FsRegEx


Capture Groups

FsGroup

FsGroup provides the functionality in the System.Text.RegularExpressions.Group Class, but returning arrays of objects instead of special collections and makes the group name a property.

anonymous groups

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
let pattern = @"(\b(\w+?)[,:;]?\s?)+[?.!]"
let input = "This is one sentence. This is a second sentence."

let m = FsRegEx.firstMatch pattern input
printfn "Match: %s" m.Value

m.Groups()
|> Array.iteri (fun i g -> 
    printfn "   Group %i: '%s'" i g.Value
    g.Captures()
    |> Array.iteri (fun i c ->  
        printfn "      Capture %i: '%s'" i c.Value 
        )
    )

// Match: This is one sentence.
//    Group 0: 'This is one sentence.'
//        Capture 0: 'This is one sentence.'
//    Group 1: 'sentence'
//        Capture 0: 'This '
//        Capture 1: 'is '
//        Capture 2: 'one '
//        Capture 3: 'sentence'
//    Group 2: 'sentence'
//        Capture 0: 'This'
//        Capture 1: 'is'
//        Capture 2: 'one'
//        Capture 3: 'sentence'

named groups

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
let pattern' = @"\b(?<FirstWord>\w+)\s?((\w+)\s)*(?<LastWord>\w+)?(?<Punctuation>\p{Po})"
let input' = "The cow jumped over the moon."

let m' = FsRegEx.firstMatch pattern' input'

printfn "Named Groups:"

m'.Groups()
|> Array.iter (fun g -> printfn "   %s: '%s'" g.Name g.Value)

// Named Groups:
//    0: 'The cow jumped over the moon.'
//    1: 'the '
//    2: 'the'
//    FirstWord: 'The'
//    LastWord: 'moon'
//    Punctuation: '.'

capturing groups

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
let regExpr = @"\b(\w+?)([\u00AE\u2122])"

let matches = 
    "Microsoft® Office Professional Edition combines several office " +
        "productivity products, including Word, Excel®, Access®, Outlook®, " +
        "PowerPoint®, and several others. Some guidelines for creating " +
        "corporate documents using these productivity tools are available " +
        "from the documents created using Silverlightâ„¢ on the corporate " +
        "intranet site."
    |> FsRegEx.matches regExpr

matches
|> Array.iter (fun m ->
    let groups = m.Groups()
    printfn "%s: %s" groups.[2].Value groups.[1].Value)
                           
printfn ""
printfn "Found %i trademarks or registered trademarks." matches.Length

// ®: Microsoft
// ®: Excel
// ®: Access
// ®: Outlook
// ®: PowerPoint
// â„¢: Silverlight
//
// Found 6 trademarks or registered trademarks.

group numbers and names

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
open System

let pattern'' = @"\b((?<word>\w+)\s*)+(?<end>[.?!])"
let input'' = "This is a sentence. This is a second sentence."

let groupNumbers' = FsRegEx.groupNumbers pattern''
let groupNames = FsRegEx.groupNames pattern''

let m'' = FsRegEx.firstMatch pattern'' input''

printfn "Match: %s" m''.Value

Array.zip groupNumbers' groupNames
|> Array.iter (fun (n, name) ->
    let isNumber, _ = Int32.TryParse(name)
    printfn "   Group %i%s: '%s'" 
        n 
        (if (not isNumber) then
            " (" + name + ")"
         else String.Empty)
        (m'.Groups()).[n].Value
    )

// Match: This is a sentence.
//    Group 0: 'This is a sentence.'
//    Group 1: 'sentence'
//    Group 2 (word): 'sentence'
//    Group 3 (end): '.'
val pattern : string
val input : string
val m : obj
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
module Array

from Microsoft.FSharp.Collections
val iteri : action:(int -> 'T -> unit) -> array:'T [] -> unit
val i : int
val g : obj
val c : obj
val pattern' : string
val input' : string
val m' : obj
val iter : action:('T -> unit) -> array:'T [] -> unit
val regExpr : string
val matches : obj []
val groups : obj
property System.Array.Length: int
namespace System
val pattern'' : string
val input'' : string
val groupNumbers' : int []
val groupNames : string []
val m'' : obj
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...
val zip : array1:'T1 [] -> array2:'T2 [] -> ('T1 * 'T2) []
val n : int
val name : string
val isNumber : bool
type Int32 =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MaxValue : int
    static val MinValue : int
    static member Parse : s:string -> int + 3 overloads
    static member TryParse : s:string * result:int -> bool + 1 overload
  end
Int32.TryParse(s: string, result: byref<int>) : bool
Int32.TryParse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider, result: byref<int>) : bool
val not : value:bool -> bool
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

--------------------
String(value: nativeptr<char>) : String
String(value: nativeptr<sbyte>) : String
String(value: char []) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: char [], startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String
field string.Empty: string
Fork me on GitHub