【问题标题】:Parse the string into a list of tuples将字符串解析为元组列表
【发布时间】:2020-07-10 01:25:21
【问题描述】:

我正在寻找可以解析这种类型字符串的 F# 中的一段代码:

"x=1,y=42,A=[1,3,4,8]"

进入如下所示的元组列表:

[("x",1);("y",42);("A",1);("A",3);("A",4);("A",8 )]

提前致谢:)

【问题讨论】:

  • 您可以发布您尝试过的任何代码吗?

标签: functional-programming f#


【解决方案1】:

建议遵循Tomas Petricek's answer 概述的方法,使用已建立的 FParsec 解析器组合器库。

出于教育目的,您可能想要推出自己的解析器组合器,为此,Scott W. 的博客("Understanding parser combinators""Building a useful set of parser combinators")包含有价值的信息。

解析看起来很相似:

// parse a list of integers enclosed in brackets and separated by ','
let plist = pchar '[' >>. sepBy1 pint (pchar ',') .>> pchar ']'
// parser for the right hand side, singleton integer or a list of integers
let intOrList = pint |>> (fun x -> [x]) <|> plist
// projection for generation of string * integer tuples
let ungroup p =
    p |>> List.collect (fun (key, xs) -> xs |> List.map (fun x -> key, x))
// parser for an input of zero or more string value pairs separated by ','
let parser = 
    sepBy (letters .>> pchar '=' .>>. intOrList) (pchar ',')
    |> ungroup

"x=1,y=42,A=[1,3,4,8]"
|> run parser
// val it : ((String * int) list * string) option =
//   Some ([("x", 1); ("y", 42); ("A", 1); ("A", 3); ("A", 4); ("A", 8)], "")

这个简单的语法仍然需要 15 个左右的解析器组合子。另一个区别是,为简单起见,Parser 类型是基于 FSharp 的 Option 类型建模的。

type Parser<'T,'U> = Parser of ('T -> ('U * 'T) option)

let run (Parser f1) x =     // run the parser with input
    f1 x

let returnP arg =           // lift a value to a Parser
    Parser (fun x -> Some(arg, x))

let (>>=) (Parser f1) f =   // apply parser-producing function
    Parser(f1 >> Option.bind (fun (a, b) -> run (f a) b))

let (|>>) p f =             // apply function to value inside Parser
    p >>= (f >> returnP)

let (.>>.) p1 p2 =          // andThen combinator
    p1 >>= fun r1 ->
    p2 >>= fun r2 ->
    returnP (r1, r2)

let (.>>) p1 p2 =           // andThen, but keep first value only
    (p1 .>>. p2) |>> fst

let (>>.) p1 p2 =           // andThen, keep second value only
    (p1 .>>. p2) |>> snd

let pchar c =               // parse a single character
    Parser (fun s -> 
        if String.length s > 0 && s.[0] = c then Some(c, s.[1..])
        else None )

let (<|>) (Parser f1) (Parser f2) =     // orElse combinator
    Parser(fun arg ->
        match f1 arg with None -> f2 arg | res -> res )

let choice parsers =        // choose any of a list of combinators
    List.reduce (<|>) parsers

let anyOf =                 // choose any of a list of characters
    List.map pchar >> choice

let many (Parser f) =       // matches zero or more occurrences
    let rec aux input =
        match f input with
        | None -> [], input
        | Some (x, rest1) ->
            let xs, rest2 = aux rest1
            x::xs, rest2
    Parser (fun arg -> Some(aux arg))

let many1 p =           // matches one or more occurrences of p
    p >>= fun x ->
    many p >>= fun xs ->
    returnP (x::xs)

let stringP p =         // converts list of characters to string
    p |>> (fun xs -> System.String(List.toArray xs))

let letters =           // matches one or more letters
    many1 (anyOf ['A'..'Z'] <|> anyOf ['a'..'z']) |> stringP

let pint =              // matches an integer
    many1 (anyOf ['0'..'9']) |> stringP |>> int

let sepBy1 p sep =  // matches p one or more times, separated by sep
    p .>>. many (sep >>. p) |>> (fun (x,xs) -> x::xs)

let sepBy p sep =   // matches p zero or more times, separated by sep
    sepBy1 p sep <|> returnP []

【讨论】:

    【解决方案2】:

    您可以使用FParsec 解析器组合库很好地解决这个问题。这可以使用正则表达式进行管理,但它不是很优雅。解析器组合器非常清楚您可以处理的输入语法是什么。您还可以轻松添加其他功能,例如空格。

    以下实际上产生了一个string * Value 对列表,其中Value 是一种新数据类型,对应于输入中可能的右侧:

    type Value = Int of int | List of int list
    

    现在,您可以使用以下方法进行解析:

    let ident = identifier (IdentifierOptions())
    
    let rhs = 
      // Right-hand-side is either an integer...
      ( pint32 |>> Int ) <|>
      // Or a list [ .. ] of integers separated by ','
      ( pchar '[' >>. (sepBy pint32 (pchar ',')) .>> pchar ']' |>> List ) 
    
    let tuple = 
      // A single tuple is an identifier = right-hand-side
      ident .>> pchar '=' .>>. rhs
    
    let p = 
      // The input is a comma separated list of tuples
      sepBy tuple (pchar ',')
    
    run p "x=1,y=42,A=[1,3,4,8]"
    

    【讨论】:

      【解决方案3】:

      有时命名的正则表达式可以使代码可读,即使不是正则表达式。

      (?<id>\w+)=((\[((?<list>(\d+))*,?\s*)*\])|(?<number>\d+))
      

      如下:标识符 = [数字后跟逗号或空格,零个或多个] | 数字

      let parse input =
          [
              let regex = Regex("(?<id>\w+)=((\[((?<list>(\d+))*,?\s*)*\])|(?<number>\d+))")
              let matches = regex.Matches input
      
              for (expr : Match) in matches do
                  let group name = expr.Groups.[string name]
                  let id = group "id"
                  let list = group "list"
                  let number = group "number"
                  if list.Success then
                      for (capture : Capture) in list.Captures do
                          yield (id.Value, int capture.Value)
                  else if number.Success then
                      yield (id.Value, int number.Value)
          ]
      

      测试

      let input = "var1=1, var2=2, list=[1, 2, 3, 4], single=[1], empty=[], bad=[,,], bad=var"    
      printfn "%A" (parse input)
      

      输出

      [("var1", 1); ("var2", 2); ("list", 1); ("list", 2); ("list", 3); ("list", 4); "single", 1)]
      

      【讨论】:

        【解决方案4】:

        试试这个:

        open System.Text.RegularExpressions
        
        let input = "x=1,y=42,A=[1,3,4,8]"
        
        Regex.Split(input,",(?=[A-Za-z])")  //output: [|"x=1"; "y=42"; "A=[1,3,4,8]"|]
        |> Array.collect (fun x ->
            let l,v = Regex.Split(x,"=") |> fun t -> Array.head t,Array.last t  //label and value
            Regex.Split(v,",") |> Array.map (fun x -> l,Regex.Replace(x,"\[|\]","") |> int))
        |> List.ofArray
        

        【讨论】:

          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 2011-09-19
          • 2010-12-21
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2015-06-20
          相关资源
          最近更新 更多