【发布时间】:2020-07-10 01:25:21
【问题描述】:
我正在寻找可以解析这种类型字符串的 F# 中的一段代码:
"x=1,y=42,A=[1,3,4,8]"
进入如下所示的元组列表:
[("x",1);("y",42);("A",1);("A",3);("A",4);("A",8 )]
提前致谢:)
【问题讨论】:
-
您可以发布您尝试过的任何代码吗?
我正在寻找可以解析这种类型字符串的 F# 中的一段代码:
"x=1,y=42,A=[1,3,4,8]"
进入如下所示的元组列表:
[("x",1);("y",42);("A",1);("A",3);("A",4);("A",8 )]
提前致谢:)
【问题讨论】:
建议遵循Tomas Petricek's answer 概述的方法,使用已建立的 FParsec 解析器组合器库。
出于教育目的,您可能想要推出自己的解析器组合器,为此,Scott W. 的博客("Understanding parser combinators" 和 "Building a useful set of parser combinators")包含有价值的信息。
解析看起来很相似:
// parse a list of integers enclosed in brackets and separated by ','
let plist = pchar '[' >>. sepBy1 pint (pchar ',') .>> pchar ']'
// parser for the right hand side, singleton integer or a list of integers
let intOrList = pint |>> (fun x -> [x]) <|> plist
// projection for generation of string * integer tuples
let ungroup p =
p |>> List.collect (fun (key, xs) -> xs |> List.map (fun x -> key, x))
// parser for an input of zero or more string value pairs separated by ','
let parser =
sepBy (letters .>> pchar '=' .>>. intOrList) (pchar ',')
|> ungroup
"x=1,y=42,A=[1,3,4,8]"
|> run parser
// val it : ((String * int) list * string) option =
// Some ([("x", 1); ("y", 42); ("A", 1); ("A", 3); ("A", 4); ("A", 8)], "")
这个简单的语法仍然需要 15 个左右的解析器组合子。另一个区别是,为简单起见,Parser 类型是基于 FSharp 的 Option 类型建模的。
type Parser<'T,'U> = Parser of ('T -> ('U * 'T) option)
let run (Parser f1) x = // run the parser with input
f1 x
let returnP arg = // lift a value to a Parser
Parser (fun x -> Some(arg, x))
let (>>=) (Parser f1) f = // apply parser-producing function
Parser(f1 >> Option.bind (fun (a, b) -> run (f a) b))
let (|>>) p f = // apply function to value inside Parser
p >>= (f >> returnP)
let (.>>.) p1 p2 = // andThen combinator
p1 >>= fun r1 ->
p2 >>= fun r2 ->
returnP (r1, r2)
let (.>>) p1 p2 = // andThen, but keep first value only
(p1 .>>. p2) |>> fst
let (>>.) p1 p2 = // andThen, keep second value only
(p1 .>>. p2) |>> snd
let pchar c = // parse a single character
Parser (fun s ->
if String.length s > 0 && s.[0] = c then Some(c, s.[1..])
else None )
let (<|>) (Parser f1) (Parser f2) = // orElse combinator
Parser(fun arg ->
match f1 arg with None -> f2 arg | res -> res )
let choice parsers = // choose any of a list of combinators
List.reduce (<|>) parsers
let anyOf = // choose any of a list of characters
List.map pchar >> choice
let many (Parser f) = // matches zero or more occurrences
let rec aux input =
match f input with
| None -> [], input
| Some (x, rest1) ->
let xs, rest2 = aux rest1
x::xs, rest2
Parser (fun arg -> Some(aux arg))
let many1 p = // matches one or more occurrences of p
p >>= fun x ->
many p >>= fun xs ->
returnP (x::xs)
let stringP p = // converts list of characters to string
p |>> (fun xs -> System.String(List.toArray xs))
let letters = // matches one or more letters
many1 (anyOf ['A'..'Z'] <|> anyOf ['a'..'z']) |> stringP
let pint = // matches an integer
many1 (anyOf ['0'..'9']) |> stringP |>> int
let sepBy1 p sep = // matches p one or more times, separated by sep
p .>>. many (sep >>. p) |>> (fun (x,xs) -> x::xs)
let sepBy p sep = // matches p zero or more times, separated by sep
sepBy1 p sep <|> returnP []
【讨论】:
您可以使用FParsec 解析器组合库很好地解决这个问题。这可以使用正则表达式进行管理,但它不是很优雅。解析器组合器非常清楚您可以处理的输入语法是什么。您还可以轻松添加其他功能,例如空格。
以下实际上产生了一个string * Value 对列表,其中Value 是一种新数据类型,对应于输入中可能的右侧:
type Value = Int of int | List of int list
现在,您可以使用以下方法进行解析:
let ident = identifier (IdentifierOptions())
let rhs =
// Right-hand-side is either an integer...
( pint32 |>> Int ) <|>
// Or a list [ .. ] of integers separated by ','
( pchar '[' >>. (sepBy pint32 (pchar ',')) .>> pchar ']' |>> List )
let tuple =
// A single tuple is an identifier = right-hand-side
ident .>> pchar '=' .>>. rhs
let p =
// The input is a comma separated list of tuples
sepBy tuple (pchar ',')
run p "x=1,y=42,A=[1,3,4,8]"
【讨论】:
有时命名的正则表达式可以使代码可读,即使不是正则表达式。
(?<id>\w+)=((\[((?<list>(\d+))*,?\s*)*\])|(?<number>\d+))
如下:标识符 = [数字后跟逗号或空格,零个或多个] | 数字
let parse input =
[
let regex = Regex("(?<id>\w+)=((\[((?<list>(\d+))*,?\s*)*\])|(?<number>\d+))")
let matches = regex.Matches input
for (expr : Match) in matches do
let group name = expr.Groups.[string name]
let id = group "id"
let list = group "list"
let number = group "number"
if list.Success then
for (capture : Capture) in list.Captures do
yield (id.Value, int capture.Value)
else if number.Success then
yield (id.Value, int number.Value)
]
测试
let input = "var1=1, var2=2, list=[1, 2, 3, 4], single=[1], empty=[], bad=[,,], bad=var"
printfn "%A" (parse input)
输出
[("var1", 1); ("var2", 2); ("list", 1); ("list", 2); ("list", 3); ("list", 4); "single", 1)]
【讨论】:
试试这个:
open System.Text.RegularExpressions
let input = "x=1,y=42,A=[1,3,4,8]"
Regex.Split(input,",(?=[A-Za-z])") //output: [|"x=1"; "y=42"; "A=[1,3,4,8]"|]
|> Array.collect (fun x ->
let l,v = Regex.Split(x,"=") |> fun t -> Array.head t,Array.last t //label and value
Regex.Split(v,",") |> Array.map (fun x -> l,Regex.Replace(x,"\[|\]","") |> int))
|> List.ofArray
【讨论】: