替换引号之间的所有空格。
例子:
A B " C" -> AB" C"
A B ' C' -> AB' C'
A B ` C` -> AB` C`
A B \'C\' -> AB\'C\'
A B '\` C \`' -> AB'\` C \`'
A B "\' C C \"\'\"" -> AB"\' C C \"\'\""
代码必须正确,而且要正确,代码必须是可读的。到目前为止,还没有提供正确或可读的正则表达式解决方案。
在 Go 中解析文本。
例如,
package main
import (
"fmt"
"unicode"
)
// Remove all whitespace not between matching unescaped quotes.
func removeUnquotedSpace(s string) (string, error) {
rs := make([]rune, 0, len(s))
const out = rune(0)
var quote rune = out
var escape = false
for _, r := range s {
if !escape {
if r == '`' || r == '"' || r == '\'' {
if quote == out {
// start unescaped quote
quote = r
} else if quote == r {
// end (matching) unescaped quote
quote = out
}
}
}
// backslash (\) is the escape character
// except when it is the second backslash of a pair
escape = !escape && r == '\\'
if quote != out || !unicode.IsSpace(r) {
// between matching unescaped quotes
// or not whitespace
rs = append(rs, r)
}
}
if quote != out {
err := fmt.Errorf("unmatched unescaped quote: %q", quote)
return "", err
}
return string(rs), nil
}
func main() {
tests := []struct {
in, out string
}{
{`A B " C"`, `AB" C"`},
{"A B ' C'", "AB' C'"},
{"A B ` C`", "AB` C`"},
{`A B \'C\'`, `AB\'C\'`},
{"A B '\\` C \\`'", "AB'\\` C \\`'"},
{`A B "\' C C \"\'\""`, `AB"\' C C \"\'\""`},
{`A B " C \\"`, `AB" C \\"`},
}
for _, tt := range tests {
out, err := removeUnquotedSpace(tt.in)
fmt.Printf(
"|| %v || %v || %v || %v\n",
tt.in, out, out == tt.out, err,
)
}
}
游乐场:https://play.golang.org/p/-3cDwSaakIb
输出:
|| A B " C" || AB" C" || true || <nil>
|| A B ' C' || AB' C' || true || <nil>
|| A B ` C` || AB` C` || true || <nil>
|| A B \'C\' || AB\'C\' || true || <nil>
|| A B '\` C \`' || AB'\` C \`' || true || <nil>
|| A B "\' C C \"\'\"" || AB"\' C C \"\'\"" || true || <nil>
|| A B " C \\" || AB" C \\" || true || <nil>