一个简单的解决方案是使用带有\b 的正则表达式来匹配“单词边界”,例如
let searchString = "bro"
let sentence = "Hey, Bro! Your brother is also her brother."
let regex = try! NSRegularExpression(pattern: #"\b\#(searchString)\b"#, options: .caseInsensitive)
regex.enumerateMatches(in: sentence, range: NSRange(sentence.startIndex..., in: sentence)) { match, _, _ in
guard let match = match else { return }
print(match.range)
// or, if you want a String.Range
if let range = Range(match.range, in: sentence) {
print(sentence[range])
}
}
还有其他更丰富的 API(例如 Natural Language 框架),虽然并不完美,但可以提供更丰富的自然语言文本解析。例如,下面将区分动词“saw”和名词“saw”:
import NaturalLanguage
let text = "I saw the hammer. I did not see a saw."
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = text
let options: NLTagger.Options = [.omitWhitespace, .joinContractions]
tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: .lexicalClass, options: options) { tag, range in
guard let tag = tag else { return true }
print(tag, String(text[range]))
return true
}
制作:
NLTag(_rawValue: 代词) 我
NLTag(_rawValue: Verb) 看到
NLTag(_rawValue:Determiner)
NLTag(_rawValue: 名词) 锤
NLTag(_rawValue: SentenceTerminator) 。
NLTag(_rawValue: 代词) 我
NLTag(_rawValue: Verb) 做了
NLTag(_rawValue: 副词) 不是
NLTag(_rawValue: 动词) 见
NLTag(_rawValue: 确定器)
NLTag(_rawValue: 名词) 锯
NLTag(_rawValue: SentenceTerminator) .