您可以使用 N-gram tokenizer 将文本首先分解为
每当遇到指定字符列表中的一个时,
然后它发出指定长度的每个单词的 N-gram。
添加一个包含索引数据、映射、搜索查询和结果的工作示例。
索引映射
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter",
"digit"
]
}
}
},
"max_ngram_diff": 50
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "standard"
}
}
}
}
分析 API
POST/_analyze
{
"analyzer" : "my_analyzer",
"text" : "34567"
}
生成以下令牌
{
"tokens": [
{
"token": "34",
"start_offset": 0,
"end_offset": 2,
"type": "word",
"position": 0
},
{
"token": "345",
"start_offset": 0,
"end_offset": 3,
"type": "word",
"position": 1
},
{
"token": "3456",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 2
},
{
"token": "34567",
"start_offset": 0,
"end_offset": 5,
"type": "word",
"position": 3
},
{
"token": "45",
"start_offset": 1,
"end_offset": 3,
"type": "word",
"position": 4
},
{
"token": "456",
"start_offset": 1,
"end_offset": 4,
"type": "word",
"position": 5
},
{
"token": "4567",
"start_offset": 1,
"end_offset": 5,
"type": "word",
"position": 6
},
{
"token": "56",
"start_offset": 2,
"end_offset": 4,
"type": "word",
"position": 7
},
{
"token": "567",
"start_offset": 2,
"end_offset": 5,
"type": "word",
"position": 8
},
{
"token": "67",
"start_offset": 3,
"end_offset": 5,
"type": "word",
"position": 9
}
]
}
搜索查询:
{
"query": {
"match": {
"title": "456"
}
}
}
搜索结果:
"hits": [
{
"_index": "stof_63976447",
"_type": "_doc",
"_id": "1",
"_score": 0.074107975,
"_source": {
"title": 34567
}
},
{
"_index": "stof_63976447",
"_type": "_doc",
"_id": "2",
"_score": 0.074107975,
"_source": {
"title": 34568
}
},
{
"_index": "stof_63976447",
"_type": "_doc",
"_id": "3",
"_score": 0.074107975,
"_source": {
"title": 34569
}
},
{
"_index": "stof_63976447",
"_type": "_doc",
"_id": "4",
"_score": 0.074107975,
"_source": {
"title": 45691
}
},
{
"_index": "stof_63976447",
"_type": "_doc",
"_id": "5",
"_score": 0.074107975,
"_source": {
"title": 45692
}
},
{
"_index": "stof_63976447",
"_type": "_doc",
"_id": "6",
"_score": 0.074107975,
"_source": {
"title": 45693
}
}
]