【问题标题】:ElasticSearch Synonym usage / syntax in my index我的索引中的 ElasticSearch 同义词用法/语法
【发布时间】:2026-02-15 19:35:01
【问题描述】:

我是 ElasticSearch 的新手。我正在 ES 中尝试一个简单的家谱项目,并希望对名字和姓氏使用同义词。我有以下 ElasticSearch 索引设置,我希望在我的设置中添加两个同义词分析器,然后使用这两个在不同字段上进行搜索时同义词处理。我不知道如何将正确的条目添加到我的“设置”JSON 文件中

我的两个同义词文件分别称为 given_synonyms.txtsurname_synonyms.txt。我将不胜感激在获得此 json 文件的语法方面的帮助。

我已经为我想做的事情添加了(大写的)非常通用的伪代码,希望这样对读者更有意义。

    {
      "settings": {
        "index": {
          "number_of_shards": "128",
          "number_of_replicas": "0",
          "analysis": {
            "filter": {
              "dbl_metaphone": {
                "type":    "phonetic",
                "encoder": "double_metaphone",
                "max_code_len" : 5
              }
            },
            "analyzer": {
              "dbl_metaphone": {
                "tokenizer": "standard",
                "filter":    "dbl_metaphone"
              }
            }
    I ASSUME THE SYNONYM ENTRIES GO HERE FOR THE TWO SYNONYM FILES - BUT I DON'T KNOW HOW THAT SHOULD BE DONE
          }
        }
      },
      "mappings": {
        "test": {
          "_all": {
            "enabled": false
          },
          "_source": {
            "enabled": true
          },
          "properties": {
            "GivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
                I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
              }
            },
            "Surnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
                I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
              }
            },
            "FatherGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "FatherSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "MotherGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "MotherSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "SpouseGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "SpouseSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "ChildrenGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "BirthYears": {
              "type": "short"
            },
            "BirthLocations": {
              "type": "integer"
            },
            "DeathYears": {
              "type": "short"
            },
            "DeathLocations": {
              "type": "integer"
            },
            "MarriageLocations": {
              "type": "integer"
            },
            "MarriageYears": {
              "type": "integer"
            },
            "ResidenceLocations": {
              "type": "integer"
            }
          }
        }
      }
    }

【问题讨论】:

    标签: elasticsearch synonym


    【解决方案1】:

    首先,让我们看看同义词分析器以及如何包含文件。 从这里https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-tokenfilter.html 你可以看到如何配置它。 你提到了两个文件。您最好定义一个包含您的同义词或同义词的文件。

    {
      "settings": {
        "index": {
          "number_of_shards": "128",
          "number_of_replicas": "0",
          "analysis": {
            "filter": {
              "dbl_metaphone": {
                "type": "phonetic",
                "encoder": "double_metaphone",
                "max_code_len": 5
              },
              "given_synonyms": {
                "type": "synonym",
                "synonyms_path": "analysis/given_synonyms.txt"
              },
              "surname_synonyms": {
                "type": "synonym",
                "synonyms_path": "analysis/surname_synonyms.txt"
              }
            },
            "analyzer": {
              "dbl_metaphone": {
                "tokenizer": "standard",
                "filter": "dbl_metaphone"
              },
              "given_synonyms": {
                "tokenizer": "whitespace",
                "filter": "given_synonyms"
              },
              "surname_synonyms": {
                "tokenizer": "whitespace",
                "filter": "surname_synonyms"
              }
            }
          }
        }
      }
    }
    

    请记住,当您使用一个文件(或多个文件)作为同义词时,您需要确保每个 elasticsearch 节点都可以访问该文件。另一种方法是在设置部分指定同义词。如果同义词的数量不是很大,或者在使文件可供 elasticsearch 节点访问时存在问题,这可能会更好。您可以在文档中查看更多定义同义词的方法

    {
      "settings": {
        "index": {
          "number_of_shards": "128",
          "number_of_replicas": "0",
          "analysis": {
            "filter": {
              "dbl_metaphone": {
                "type": "phonetic",
                "encoder": "double_metaphone",
                "max_code_len": 5
              },
              "given_synonyms": {
                "type": "synonym",
                "synonyms": [ ... YOUR SYNONYMS HERE ...]
              },
              "surname_synonyms": {
                "type": "synonym",
                "synonyms": [ ... YOUR SYNONYMS HERE ...]
              }
            },
            "analyzer": {
              "dbl_metaphone": {
                "tokenizer": "standard",
                "filter": "dbl_metaphone"
              },
              "given_synonyms": {
                "tokenizer": "standard",
                "filter": "given_synonyms"
              },
              "surname_synonyms": {
                "tokenizer": "standard",
                "filter": "surname_synonyms"
              }
            }
          }
        }
      }
    }
    

    为了达到最终的解决方案,你可以有这样的东西

    {
      "settings": {
        "index": {
          "number_of_shards": "128",
          "number_of_replicas": "0",
          "analysis": {
            "filter": {
              "dbl_metaphone": {
                "type": "phonetic",
                "encoder": "double_metaphone",
                "max_code_len": 5
              },
              "given_synonyms": {
                "type": "synonym",
                "synonyms_path": "analysis/given_synonyms.txt"
              },
              "surname_synonyms": {
                "type": "synonym",
                "synonyms_path": "analysis/surname_synonyms.txt"
              }
            },
            "analyzer": {
              "dbl_metaphone": {
                "tokenizer": "standard",
                "filter": "dbl_metaphone"
              },
              "dbl_metaphone_given_synonym": {
                "tokenizer": "standard",
                "filter": [
                  "given_synonyms",
                  "dbl_metaphone"
                ]
              },
              "dbl_metaphone_surname_synonym": {
                "tokenizer": "standard",
                "filter": [
                  "surname_synonyms",
                  "dbl_metaphone"
                ]
              }
            }
          }
        }
      }
    }
    

    在此,您总共拥有三个分析器。其中两个正在组合两个过滤器(第一个的输出是第二个的输入,所以顺序很重要)。在elasticsearch中,您可以指定在索引期间使用一个分析器分析一个字段,并且当通过某个输入搜索时,该输入将由另一个分析器分析。所以你可以有这样的东西(从这里https://www.elastic.co/guide/en/elasticsearch/reference/current/search-analyzer.html

    {
      "mappings": {
        "properties": {
          "text": {
            "type": "text",
            "analyzer": "autocomplete",
            "search_analyzer": "standard"
          }
        }
      }
    }
    

    所以你的设置/映射变成了这样

    {
      "settings": {
        "index": {
          "number_of_shards": "128",
          "number_of_replicas": "0",
          "analysis": {
            "filter": {
              "dbl_metaphone": {
                "type": "phonetic",
                "encoder": "double_metaphone",
                "max_code_len": 5
              },
              "given_synonyms": {
                "type": "synonym",
                "synonyms_path": "analysis/given_synonyms.txt"
              },
              "surname_synonyms": {
                "type": "synonym",
                "synonyms_path": "analysis/surname_synonyms.txt"
              }
            },
            "analyzer": {
              "dbl_metaphone": {
                "tokenizer": "standard",
                "filter": "dbl_metaphone"
              },
              "dbl_metaphone_given_synonym": {
                "tokenizer": "standard",
                "filter": [
                  "given_synonyms",
                  "dbl_metaphone"
                ]
              },
              "dbl_metaphone_surname_synonym": {
                "tokenizer": "standard",
                "filter": [
                  "surname_synonyms",
                  "dbl_metaphone"
                ]
              }
            }
          }
        }
      },
      "mappings": {
        "test": {
          "_all": {
            "enabled": false
          },
          "_source": {
            "enabled": true
          },
          "properties": {
            "GivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_given_synonym"
                }
              }
            },
            "Surnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_surname_synonym"
                }
              }
            },
            "FatherGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_given_synonym"
                }
              }
            },
            "FatherSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_surname_synonym"
                }
              }
            },
            "MotherGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_given_synonym"
                }
              }
            },
            "MotherSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_given_synonym"
                }
              }
            },
            "SpouseGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_given_synonym"
                }
              }
            },
            "SpouseSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_surname_synonym"
                }
              }
            },
            "ChildrenGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone",
                  "search_analyzer": "dbl_metaphone_given_synonym"
                }
              }
            },
            "BirthYears": {
              "type": "short"
            },
            "BirthLocations": {
              "type": "integer"
            },
            "DeathYears": {
              "type": "short"
            },
            "DeathLocations": {
              "type": "integer"
            },
            "MarriageLocations": {
              "type": "integer"
            },
            "MarriageYears": {
              "type": "integer"
            },
            "ResidenceLocations": {
              "type": "integer"
            }
          }
        }
      }
    }
    

    【讨论】: