【发布时间】:2015-03-20 11:16:12
【问题描述】:
我正在尝试设置 lucene.net 3 以便能够在两个字段中搜索一个短语,但我遇到了困难。这是我希望查询返回的内容:
我希望查询返回完全匹配的词组,例如:
短语:“清云红舒缓草药膏”
结果:“清云红舒缓草药膏”
以及通配符匹配:
短语:“CHING WAN HUNG SOO”或“CHING WAN HUN”
结果:“CHING WAN HUNG SOOTHING HERBAL BALM”和其他可能匹配此短语或任何其他不完整短语的变体。
我最初的解决方案是创建一个布尔查询,其中包含短语查询和解析和通配符每个单词的查询。
但这会返回匹配项以及太多不接近适用的结果。 (它会返回“HERBAL TEA”,因为“HERBAL”是解析后的术语之一……)由于解析了 OR 查询。
这与我的原始帖子有关:How to set up a query to return phrases and parts of phrases in lucene.net? 想知道是否可以在标准 Lucene.net 中执行此操作,而无需使用提到的 Java 端口。
有人可以给我任何指导吗? 谢谢!
public override List<TT> ExecuteSearch(string searchQuery, string searchField = "")
{
if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", ""))) return new List<TT>();
using (var searcher = new IndexSearcher(Directory, false))
{
var hits_limit = 1000;
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30, new HashSet<string>());
var fields = new[] {"CompositeName", "SubstanceName"};
var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, analyzer);
parser.AllowLeadingWildcard = true;
parser.PhraseSlop = 0;
var query = ParseWholeQueryWc(searchQuery, fields, parser);
searcher.SetDefaultFieldSortScoring(true, true);
var hits = searcher.Search(query, null, hits_limit, Sort.RELEVANCE).ScoreDocs;
var results = MapLuceneToDataList(hits, searcher);
analyzer.Close();
searcher.Dispose();
return results;
}
}
public Query ParseWholeQueryWc(string searchQuery, string[] fields, QueryParser parser)
{
Query query = new PhraseQuery();
Query query2 = new PhraseQuery();
Query mq = new BooleanQuery();
try
{
var bld = ParseTermWithWildcards(searchQuery);
// phrase
query = parser.Parse("\"" + searchQuery.Trim() + "\"");
// or
query2 = parser.Parse(searchQuery + "*");
// main
((BooleanQuery)mq).Add(query, Occur.SHOULD);
((BooleanQuery)mq).Add(query2, Occur.SHOULD);
}
catch (ParseException ex)
{
throw;
}
return mq;
}
更新
public BooleanQuery ParseWholeQueryWc(string searchQuery, string[] fields, QueryParser parser)
{
BooleanQuery mq = new BooleanQuery();
try
{
string[] qrArr = searchQuery.Split(null);
SpanQuery[] compNmQ = new SpanQuery[qrArr.Length];
SpanQuery[] subsNmQ = new SpanQuery[qrArr.Length];
for (var i = 0; i < qrArr.Length; i++)
{
//CompositeName", "SubstanceName
if (i == qrArr.Length - 1)
{
compNmQ[i] = new SpanTermQuery(new Term("CompositeName", qrArr[i] + "*"));
subsNmQ[i] = new SpanTermQuery(new Term("SubstanceName", qrArr[i] + "*"));
}
else
{
compNmQ[i] = new SpanTermQuery(new Term("CompositeName", qrArr[i]));
subsNmQ[i] = new SpanTermQuery(new Term("SubstanceName", qrArr[i]));
}
}
SpanQuery compNameQ = new SpanNearQuery(compNmQ, 0, true);
SpanQuery subsNameQ = new SpanNearQuery(subsNmQ, 0, true);
// main
((BooleanQuery) mq).Add(compNameQ, Occur.SHOULD);
((BooleanQuery)mq).Add(subsNameQ, Occur.SHOULD);
}
catch (ParseException ex)
{
throw new ArgumentException("BaseLuceneStrategy:ParseWholeQueryWc():" + ex.Message);
}
return mq;
}
这将返回零命中。
【问题讨论】:
标签: lucene lucene.net