【问题标题】:Combine queries in Lucene with BooleanQuery将 Lucene 中的查询与 BooleanQuery 结合起来
【发布时间】:2012-08-03 16:00:30
【问题描述】:

我创建了一个示例 Lucene 代码 sn-p,它索引一个小文件。我能够正确执行索引并搜索单个字段值。但是,我想查询多个字段。我正在使用BooleanQuery,但它不起作用。

有人可以建议吗?这是我的代码 sn-p。

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class LocalFSLucene {

    private final Version version = Version.LUCENE_36;

    private final String indexDirectory = "/Work/Lucene/LocalFSIndex";

    private final String dataFile = "/Work/Lucene/data.txt";

    private final String fields[] = {"date", "time", "cs-method", "cs-uri",
                                     "sc-status", "time-taken"};

    private IndexWriterConfig config = null;

    public void setConfig() {

        /* Check if the IndexWriterConfiguration is available or not.
         * If not, we will create one and save it for any further references.
         */
        if (config == null) {
            config = new IndexWriterConfig(version, new StandardAnalyzer(version));
        }
    }

    private final String rowDelimiter = " ";
    public void buildIndex() throws Exception {

        /* Create the Configuration object for writing index files */
        setConfig();

        /* Get the handle to the directory where indexes will be created */
        Directory dir = new SimpleFSDirectory(new File(indexDirectory));

        /* Initialize the index writer object */
        IndexWriter indexWriter = new IndexWriter(dir, config);

        /* Reader object to read the data file */
        BufferedReader reader = new BufferedReader(new FileReader(dataFile));

        /* Read each line of the data and build the index on the fields */
        String row = null;

        while ((row = reader.readLine()) != null) {

            /* Get each field in the current row */
            String fieldValues[] = row.split(rowDelimiter);

            /* Create a document for each row to store the index information */
            Document doc = new Document();

            for (int i = 0; i < fields.length; i++) {
                doc.add(new Field(fields[i], fieldValues[i], Field.Store.YES, Field.Index.ANALYZED));
            }

            /* Add the document to index */
            indexWriter.addDocument(doc);
        }

        /* Push the index files on the File System */
        indexWriter.commit();

        /* Close the reader object */
        reader.close();

        /* Close the index writer object */
        indexWriter.close();

        System.out.println("Indexing is complete");
    }

    public void search(Map<String, String> params) throws Exception {

        /* Get the handle to the directory where indexes are be created */
        Directory dir = new SimpleFSDirectory(new File(indexDirectory));

        /* Create the Index Reader object to read the indexes created */
        IndexReader reader = IndexReader.open(dir);

        /* Create the detective object which will perform search operation */
        IndexSearcher detective = new IndexSearcher(reader);

        System.out.println("Total Number of Documents - " + detective.maxDoc());

        /* Build the query containing the clues which the detective will use
         * to solve the case.
         */
        //Query q = new QueryParser(version, field, new StandardAnalyzer(version)).parse(value);
        BooleanQuery q = new BooleanQuery();

        Set<String> fields = params.keySet();

        for (String field : fields) {
            q.add(new TermQuery(new Term(field, params.get(field))), BooleanClause.Occur.SHOULD);
        }

        /* The TopScoreDocCollector will create the bag where the detective will
         * put all the found clues to solve the case.
         */
        TopScoreDocCollector clueBag = TopScoreDocCollector.create(10, true);

        /* Ask the detective to start */
        detective.search(q, clueBag);

        /* Get all the clues which the detective found during investigation
         * and display them.
         */
        ScoreDoc clues[] = clueBag.topDocs().scoreDocs;

        System.out.println("Total Clues Found - " + clues.length);
        System.out.println();

        for (int i = 0; i < clues.length; i++) {

            /* Get the pointer to the clue */
            int clueId = clues[i].doc;

            /* Get the actual clue from the clue bag */
            Document clue = detective.doc(clueId);

            /* Print the document */
            List<Fieldable> lstFields = clue.getFields();

            System.out.print((i + 1) + " --> ");
            for (Fieldable fld : lstFields) {

                String strField = fld.name();

                String strValue = clue.get(strField);

                System.out.print(strField + ":" + strValue + "  ");
            }
            System.out.println();
        }
    }

    public static void main(String args[]) throws Exception {
        LocalFSLucene obj = new LocalFSLucene();

        //obj.buildIndex();

        Map<String, String> searchParams = new HashMap<String, String>();
        searchParams.put("cs-method", "GET");
        searchParams.put("cs-uri", "/blank");
        obj.search(searchParams);
    }
}

这是我正在使用的data.txt

2010-04-21 02:24:01 GET /blank 200 120
2010-04-21 02:24:01 GET /US/registrationFrame 200 605
2010-04-21 02:24:02 GET /US/kids/boys 200 785
2010-04-21 02:24:02 POST /blank 304 56
2010-04-21 02:24:04 GET /blank 304 233
2010-04-21 02:24:04 GET /blank 500 567
2010-04-21 02:24:04 GET /blank 200 897
2010-04-21 02:24:04 POST /blank 200 567
2010-04-21 02:24:05 GET /US/search 200 658
2010-04-21 02:24:05 POST /US/shop 200 768
2010-04-21 02:24:05 GET /blank 200 347

【问题讨论】:

  • 你能更新一下你传递的参数和搜索输出吗?

标签: java solr lucene


【解决方案1】:

终于搞定了。以下是您应该如何使用它。

  1. 使用您的字段和参数在 BooleanQuery 中构建您的查询。
  2. 传递要使用QueryParser 解析的BooleanQuery 字符串。

这是相同的sn-p。

BooleanQuery b = new BooleanQuery();

Set<String> fields = params.keySet();
StandardAnalyzer analyzer = new StandardAnalyzer(version);

b.add(new TermQuery(new Term("cs-method", "GET"), BooleanClause.Occur.SHOULD);
b.add(new TermQuery(new Term("cs-uri", "/blank"), BooleanClause.Occur.SHOULD);

Query q = new QueryParser(version, "cs-method", analyzer).parse(b.toString());

【讨论】:

    【解决方案2】:

    这是上面代码sn-p下面的main()方法:

    public static void main(String args[]) throws Exception {
        LocalFSLucene obj = new LocalFSLucene();
    
        //obj.buildIndex();
    
        Map<String, String> searchParams = new HashMap<String, String>();
        searchParams.put("cs-method", "GET");
        searchParams.put("cs-uri", "/blank");
        obj.search(searchParams);
    }
    

    另外,输出如下:

    Total Number of Documents - 11
    Total Clues Found - 0
    

    QueryParserBooleanQuery 的查询不同。我在QueryParser 版本中看到了+ 标志,而另一个版本中没有。检查以下内容。

    使用QueryParser

    Query q = new QueryParser(version, "cs-method", new StandardAnalyzer(version)).parse("cs-method:GET AND cs-uri:/blank");
    

    QueryParser 的输出

    Total Number of Documents - 11
    Query --> +cs-method:get +cs-uri:blank
    Total Clues Found - 5
    

    使用BooleanQuery

    Map<String, String> searchParams = new HashMap<String, String>();
    searchParams.put("cs-method", "GET");
    searchParams.put("cs-uri", "/blank");
    BooleanQuery q = new BooleanQuery();
    
    Set<String> fields = params.keySet();
    for (String field : fields) {
        q.add(new TermQuery(new Term(field, params.get(field))), BooleanClause.Occur.SHOULD);
    }
    

    BooleanQuery 的输出

    Total Number of Documents - 11
    Query --> cs-method:GET cs-uri:/blank
    Total Clues Found - 0
    

    【讨论】:

    • 您是否使用 luke 检查过您的索引?看看索引是否如你所愿。 code.google.com/p/luke
    • 我没有,但让我也检查一下。
    • 有趣的是,当我使用以下查询时,文档被正确获取。 Query q = new QueryParser(version, "cs-method", new StandardAnalyzer(version)).parse("cs-method:GET AND cs-uri:/blank");
    • 打印您使用 API 生成的查询,看看它与上述查询的不同之处
    • 您要我打印QueryParserBooleanQuery 的查询?
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2013-11-27
    • 2020-08-03
    • 1970-01-01
    • 2011-08-25
    • 1970-01-01
    相关资源
    最近更新 更多