lucene4中有一些方法有很大的变动,以下代码可以看看与3的区别,
下面代码中解释
- File2DocumentUtils类就不上传了,就是将File转换成lucene中的Document,如果需要运行的话再贴出来
- lucene包用的是4.1.0
- 分词器用了IKAnalyzer和lucene标准的分词器StandardAnalyzer
- 简单的一些查询都有@test可以直接junit
package helloworld;import java.io.File;import java.io.IOException;import java.io.StringReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.TypeAttribute;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.NumericRangeQuery;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Fragmenter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.Scorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.junit.Test;import org.wltea.analyzer.lucene.IKAnalyzer;import utils.File2DocumentUtils;public class HelloWorld { String filePath1 = "C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneDatasource\\README.txt"; String filePath2 = "C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneDatasource\\Copy of README.txt"; File indexFile = new File("C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneIndex"); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41); // Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_41); @Test public void createIndex() throws IOException { Directory indexPath = FSDirectory.open(indexFile); Document document1 = File2DocumentUtils.file2Document(filePath1); Document document2 = File2DocumentUtils.file2Document(filePath2); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_41, analyzer); IndexWriter indexWriter = new IndexWriter(indexPath, indexWriterConfig); indexWriter.addDocument(document1); indexWriter.addDocument(document2); indexWriter.close(); indexPath.close(); } public void search(Query query) throws Exception { Directory directory = FSDirectory.open(indexFile); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Formatter formatter = new SimpleHTMLFormatter("", ""); Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); Fragmenter fragmenter = new SimpleFragmenter(100); highlighter.setTextFragmenter(fragmenter); TopDocs topDocs = indexSearcher.search(query, 1000); // Listdocuments = new ArrayList (); System.out.println("搜索到的总数:" + topDocs.totalHits); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docID = scoreDoc.doc; Document document = indexSearcher.doc(docID); String hc = highlighter.getBestFragment(analyzer, "content", document.get("content")); if (hc != null) { document.removeField("content"); document.add(new Field("content", hc, Store.YES, Index.ANALYZED)); } File2DocumentUtils.printDocumentInfo(document); } indexReader.close(); directory.close(); } @Test public void search() throws Exception { String[] fields = { "name", "content" }; QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_41, fields, analyzer); Query query = queryParser.parse("Introduction"); search(query); } @Test public void testTerm() throws Exception { Term t = new Term("content", "introduction"); Query query = new TermQuery(t); search(query); } @Test public void testRange() throws Exception { // Term t = new Term("content", "read"); Query query = NumericRangeQuery.newLongRange("size", Long.valueOf(400), Long.valueOf(1000), true, true); search(query); } @Test public void testWildcardQuery() throws Exception { // Term t = new Term("content", "read"); Term term = new Term("content", "luc?"); Query query = new WildcardQuery(term); search(query); } @Test public void testBooleanQuery() throws Exception { // Term t = new Term("content", "read"); Query query1 = NumericRangeQuery.newLongRange("size", Long.valueOf(400), Long.valueOf(1000), true, true); Term term = new Term("content", "luc*"); Query query2 = new WildcardQuery(term); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(query1, Occur.MUST_NOT); booleanQuery.add(query2, Occur.MUST); search(booleanQuery); } @Test public void testPhraseQuery() throws Exception { // Term t = new Term("content", "read"); Term term1 = new Term("content", "main"); Term term2 = new Term("content", "page"); PhraseQuery query = new PhraseQuery(); query.add(term1); query.add(term2); query.setSlop(1); search(query); } @Test public void testAnalyzer() throws Exception { String string = "我是一个中国人 sdjfkajsdfjak "; Analyzer analyzer = new IKAnalyzer(); TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(string)); CharTermAttribute termAtt = (CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = (TypeAttribute) tokenStream.getAttribute(TypeAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { System.out.print("["+termAtt.toString()+"]"); System.out.println(typeAtt.type()); } // System.out.println(tokenStream.toString()); }}