博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
lucene4中简单的query,以及与lucene3的一些区别
阅读量:6896 次
发布时间:2019-06-27

本文共 6453 字,大约阅读时间需要 21 分钟。

hot3.png

lucene4中有一些方法有很大的变动,以下代码可以看看与3的区别,

下面代码中解释

  1. File2DocumentUtils类就不上传了,就是将File转换成lucene中的Document,如果需要运行的话再贴出来
  2. lucene包用的是4.1.0
  3. 分词器用了IKAnalyzer和lucene标准的分词器StandardAnalyzer
  4. 简单的一些查询都有@test可以直接junit

package helloworld;import java.io.File;import java.io.IOException;import java.io.StringReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.TypeAttribute;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.NumericRangeQuery;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Fragmenter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.Scorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.junit.Test;import org.wltea.analyzer.lucene.IKAnalyzer;import utils.File2DocumentUtils;public class HelloWorld {	String filePath1 = "C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneDatasource\\README.txt";	String filePath2 = "C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneDatasource\\Copy of README.txt";	File indexFile = new File("C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneIndex");	Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);	// Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_41);	@Test	public void createIndex() throws IOException {		Directory indexPath = FSDirectory.open(indexFile);		Document document1 = File2DocumentUtils.file2Document(filePath1);		Document document2 = File2DocumentUtils.file2Document(filePath2);		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_41, analyzer);		IndexWriter indexWriter = new IndexWriter(indexPath, indexWriterConfig);		indexWriter.addDocument(document1);		indexWriter.addDocument(document2);		indexWriter.close();		indexPath.close();	}	public void search(Query query) throws Exception {		Directory directory = FSDirectory.open(indexFile);		IndexReader indexReader = DirectoryReader.open(directory);		IndexSearcher indexSearcher = new IndexSearcher(indexReader);		Formatter formatter = new SimpleHTMLFormatter("", "");		Scorer fragmentScorer = new QueryScorer(query);		Highlighter highlighter = new Highlighter(formatter, fragmentScorer);		Fragmenter fragmenter = new SimpleFragmenter(100);		highlighter.setTextFragmenter(fragmenter);		TopDocs topDocs = indexSearcher.search(query, 1000);		// List
documents = new ArrayList
(); System.out.println("搜索到的总数:" + topDocs.totalHits); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docID = scoreDoc.doc; Document document = indexSearcher.doc(docID); String hc = highlighter.getBestFragment(analyzer, "content", document.get("content")); if (hc != null) { document.removeField("content"); document.add(new Field("content", hc, Store.YES, Index.ANALYZED)); } File2DocumentUtils.printDocumentInfo(document); } indexReader.close(); directory.close(); } @Test public void search() throws Exception { String[] fields = { "name", "content" }; QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_41, fields, analyzer); Query query = queryParser.parse("Introduction"); search(query); } @Test public void testTerm() throws Exception { Term t = new Term("content", "introduction"); Query query = new TermQuery(t); search(query); } @Test public void testRange() throws Exception { // Term t = new Term("content", "read"); Query query = NumericRangeQuery.newLongRange("size", Long.valueOf(400), Long.valueOf(1000), true, true); search(query); } @Test public void testWildcardQuery() throws Exception { // Term t = new Term("content", "read"); Term term = new Term("content", "luc?"); Query query = new WildcardQuery(term); search(query); } @Test public void testBooleanQuery() throws Exception { // Term t = new Term("content", "read"); Query query1 = NumericRangeQuery.newLongRange("size", Long.valueOf(400), Long.valueOf(1000), true, true); Term term = new Term("content", "luc*"); Query query2 = new WildcardQuery(term); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(query1, Occur.MUST_NOT); booleanQuery.add(query2, Occur.MUST); search(booleanQuery); } @Test public void testPhraseQuery() throws Exception { // Term t = new Term("content", "read"); Term term1 = new Term("content", "main"); Term term2 = new Term("content", "page"); PhraseQuery query = new PhraseQuery(); query.add(term1); query.add(term2); query.setSlop(1); search(query); } @Test public void testAnalyzer() throws Exception { String string = "我是一个中国人 sdjfkajsdfjak "; Analyzer analyzer = new IKAnalyzer(); TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(string)); CharTermAttribute termAtt = (CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = (TypeAttribute) tokenStream.getAttribute(TypeAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { System.out.print("["+termAtt.toString()+"]"); System.out.println(typeAtt.type()); } // System.out.println(tokenStream.toString()); }}

转载于:https://my.oschina.net/u/942651/blog/149095

你可能感兴趣的文章
sqlite实现新闻收藏和取消收藏
查看>>
Unity中的基础光照
查看>>
Final发布——视频博客
查看>>
SqlHelper类
查看>>
服务器端控件Button会自动刷新页面
查看>>
Sass函数:Sass Maps的函数-map-get($map,$key)
查看>>
HDU 1230 火星A+B
查看>>
C# foreach 为什么循环使用Foreach 效率要高
查看>>
oracle创建透明网关出现的问题
查看>>
对象和类
查看>>
分布式事务
查看>>
udp,select超时和recvfrom收不到数据原因
查看>>
将任意程序(如.bat文件)作为Windows服务运行
查看>>
【ElasticSearch篇】--ElasticSearch从初识到安装和应用
查看>>
Java命令参数说明大全
查看>>
PIE SDK创建掩膜
查看>>
(四)springmvc+mybatis+dubbo+zookeeper分布式架构 整合 - maven代码结构
查看>>
SQL查询到的数据放到DataSet中
查看>>
mybatis的selectOne和selectList没有数据返回时的问题
查看>>
批处理+组策略 实现规定时间段无法开机and定时关机
查看>>