1. lucene-3.5.0.jar
2. 新建目录C:\testsource,新建目录C:\testindex。
3.在C:\testsource下新建test1.txt, test2.txt,内容分别为:“商务休闲品牌男装西裤衬衫”,“潮流休闲品牌女装裙子大衣”。
4.创建索引
import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;/** * 给text文件建立索引 * @author liam.huang@foxmail.com */public class TextFileIndexer { public static void main(String[] args) throws Exception{ //text文件路径 File sourceDir = new File("C:\\testsource"); File[] sourceFiles = sourceDir.listFiles(); //索引文件路径 File indexDir = new File("C:\\testindex"); Directory indexFilesDir = FSDirectory.open(indexDir); //构建analyzer Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); //配置IndexWriter IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer); iwConfig.setOpenMode(OpenMode.CREATE); //构建IndexWriter IndexWriter indexWriter = new IndexWriter(indexFilesDir, iwConfig); long startTime = new Date().getTime(); for(int i=0; i
输出结果:
File C:\testsource\test1.txt正在被索引......商务休闲品牌男装西裤衬衫File C:\testsource\test2.txt正在被索引......潮流休闲品牌女装裙子大衣花费了569 毫秒把文档增加到索引里面去!索引文件地址:C:\testsource
5.关键字检索
import java.io.File;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;/** * 关键字检索 * @author liam.huang@foxmail.com */public class TextQuery { public static void main(String[] args) throws Exception{ String queryString = "休闲 装"; //索引文件路径 String indexDir = "C:\\testindex"; IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Query query = null; Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); QueryParser queryParser = new QueryParser(Version.LUCENE_35, "body", analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); query = queryParser.parse(queryString); ScoreDoc[] hits = null; if(indexSearcher!=null){ //返回最多为10条记录 TopDocs results = indexSearcher.search(query, 10); hits = results.scoreDocs; if(hits.length>0){ System.out.println("找到:" + hits.length + " 个结果!"); }else{ System.out.println("没有找到"); } indexSearcher.close(); } }}
输出结果:
找到:2 个结果!