Lucene简单应用
这里介绍lucene 3.4的应用
1、下载lucene相关JAR包:(主要是以下三个JAR包)
http://jakarta.apache.org/lucene/
lucene-analyzers-3.4.0.jar
lucene-core-3.4.0.jar
lucene-highlighter-3.4.0.jar
将这三个JAR包引进项目里,放在PATH下
2、建立索引文件夹
在D盘下建立文件夹luceneIndex,存放索引信息,建立文件夹luceneDataSource,存放索引的文件
3、建立索引的文件hello.txt,内容如下:
中华人民共和国全国人民2006年
将文件夹放在D盘下的luceneDataSource文件夹下
4、新建类FSDirectoryTest,代码如下
public class FSDirectoryTest {
private String filePath = “d://luceneDataSource//hello.txt”;
private File indexPath = new File(“d://luceneIndex”);
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
public void creatIndex() throws Exception {
IndexWriterConfig iwicf=new IndexWriterConfig(Version.LUCENE_34,analyzer);
Document doc1 = File2Document.file2Document(filePath);
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexPath), iwicf);
indexWriter.addDocument(doc1);
indexWriter.close();
}
public void search() throws Exception {
String queryString = “华”;
String[] fields = {“name”,”content”};
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_34, fields, analyzer); //查询解析器
Query query = queryParser.parse(queryString);
//查询
IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(indexPath));
Filter filter = null;
TopDocs topDocs = indexSearcher.search(query, filter, 10000);//topDocs 类似集合
System.out.println(“总共有【”+topDocs.totalHits+”】条匹配结果.”);
//输出
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
int docSn = scoreDoc.doc;//文档内部编号
Document doc = indexSearcher.doc(docSn);//根据文档编号取出相应的文档
File2Document.printDocumentInfo(doc);//打印出文档信息
}
}
public void deleteFiles(){
File[] files=indexPath.listFiles();
for(int i=0;i<files.length;i++){
if(files[i]!=null){
files[i].delete();
}
}
}
public static void main(String[] args) {
try {
FSDirectoryTest fst=new FSDirectoryTest();
fst.creatIndex();
fst.search();
fst.deleteFiles();
} catch (Exception e) {
e.printStackTrace();
}
}
}
5、新建file2Document类,读取索引文件的内容,代码如下:
public class File2Document {
public static Document file2Document(String path){
File file = new File(path);
Document doc = new Document();
//Store.YES 是否存储 yes no compress
//Index 是否进行索引 Index.ANALYZED 分词后进行索引
////Document是一个记录。用来表示一个条目。就是搜索建立的倒排索引的条目。
//比如说,你要搜索自己电脑上的文件。这个时候就可以创建field
doc.add(new Field(“name”,file.getName(),Store.YES,Index.ANALYZED));
doc.add(new Field(“content”,readFileContent(file),Store.YES,Index.ANALYZED));
//readFileContent()读取文件类容
doc.add(new Field(“size”,String.valueOf(file.length()),Store.YES,Index.NOT_ANALYZED));
//不分词,文件大小(int)转换成String
doc.add(new Field(“path”,file.getAbsolutePath(),Store.YES,Index.NOT_ANALYZED));
//不需要根据文件的路径来查询
return doc;
}
private static String readFileContent(File file) {
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
StringBuffer content = new StringBuffer();
for(String line=null;(line = reader.readLine())!=null;){
content.append(line).append(“/n”);
}
return content.toString();
} catch (IOException e) {
}
return null;
}
public static void printDocumentInfo(Document doc){
System.out.println(“name –>”+doc.get(“name”));
System.out.println(“content –>”+doc.get(“content”));
System.out.println(“path –>”+doc.get(“path”));
System.out.println(“size –>”+doc.get(“size”));
}
}