全文检索例子Word文档格式.docx
《全文检索例子Word文档格式.docx》由会员分享,可在线阅读,更多相关《全文检索例子Word文档格式.docx(16页珍藏版)》请在冰豆网上搜索。
indexFile.canWrite()){
if(logger.isDebugEnabled())
logger.error("
索引文件目录创建失败或不可写入!
"
);
publicvoidinit(){
confirmDirs();
try{
Filef=newFile(dirPath);
directory=FSDirectory.open(f);
}catch(Exceptione){
if(logger.isDebugEnabled()){
解除索引文件锁定失败!
+e.getCause());
publicvoidcreateIndex(List&
lt;
User&
gt;
userList){
init();
//第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中),
//第二个参数是使用的分词器,第三个:
true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度
writer=newIndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.LIMITED);
writer.setMergeFactor(500);
writer.setMaxBufferedDocs(155);
writer.setMaxFieldLength(Integer.MAX_VALUE);
writeIndex(writer,userList);
writer.optimize();
writer.close();
}catch(IOExceptione){
//TODOAuto-generatedcatchblock
e.printStackTrace();
publicList&
search(Stringkeyword){
returnnull;
Directorydir;
dir=FSDirectory.open(indexFile);
indexSearcher=newIndexSearcher(dir);
indexSearcher.setSimilarity(newIKSimilarity());
//单字段查询,单条件查询
//Queryquery=IKQueryParser.parse("
userInfo"
keyword);
//多字段,单条件查询
String[]fields=newString[]{"
"
parameter1"
};
Queryquery=IKQueryParser.parseMultiField(fields,keyword);
//多字体,单条件,多BooleanClause.Occur[]flags,查询条件的组合方式(Or/And)
//BooleanClause.Occur[]数组,它表示多个条件之间的关系,
//BooleanClause.Occur.MUST表示and,
//BooleanClause.Occur.MUST_NOT表示not,
//BooleanClause.Occur.SHOULD表示or.
//String[]fields=newString[]{"
"
};
//BooleanClause.Occur[]flags=new
//BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};
//Queryquery=IKQueryParser.parseMultiField(fields,
//keyword,flags);
////多Field,多条件查询分析
//String[]queries=newString[]{keyword,keyword};
//Queryquery=IKQueryParser.parseMultiField(fields,queries);
//多Field,多条件,多Occur查询
//Queryquery=
//IKQueryParser.parseMultiField(fields,queries,flags);
//搜索相似度最高的20条记录
TopDocstopDocs=indexSearcher.search(query,20);
ScoreDoc[]hits=topDocs.scoreDocs;
returnhitsToQuery(hits,query);
privateList&
hitsToQuery(ScoreDoc[]hits,Queryquery){
List&
list=newArrayList&
();
for(inti=0;
i&
hits.length;
i++){
Useru=newUser();
Documentdoc=indexSearcher.doc(hits[i].doc);
u.setUserId(Integer.parseInt(doc.get("
userId"
)));
u.setUserName(doc.get("
userName"
));
u.setUserAge(Integer.parseInt(doc.get("
userAge"
//高亮设置
SimpleHTMLFormattersimpleHtmlFormatter=newSimpleHTMLFormatter(
"
&
fontcolor=\"
red\"
/font&
Highlighterhighlighter=newHighlighter(simpleHtmlFormatter,
newQueryScorer(query));
TokenStreamtokenStream=analyzer.tokenStream("
text"
newStringReader(doc.get("
StringuserInfo=highlighter.getBestFragment(tokenStream,doc
.get("
if(userInfo!
=null){
u.setUserInfo(userInfo);
}else{
u.setUserInfo(doc.get("
SimpleHTMLFormattersimpleHtmlFormatter1=newSimpleHTMLFormatter(
Highlighterhighlighter1=newHighlighter(
simpleHtmlFormatter1,newQueryScorer(query));
TokenStreamtokenStream1=analyzer.tokenStream("
text1"
Stringp1=highlighter1.getBestFragment(tokenStream1,doc
if(p1!
u.setParameter1(p1);
u.setParameter1(doc.get("
u.setParameter2(doc.get("
parameter2"
u.setParameter3(doc.get("
parameter3"
u.setParameter4(doc.get("
parameter4"
list.add(u);
indexSearcher.close();
returnlist;
}catch(CorruptIndexExceptione){
}catch(InvalidTokenOffsetsExceptione){
publicvoidwriteIndex(IndexWriterwriter,List&
for(Useru:
Documentdoc=getDoc(u);
writer.addDocument(doc);
privateDocumentgetDoc(Useruser){
System.out.println("
用户ID为"
+user.getUserId()+"
索引被创建"
Documentdoc=newDocument();
addField2Doc(doc,user,"
Store.YES,Index.NOT_ANALYZED);
//Index.NOT_ANALYZED
//不分词,但建立索引
//Index.ANALYZED
//分词并且建立索引
Store.YES,Index.ANALYZED);
returndoc;
privatevoidaddField2Doc(Documentdoc,Objectbean,Stringname,Stores,
Indexi){
Stringvalue;
value=BeanUtils.getProperty(bean,name);
if(value!
doc.add(newField(name,value,s,i,
Field.TermVector.WITH_POSITIONS_OFFSETS));
}catch(IllegalAccessExceptione){
getbeanpropertyerror"
e);
}catch(InvocationTargetExceptione){
}catch(NoSuchMethodExceptione){
/**
*没有排序,有高亮,有分页
*
*@parampageNo
*@parampageSize
*@paramkeyword
*@return
*/
publicPageBeangetPageQuery(intpageNo,intpageSize,Stringkeyword){
Listresult=newArrayList();
BooleanClause.Occur[]flags=newBooleanClause.Occur[]{
BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};
Queryquery=IKQueryParser.parseMultiField(fields,keyword,flags);
TopScoreDocCollectortopCollector=TopScoreDocCollector.create(
indexSearcher.maxDoc(),true);
indexSearcher.search(query,topCollector);
//查询当页的记录
ScoreDoc[]docs=topCollector.topDocs((pageNo-1)*pageSize,
pageSize).scoreDocs;
//String[]highlightCol={"
for(ScoreDocscdoc:
docs){
Documentdoc=indexSearcher.doc(scdoc.doc);
//
//for(Fieldablefa:
doc.getFields()){
//System.out.println(fa.name());
//Stringvalue=doc.get(fa.name());
//for(Stringcol:
highlightCol){
//if(fa.name().equals(col)){
////设置高显内容
//TokenStreamtokenStream=analyzer.tokenStream("
new
//StringReader(value));
//value=highlighter.getBestFragment(tokenStream,value);
//}
Stringp1=highlighter.getBestFragment(tokenStream1,doc
result.add(u);
PageBeanpb=newPageBean();
pb.setCurrentPage(pageNo);
//当前页
pb.setPageSize(pageSize);
pb.setAllRow(topCollector.getTotalHits());
//hit中的记录数目
pb.setList(result);
returnpb;
e.pr