用户名: 密 码:
您现在的位置:首页 >> SEO开发技巧 >> 内容

相关搜索提高高亮显示的速度

时间:2010/4/7 20:59:35 点击:8466

  核心提示:publicclassTermVectorTest{Analyzeranalyzer=newSimpleAnalyzer(); DirectoryramDir=newRAMDirectory();publicvoidcreateRamIndex()throwsCorruptIndexExceptio...
public class TermVectorTest {  
  •       
  •     Analyzer analyzer = new SimpleAnalyzer();  
  •     Directory ramDir = new RAMDirectory();  
  •       
  •     public void createRamIndex() throws CorruptIndexException, LockObtainFailedException, IOException{  
  •           
  •         IndexWriter writer = new IndexWriter(ramDir,analyzer,IndexWriter.MaxFieldLength.LIMITED);  
  •           
  •         Document doc1 = new Document();  
  •         doc1.add(new Field("title","java",Store.YES,Index.ANALYZED));  
  •         doc1.add(new Field("author","callan",Store.YES,Index.ANALYZED));  
  •         doc1.add(new Field("subject","java一门编程语言,用java的人很多,编程语言也不少,但是java最流行",Store.YES,Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS));  
  •           
  •         Document doc2 = new Document();  
  •         doc2.add(new Field("title","english",Store.YES,Index.ANALYZED));  
  •         doc2.add(new Field("author","wcq",Store.YES,Index.ANALYZED));  
  •         doc2.add(new Field("subject","英语用的人很多",Store.YES,Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS));  
  •       
  •         Document doc3 = new Document();  
  •         doc3.add(new Field("title","asp",Store.YES,Index.ANALYZED));  
  •         doc3.add(new Field("author","ca",Store.YES,Index.ANALYZED));  
  •         doc3.add(new Field("subject","英语用的人很多",Store.YES,Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS));  
  •           
  •         writer.addDocument(doc1);  
  •         writer.addDocument(doc2);  
  •         writer.addDocument(doc3);  
  •           
  •         writer.optimize();  
  •         writer.close();  
  •     }  
  •       
  •     public void search() throws CorruptIndexException, IOException{  
  •         IndexReader reader = IndexReader.open(ramDir);  
  •         IndexSearcher searcher = new IndexSearcher(reader);  
  •         Term term = new Term("title","java");   //在title里查询java词条  
  •         TermQuery query = new TermQuery(term);  
  •         Hits hits = searcher.search(query);  
  •         for (int i = 0; i < hits.length(); i++)  
  •         {  
  •             Document doc = hits.doc(i);  
  •             System.out.println(doc.get("title"));  
  •             System.out.println(doc.get("subject"));  
  •             System.out.println("moreLike search: ");  
  •               
  •             morelikeSearch(reader,hits.id(i));  
  •         }  
  •     }  
  •   
  •     private void morelikeSearch(IndexReader reader,int id) throws IOException  
  •     {  
  •         //根据这个document的id获取这个field的Term Vector 信息,就是这个field分词之后在这个field里的频率、位置、等信息  
  •         TermFreqVector vector = reader.getTermFreqVector(id, "subject");  
  •           
  •         BooleanQuery query = new BooleanQuery();    
  •           
  •         for (int i = 0; i < vector.size(); i++)  
  •         {  
  •              TermQuery tq = new TermQuery(new Term("subject",     
  •                         vector.getTerms()[i]));   //获取每个term保存的Token  
  •                      
  •                  query.add(tq, BooleanClause.Occur.SHOULD);     
  •   
  •         }  
  •           
  •         IndexSearcher searcher = new IndexSearcher(ramDir);     
  •              
  •         Hits hits = searcher.search(query);     
  •           
  •         //显示代码,略  
  •   
  •           
  •     }  
  •   
  • //Lucene使用TermVector提高高亮显示性能  
  •     public void highterLightSearch() throws CorruptIndexException, IOException{  
  •         IndexReader reader = IndexReader.open(ramDir);     
  •           
  •         IndexSearcher searcher = new IndexSearcher(reader);     
  •              
  •         TermQuery query = new TermQuery(new Term("subject","java"));     
  •              
  •         Hits hits = searcher.search(query);     
  •              
  •         //高亮显示设置     
  •         SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>","</font>");  
  •              
  •         Highlighter highlighter =new Highlighter(simpleHTMLFormatter,new QueryScorer(query));     
  •           
  •          // 这个100是指定关键字字符串的context的长度,你可以自己设定,因为不可能返回整篇正文内容     
  •         highlighter.setTextFragmenter(new SimpleFragmenter(100));     
  •     
  •         for(int i = 0; i < hits.length(); i++){     
  •                  
  •             Document doc = hits.doc(i);     
  •                  
  •             TermPositionVector termFreqVector = (TermPositionVector)reader.getTermFreqVector(hits.id(i), "subject");     
  •                
  •             TermFreqVector vector = reader.getTermFreqVector(hits.id(i), "subject");  
  •             TokenStream tokenStream = TokenSources.getTokenStream(termFreqVector);     
  •                  
  •             String result = highlighter.getBestFragment(tokenStream, doc.get("subject"));     
  •     
  •             System.out.println(doc.get("title"));     
  •                  
  •             System.out.println(result);     
  •                  
  •         }     
  •   
  •           
  •     }  
  •       
  •     public static void main(String[] args) throws CorruptIndexException, IOException  
  •     {  
  •         TermVectorTest  t = new TermVectorTest();  
  •         t.createRamIndex();  
  •         t.search();  
  •     }  
  •   

  • 文章来源:http://www.xinxilong.com

    作者:不详 来源:网络
    相关评论
    发表我的评论
    • 大名:
    • 内容:
  • 论坛群发大师(www.xinxilong.com) © 2008 版权所有 All Rights Resverved.
  • Email:4984672[at]qq.com 沪ICP备12025887号-1
  • Powered by 论坛群发大师