文章来源:http://www.blogjava.net/wangdei/archive/2008/06/17/208696.html
Lucene不是一个完整的全文索引应用,而是是一个用Java写的全文索引引擎工具包,它可以方便的嵌入到各种应用中实现针对应用的全文索引/检索功能。
为了更快的体验lucene.本文作者写了一个比较简单的类.大家可以上 BT下载 或是 小说520网看看其效果.
public class BtLucene {
private static Logger logger = Logger.getLogger(BtLucene.class);
public static String[] StopStrs = {"BT285","BT软件","BT电影","BT下载"};
/**
* 查询
* @param queryStr
* @param lucennePath
* @return
* @throws Exception
*/
public LuceneModel query(String queryStr,String lucennePath) throws Exception {
String queryUTF8 = URLDecoder.decode(queryStr,"UTF-8");
LuceneModel luceneModel = new LuceneModel();
List<LuceneInfo> lucneneInfoList = new ArrayList<LuceneInfo>();
long begin = System.currentTimeMillis();
Document doc = new Document();
StandardAnalyzer analyzer = new StandardAnalyzer(StopStrs);
// 一段简单的检索代码
QueryParser queryParser = new QueryParser("title", analyzer);
Query query = queryParser.parse(queryUTF8);
// 检索
Searcher searcher = new IndexSearcher(lucennePath);// "index"指定索引文件位置
Hits hits = searcher.search(query);
int size = hits.length();
// 打印结果值集
if(logger.isDebugEnabled()){
logger.debug("result size is " + size);
}
luceneModel.setSize(size);
for (int i = 0; i < size; i++) {
LuceneInfo lucneneInfo = new LuceneInfo();
doc = hits.doc(i);
String id = doc.get("id");
String title = doc.get("title");
String[] splitTitle = title.split(queryUTF8);
lucneneInfo.setId(id);
if(splitTitle.length >1)
lucneneInfo.setTitle(splitTitle[0]+ "" +queryUTF8 +"" + splitTitle[1]);
else
lucneneInfo.setTitle(splitTitle[0]+ "" +queryUTF8 +"");
lucneneInfo.setTrip(title);
lucneneInfoList.add(lucneneInfo);
if(i==200)
break;
}
long needsTime = (System.currentTimeMillis()-begin);
long compiteTime = needsTime/1000;
luceneModel.setTime(String.valueOf(compiteTime));
luceneModel.setLuceneInfoList(lucneneInfoList);
logger.info("query the " + queryUTF8 + " needs " + needsTime +" ms" );
return luceneModel;
}
/**
* 建立索引
* @throws Exception
*/
public void batchCreate() throws Exception {
Configure.propertiesConfigure();
BtBatchContentCreate contentCreate = new BtBatchContentCreate();
ClassPathXmlApplicationContext appContext = new ClassPathXmlApplicationContext(
"./mysqlContext.xml");
WNewsDAO newsDAO = (WNewsDAO) appContext.getBean("wNewDaoProxy");
Bt285DAO bt285DAO = (Bt285DAO) appContext.getBean("bt285DAO");
contentCreate.setNewsDAO(newsDAO);
contentCreate.setBt285DAO(bt285DAO);
LieService lieService = (LieService) appContext.getBean("lieService");
contentCreate.setLieService(lieService);
StandardAnalyzer analyzer = new StandardAnalyzer(StopStrs);
IndexWriter writer = new IndexWriter(Configure.getCreateBtLucenePath(), analyzer, true);
for (int i = 1; i < 214; i++) {
Page page = new Page();
logger.info("i=" + i);
page.setPageIndex(i);
page.setPageSize(1000);
List<Bt285> list = bt285DAO.findPageByQuery(
"select t from Bt285 t ",null, page);
for (Bt285 news : list) {
logger.debug("news Id=" + news.getId());
// 添加一条文档
Document doc = new Document();
String title = news.getTitle();
String newTitle = null;
if(title == null)
title ="no title";
newTitle = title.replace("|BT285.cn|BT下载|BT电影|BT软件", "");
doc.add(new Field("id", String.valueOf(news.getId()), Field.Store.YES, Field.Index.NO));
doc.add(new Field("title", newTitle, Field.Store.YES,
Field.Index.TOKENIZED));
doc.setBoost(news.getId() * 10);
writer.addDocument(doc);
}
}
writer.optimize();
writer.close();
}
public static void main(String[] args) throws Exception {
System.out.println("server begin!");
Configure.propertiesConfigure();
BtLucene action = new BtLucene();
//action.batchCreate();
String path = Configure.getCreateBtLucenePath();
action.query("nba",path);
System.out.println(URLEncoder.encode("天兆","UTF-8"));//%E5%A4%A9%E5%85%86
System.out.println("server finish!");
}
}
搜索
法证先锋II 出来的效果.
[TVB连续剧][法证先锋II][粤语中字][TV-RMVB]
阅读(905) | 评论(0) | 转发(0) |