oracle中文全文索引
basic_lexer:默认词法分析器,只支持英语
chinese_vgram_lexer:支持中文字符集,非智能分词,效率一般
chinese_lexer:只支持utf8字符集,智能中文分词,
BEGIN
ctx_ddl.create_preference ('oracle_lexer', 'chinese_lexer');
END;
/
/* 建立全文索引,指定词法分析器,单一字段建立索引 */
CREATE INDEX idx_test ON test(send_addr) indextype is ctxsys.context parameters('lexer oracle_lexer');
/* 查询 */
select * from test where contains (send_addr,'闵行') >0;
exec ctx_ddl.sync_index('idx_test'); /* 同步索引 */
exec ctx_ddl.optimize_index('idx_test','full'); /* 优化索引 */
mysql中文全文索引
下载mysqlcft.so 放入mysql plugin目录下
mysql> show variables like '%plugin%';
+---------------+-------------------------+
| Variable_name | Value |
+---------------+-------------------------+
| plugin_dir | /usr/lib64/mysql/plugin |
+---------------+-------------------------+
INSTALL PLUGIN mysqlcft SONAME 'mysqlcft.so';
show plugins;
创建中文全文索引
ALTER IGNORE TABLE test ADD FULLTEXT INDEX idx_test (send_addr) WITH PARSER mysqlcft; /* 支持多列索引 */
以下为测试结果
SELECT * FROM test WHERE MATCH(send_addr) AGAINST ('闵行' IN BOOLEAN MODE);
1938 rows in set (0.01 sec)
select * from test where send_addr like '%闵行%';
1938 rows in set (0.37 sec)
sphinx中文全文索引
rpm安装
配置
指定数据库连接,索引字段等,注意需要表有自增列作为sphinx id
建立index
#indexer --all
测试结果 /* 权重最低为最精确的匹配 ,document为表id字段*/
using config file '/etc/sphinx/sphinx.conf'...
index 'test1': query '安亭 ': returned 71 matches of 71 total in 0.005 sec
displaying matches:
1. document=18378, weight=2608
2. document=10415, weight=2579
3. document=19085, weight=2579
4. document=20185, weight=2579
5. document=29761, weight=2579
6. document=30447, weight=2579
7. document=31996, weight=2579
8. document=34205, weight=2579
9. document=36125, weight=2579
mysql> select * from test where send_addr like '%安亭%';
22 rows in set (0.26 sec)
以下为sphinx.conf配置
source src1
{
type = mysql
sql_host = localhost
sql_user = test
sql_pass = test
sql_db = test
sql_port = 3306 # optional, default is 3306
sql_query_pre = SET NAMES utf8
sql_query = \
SELECT id,send_addr \
FROM test
}
index test1
{
source = src1
path = /var/lib/sphinx/test1
charset_type = utf-8
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
ngram_len = 1
ngram_chars = U+3000..U+2FA1F
}
阅读(902) | 评论(0) | 转发(0) |