######### 安装 mmseg 分词引擎 ###########
cd /dist/src
unzip ../dist/mmseg-0.7.2.zip
cd mmseg-0.7.2/
chmod 700 configure
./configure
make && make install
######### 安装 scws 分词引擎 ###########
cd /dist/src
tar xzf ../dist/scws-1.0.0.tar.gz
cd scws-1.0.0/
./configure --prefix=/usr/local/scws
make && make install
cp etc/*.txt /usr/local/scws/etc/
#查看词典生成帮助
/usr/local/scws/bin/gen_scws_dict -h
cd /usr/local/scws/etc
/usr/local/scws/bin/gen_scws_dict -c gbk -i dict_chs_gbk.txt -o dict_gbk.xdb
#执行需要一段时间,最终生成可用的 xdb 文件于 /usr/local/scws/etc/ 中
#注:如果您需要使用 utf8 编码,请事先将 dict_chs_gbk.txt 转换成 utf8 编码再调用 gen_scws_dict 来转换。
#[PHP 扩展安装]
cd /dist/src/scws-1.0.0/phpext/
/usr/local/php/bin/phpize
./configure --with-scws=/usr/local/scws --with-php-config=/usr/local/php/bin/php-config
make && make install
#在 php.ini 中加入以下几行
[scws]
extension = scws.so
scws.default.charset = gbk
scws.default.fpath = /usr/local/scws/etc
######### 安装 sphinx 全文搜索系统,注意要打上中文补丁 ###########
cd /dist/src
tar xzf ../dist/sphinx-0.9.8-rc2.tar.gz
cd sphinx-0.9.8-rc2/
patch -p1 < ../../dist/sphinx-0.98rc2.zhcn-support.patch
patch -p1 < ../../dist/fix-crash-in-excerpts.patch
ldconfig /usr/local/mysql/lib/mysql
ldconfig /usr/local/mysql/include/mysql
./configure --prefix=/usr/local/sphinx --with-mmseg --with-mysql=/usr/local/mysql
# --with-mysql 参数是必须的,否则sphinx不能连接mysql数据库
#编译出错,提示找不到这个文件,复制过去吧。
cp ../mmseg-0.7.2/src/csr_typedefs.h /usr/local/include/mmseg/
make && make install
## 安装顺利完成后,执行indexer还是提示找不到libmysqlclient.so.15,执行下面两行,就行了。
echo '/usr/local/mysql/lib/mysql' > /etc/ld.so.conf.d/mysqlclient15.conf
ldconfig
## 建立符号连接的方式是无效的。
## ln -s /usr/local/mysql/lib/mysql/libmysqlclient.so.15 /usr/local/sphinx/bin/
######### 安装 sphinxSE 数据库引擎(也可以不安装,则只能使用sphinx的API,不能使用SQL方式) ###########
cd /dist/src
cd mysql-5.0.45/
patch -p1 < ../sphinx-0.9.8-rc2/mysqlse/sphinx.5.0.37.diff
sh BUILD/autorun.sh
mkdir -p sql/sphinx
cp -R ../sphinx-0.9.8-rc2/mysqlse/* sql/sphinx
./configure --with-sphinx-storage-engine
make && make install
CHOST="i686-pc-linux-gnu" \
CFLAGS="-O3 -msse2 -mmmx -mfpmath=sse -mcpu=pentium4 -march=pentium4 -pipe -fomit-frame-pointer" \
CXXFLAGS="-O3 -msse2 -mmmx -mfpmath=sse -funroll-loops -mcpu=pentium4 -march=pentium4 -pipe -fomit-frame-pointer" \
./configure --prefix=/usr/local/mysql \
--with-unix-socket-path=/tmp/mysql.sock \
--with-comment=Source \
--with-server-suffix=-Community-Server \
--with-charset=gbk \
--with-extra-charsets=all \
--with-mysqld-user=mysql \
--without-debug \
--with-max-indexes=128 \
--with-archive-storage-engine \
--with-csv-storage-engine \
--with-pthread \
--enable-static \
--enable-thread-safe-client \
--with-client-ldflags=-all-static \
--with-mysqld-ldflags=-all-static \
--enable-assembler \
--with-readline \
--with-big-tables \
--with-sphinx-storage-engine \
--without-ndb-debug
./configure --prefix=/usr/local/mysql \
--with-unix-socket-path=/tmp/mysql.sock \
--with-charset=gbk \
--with-extra-charsets=all \
--with-mysqld-user=mysql \
--with-archive-storage-engine \
--with-csv-storage-engine \
--with-pthread \
--enable-thread-safe-client \
--enable-assembler \
--with-readline \
--with-big-tables \
--with-sphinx-storage-engine
make clean
make && make install
#检查引擎安装成功否
show engines;
----------------------------------
/usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/cc.conf --all
/usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/cc.conf
/usr/local/sphinx/bin/search -c /usr/local/sphinx/etc/cc.conf sphinx
/usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/ddd.conf --all
/usr/local/sphinx/bin/search -c /usr/local/sphinx/etc/ddd.conf sphinx
-------------------------------------------------------------------------------
转换现有表的数据
iconv -f GB18030 -t utf-8 -o ddd.txt dede_archives.txt
也可以不转换,直接使用现有的GBK的数据,但是需要设置连接方式
mysql_query("SET character_set_client = 'gbk'", $conn);
mysql_query("SET character_set_connection ='gbk'", $conn); //设置character_set_connection也就同时设置了collation_connection
//mysql_query("SET collation_connection = 'gbk'", $conn);
mysql_query("SET character_set_results ='utf8'", $conn);
这三个设置后,查询得到的结果,就是UTF8编码的结果了。合适给SPHINX使用。
mysql_query("SET SESSION query_cache_type=OFF", $conn); //indexer建立索引时的查询,不需要缓存的
阅读(1014) | 评论(0) | 转发(0) |