裁剪libiconv
项目里面只需要UTF8与GB2312的转换。
1.下载源代码编译,编写测试代码,gdb调试,查看UTF8和GB2312转换调用的函数,分别是xxx_wctomb和xxx_mbtowc(文件就是utf8.h/euc_cn.h),aliases.gperf文件里面有定义UTF-8, ei_utf8和GB2312, ei_euc_cn。
2.encoding.def文件进行修改,最后只剩下:
-
DEFENCODING(( "US-ASCII", /* IANA */
-
"ASCII", /* IANA, JDK 1.1 */
-
"ISO646-US", /* IANA */
-
"ISO_646.IRV:1991", /* IANA */
-
"ISO-IR-6", /* IANA */
-
"ANSI_X3.4-1968", /* IANA */
-
"ANSI_X3.4-1986", /* IANA */
-
"CP367", /* IANA */
-
"IBM367", /* IANA */
-
"US", /* IANA */
-
"csASCII", /* IANA */
-
/*"ISO646.1991-IRV", X11R6.4 */
-
),
-
ascii,
-
{ ascii_mbtowc, NULL }, { ascii_wctomb, NULL })
-
-
/* General multi-byte encodings */
-
-
DEFENCODING(( "UTF-8", /* IANA, RFC 2279 */
-
/*"UTF8", JDK 1.1 */
-
/*"CP65001", Windows */
-
),
-
utf8,
-
{ utf8_mbtowc, NULL }, { utf8_wctomb, NULL })
-
-
-
DEFENCODING(( "GB_2312-80", /* IANA */
-
"ISO-IR-58", /* IANA */
-
"csISO58GB231280", /* IANA */
-
"CHINESE", /* IANA */
-
/*"GB2312.1980-0", X11R6.4 */
-
),
-
gb2312,
-
{ gb2312_mbtowc, NULL }, { gb2312_wctomb, NULL })
-
-
DEFENCODING(( "EUC-CN", /* glibc */
-
"EUCCN", /* glibc, IRIX */
-
"GB2312", /* IANA */
-
"CN-GB", /* RFC 1922 */
-
"csGB2312", /* IANA */
-
/*"EUC_CN", JDK 1.1 */
-
/*"CP51936", Windows */
-
),
-
euc_cn,
-
{ euc_cn_mbtowc, NULL }, { euc_cn_wctomb, NULL })
3.然后对aliases.h进行修改,删除没有用的编码。
-
struct stringpool_t
-
{
-
char stringpool_str117[sizeof("CHINESE")];
-
char stringpool_str120[sizeof("ASCII")];
-
char stringpool_str165[sizeof("GB2312")];
-
char stringpool_str255[sizeof("UTF-8")];
-
};
-
static const struct stringpool_t stringpool_contents =
-
{
-
"CHINESE",
-
"ASCII",
-
"GB2312",
-
"UTF-8",
-
};
-
/* 与上面的struct stringpool_t对应起来 */
-
static const struct alias aliases[] =
-
{
-
#line 14 "lib/aliases.gperf" /* 与gperf文件中的行号对应起来*/
-
{(int)(long)&((struct stringpool_t *)0)->stringpool_str117, ei_gb2312},
-
#line 12 "lib/aliases.gperf"
-
{(int)(long)&((struct stringpool_t *)0)->stringpool_str120, ei_ascii},
-
#line 15 "lib/aliases.gperf"
-
{(int)(long)&((struct stringpool_t *)0)->stringpool_str165, ei_euc_cn},
-
#line 13 "lib/aliases.gperf"
-
{(int)(long)&((struct stringpool_t *)0)->stringpool_str255, ei_utf8},
-
};
-
-
/*修改为只支持UTF8和GB2312,不用在查找,可以删除多余的代码*/
-
const struct alias *
-
aliases_lookup (register const char *str, register unsigned int len)
-
{
-
if (!strcmp(str, "UTF-8"))
-
return &aliases[3];
-
else if (!strcmp(str, "GB2312"))
-
return &aliases[2];
-
return 0;
-
}
4.修改aliases. gperf,删除大部分编码,只剩下你需要的编码类型。
-
struct alias { int name; unsigned int encoding_index; };
-
%struct-type
-
%language=ANSI-C
-
%define hash-function-name aliases_hash
-
%define lookup-function-name aliases_lookup
-
%7bit
-
%readonly-tables
-
%global-table
-
%define word-array-name aliases
-
%pic
-
%%
-
ASCII, ei_ascii
-
UTF-8, ei_utf8
-
CHINESE, ei_gb2312
-
GB2312, ei_euc_cn
5.然后再make,会有几个文件报错(canonical.h等),对报错的地方进行屏蔽,具体看上传已裁减的libiconv源代码包。
裁剪过后源代码包在这个帖子里面可以下载:
阅读(2441) | 评论(0) | 转发(0) |