Chinaunix首页 | 论坛 | 博客
  • 博客访问: 372471
  • 博文数量: 242
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 1134
  • 用 户 组: 普通用户
  • 注册时间: 2012-10-20 10:53
文章分类

全部博文(242)

文章存档

2015年(1)

2014年(10)

2013年(18)

2012年(213)

分类:

2012-11-07 10:39:12

使用iconv实现不同编码之间的转换

样学习使用libiconv库

    libiconv库是一个基于GNU协议的开源库,主要是解决多语言编码处理转换等应用问题。
    怎样学习使用libiconv库?对于刚接触到人来说,这篇文章不妨去看一看,若已经用到过该库的人,在应用的过程中可能遇到一些问题,我们可以一起来探讨,我的联系方式是 。

    几个函数原型:
iconv_t iconv_open(const char *tocode, const char *fromcode);
size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
int iconv_close(iconv_t cd);
    其中:
iconv_open是打开一个编码流,类似于打开一个编码管道(通道),出错则返回 -1;
iconv用于具体输入的转换,如果出错,则返回 -1,否则返回 0;
iconv_close是关闭该管道(通道)。
    举个例子:
#include
#include
#include
#include

#define OUTLEN 255
int covert(char *, char *, char *, size_t , char *, size_t );

int main(int argc, char *argv[])
{
    char *input = "中国";
    size_t len = strlen(input);
    char *output = (char *)malloc(OUTLEN);
    covert("UTF-8", "GBK", input, len, output, OUTLEN);
    printf("%s\n", output);
    return 0;
}

int covert(char *desc, char *src, char *input, size_t ilen, char *output, size_t olen)
{
    char **pin = &input;
    char **pout = &output;
    iconv_t cd = iconv_open(desc, src);
    if (cd == (iconv_t)-1)
    {
        return -1;
    }
    memset(output, 0, olen);
    if (iconv(cd, pin, &ilen, pout, &olen)) return -1;
    iconv_close(cd);
    return 0;
}
    这里面covert函数就是用于将编码进行转换,其中要注意的地方是iconv函数的传递参数:
1,iconv传递有5个参数;
2,第3个参数和第5个参数一般是input和output实际分配的大小,一般是 sizeof(type)*strlen(string);
3,第4个参数是不能直接传递指针的地址,因为iconv函数能够改变指针的值,所以需要复制一份指针变量;
    如果对于大量需要转换的编码,上述函数covert不适合该方式,一是内存的限制不能一次调用,二是若分多次调用会频繁打开一个编码管道(通道),导致资源浪费,最好的办法还是拆开该函数根据情况使用。
    这里补充一下代码:
translateSP.h:
 #ifndef __TRANSLATESP_H_
 #define __TRANSLATESP_H_
 #include
 
 class TranslateSP
 {
     public:
         TranslateSP():i_cd(0){}
         TranslateSP(const char *from_charset,const char *to_charset)
         {
             i_cd = iconv_open(to_charset, from_charset);
             if ((iconv_t)-1 == i_cd) printf("iconv open error!\n");
         }
         ~TranslateSP()
         {   
             if (i_cd)
                 iconv_close(i_cd);
         }
 
     public:
         size_t translate(char *src, size_t srcLen, char *desc, size_t descLen);
         size_t convert(const char *from_charset, const char *to_charset, 
                 char *src, size_t srcLen, char *desc, size_t descLen);
 
     private:
         iconv_t i_cd;
 };
 
 #endif
translateSP.cpp:
 #include "translateSP.h"
 
 #define MAX_LEN 200
 
 size_t TranslateSP::translate(char *src, size_t srcLen, char *desc, size_t descLen)
 {
     char **inbuf = &src;
     char **outbuf = &desc;
     memset(desc, 0, descLen);
     return iconv(i_cd, inbuf, &srcLen, outbuf, &descLen);
 }
 
 size_t TranslateSP::convert(const char *from_charset, const char *to_charset, 
         char *src, size_t srcLen, char *desc, size_t descLen)
 {
     char **inbuf = &src;
     char **outbuf = &desc;
     iconv_t cd = iconv_open(to_charset, from_charset);
     if ((iconv_t)-1 == cd) return (size_t)-1;
     memset(desc, 0, descLen);
     size_t n = iconv(cd, inbuf, &srcLen, outbuf, &descLen);
     iconv_close(cd);
     return n;
 }
 
 int main(int argc, char *argv[])
 {
     char *str = "我爱zhong国! %#@#";
     char *str1 = "i大量需要转换的编码";
     char *str2 = "函数就是用于将hello进行转换";
     char newstr[MAX_LEN];
     TranslateSP tsp;
     tsp.convert("utf-8", "gbk", str, strlen(str), newstr, MAX_LEN);
     printf("%s\n", newstr);
     TranslateSP newtsp("UTF-8", "GBK");
     newtsp.translate(str1, strlen(str1), newstr, MAX_LEN);
     printf("%s\n", newstr);
     newtsp.translate(str2, strlen(str2), newstr, MAX_LEN);
     printf("%s\n", newstr);
     return 0;
 }
编译:
g++ translateSP.cpp -o test
./test
我爱zhong国! %#@#
i大量需要转换的编码
函数就是用于将hello进行转换
(以上输出是GBK编码)

在Linux上进行编码转换时,既可以利用iconv函数族编程实现,在虚拟机我们只需直接使用就可以了,但是要在开发板上运行就要自己交叉编译一个iconv的库,下面就大概说一下我的编译步骤和使用的例程。
1.首先下载一个libiconv-1.11.tar.gz压缩包,就放到/root/cross下,然后右击解压到此目录。
下载地址为:[url]ftp://ftp.eenet.ee/pub/gentoo/distfiles/libiconv-1.11.tar.gz[/url] ,可以直接用迅雷下载。
2.在终端中进入此目录:
[root@localhost ~]# [color=red]cd /root/cross/libiconv-1.11[/color]
3.配置并生成Makefile:
[root@localhost libiconv-1.11]# [color=red]./configure CC=arm-linux-gcc --build=i686-pc-linux --target=arm-linux --host=arm-linux prefix=/usr/local/arm/3.4.1/arm-linux --enable-shared --enable-shared --enable-static[/color]
4.编译并安装:
[root@localhost libiconv-1.11]# [color=red]make && make install[/color]
5.将生成好的iconv库拷贝到nfs下:
[root@localhost libiconv-1.11]# [color=red]cp /usr/local/arm/3.4.1/arm-linux/lib/libiconv.* /nfs/lib/[/color]
6.编写应用程序:
[root@localhost libiconv-1.11]# [color=red]cd /nfs/
[/color][root@localhost nfs]# [color=red]gedit conv.c[/color]
[color=darkred]#include "/usr/local/arm/3.4.1/arm-linux/include/iconv.h"
#include
#include
int code_convert(char *from_charset,char *to_charset,char *inbuf,int inlen,char *outbuf,int outlen)
{
iconv_t cd;
char **pin = &inbuf;
char **pout = &outbuf;
cd = iconv_open(to_charset,from_charset);
if (cd==0) return -1;
memset(outbuf,0,outlen);
if (iconv(cd,pin,&inlen,pout,&outlen)==-1) 
  return -1;
iconv_close(cd);
return 0;
}[/color]
[color=darkred]int u2g(char *inbuf,int inlen,char *outbuf,int outlen)   //UNICODE码转为GB2312码
{
return code_convert("utf-8","gb2312",inbuf,inlen,outbuf,outlen);
}[/color]
[color=darkred]int g2u(char *inbuf,size_t inlen,char *outbuf,size_t outlen)  //GB2312码转为UNICODE码
{
return code_convert("gb2312","utf-8",inbuf,inlen,outbuf,outlen);
}

int main(void)
{
unsigned char in_utf8[] = {0x20,0xe8,0xbf,0x99,0xe6,0x98,0xaf,0xe4,0xb8,0x80,0xe4,0xb8,0xaa,0x69,0x63,0x6f,0x6e,0x76,0xe7,0x9a,0x84,0xe6,0xb5,0x8b,0xe8,0xaf,0x95,0xe4,0xbe,0x8b,0xe7,0xa8,0x8b,0xef,0xbc,0x9a,0xa,0xe2,0x80,0x9c,0xe7,0xba,0xb5,0xe6,0xb5,0xb7,0xe6,0x9d,0xaf,0xe2,0x80,0x9d,0xe4,0xb8,0x9c,0xe5,0x8d,0x97,0xe5,0xa4,0xa7,0xe5,0xad,0xa6,0xe7,0xac,0xac,0xe4,0xb8,0x89,0xe5,0xb1,0x8a,0xe5,0xb5,0x8c,0xe5,0x85,0xa5,0xe5,0xbc,0x8f,0xe7,0xb3,0xbb,0xe7,0xbb,0x9f,0xe8,0xae,0xbe,0xe8,0xae,0xa1,0xe9,0x82,0x80,0xe8,0xaf,0xb7,0xe8,0xb5,0x9b};    //UTF-8编码
char out[100];
int rc;[/color]
[color=darkred] rc = u2g(in_utf8,strlen(in_utf8),out,100);
printf("%s\n",out);
}
[/color]保存并退出。
7.交叉编译此程序:
[root@localhost nfs]#  [color=red]arm-linux-gcc -o iconv conv.c -liconv[/color]
8.在开发板上运行此程序:
/ # [b]./iconv[/b] 
[color=magenta] 这是一个iconv的测试例程:
“纵海杯”东南大学第三届嵌入式系统设计邀请赛[/color]

[[i] 本帖最后由 youren4548 于 2010-5-10 22:40 编辑 [/i]]

zyfo2 发表于 2010-5-10 23:53

回复 1# youren4548 的帖子

学长,我试了下,utf8和gb2312的确可以转换,但unicode 却不识别,是少什么库文件吧?

youren4548 发表于 2010-5-11 00:00

iconv也可以实现Unicode至GB2312的转换,下面是一个测试代码:
#include "/usr/local/arm/3.4.1/arm-linux/include/iconv.h"
#include
#include
int code_convert(char *from_charset,char *to_charset,char *inbuf,int inlen,char *outbuf,int outlen)
{
        iconv_t cd;
        char **pin = &inbuf;
        char **pout = &outbuf;
        cd = iconv_open(to_charset,from_charset);
        if (cd==0) return -1;
        memset(outbuf,0,outlen);
        if (iconv(cd,pin,&inlen,pout,&outlen)==-1) 
                return -1;
        iconv_close(cd);
        return 0;
}

int u2g(char *inbuf,int inlen,char *outbuf,int outlen)                        //UNICODE码转为GB2312码
{
        return code_convert("UTF-16LE","gb2312",inbuf,inlen,outbuf,outlen);
}


int main(void)
{
        unsigned char utf_16le[] = {0xd9,0x8f,0x2f,0x66,0x00,0x4e,0x2a,0x4e,0x69,0x0,0x63,0x00,0x6f,0x0,0x6e,0x0,0x76,0x0,0x84,0x76,0x4b,0x6d,0xd5,0x8b,0x8b,0x4f,0xb,0x7a,0x1a,0xff,0xa,0x0,0x1c,0x20,0xb5,0x7e,0x77,0x6d,0x6f,0x67,0x1d,0x20,0x1c,0x4e,0x57,0x53,0x27,0x59,0x66,0x5b,0x2c,0x7b,0x9,0x4e,0x4a,0x5c,0x4c,0x5d,0x65,0x51,0xf,0x5f,0xfb,0x7c,0xdf,0x7e,0xbe,0x8b,0xa1,0x8b};                                //Unicode编码
        char out[100];
        int rc;
        rc = u2g(utf_16le,sizeof(utf_16le),out,100);
        printf("%s\n",out);
}

阅读(401) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~