Chinaunix首页 | 论坛 | 博客
  • 博客访问: 130645
  • 博文数量: 42
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 354
  • 用 户 组: 普通用户
  • 注册时间: 2014-07-01 15:34
个人简介

不晓得说啥子

文章分类

全部博文(42)

文章存档

2015年(41)

2014年(1)

我的朋友

分类: C/C++

2015-04-03 16:40:51

1、gb2312编码无法直接与utf-8进行直接转换

2、我采用的方式是:  gb2312--->unicode--->utf-8


     由于gb2312编码与unicode编码有现成的对应的码表, 所以可以根据码表将gb2312编码先转换为unicode编码,又由于utf-8编码是unicode的表示方式之一,所有可以得到对应的utf-8编码

     下面是unicode编码转换成utf-8编码的代码: 


点击(此处)折叠或打开

  1. int gb2312_to_utf8(unsigned char *gb2312string, int gb2312string_len, unsigned char *utf8string)
  2.  {
  3.          int i;
  4.          unsigned int index1,index2;
  5.  
  6.          wchar tmp;
  7.          wchar *p;
  8.  
  9.          uint_8 *gb2312_char;
  10.          uint_8 *utf8_char;
  11.          int utf8string_len = 0;
  12.          wchar *twochar;
  13.          gb2312_char =(uint_8*)gb2312string;
  14.          utf8_char = (uint_8 *)utf8string;
  15.          i = 0;
  16.          while(i<gb2312string_len){
  17.                  if(*gb2312_char >= 0x0 && *gb2312_char <= 0x7f){
  18.                   //是字母 直接存 不用转换 (因为字母编码都一样)
  19.                          *utf8_char = *gb2312_char;
  20.                          utf8_char++;
  21.                          utf8string_len++;
  22.                          gb2312_char++;
  23.                          i++;
  24.                  }
  25.                  else{
  26.                          twochar = (wchar *)gb2312_char;
  27.                          // 检测当前的两个字符是否是正确的gb2312编码
  28.                          if(tmp <= 0xa0a0 && tmp >=0xf7ff) {
  29.                                  fprintf(stderr,"Invalid Gb3212 code\n");
  30.                                  return -1;
  31.                          }
  32.                          tmp = *twochar - 0xa0a0;
  33.                          index1 = tmp >> 8;
  34.                          index2 = tmp & 0xff;

  35.                          tmp = gb_2_uni[index1][index2];
  36.                          *(utf8_char) = ((tmp >> 12) & 0x0f) | 0xe0;
  37.                          *(utf8_char+2) = (tmp & 0x3f) | 0x80;
  38.                         *(utf8_char+1) = ((tmp >> 6) & 0x3f) |0x80;

  39.                          utf8_char += 3;
  40.                          utf8string_len += 3;
  41.                          gb2312_char += 2;
  42.                          i +=2;
  43.                  }
  44.          }
  45.          return utf8string_len;
  46.  }


阅读(1530) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~