Chinaunix首页 | 论坛 | 博客
  • 博客访问: 17912488
  • 博文数量: 7460
  • 博客积分: 10434
  • 博客等级: 上将
  • 技术积分: 78178
  • 用 户 组: 普通用户
  • 注册时间: 2008-03-02 22:54
文章分类

全部博文(7460)

文章存档

2011年(1)

2009年(669)

2008年(6790)

分类:

2008-05-29 21:46:51

如果使用 iconv() 函数转换编码就相比比较简单了,不过很多虚拟主机里并不支持这个组件,我在网上找半天,才找到一个gb2312转utf-8的方法,但不能逆向转换。

 

这个函数如下:

以下为引用的内容:
*******************************
//GB转UTF-8编码
*******************************/
function gb2utf8($gbstr) {
 global $CODETABLE;
 if(trim($gbstr)=="") return $gbstr;
 if(empty($CODETABLE)){
  $filename = dirname(__FILE__)."/gb2312-utf8.table";
  $fp = fopen($filename,"r");
  while ($l = fgets($fp,15))
  { $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6); }
  fclose($fp);
 }
 $ret = "";
 $utf8 = "";
 while ($gbstr) {
  if (ord(substr($gbstr, 0, 1)) > 127) {
   $thisW = substr($gbstr, 0, 2);
   $gbstr = substr($gbstr, 2, strlen($gbstr));
   $utf8 = "";
   @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
   if($utf8!=""){
    for ($i = 0;$i < strlen($utf8);$i += 3)
     $ret .= chr(substr($utf8, $i, 3));
   }
  }
  else
  {
   $ret .= substr($gbstr, 0, 1);
   $gbstr = substr($gbstr, 1, strlen($gbstr));
  }
 }
 return $ret;
}
//Unicode转utf8
function u2utf8($c) {
 for ($i = 0;$i < count($c);$i++)
  $str = "";
 if ($c < 0x80) {
  $str .= $c;
 } else if ($c < 0x800) {
  $str .= (0xC0 | $c >> 6);
  $str .= (0x80 | $c & 0x3F);
 } else if ($c < 0x10000) {
  $str .= (0xE0 | $c >> 12);
  $str .= (0x80 | $c >> 6 & 0x3F);
  $str .= (0x80 | $c & 0x3F);
 } else if ($c < 0x200000) {
  $str .= (0xF0 | $c >> 18);
  $str .= (0x80 | $c >> 12 & 0x3F);
  $str .= (0x80 | $c >> 6 & 0x3F);
  $str .= (0x80 | $c & 0x3F);
 }
 return $str;
}

因为gb2312都是双字节的,因此转换为utf-8就相对比较简单,但反之有很麻烦了,我尝试了一下:

这样

function utf82gb($utfstr)
{
 global $UC2GBTABLE;
 $okstr = "";
 if(trim($utfstr)=="") return $utfstr;
 if(empty($UC2GBTABLE)){
  $filename = dirname(__FILE__)."/gb2312-utf8.table";
  $fp = fopen($filename,"r");
  while($l = fgets($fp,15))
  { $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));}
  fclose($fp);
 }
 $ulen = strlen($utfstr);
 for($i=0;$i<$ulen;$i++)
 {
  if(ord($utfstr[$i])<0x81) $okstr .= $utfstr[$i];
  else
  {
   if($ulen>$i+2)
   {
    $utfc = substr($utfstr,$i,3);
    $c = "";
    @$c = dechex($UC2GBTABLE[utf82u_3($utfc)]+0x8080);
    if($c!=""){
       $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
    }
   }
   else
   { $okstr .= $utfstr[$i]; }
  }
  }
  $okstr = trim($okstr);
  return $okstr;
}

function utf82u_3($c)
{
      $n = (ord($c[0]) & 0x1f) << 12;
      $n += (ord($c[1]) & 0x3f) << 6;
      $n += ord($c[2]) & 0x3f;
      return $n;
}

按这种方法,大部份字符也算是能转换成功的了,不过总是有点不妥之处,我把程序改成这样子:

function utf82gb($utfstr)
{
 global $UC2GBTABLE;
 $okstr = "";
 if(trim($utfstr)=="") return $utfstr;
 if(empty($UC2GBTABLE)){
  $filename = dirname(__FILE__)."/gb2312-utf8.table";
  $fp = fopen($filename,"r");
  while($l = fgets($fp,15))
  { $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));}
  fclose($fp);
 }
 $okstr = "";
 $utfstr = urlencode($utfstr);
 $ulen = strlen($utfstr);
 for($i=0;$i<$ulen;$i++)
 {
  if($utfstr[$i]=="%")
  {
   if($ulen>$i+2){
    $hexnext = hexdec("0x".substr($utfstr,$i+1,2));
    if($hexnext<127){
     $okstr .= chr($hexnext);
     $i = $i+2;
    }
    else{
     if($ulen>=$i+9){
      $hexnext = substr($utfstr,$i+1,8);
      $c = "";
      @$c = dechex($UC2GBTABLE[url_utf2u($hexnext)]+0x8080);
      if($c!=""){
        $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
      }
      $i = $i+8;
     }
    }
   }
   else
   { $okstr .= $utfstr[$i]; }
  }
  else if($utfstr[$i]=="+")
   $okstr .= " ";
  else
   $okstr .= $utfstr[$i];
 }
 $okstr = trim($okstr);
 return $okstr;
}
//三字节的URL编码转成的utf8字符转为unicode编码
function url_utf2u($c)
{
 $utfc = "";
 $cs = split("%",$c);
 for($i=0;$i  $utfc .= chr(hexdec("0x".$cs[$i]));
 }
 $n = (ord($utfc[0]) & 0x1f) << 12;
  $n += (ord($utfc[1]) & 0x3f) << 6;
  $n += ord($utfc[2]) & 0x3f;
 return $n;
}

阅读(603) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~