以下为引用的内容: ******************************* //GB转UTF-8编码 *******************************/ function gb2utf8($gbstr) { global $CODETABLE; if(trim($gbstr)=="") return $gbstr; if(empty($CODETABLE)){ $filename = dirname(__FILE__)."/gb2312-utf8.table"; $fp = fopen($filename,"r"); while ($l = fgets($fp,15)) { $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6); } fclose($fp); } $ret = ""; $utf8 = ""; while ($gbstr) { if (ord(substr($gbstr, 0, 1)) > 127) { $thisW = substr($gbstr, 0, 2); $gbstr = substr($gbstr, 2, strlen($gbstr)); $utf8 = ""; @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080])); if($utf8!=""){ for ($i = 0;$i < strlen($utf8);$i += 3) $ret .= chr(substr($utf8, $i, 3)); } } else { $ret .= substr($gbstr, 0, 1); $gbstr = substr($gbstr, 1, strlen($gbstr)); } } return $ret; } //Unicode转utf8 function u2utf8($c) { for ($i = 0;$i < count($c);$i++) $str = ""; if ($c < 0x80) { $str .= $c; } else if ($c < 0x800) { $str .= (0xC0 | $c >> 6); $str .= (0x80 | $c & 0x3F); } else if ($c < 0x10000) { $str .= (0xE0 | $c >> 12); $str .= (0x80 | $c >> 6 & 0x3F); $str .= (0x80 | $c & 0x3F); } else if ($c < 0x200000) { $str .= (0xF0 | $c >> 18); $str .= (0x80 | $c >> 12 & 0x3F); $str .= (0x80 | $c >> 6 & 0x3F); $str .= (0x80 | $c & 0x3F); } return $str; }
因为gb2312都是双字节的,因此转换为utf-8就相对比较简单,但反之有很麻烦了,我尝试了一下:
这样
function utf82gb($utfstr) { global $UC2GBTABLE; $okstr = ""; if(trim($utfstr)=="") return $utfstr; if(empty($UC2GBTABLE)){ $filename = dirname(__FILE__)."/gb2312-utf8.table"; $fp = fopen($filename,"r"); while($l = fgets($fp,15)) { $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));} fclose($fp); } $ulen = strlen($utfstr); for($i=0;$i<$ulen;$i++) { if(ord($utfstr[$i])<0x81) $okstr .= $utfstr[$i]; else { if($ulen>$i+2) { $utfc = substr($utfstr,$i,3); $c = ""; @$c = dechex($UC2GBTABLE[utf82u_3($utfc)]+0x8080); if($c!=""){ $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3])); } } else { $okstr .= $utfstr[$i]; } } } $okstr = trim($okstr); return $okstr; }
function utf82u_3($c) { $n = (ord($c[0]) & 0x1f) << 12; $n += (ord($c[1]) & 0x3f) << 6; $n += ord($c[2]) & 0x3f; return $n; }
按这种方法,大部份字符也算是能转换成功的了,不过总是有点不妥之处,我把程序改成这样子:
function utf82gb($utfstr) { global $UC2GBTABLE; $okstr = ""; if(trim($utfstr)=="") return $utfstr; if(empty($UC2GBTABLE)){ $filename = dirname(__FILE__)."/gb2312-utf8.table"; $fp = fopen($filename,"r"); while($l = fgets($fp,15)) { $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));} fclose($fp); } $okstr = ""; $utfstr = urlencode($utfstr); $ulen = strlen($utfstr); for($i=0;$i<$ulen;$i++) { if($utfstr[$i]=="%") { if($ulen>$i+2){ $hexnext = hexdec("0x".substr($utfstr,$i+1,2)); if($hexnext<127){ $okstr .= chr($hexnext); $i = $i+2; } else{ if($ulen>=$i+9){ $hexnext = substr($utfstr,$i+1,8); $c = ""; @$c = dechex($UC2GBTABLE[url_utf2u($hexnext)]+0x8080); if($c!=""){ $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3])); } $i = $i+8; } } } else { $okstr .= $utfstr[$i]; } } else if($utfstr[$i]=="+") $okstr .= " "; else $okstr .= $utfstr[$i]; } $okstr = trim($okstr); return $okstr; } //三字节的URL编码转成的utf8字符转为unicode编码 function url_utf2u($c) { $utfc = ""; $cs = split("%",$c); for($i=0;$i $utfc .= chr(hexdec("0x".$cs[$i])); } $n = (ord($utfc[0]) & 0x1f) << 12; $n += (ord($utfc[1]) & 0x3f) << 6; $n += ord($utfc[2]) & 0x3f; return $n; } |