happy fish的博客
happy_fish100
全部博文(14)
2011年(1)
2009年(4)
2008年(9)
轨迹16
wang2kk
zyhlxf12
wto432
louyefei
hzqbbc
happy_fa
ivan2046
yyf007
liuchx
mengxch
丸喵喵
zhongshi
aomeilwl
cool666
回归平淡
分类: C/C++
2008-07-31 13:09:19
/*UTF-8 valid format list:0xxxxxxx110xxxxx 10xxxxxx1110xxxx 10xxxxxx 10xxxxxx11110xxx 10xxxxxx 10xxxxxx 10xxxxxx111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx*/char *filter_none_utf8_chars(char *src, int *len){ unsigned char *p; unsigned char *pSub; unsigned char *pStrEnd; unsigned char *pCharEnd; int bytes; unsigned char *filtered; unsigned char *pDest; unsigned char *pInvalidCharStart; pStrEnd = (unsigned char *)src + (*len); p = (unsigned char *)src; pInvalidCharStart = NULL; while (p < pStrEnd) { if (*p < 0x80) { p++; continue; } if ((*p & 0xE0) == 0xC0) //110xxxxx { bytes = 1; } else if ((*p & 0xF0) == 0xE0) //1110xxxx { bytes = 2; } else if ((*p & 0xF8) == 0xF0) //11110xxx { bytes = 3; } else if ((*p & 0xFC) == 0xF8) //111110xx { bytes = 4; } else if ((*p & 0xFE) == 0xFC) //1111110x { bytes = 5; } else { pInvalidCharStart = p; break; } p++; pCharEnd = p + bytes; if (pCharEnd > pStrEnd) { pInvalidCharStart = p - 1; break; } for (; p<pCharEnd; p++) { if ((*p & 0xC0) != 0x80) { break; } } if (p != pCharEnd) { pInvalidCharStart = pCharEnd - (bytes + 1); break; } } if (pInvalidCharStart == NULL) //all chars are valid { return src; } filtered = (unsigned char *)malloc(sizeof(char) * (*len)); if (filtered == NULL) { *len = 0; *src = '\0'; return src; } pDest = filtered; bytes = (char *)pInvalidCharStart - src; if (bytes > 0) { memcpy(pDest, src, bytes); pDest += bytes; } p = pInvalidCharStart + 1; //skip this invalid char while (p < pStrEnd) { if (*p < 0x80) { *pDest++ = *p++; continue; } if ((*p & 0xE0) == 0xC0) //110xxxxx { bytes = 1; } else if ((*p & 0xF0) == 0xE0) //1110xxxx { bytes = 2; } else if ((*p & 0xF8) == 0xF0) //11110xxx { bytes = 3; } else if ((*p & 0xFC) == 0xF8) //111110xx { bytes = 4; } else if ((*p & 0xFE) == 0xFC) //1111110x { bytes = 5; } else //invalid char { p++; continue; } pSub = p + 1; pCharEnd = pSub + bytes; if (pCharEnd > pStrEnd) { p++; continue; } for (; pSub<pCharEnd; pSub++) { if ((*pSub & 0xC0) != 0x80) { break; } } if (pSub != pCharEnd) { p++; continue; } bytes += 1; memcpy(pDest, pSub-bytes, bytes); pDest += bytes; p += bytes; } *len = pDest - filtered; memcpy(src, filtered, *len); * (src + (*len)) = '\0'; free(filtered); return src;}
上一篇:没有了
下一篇:FastDFS一个高效的分布式文件系统
登录 注册