Legend
lhui_089
全部博文(70)
2015年(8)
2014年(14)
2011年(1)
2010年(21)
2009年(26)
Bean_lee
bcqttqo2
acxiuvv2
nyfkxneb
iiimad
kdsfan
abssky0
manshukw
九阳神功
分类: C/C++
2010-01-23 20:33:43
/*UTF-8 valid format list:0xxxxxxx110xxxxx 10xxxxxx1110xxxx 10xxxxxx 10xxxxxx11110xxx 10xxxxxx 10xxxxxx 10xxxxxx111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx*/char *filter_none_utf8_chars(char *src, int *len){ unsigned char *p; unsigned char *pSub; unsigned char *pStrEnd; unsigned char *pCharEnd; int bytes; unsigned char *filtered; unsigned char *pDest; unsigned char *pInvalidCharStart; pStrEnd = (unsigned char *)src + (*len); p = (unsigned char *)src; pInvalidCharStart = NULL; while (p < pStrEnd) { if (*p < 0x80) { p++; continue; } if ((*p & 0xE0) == 0xC0) //110xxxxx { bytes = 1; } else if ((*p & 0xF0) == 0xE0) //1110xxxx { bytes = 2; } else if ((*p & 0xF8) == 0xF0) //11110xxx { bytes = 3; } else if ((*p & 0xFC) == 0xF8) //111110xx { bytes = 4; } else if ((*p & 0xFE) == 0xFC) //1111110x { bytes = 5; } else { pInvalidCharStart = p; break; } p++; pCharEnd = p + bytes; if (pCharEnd > pStrEnd) { pInvalidCharStart = p - 1; break; } for (; p<pCharEnd; p++) { if ((*p & 0xC0) != 0x80) { break; } } if (p != pCharEnd) { pInvalidCharStart = pCharEnd - (bytes + 1); break; } } if (pInvalidCharStart == NULL) //all chars are valid { return src; } filtered = (unsigned char *)malloc(sizeof(char) * (*len)); if (filtered == NULL) { *len = 0; *src = '\0'; return src; } pDest = filtered; bytes = (char *)pInvalidCharStart - src; if (bytes > 0) { memcpy(pDest, src, bytes); pDest += bytes; } p = pInvalidCharStart + 1; //skip this invalid char while (p < pStrEnd) { if (*p < 0x80) { *pDest++ = *p++; continue; } if ((*p & 0xE0) == 0xC0) //110xxxxx { bytes = 1; } else if ((*p & 0xF0) == 0xE0) //1110xxxx { bytes = 2; } else if ((*p & 0xF8) == 0xF0) //11110xxx { bytes = 3; } else if ((*p & 0xFC) == 0xF8) //111110xx { bytes = 4; } else if ((*p & 0xFE) == 0xFC) //1111110x { bytes = 5; } else //invalid char { p++; continue; } pSub = p + 1; pCharEnd = pSub + bytes; if (pCharEnd > pStrEnd) { p++; continue; } for (; pSub<pCharEnd; pSub++) { if ((*pSub & 0xC0) != 0x80) { break; } } if (pSub != pCharEnd) { p++; continue; } bytes += 1; memcpy(pDest, pSub-bytes, bytes); pDest += bytes; p += bytes; } *len = pDest - filtered; memcpy(src, filtered, *len); * (src + (*len)) = '\0'; free(filtered); return src;}
上一篇:最全的mysql 查询语句
下一篇:编写可移植C/C++程序的要点
登录 注册