iconv用法解读-Aquester-ChinaUnix博客

岚天逸见aquester.blog.chinaunix.net

博客访问： 8229718
博文数量： 595
博客积分： 13065
博客等级：上将
技术积分： 10334
用户组：普通用户
注册时间： 2008-03-26 16:44

个人简介

推荐: blog.csdn.net/aquester https://github.com/eyjian https://www.cnblogs.com/aquester http://blog.chinaunix.net/uid/20682147.html

文章分类

全部博文（595）

maven（0）
flink（1）
gRPC（2）
go（3）
Kubernetes（1）
微服务（4）

skywalking（3）
Docker（1）
raft（1）
微码分享（2）
一致性协议（1）
iptables（0）
crontab（9）
python（1）
svn（1）
redis（42）
java（4）
json（2）
nginx（1）
海量服务（1）
微信编程（0）
js&html（2）
github（1）
andriod（1）
互联网金融（0）
thrift（10）
推荐转载（5）
原创推荐（16）
平淡生活（22）
生活与设计（3）
hadoop（51）

kafka（3）

hue（1）

hive（1）

hbase（8）

spark（2）

zookeeper（4）

hdfs（13）

storm（1）
有感而发（19）
mooon（28）
下载（1）
TCP/IP（3）
MYSQL（26）
question（4）
linux（89）

LVS（1）

性能（11）

WEB服务器（8）
转载（15）
C/C++（162）

汇编（3）
OO（4）
UML（1）
常用脚本（45）
未分配的博文（10）

推荐博文

相关博文

iconv用法解读

分类： C/C++

2016-01-29 13:54:01

iconv是一个字符集转换函数，原型为：
size_t iconv(iconv_t cd,
char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft);

// 传递给do_convert的in_buf，所有字节数（in_buf_size指定）都是可以转换成功的
static int do_convert(iconv_t cd, const char* from, size_t from_size, std::string* to)
{
char* in_buf_ptr = const_cast<char*>(from);
size_t in_bytes_left = from_size;
size_t out_bytes = in_bytes_left*3 + 1;
size_t out_bytes_left = out_bytes;
std::string out(out_bytes_left, '\0');
char* out_buf_start = const_cast<char*>(out.c_str());
char* out_buf_ptr = out_buf_start;
int bytes = iconv(cd, &in_buf_ptr, &in_bytes_left, &out_buf_ptr, &out_bytes_left);
if (-1 == bytes)
return errno;
to->assign(out_buf_start, out_bytes-out_bytes_left);
return 0;
}
// 可忽略不能转换的部分，
// 也可以在结果中保留不能被转换的部分
// 详细实现可以浏览：
// https://github.com/eyjian/mooon/blob/master/common_library/src/utils/charset_utils.cpp
void CCharsetUtils::convert(const std::string& from_charset, const std::string& to_charset,
const std::string& from, std::string* to,
bool ignore_error, bool skip_error) throw (CException)
{
std::string result; // 用来保存处理后的内容
char* in_buf = const_cast<char*>(from.c_str());
size_t in_bytes = from.size(); // 需要处理的总字节数
size_t in_bytes_left = in_bytes; // 剩余的未被处理的字节数
iconv_t cd = iconv_open(to_charset.c_str(), from_charset.c_str());
if ((iconv_t)(-1) == cd)
{
THROW_EXCEPTION(strerror(errno), errno);
}
while (in_bytes_left > 0)
{
int errcode;
size_t out_bytes = in_bytes_left * 3 + 1; // 保证足够大
size_t out_bytes_left = out_bytes;
std::string out(out_bytes_left, '\0');
char* out_buf = const_cast<char*>(out.c_str());
char* out_buf_start = out_buf;
char* in_buf_start = in_buf;
// 如果成功，返回值bytes为0
// 如果成功，in_buf指向in的结尾符，即'\0'，同时in_bytes_left值为0
// 如果失败，in_buf指向未能转换的起始地址，而in_bytes_left值为剩余的未被转换的（可能含有可转换的）字节数
// 如果成功，则out_bytes-out_bytes_left值为转换后的字节数
// 如果成功，则out_buf_start存储了被转换后的结果，有效长度为out_bytes-out_bytes_left
int bytes = iconv(cd, &in_buf, &in_bytes_left, &out_buf, &out_bytes_left);
if (bytes != -1)
{
result.append(out_buf_start, out_bytes-out_bytes_left);
break;
}
else if (!ignore_error)
{
errcode = errno;
iconv_close(cd);
THROW_EXCEPTION(strerror(errcode), errcode);
}
else
{
// EILSEQ An invalid multibyte sequence has been encountered in the input.
// EINVAL An incomplete multibyte sequence has been encountered in the input.
if ((errno != EINVAL) &&
(errno != EILSEQ))
{
// E2BIG There is not sufficient room at *outbuf.
errcode = errno;
iconv_close(cd);
THROW_EXCEPTION(strerror(errcode), errcode);
}
else
{
// in_buf之前部分是可以转换的
if (in_buf != in_buf_start)
{
std::string str;
errcode = do_convert(cd, in_buf_start, in_buf-in_buf_start, &str);
if (errcode != 0)
{
iconv_close(cd);
THROW_EXCEPTION(strerror(errcode), errcode);
}
result.append(str);
}
// skip_error决定未能被转换的是否出现在结果当中
if (!skip_error)
{
result.append(in_buf, 1);
}
// 往前推进
--in_bytes_left; // 将导致while语句结束
++in_buf;
}
}
}
if (-1 == iconv_close(cd))
{
THROW_EXCEPTION(strerror(errno), errno);
}
// 不能直接使用to，因为to可能就是from
*to = result;
}
void CCharsetUtils::gbk_to_utf8(const std::string& from, std::string* to, bool ignore_error, bool skip_error) throw (CException)
{
convert("gbk", "utf-8", from, to, ignore_error, skip_error);
}
void CCharsetUtils::utf8_to_gbk(const std::string& from, std::string* to, bool ignore_error, bool skip_error) throw (CException)
{
convert("utf-8", "gbk", from, to, ignore_error, skip_error);
}
void CCharsetUtils::gb2312_to_utf8(const std::string& from, std::string* to, bool ignore_error, bool skip_error) throw (CException)
{
convert("gb2312", "utf-8", from, to, ignore_error, skip_error);
}
void CCharsetUtils::utf8_to_gb2312(const std::string& from, std::string* to, bool ignore_error, bool skip_error) throw (CException)
{
convert("utf-8", "gb2312", from, to, ignore_error, skip_error);
}

阅读(3137) | 评论(0) | 转发(0) |

上一篇：HBase & thrift & C++编程

下一篇：mysql-5.7.10普通安装

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6