libxml的手册地址为:
这里讲得很详细,参考的例子在Code Example里。
其中要注意的是: 编译的时候要加上参数 -lxml2才能成功.
这里把一些基本的解析封装成一个类:
XMLLib.cpp
#include "XMLLib.h"
XMLLib2::XMLLib2()
{
m_doc = NULL;
m_rootElement = NULL;
}
XMLLib2::~XMLLib2()
{
if(this->m_doc)
{
xmlFreeDoc(this->m_doc);
xmlCleanupParser();
xmlMemoryDump();
}
}
bool
XMLLib2::loadXML(const std::string& filename)//从XML文件中解析
{
#ifdef XMLLIB2_DEBUG
std::cout << "XMLLib2::loadXML" << std::endl;
#endif
xmlDocPtr doc;
doc = xmlReadFile(filename.c_str(),NULL,0);
if(NULL == doc)
{
std::cerr<< "XMLLib2::loadXML::xmlReadfile::Failed to open file" << std::endl;
this->m_doc = NULL;
return false;
}
this->m_doc = doc;
this->parseXML();
return true;
}
bool
XMLLib2::readXML(const std::string& content)//从内存中解析
{
#ifdef XMLLIB2_DEBUG
std::cout << "XMLLib2::readXML" << std::endl;
#endif
size_t iLength = content.size();
xmlDocPtr doc;
doc = xmlReadMemory(content.c_str(), iLength, "gb2312", NULL, 0);
if(NULL == doc)
{
std::cerr << "XMLLib2::readXML::xmlReadMemeory::Failed to parse document" << std::endl;
this->m_doc = NULL;
return false;
}
this->m_doc = doc;
this->parseXML();
return true;
}
void
XMLLib2::parseXML(void)
{
xmlNode* curNode;
if(this->m_doc)
{
this->m_rootElement = xmlDocGetRootElement(this->m_doc);
curNode = this->m_rootElement->xmlChildrenNode;
recursionNode(this->m_rootElement,"");
}
#ifdef XMLLIB2_DEBUG
std::map
::iterator it;
for(it=this->m_xmlMap.begin(); it!=this->m_xmlMap.end(); ++it)
std::cout << "key is:\t" << it->first << "\nvalue
is:\t" << it->second << std::endl;
#endif
}
char
* ConvertEnc( char *encFrom, char *encTo, const char *
in)//由于Libxml采用UTF-8,而不支持中文.这里用函数ConvertEnc来进行转换,支持解析中文.调用方法如: szNode =
ConvertEnc("gb2312", "utf-8", sTmp.c_str() );
{
static char bufin[1024], bufout[1024], *sin, *sout;
int mode, lenin, lenout, ret, nline;
iconv_t c_pt;
if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
{
printf("iconv_open false: %s ==> %s\n", encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);
lenin = strlen(in) + 1;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
if (ret == -1)
{
return NULL;
}
iconv_close(c_pt);
return bufout;
}
void
XMLLib2::recursionNode(xmlNode* node,std::string sPath)
{
xmlNode* curNode;
for(curNode = node; curNode; curNode = curNode->next)
{
if(curNode->type == XML_ELEMENT_NODE)
{
char sname[MAX_XML_LENGTH] = {0};
strncpy(sname,(const char *)curNode->name,MAX_XML_LENGTH); //把xmlChar 型转换为Char型来存放时,不能用memcpy.
if(sPath.empty())
sPath += sname;
else
sPath = sPath + "_" + sname;
#ifdef XMLLIB2_DEBUG
std::cout << "Key is:\t" << sPath << std::endl;
#endif
}
std::string sValue;
if(curNode->content)
{
char scontent[MAX_XML_LENGTH] = {0};
char * result = ConvertEnc("utf-8", "gb2312", (char *)curNode->content);
strncpy(scontent,(const char *)result,MAX_XML_LENGTH);
sValue = scontent;
#ifdef XMLLIB2_DEBUG
std::cout << "Value is:\t" << sValue << std::endl;
#endif
this->m_xmlMap.insert(std::map::value_type(sPath,sValue));
}
recursionNode(curNode->children,sPath);
}
}
这里还要特别注意:在调用Libxml的函数要对分配的内存进行释放.可以用valgrind ./XXX来查看是否存在内存泄露.XXX为可执行程序.
我在对XML中文进行转换时,调用函数xmlNodeGetContent()时,没有注意释放内存,故造成了内存泄露.正确的方法是:
#include
#include
#include
#include
using namespace std;
char * ConvertEnc( const char * in)
{
static char bufout[1024], *sin, *sout;
int lenin, lenout, ret;
iconv_t c_pt;
if ((c_pt = iconv_open("gb2312", "utf-8")) == (iconv_t)-1)
{
printf("iconv_open false: utf-8 ==> gb2312\n" );
return NULL;
}
lenin = strlen(in)+1 ;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
if (ret == -1)
{
return NULL;
}
iconv_close(c_pt);
return bufout;
}
int main(void)
{
static const char *document = "
222.333.110.666你好8888<
/cr>222.333.1.47233000t>";
xmlDocPtr doc = NULL;
xmlNode * cur = NULL;
doc = xmlParseFile("test_zh.xml");
cur = xmlDocGetRootElement(doc);
xmlChar * stmp = xmlNodeGetContent(cur);//该函数需要释放内存
std::string sTmp = (char *)stmp;
std::cout << ConvertEnc(sTmp.c_str()) << std::endl;
xmlFreeDoc(doc);
xmlFree(stmp);//释放xmlNodeGetContent()分配的内存
xmlDocPtr doc2 = NULL;
int iLength = strlen(document)+1;
doc2 = xmlReadMemory(document, iLength, "", "gb2312", 0);
cur = xmlDocGetRootElement(doc2);
xmlChar * stmp2 = xmlNodeGetContent(cur);
cout<<"cur is "< char * result2 = ConvertEnc((char *)stmp2);
xmlFree(stmp2);
printf("result2 is %s\n",result2);
xmlFreeDoc(doc2);//释放内存
xmlCleanupParser();
xmlMemoryDump();
}
阅读(1611) | 评论(0) | 转发(0) |