Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1042038
  • 博文数量: 326
  • 博客积分: 10135
  • 博客等级: 上将
  • 技术积分: 2490
  • 用 户 组: 普通用户
  • 注册时间: 2006-04-22 23:53
文章分类

全部博文(326)

文章存档

2014年(1)

2012年(4)

2011年(1)

2010年(4)

2009年(41)

2008年(44)

2007年(63)

2006年(168)

我的朋友

分类: LINUX

2008-10-22 14:37:14

libxml的手册地址为:
这里讲得很详细,参考的例子在Code Example里。
其中要注意的是: 编译的时候要加上参数 -lxml2才能成功.
这里把一些基本的解析封装成一个类:
XMLLib.cpp
#include "XMLLib.h"

XMLLib2::XMLLib2()
{
        m_doc = NULL;
        m_rootElement = NULL;
}

XMLLib2::~XMLLib2()
{
        if(this->m_doc)
        {
                xmlFreeDoc(this->m_doc);
                xmlCleanupParser();
                xmlMemoryDump();
        }
}

bool
XMLLib2::loadXML(const std::string& filename)//从XML文件中解析
{
#ifdef XMLLIB2_DEBUG
        std::cout << "XMLLib2::loadXML" << std::endl;
#endif
        xmlDocPtr doc;
        doc = xmlReadFile(filename.c_str(),NULL,0);
        if(NULL == doc)
        {
                std::cerr<< "XMLLib2::loadXML::xmlReadfile::Failed to open file" << std::endl;
                this->m_doc = NULL;
                return false;
        }

        this->m_doc = doc;
        this->parseXML();
        return true;
}

bool
XMLLib2::readXML(const std::string& content)//从内存中解析
{
#ifdef XMLLIB2_DEBUG
        std::cout << "XMLLib2::readXML" << std::endl;
#endif
        size_t iLength = content.size();
        xmlDocPtr doc;
        doc = xmlReadMemory(content.c_str(), iLength, "gb2312", NULL, 0);
        if(NULL == doc)
        {
                std::cerr << "XMLLib2::readXML::xmlReadMemeory::Failed to parse document" << std::endl;
                this->m_doc = NULL;
                return false;
        }

        this->m_doc = doc;
        this->parseXML();
        return true;
}
void
XMLLib2::parseXML(void)
{
        xmlNode* curNode;
        if(this->m_doc)
        {
                this->m_rootElement = xmlDocGetRootElement(this->m_doc);
                curNode = this->m_rootElement->xmlChildrenNode;
                recursionNode(this->m_rootElement,"");
        }
#ifdef XMLLIB2_DEBUG
        std::map::iterator it;
        for(it=this->m_xmlMap.begin(); it!=this->m_xmlMap.end(); ++it)
                std::cout << "key is:\t" << it->first << "\nvalue is:\t" << it->second << std::endl;
#endif
}

char * ConvertEnc( char *encFrom, char *encTo, const char * in)//由于Libxml采用UTF-8,而不支持中文.这里用函数ConvertEnc来进行转换,支持解析中文.调用方法如: szNode = ConvertEnc("gb2312", "utf-8", sTmp.c_str() );
{
        static char bufin[1024], bufout[1024], *sin, *sout;
        int mode, lenin, lenout, ret, nline;
        iconv_t c_pt;

        if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
        {
                printf("iconv_open false: %s ==> %s\n", encFrom, encTo);
                return NULL;
        }
        iconv(c_pt, NULL, NULL, NULL, NULL);
        lenin  = strlen(in) + 1;
        lenout = 1024;
        sin    = (char *)in;
        sout   = bufout;
        ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
        if (ret == -1)
        {
                return NULL;
        }
        iconv_close(c_pt);
        return bufout;
}

void
XMLLib2::recursionNode(xmlNode* node,std::string sPath)
{
        xmlNode* curNode;
        for(curNode = node; curNode; curNode = curNode->next)
        {
                if(curNode->type == XML_ELEMENT_NODE)
                {
                        char sname[MAX_XML_LENGTH] = {0};
                        strncpy(sname,(const char *)curNode->name,MAX_XML_LENGTH); //把xmlChar 型转换为Char型来存放时,不能用memcpy.
                        if(sPath.empty())
                                sPath += sname;
                        else
                                sPath = sPath + "_" + sname;
#ifdef XMLLIB2_DEBUG
                        std::cout << "Key is:\t" << sPath << std::endl;
#endif
                }
                std::string sValue;
                if(curNode->content)
                {
                        char scontent[MAX_XML_LENGTH] = {0};
                        char * result = ConvertEnc("utf-8", "gb2312", (char *)curNode->content);
                        strncpy(scontent,(const char *)result,MAX_XML_LENGTH);
                        sValue = scontent;
#ifdef XMLLIB2_DEBUG
                        std::cout << "Value is:\t" << sValue << std::endl;
#endif
                        this->m_xmlMap.insert(std::map::value_type(sPath,sValue));

                }

                recursionNode(curNode->children,sPath);
        }
}

这里还要特别注意:在调用Libxml的函数要对分配的内存进行释放.可以用valgrind ./XXX来查看是否存在内存泄露.XXX为可执行程序.
我在对XML中文进行转换时,调用函数xmlNodeGetContent()时,没有注意释放内存,故造成了内存泄露.正确的方法是:
#include
#include
#include
#include
using namespace std;
char * ConvertEnc( const char * in)
{
        static char  bufout[1024], *sin, *sout;
        int  lenin, lenout, ret;
        iconv_t c_pt;

        if ((c_pt = iconv_open("gb2312", "utf-8")) == (iconv_t)-1)
        {
                printf("iconv_open false: utf-8 ==> gb2312\n" );
                 return NULL;
        }

        lenin  = strlen(in)+1 ;
        lenout = 1024;
        sin    = (char *)in;
        sout   = bufout;
        ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);

        if (ret == -1)
        {
              return NULL;
        }

        iconv_close(c_pt);
        return bufout;
}
int main(void)
{
        static const char *document = "
222.333.110.666你好
8888< /cr>
222.333.1.47
23
3000t>
";
        xmlDocPtr doc = NULL;
        xmlNode * cur = NULL;

        doc = xmlParseFile("test_zh.xml");
        cur = xmlDocGetRootElement(doc);
        xmlChar * stmp = xmlNodeGetContent(cur);//该函数需要释放内存
        std::string sTmp = (char *)stmp;
        std::cout << ConvertEnc(sTmp.c_str()) << std::endl;
        xmlFreeDoc(doc);
        xmlFree(stmp);//释放xmlNodeGetContent()分配的内存

        xmlDocPtr doc2 = NULL;
        int iLength = strlen(document)+1;
        doc2 = xmlReadMemory(document, iLength, "", "gb2312", 0);
        cur = xmlDocGetRootElement(doc2);
        xmlChar * stmp2 = xmlNodeGetContent(cur);
        cout<<"cur is "<        char * result2 = ConvertEnc((char *)stmp2);
        xmlFree(stmp2);
        printf("result2 is %s\n",result2);

        xmlFreeDoc(doc2);//释放内存
        xmlCleanupParser();
        xmlMemoryDump();

}
阅读(1600) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~