Chinaunix首页 | 论坛 | 博客
  • 博客访问: 385946
  • 博文数量: 32
  • 博客积分: 2410
  • 博客等级: 大尉
  • 技术积分: 687
  • 用 户 组: 普通用户
  • 注册时间: 2006-01-10 11:34
文章分类
文章存档

2012年(2)

2011年(6)

2010年(6)

2009年(7)

2008年(11)

分类: C/C++

2008-03-17 15:17:40

上个星期所做的项目中涉及到xml文件的解析,所以首先需要选择一个合适的xml解析器,我以前用过libxml2,经过评估觉得它对dom和sax支持得不够全。最后选择了xerces-c这个开源的xml解析器。关于xerces-c的历史及具体细节,可以在对其维护的网站上查看。
  编译安装好后,可以在sample目录下查看示例代码,但是很多API接口描述得并不清晰。故现将代码整理一下,方便以后自己查阅,同时也希望对其它人有些帮助。
  我将一直常用的接口封装成了一个类,包括查找结点,取结点值,更新结点值,删除结点。其中包括有从内存块中读取xml格式的内容并解析,还有将dom树中的内容输出为字节流。
 
XercesParserXml.h
 

#ifndef XERCES_PARSER_XML_H__
#define XERCES_PARSER_XML_H__

#include <string>

class XercesParserXml {
public:
    bool has(const std::string& srcByte, const std::string& node) const;
    std::string get(const std::string& srcByte, const std::string& node) const;
    bool set(std::string& srcByte, const std::string& node, const std::string& value);
    bool del(std::string& srcByte, const std::string& node);

private:

};

#endif

 

XercesParserXml.cpp

 

#include "XercesParserXml.h"

#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>

#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif
#include <vector>

XERCES_CPP_NAMESPACE_USE

    DOMNode* getNode(DOMNode* node, const char* nodeName) {
        //char* ptr = (char *)nodeName;
        char ptr[4096] = {0};
        sprintf(ptr, "%s", nodeName);
        if (NULL == ptr)
            return NULL;
        char* d = ".";
        char* p = NULL;
        p = strtok(ptr, d);
        std::vector<std::string> vec;
        while (p) {
            //printf("name: %s\n", p);
            vec.push_back(p);
            p = strtok(NULL, d);
        }
        DOMNode* child;
        DOMNode* curNode = node;

        for (unsigned int i = 1; i < vec.size(); i++) {
            //std::cout << vec[i] << std::endl;
            if (0 == curNode) {
                return NULL;
            }
            for (child = curNode->getFirstChild(); child != 0; child = child->getNextSibling()) { // have no child ???
                char *name = XMLString::transcode(child->getNodeName());
                if (vec[i] == name) {
                    //printf("vec[%d]: %s\tname: %s\n", i, vec[i].c_str(), name);
                    XMLString::release(&name);
                    curNode = child;
                    break;
                }
                XMLString::release(&name);
                if (child == curNode->getLastChild()) {
                    std::cout << "such node isn't exist" << std::endl;
                    return NULL;
                }
            }
        }

        return curNode;
    }


class XStr
{
public :
    // -----------------------------------------------------------------------
    // Constructors and Destructor
    // -----------------------------------------------------------------------
    XStr(const char* const toTranscode)
    {
        // Call the private transcoding method
        fUnicodeForm = XMLString::transcode(toTranscode);
    }

    ~XStr()
    {
        XMLString::release(&fUnicodeForm);
    }


    // -----------------------------------------------------------------------
    // Getter methods
    // -----------------------------------------------------------------------
    const XMLCh* unicodeForm() const
    {
        return fUnicodeForm;
    }

private :
    // -----------------------------------------------------------------------
    // Private data members
    //
    // fUnicodeForm
    // This is the Unicode XMLCh format of the string.
    // -----------------------------------------------------------------------
    XMLCh* fUnicodeForm;
};


#define X(str) XStr(str).unicodeForm()

bool XercesParserXml::has(const std::string& srcBytes, const std::string& node) const
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return false;
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return false;
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return false;
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    // release source
    delete parser;
    delete errHandler;
    delete pInputSource;
    //doc->release();
    if (testNode == NULL) {
        return false;
    }
    else {
        return true;
    }
}


std::string XercesParserXml::get(const std::string& srcBytes, const std::string& node) const
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return "";
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return "";
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return "";
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return "";
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    // get content
    std::string result;
    char* nodeValue = NULL;
    if (testNode) {
        nodeValue = XMLString::transcode(testNode->getTextContent());
        result = nodeValue;
        XMLString::release(&nodeValue);
    }

    // release source
    delete parser;
    delete errHandler;
    delete pInputSource;
    //doc->release();

    return result;
}



bool XercesParserXml::set(std::string& srcBytes, const std::string& node, const std::string& value)
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return false;
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return false;
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return false;
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    // 4. set new content
    if (testNode) {
        testNode->setTextContent(X(value.c_str()));
    }

    // 5. serialize DOM tree and save it
    char* result;
    DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(X("Core"));
    if (NULL == impl) {
        XERCES_STD_QUALIFIER cerr << "Requested implementation is not supported" << XERCES_STD_QUALIFIER endl;
        XMLPlatformUtils::Terminate();
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    DOMWriter* theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter();
    if (NULL == theSerializer) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true);
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true);
    XMLCh* unicodeStr = theSerializer->writeToString(*rootElem);
    result = XMLString::transcode(unicodeStr);
    srcBytes = result;
    //printf("result: %s\n", result);
    XMLString::release(&unicodeStr);
    XMLString::release(&result);

    // release source
    theSerializer->release();
    delete parser;
    delete errHandler;
    delete pInputSource;

    return true;
}



bool XercesParserXml::del(std::string& srcBytes, const std::string& node)
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return false;
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return false;
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return false;
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    if (NULL == testNode) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 4. remove identifier node from DOM tree
    DOMNode* parentNode = testNode->getParentNode();
    DOMNode* oldNode = parentNode->removeChild(testNode);
    oldNode->release();

    // 5. serialize DOM tree and save it
    char* result;
    DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(X("Core"));
    if (NULL == impl) {
        XERCES_STD_QUALIFIER cerr << "Requested implementation is not supported" << XERCES_STD_QUALIFIER endl;
        XMLPlatformUtils::Terminate();
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    DOMWriter* theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter();
    if (NULL == theSerializer) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true);
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true);
    XMLCh* unicodeStr = theSerializer->writeToString(*rootElem);
    result = XMLString::transcode(unicodeStr);
    srcBytes = result;
    //printf("result: %s\n", result);
    XMLString::release(&unicodeStr);
    XMLString::release(&result);


    // release source
    theSerializer->release();
    delete parser;
    delete errHandler;
    delete pInputSource;

    return true;
}



int main()
{
        char* xmlFile = "x2.xml";

        char buf[4096] = {0};
        FILE* fp = fopen(xmlFile, "rb");
        if (!fp) {
            perror(xmlFile);
            exit(1);
        }
        size_t size;
        size = fread(buf, 1, sizeof(buf), fp);
        fclose(fp);
        std::cout << "size to be parser: " << size << std::endl;
        std::string srcDocBytes;
        srcDocBytes.assign(buf, size);
        std::string backup = srcDocBytes;

        XercesParserXml xml;
        bool flag = false;
        char testNode[1000] = {0};
        sprintf(testNode, "%s", "SendRoutingInfoRes.imsi"); //其中各结点以“.”分隔,格式如下:root.child.grandson
        std::string result;
        for (int i = 0; i < 1; i++) {
            srcDocBytes = backup;
            printf("\nfind node test ... \n");
            flag = xml.has(srcDocBytes, testNode);
            printf("\nget node test ...\n");
            result = xml.get(srcDocBytes, testNode);
            printf("get value: %s\n", result.c_str());
            printf("\nset node test ...\n");
            xml.set(srcDocBytes, testNode, "gnu means gnu not unix");
            printf("new content: %s\n", srcDocBytes.c_str());
            printf("\nremove test ...\n");
            xml.del(srcDocBytes, testNode);
            printf("after del operation: %s\n", srcDocBytes.c_str());
        }
        if (flag) {
            printf("%s has found\n", testNode);
        }
        else {
            printf("%s hasn't found\n", testNode);
        }
}

阅读(4342) | 评论(0) | 转发(0) |
1

上一篇:成也魔羯 败也魔羯

下一篇:string c_str()

给主人留下些什么吧!~~