#python项目练习三：万能的XML-mhxy13867806343-ChinaUnix博客

博客访问： 46537
博文数量： 14
博客积分： 10
博客等级：民兵
技术积分： 132
用户组：普通用户
注册时间： 2012-05-07 08:56

个人简介

此博客以后永不更新阴险阴险阴险阴险阴险阴险国内唯一更新博客在此处不在更新，谢谢，请看下面的地址和邮箱! 我的博客园：http://www.cnblogs.com/mhxy13867806343/ 欢迎访问！邮箱:mhxy13867806343@sina.cn/ 国内更新博客在:https://github.com/mhxy13867806343 微信公众号:pyjs

文章分类

全部博文（14）

2017（1）
2016（3）
2015（3）
未分配的博文（7）

文章存档

2017年（1）

2016年（4）

2015年（8）

2014年（1）

我的朋友

相关博文

#python项目练习三：万能的XML

分类： Python/Ruby

2015-09-15 20:13:51

#python解析xml和java中一样，有2种方式:sax,dom,蛤处理方式不同：速度和范围
#前者讲究的是效率，每次只处理文档的一小部分，快速而能有效的利用内存，后者是相反的处理方式，

先把所有的文档载入到内存，然后再进行处理，速度比较慢，也比较消耗内存，唯一的好处就是可以操

作整个文档
from xml.sax.handler import ContentHandler
from xml.sax import parse
import os
class Dispatcher:
    def dispatch(self,prefix,name,attrs=None):
        mname=prefix+name.capitalize()
        dname='default'+prefix.capitalize()
        method=getattr(self,mname,None)
        if callable(method):
            args=()
        else:
            method=getattr(self,dname,None)
            args=name,
        if prefix=='start':
            args+=args
        if callable(method):
            method('args')
    def startElement(self,name,attrs):
        self.dispatch('start',name,attrs)
    def endElement(self,name):
        self.dispatch('end',name)

class WebsiteConstructor(Dispatcher,ContentHandler):
    parssthrough=False
    def __init__(self,directory):
        self.directory=directory
        self.endsureDurectory()
    def endsureDurectory(self):
        path=os.path.join(*self.directory)
        print path
        print '-----'
        if not os.path.isdir(path):
            os.makedirs(path)
    def defaultStart(self,name,attrs):
        if self.parssthrough:
            self.out.write('<'+name)
            for k,v in attrs.items():
                self.out.write('%s=%s'%(k,v))
            self.out.write('>')
    def defaultEnd(self,name):
        if self.parssthrough:
            self.out.write('</%s>'%name)
    def startDirectory(self,attrs):
        self.directory.append(attrs['name'])
        self.endsureDurectory()
    def endDirectory(self):
        print 'endDirectory'
        self.directory.pop()
    def startPage(self,attrs):
        print 'startPage'
        filename=os.path.join(*self.directory+[attrs['name']+'.html'])
        self.out=ope(filename,'w')
        self.writeHeader(attrs['title'])
        self.passthrough=True
    def endPage(self):
        print 'endPage'
        self.passthrough=False
        self.writeFooter()
        self.out.close()
    def characters(self, s):
        if self.passthrough:
            self.out.write(s)
    def writeHeader(self,title):
        self.out.write('<html>\n<head>\n <title>')
        self.out.write(title)
        self.out.write('</title>\n</head>\n <body>\n')
    def writeFooter(self):
        self.out.write('\n </body>\n</html>\n')
parse('website.xml',WebsiteConstructor('public_html'))

Traceback (most recent call last):
-----
File "G:/py_py/python_object_xyz/obj_python/obj_xml.py", line 74, in <module>
    parse('website.xml',WebsiteConstructor('public_html'))
File "C:\Python27\lib\xml\sax\__init__.py", line 33, in parse
    parser.parse(source)
File "C:\Python27\lib\xml\sax\expatreader.py", line 107, in parse
    xmlreader.IncrementalParser.parse(self, source)
File "C:\Python27\lib\xml\sax\xmlreader.py", line 123, in parse
    self.feed(buffer)
File "C:\Python27\lib\xml\sax\expatreader.py", line 210, in feed
    self._parser.Parse(data, isFinal)
File "C:\Python27\lib\xml\sax\expatreader.py", line 304, in start_element
    self._cont_handler.startElement(name, AttributesImpl(attrs))
File "G:/py_py/python_object_xyz/obj_python/obj_xml.py", line 24, in startElement
    self.dispatch('start',name,attrs)
File "G:/py_py/python_object_xyz/obj_python/obj_xml.py", line 22, in dispatch
    method('args')
TypeError: defaultStart() takes exactly 3 arguments (2 given)

那位朋友帮看下，出现这样的问题，求帮助

管理员在2009年8月13日编辑了该文章文章。

-->

阅读(1046) | 评论(0) | 转发(0) |

上一篇：#python项目练习二：画幅好画

下一篇：#python项目练习四：新闻聚合

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6