此博客以后永不更新阴险阴险阴险阴险阴险阴险 国内唯一更新博客在此处不在更新,谢谢,请看下面的地址和邮箱! 我的博客园:http://www.cnblogs.com/mhxy13867806343/ 欢迎访问! 邮箱:mhxy13867806343@sina.cn/ 国内更新博客在:https://github.com/mhxy13867806343 微信公众号:pyjs
分类: Python/Ruby
2015-09-15 20:13:51
#python解析xml和java中一样,有2种方式:sax,dom,蛤处理方式不同:速度和范围
#前者讲究的是效率,每次只处理文档的一小部分,快速而能有效的利用内存,后者是相反的处理方式,
先把所有的文档载入到内存,然后再进行处理,速度比较慢,也比较消耗内存,唯一的好处就是可以操
作整个文档
from xml.sax.handler import ContentHandler
from xml.sax import parse
import os
class Dispatcher:
def dispatch(self,prefix,name,attrs=None):
mname=prefix+name.capitalize()
dname='default'+prefix.capitalize()
method=getattr(self,mname,None)
if callable(method):
args=()
else:
method=getattr(self,dname,None)
args=name,
if prefix=='start':
args+=args
if callable(method):
method('args')
def startElement(self,name,attrs):
self.dispatch('start',name,attrs)
def endElement(self,name):
self.dispatch('end',name)
class WebsiteConstructor(Dispatcher,ContentHandler):
parssthrough=False
def __init__(self,directory):
self.directory=directory
self.endsureDurectory()
def endsureDurectory(self):
path=os.path.join(*self.directory)
print path
print '-----'
if not os.path.isdir(path):
os.makedirs(path)
def defaultStart(self,name,attrs):
if self.parssthrough:
self.out.write('<'+name)
for k,v in attrs.items():
self.out.write('%s=%s'%(k,v))
self.out.write('>')
def defaultEnd(self,name):
if self.parssthrough:
self.out.write('</%s>'%name)
def startDirectory(self,attrs):
self.directory.append(attrs['name'])
self.endsureDurectory()
def endDirectory(self):
print 'endDirectory'
self.directory.pop()
def startPage(self,attrs):
print 'startPage'
filename=os.path.join(*self.directory+[attrs['name']+'.html'])
self.out=ope(filename,'w')
self.writeHeader(attrs['title'])
self.passthrough=True
def endPage(self):
print 'endPage'
self.passthrough=False
self.writeFooter()
self.out.close()
def characters(self, s):
if self.passthrough:
self.out.write(s)
def writeHeader(self,title):
self.out.write('<html>\n<head>\n <title>')
self.out.write(title)
self.out.write('</title>\n</head>\n <body>\n')
def writeFooter(self):
self.out.write('\n </body>\n</html>\n')
parse('website.xml',WebsiteConstructor('public_html'))
Traceback (most recent call last):
-----
File "G:/py_py/python_object_xyz/obj_python/obj_xml.py", line 74, in <module>
parse('website.xml',WebsiteConstructor('public_html'))
File "C:\Python27\lib\xml\sax\__init__.py", line 33, in parse
parser.parse(source)
File "C:\Python27\lib\xml\sax\expatreader.py", line 107, in parse
xmlreader.IncrementalParser.parse(self, source)
File "C:\Python27\lib\xml\sax\xmlreader.py", line 123, in parse
self.feed(buffer)
File "C:\Python27\lib\xml\sax\expatreader.py", line 210, in feed
self._parser.Parse(data, isFinal)
File "C:\Python27\lib\xml\sax\expatreader.py", line 304, in start_element
self._cont_handler.startElement(name, AttributesImpl(attrs))
File "G:/py_py/python_object_xyz/obj_python/obj_xml.py", line 24, in startElement
self.dispatch('start',name,attrs)
File "G:/py_py/python_object_xyz/obj_python/obj_xml.py", line 22, in dispatch
method('args')
TypeError: defaultStart() takes exactly 3 arguments (2 given)
那位朋友帮看下,出现这样的问题,求帮助