使用Python进行XML文件读取,使用 sax 库:
#-*- coding:UTF-8 -*-
import string
import xml.sax
from xml.sax.handler import *
class QuotationHandler(ContentHandler):
"""Crude extractor for quotations.dtd compliant XML document"""
def __init__(self, out):
self.out = out
self.string=''
self.inread=0
def startDocument(self):
print '--- Begin Document ---'
self.out.write('offer_id, title, mobile_url\n')
def startElement(self, name, attrs):
if name == 'title':
self.in_title=1
self.inread=1
elif name=='offer_id':
self.in_offerid= 1
self.inread=1
elif name=='mobile_url':
self.in_mobileurl=1
self.inread=1
else:
pass
def endElement(self, name):
if name == 'title':
self.inread=0
self.in_title=0
self.out.write(self.string.encode("GBK")+',')
print self.string
self.string = ''
elif name=='offer_id':
self.in_offerid= 0
self.inread=0
self.out.write(self.string.encode("GBK")+",")
self.string = ''
elif name=='mobile_url':
self.in_mobileurl=0
self.inread=0
self.out.write(self.string.encode("GBK")+",\n")
self.string = ''
else:
pass
def characters(self, ch):
if self.inread:
self.string = self.string + ch
if __name__ == '__main__':
out = open("test.csv","wt")
try:
parser = xml.sax.make_parser()
handler = QuotationHandler(out)
parser.setContentHandler(handler)
parser.parse("i900738_090816_2.xml")
except:
import traceback
traceback.print_exc()
finally:
out.close()
|
阅读(2954) | 评论(0) | 转发(0) |