此博客以后永不更新阴险阴险阴险阴险阴险阴险 国内唯一更新博客在此处不在更新,谢谢,请看下面的地址和邮箱! 我的博客园:http://www.cnblogs.com/mhxy13867806343/ 欢迎访问! 邮箱:mhxy13867806343@sina.cn/ 国内更新博客在:https://github.com/mhxy13867806343 微信公众号:pyjs
分类: Python/Ruby
2015-09-15 17:46:14
一个是提供那些固定的html标记的输出(每一个标记都有start和end),
handlers.py
#callable(object)
#中文说明:检查对象object是否可调用。如果返回True,object仍然可能调用失败;但如果返回False,调用对象ojbect绝对不会成功。
#注意:类是可调用的,而类的实例实现了__call__()方法才可调用。
getattr(object, name, default=None)
class HandLer:
def callback(self,prefix,name,*args):
method=getattr(self,prefix+name,None)
if callable(method):
return method(*args)
def start(self,name):
self.callback('start_',name)
def end(self,name):
self.callback('end',name)
def sub(self,name):
def substitution(match):
result=self.callback('sub_',name,match)
if result is None:
match.group(0)
return result
return substitution
class HTMLRenderer(HandLer):
def start_document(self):
print '<html><head><title>...</title>/</head><body>'
def end_document(self):
print '</body></html>'
def start_paragraph(self):
print '<p>'
def end_paragraph(self):
print '</p>'
def start_heading(self):
print '<h2>'
def end_heading(self):
print '</h2>'
def start_list(self):
print '<ul>'
def end_list(self):
print '</ul>'
def start_listitem(self):
print '<li>'
def end_listitem(self):
print '</li>'
def start_title(self):
print '<h1>'
def end_title(self):
print '</h1>'
def sub_emphasis(self,match):
return '<em>%s</em>'%match.group(0)
def sub_url(self,match):
return '<a href=%s>%s</a>'%(match.group(1),match.group(1))
def sub_mail(self,match):
return '<a href="mailto:%s">%s</a>'%(match.group(0),match.group(1))
def feed(self,data):
print data
#rules.py
class Rule:
def action(self,block,hendler):
hendler.start(self.type)
hendler.feed(block)
hendler.end(self.type)
return True
class HeadingRule(Rule):
type='heading'
def condition(self,block):
return not '\n' in block and len(block)<=70 and not block[-1]==':'
class TitleRule(HeadingRule):
type='title'
first=True
def condition(self,block):
if not self.first:
return False
return HeadingRule.condition(self,block)
class ListItemRule(Rule):
type='listitem'
def condition(self,block):
return block[0]=='-'
def action(self,block,hendler):
hendler.start(self.type)
hendler.feed(block[1:].strip())
hendler.end(self.type)
return True
class ListRule(ListItemRule):
type='list'
inside=False
def condition(self,block):
return True
def action(self,block,hendler):
if not self.inside and ListItemRule.condition(self,block):
hendler.start(self.type)
self.inside=True
elif self.inside and not ListItemRule.condition(self,block):
hendler.end(self.type)
self.inside=False
return False
class ParagraphRule(Rule):
type='paragraph'
def condition(self,block):
return True
两个方法是condition和action,前者是用来判断读进来的字符串是不是符合自家规则,后者是用来执行操作的
utils.py
def line(f):
for l in f:
yield l
yield '\n'
def blocks(f):
block=[]
for line1 in line(f):
if line1.strip():
block.append(line1)
elif block:
yield ''.join(block).strip()
block=[]
语法分析器模块
import re,sys
from handlers import *
from utils import *
from rules import *
class Parser:
def __init__(self,handler):
self.handler=handler
self.rules=[]
self.filters=[]
def addRule(self,r):
self.rules.append(r)
def addFilter(self,pattern,name):
def filter(block,handler):
return re.sub(pattern,handler.sub(name),block)
self.filters.append(filter)
def parse(self,f):
self.handler.start('document')
for block in blocks(f):
for filter in self.filters:
block=filter(block,self.handler)
for rule in self.rules:
if rule.condition(block):
last=rule.action(block,self.handler)
if last:break
self.handler.end('document')
class BasicTextParser(Parser):
def __init__(self,h):
Parser.__init__(self,h)
self.addRule(ListRule())
self.addRule(ListItemRule())
self.addRule(TitleRule())
self.addRule(HeadingRule())
self.addRule(ParagraphRule())
self.addFilter(r'\*(.+?)\*','embhasis')
self.addFilter(r'(http://[\.a-z0-9A-Z/]+)','url')
self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)','mail')
handle=HTMLRenderer()
parser=BasicTextParser(handle)
parser.parse(sys.stdin)