1、删除
import re
s ='
《大辞典》:abc
'
f = re.sub('id="hi-[0-9].*"',"",s)
print f
2、查找
#coding:utf-8
import re
s ='
《中药大辞典》:鼻血雷
'
f = re.sub('id="hi-[0-9].*"',"",s)
print f
phone = "2004-959-559 # 这是一个国外电话号码 aa"
num = re.sub(r'#.* ', "", phone)
print "电话号码是: ", num
s='
用法用量外用:捣敷。内服:泡酒。
'
print s
f = re.sub('class=\"drug \S*\"',"",s)
print f
def getkeys(k):
arr =[]
pos =k.find("(")
pos2 =k.find(")")
if pos <0 or pos2 <0:
arr.append(k)
return arr
arr.append(k[:pos])
p1 = re.compile(r'[(,(](.*?)[),)]', re.S) #最小匹配
res = re.findall(p1, k)
if len(res)<=0:
return arr
for word in res:
lst = re.split(',| |,| |、',word)
for v in lst:
v = v.strip()
if len(v)<=0:
continue
arr.append(v)
return arr
#s = 'abe(ac,a b,你好 f)ad)'
#s = 'abe补脾益肺(培土生金)'
s = '小方脉(少小)'
r = getkeys(s)
print "aaaaaaaaaaaaaaaaaaaaaa"
print r
print len(r)
for vv in r:
print vv
~
阅读(1126) | 评论(0) | 转发(0) |