import urllib2 import re import sys import string
def get(url1,boo): request = urllib2.Request(url1) opener = urllib2.build_opener() opener.addheaders = [("User-agent","Mozilla/5.0")] try: feeddata1 = opener.open(request).read() boo=True return feeddata1 except urllib2.HTTPError,e: print "http erro code" ,e.code boo=False return
|
gethtmldata,抓网页的函数
import urllib2 import re import sys import string import gethtmldata def get(urlwea): # print urlwea bo=True weadata=gethtmldata.get(urlwea,bo) if bo==False: print "getdata erro" sys.exit() #request = urllib2.Request(urlwea) #opener = urllib2.build_opener() #opener.addheaders = [("User-agent","Mozilla/5.0")] #weadata = opener.open(request).read() print "data get ok" #f=open("data.html","w") #print f,weadata #f.close() deg_matches=re.findall(r"\d+\b°\b",weadata) wea_matches=re.findall(r"ALT.*TITLE",weadata) deglen=len(deg_matches) wealen=len(wea_matches) ii=0 while ii<deglen: # print deg_matches[ii] stringdeg=string.replace(deg_matches[ii],"°","") print stringdeg ii=ii+1 ii=0 while ii<wealen: # print wea_matches[ii] stringwea=string.replace(wea_matches[ii],'ALT="',"") stringtemp=string.replace(stringwea,'" TITLE',"") print stringtemp ii=ii+1 return
|
正则表达式解出天气预报函数
用来抓取网页的天气预报并整理,算是自己写的第一个能用的东西
阅读(424) | 评论(0) | 转发(0) |