#!/usr/bin/env python
# -*-coding:utf-8 -*-
import urllib2
import re
import cPickle
from random import randint
from time import sleep
#from urllib2 import HTTPError
#from BeautifulSoup import BeautifulSoup
WEBSITE = ""
Yaopin = "/leibie.html"
HOST = WEBSITE.split("/")[2]
def load(url,encoding="gbk"):
heads = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
'Accept-Language':'zh-cn,zh;q=0.5',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Host':HOST,
'Keep-Alive':'115',
'Referer':url,
'User-Agent':'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.14) Gecko/20110221 Ubuntu/10.10 (maverick) Firefox/3.6.14'}
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
urllib2.install_opener(opener)
req = urllib2.Request(url,headers=heads)
#opener.addheaders = heads.items()
while True:
try:
page = opener.open(req).read()
#print req.header_items()
#sleep(randint(1,10))
break
except (urllib2.HTTPError,),e:
print "Load Error,%s" % e
return False
except KeyboardInterrupt:
print "Interrupt"
return False
except:
print "Error,Retry..."
return False
try:
return unicode(page,encoding).encode('utf-8')
except:
print "Unicode Error...!!!"
def main():
load(WEBSITE+Yaopin)
if __name__ == "__main__":
main()