分类: Python/Ruby
2014-03-21 11:28:46
原文地址:python使用urllib2抓取防爬取链接 作者:chinaboywg
urllib.urlopen(url).read()
def get_url_content(url): i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",\ "Referer": ''} req = urllib2.Request(url, headers=i_headers) return urllib2.urlopen(req).read()
def get_content_by_proxy(url, proxy): opener = urllib2.build_opener(urllib2.ProxyHandler({'http':proxy}), urllib2.HTTPHandler(debuglevel=1)) urllib2.install_opener(opener) i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5", \ "Referer": ''} req = urllib2.Request(url, headers=i_headers) content = urllib2.urlopen(req).read() return content