httplib2 对cookie的支持不是很好,虽然可以手动设定,但是不够自动化。
官方文档里面的POST之后的: headers['Cookie'] = resp['set-cookie']
但是我测试设置之后的get里面cookie不是设置的,而是每次都改变,也就是说需要把 headers['Cookie']
设置成为一个合法的内容才行,我是听包然后设置的,是没有问题的。
所以技术选择python3自带的urllib。
1. 代理和cookie的设置
- import urllib.request, urllib.error, urllib.parse
- import urllib.response
- import http.cookiejar
- import gzip
- import io
- # proxy_cookie setting:
- proxy_support = urllib.request.ProxyHandler({'http': ''})
- cookie_support= urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar())
- opener = urllib.request.build_opener(proxy_support, cookie_support, urllib.request.HTTPHandler)
- urllib.request.install_opener(opener)
- content = urllib.request.urlopen('').read()
2. 自动压缩的设置
- # deflate support
- def deflate(data):
- """ zlib only provides the zlib compress format, not the deflate format;
- so on top of all there's this workaround:
- """
- import zlib
- try:
- return zlib.decompress(data, -zlib.MAX_WBITS)
- except zlib.error:
- return zlib.decompress(data)
- class ContentEncodingProcessor(urllib.request.BaseHandler):
- """ A handler to add gzip capabilities to urllib.request """
-
- def http_request(self, req):
- ''' add headers to requests
- '''
- req.add_header("Accept-Encoding", "gzip, deflate")
- return req
-
- def http_response(self, req, resp):
- ''' decode '''
- old_resp = resp
- # gzip
- if resp.headers.get("content-encoding") == "gzip":
- gz = gzip.GzipFile(
- fileobj = io.BytesIO( resp.read() ),
- mode="r"
- )
- resp = urllib.response.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- # deflate
- if resp.headers.get("content-encoding") == "deflate":
- gz = io.BytesIO( deflate(resp.read()) )
- resp = urllib.response.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) # class to add info() and geturl
- resp.msg = old_resp.msg
- return resp
调用时十分方便:
encoding_support = ContentEncodingProcessor
opener = urllib.request.build_opener( encoding_support, urllib.request.HTTPHandler )
#直接用opener打开网页,如果服务器支持gzip/defalte则自动解压缩
content = opener.open('').read()
阅读(2611) | 评论(0) | 转发(0) |