分类: Python/Ruby
2011-09-15 21:55:44
import urllib
' 获取web页面内容并返回'
def getWebPageContent(url):
f = urllib.urlopen(url)
data = f.read()
f.close()
return data
url = 'http://blog.csdn.net'
content = getWebPageContent(url)
print content
# Pycurl参考地址:http://pycurl.sourceforge.net/
# Pycurl下载地址:http://pycurl.sourceforge.net/download/pycurl-7.18.1.tar.gz
# -*-coding: UTF-8 -*-
importpycurl
importStringIO
defgetURLContent_pycurl(url):
c = pycurl.Curl()
c.setopt(pycurl.URL,url)
b = StringIO.StringIO()
c.setopt(pycurl.WRITEFUNCTION, b.write)
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.MAXREDIRS, 5)
# 代理
#c.setopt(pycurl.PROXY, '')
#c.setopt(pycurl.PROXYUSERPWD, 'aaa:aaa')
c.perform()
returnb.getvalue()
url = 'http://blog.csdn.net'
content =getURLContent_pycurl(url)
print content
# -*-coding: UTF-8 -*-
import cPAMIE
defgetURLContent_cPAMIE(url):
g_ie =cPAMIE.PAMIE()
g_ie.showDebugging = False
g_ie.frameName= None
g_ie.navigate(url)
content =g_ie.pageGetText()
g_ie.quit()
returncontent
url = 'http://blog.csdn.net'
content = getURLContent_cPAMIE(url)
print content
# -*- coding: UTF-8 -*-
import urllib
url = 'http://blog.csdn.net'
path = 'C://temp//csdn.net.html'
urllib.urlretrieve(url,path)
# Twisted框架下载:
# -*-coding: UTF-8 -*-
fromtwisted.internet import reactor
fromtwisted.web import client
defresult(content):
print content
reactor.stop()
deferred =client.getPage("http://blog.csdn.net")
deferred.addCallback(result)
reactor.run()