"
,htmlSource
,re
.I
)
#
print htmlContent
htmlSourceNew
= htmlSource
.replace
("\n"," ")
htmlContent
= re
.findall
(r
"<\/td><\/tr><\/table>",htmlSourceNew,re.I)
user += '/%s' % (blogUrl[11:])
myfile = file(user,'w')
myfile.write(""+htmlContent[0]+"
")#保存有效部分
myfile.close()
urls = re.findall(r"var.*pre.*?/blog/item/.*?html",htmlSource,re.I)
if(len(urls)==1):
blogUrl = re.findall(r"/blog/item/.*?html",urls[0],re.I)
htmlUrl = '' + user + blogUrl[0]
print htmlUrl
downloadpage(blogUrl = blogUrl[0])#递归你所有的文章
else:
break
sock.close()
return
except:
nFail += 1
print 'download blog fail: %s' % (blogUrl)
downloadpage()