Python下载百度新歌100的代码-niustar-ChinaUnix博客

欣欣向荣

首页　| 　博文目录　| 　关于我

niustar

博客访问： 221883
博文数量： 88
博客积分： 3020
博客等级：中校
技术积分： 707
用户组：普通用户
注册时间： 2009-02-12 16:56

文章分类

全部博文（88）

未分配的博文（88）

文章存档

2010年（26）

2009年（62）

我的朋友

相关博文

Python下载百度新歌100的代码

分类： Python/Ruby

2009-03-26 14:43:34

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2006 UbuntuChina <
# License: GPLv2
# Author: oneleaf
# hack by ct

import httplib
import re
import urllib
import os
import locale
global m,topid
global fsize
fsize=1 #文件大小下限(M)
allowext=['.mp3','.wma'] #允许的扩展名
topid='0'

if topid=='0':
topid='/list/newhits.html' #新歌100
elif topid=='1':
topid='/topso/mp3topsong.html' #Top500
elif topid=='2':
topid='/list/oldsong.html' #老歌经典
elif topid=='3':
topid='/list/movies.html' #电影金曲
elif topid=='4':
topid='/list/tvs.html' #电视歌曲
elif topid=='5':
topid='/minge/mp3topsong.html' #民歌精选
elif topid=='6':
topid='/xiaoyuan/mp3topsong.html' #校园歌曲
elif topid=='7':
topid='/list/liujinsuiyue.html' #流金岁月(new)
elif topid=='8':
topid='/list/yaogun.html' #摇滚地带

def getdownfileurl(url): #获取歌曲页的试听URL
url = ""+url
count = url.index('" ');
url = url[:count]
tn = re.search('&tn=(.*)&word',url).group(0)
url=url.replace(tn,'&tn=baidusg,mp3%20%20&word')
print u"正在处理",url
try:
urlopen = urllib.URLopener()
fp=urlopen.open(url)
data = fp.read()
fp.close()
except IOError, errmsg:
print errmsg
expression2='"_blank">(.*)'
url = re.search(expression2, data).group(0)[16:-9]
try:
url="http://"+urllib.quote(url)
except:pass
print u"发现 "+url
return url

def getdownurl(url): #从歌曲页抓取URL列表
urllist=[]
urllist1=[]
urllist2=[]
conn = httplib.HTTPConnection('mp3.baidu.com')
conn.request("GET",url)
response = conn.getresponse()
html=response.read()
conn.close()
expression2='(.*)target'
listSentence2 = re.findall(expression2, html) #抓取链接列表
filesize=re.findall('(.*)M',html) #抓取文件大小
lineno=0
while lineno mp3url=getdownfileurl(listSentence2[lineno]) #转换链接为最终下载地址
if mp3url:
urllist1.append(mp3url)
else:
urllist1.append('')
lineno+=1
urllist=map(None,urllist1,filesize)
return urllist

def downmp3(url,author,name,filelist): #下载歌曲
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) == 0: #忽略
print u"文件已经下载，忽略。"
return 1
urllists=getdownurl(url) #获取文件url列表
lineno=0
while lineno print u"尝试",urllists[lineno][0]
ext=urllists[lineno][0][-4:] #获取文件名后缀(最后4位)
try:
lineno+=1
print urllists[lineno-1][1] +'M'
if float(urllists[lineno-1][1])>float(fsize) : #大小符合则下载
urlopen = urllib.URLopener()
fp=urlopen.open(urllists[lineno-1][0])
data = fp.read()
fp.close()
filename=filename+ext;
file=open(filename,'w+b')
file.write(data)
file.close()
print u"下载成功!"
return 1
elif float(urllists[lineno][1]) print u"文件太小,忽略!"
except:
continue
return 0

def axeldownmp3(url,author,name,filelist): #使用axel下载歌曲，Ubuntu用户需要使用 sudo apt-get install axel安装.
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) >= 0: #忽略
print u"%s 文件已经下载，忽略。"%filename
return 1
print u'获取文件列表'
urllists=getdownurl(url) #获取文件url列表
lineno=0
print u"获得",len(urllists),"个下载地址"
while lineno print u"尝试",urllists[lineno][0]
ext=urllists[lineno][0][-4:].lower() #获取文件名后缀(最后4位)
if not ext in allowext:
print u"文件扩展名 %s 名不允许，忽略"%ext
lineno+=1
continue
try:
lineno+=1
print urllists[lineno-1][1] +'M'
if float(urllists[lineno-1][1])>float(fsize) : #大小符合则下载
savefilename=filename+ext;
if os.spawnlp(os.P_WAIT,'axel','-q','-n 20','-o '+savefilename,urllists[lineno-1][0])==0:
print u"下载成功!"
return 1
elif float(urllists[lineno][1]) print u"文件太小,忽略!"
except:
continue
return 0

if __name__ == "__main__":
conn = httplib.HTTPConnection('list.mp3.baidu.com')
conn.request("GET",topid ) #类型
response = conn.getresponse()
html=response.read().decode('gbk')
conn.close()
expression1='border">(.*).'
expression2='>'
listSentence1 = re.findall(expression1, html) #编号特征
listSentence2 = re.findall(expression2, html) #歌曲名特征
listSentence3 = re.findall(expression3, html) #歌手名特征
lineno=0
while lineno url=re.search('(.*)target',listSentence2[lineno])
url='/m'+url.group(0)[:-8]
idno=listSentence1[lineno]
name=re.search('blank>(.*)',listSentence2[lineno])
name=name.group(0)[6:]
dirty=re.search('/',listSentence3[lineno])
if dirty is not None : #合唱
author1=re.search('>(.*)/ author1=author1.group(0)[1:-7]
author2=re.search('/',listSentence3[lineno])
author2=re.search('>(.*)<',author2.group(0))
author2=author2.group(0)[1:-1]
author=author1 + '+' + author2
elif dirty is None : #独唱
author=re.search('blank>(.*) author=author.group(0)[6:-2]
name=name.strip()
author=author.strip()
print u"开始下载",idno,name,author,u"来自",url
filelist=os.listdir('.');
if axeldownmp3(url,author,name,filelist)==0: #判断失败
print u"下载",author,name,u'失败！'
lineno+=1

阅读(750) | 评论(0) | 转发(0) |

上一篇：InstallShield内部库函数版本检测函数

下一篇：MS Windows DCE-RPC svcctl ChangeServiceConfig2A()

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6