[代码] 华语 package com;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.File
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import net.sf.json.JSONObject;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* 批量下载百度音乐首页的歌曲.
* @author gehouse
*
*/
public class MusicSpider {
public static final String BAI_DU_MUSIC = "";
private String saveDestinationPath;//下载保存路径
public MusicSpider(String saveDestinationPath){
if(!saveDestinationPath.endsWith("/")){
saveDestinationPath += "/";
}
this.saveDestinationPath = saveDestinationPath;
}
private Document initDocument(String url) {
try {
Document doc = Jsoup
.connect(url)
.header("User-Agent",
"Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1")
.header("Accept", "text ml,application/xhtml+xml")
.header("Accept-Language", "zh-cn,zh;q=0.5")
.header("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7")
.get();
return doc;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 取到下载链接.
* @param url
* @return
*/
private String getDownLoadLink(String url) {
if(url.contains("artist")){
return null;
}
url = BAI_DU_MUSIC + url;
Document doc = initDocument(url);
Element form = doc.getElementById("form");
if(form == null){
return null;
}
Elements linkLi = form.getElementsByTag("li");
JSONObject json = JSONObject.fromObject(linkLi.get(0).attr("data-data"));
String link = (String) json.get("link");
if(link != null){
link = link.substring(link.indexOf("?") + 6, link.length());
}
return link;
}
/**
* 下载.
* @param downLoadlink
* @param saveName
* @param saveDestinationPath
*/
private void saveMusic(String downLoadlink, String saveName, String saveDestinationPath){
System.out.print(saveName+"----> 下载中......");
Date d1 = new Date();
try {
HttpClient client = new HttpClient();
GetMethod get = new GetMethod(downLoadlink);
client.executeMethod(get);
File file = new File(saveDestinationPath);
if(!file.exists()){
file.mkdirs();
}
file = new File(saveDestinationPath + saveName + ".mp3");
FileOutputStream outputStream = new FileOutputStream(file);
InputStream is = get.getResponseBodyAsStream();
ByteArrayOutputStream swapStream = new ByteArrayOutputStream();
byte[] buff = new byte[1024]; // buff用于存放循环读取的临时数据
int rc = 0;
while ((rc = is.read(buff)) > 0) {
swapStream.write(buff, 0, rc);
}
byte[] in_b = swapStream.toByteArray();
outputStream.write(in_b);
outputStream.close();
} catch (HttpException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Date d2 = new Date();
System.out.println(" 下载耗时:"+((d2.getTime()-d1.getTime()) / 1000)+"秒");
}
public void catchMusic(){
Document doc = initDocument(BAI_DU_MUSIC);
Elements songList = doc.getElementsByClass("song-list");//取到歌曲列表
for(int i = 0;i < songList.size();i++){
Elements labelLi = songList.get(i).getElementsByTag("li");
for(int j = 0;j < labelLi.size();j++){
Elements labelA = labelLi.get(j).getElementsByTag("a");
String musicName = labelA.get(0).attr("title");
File file = new File(saveDestinationPath + musicName + ".mp3");
if(!file.exists()){
String href = labelA.get(0).attr("href");
String downLoadlink = getDownLoadLink(href + "/download?__o=%2Fsong%2F"
+ href.substring(href.lastIndexOf("/") + 1, href.length()));
if(downLoadlink == null){
continue;
}
saveMusic(downLoadlink, musicName, saveDestinationPath);
}
}
}
}
/**
* @param args
*/
public static void main(String[] args) {
MusicSpider ms = new MusicSpider("e:/baidu_music");
ms.catchMusic();
}
}
阅读(634) | 评论(0) | 转发(0) |