Chinaunix首页 | 论坛 | 博客
  • 博客访问: 189380
  • 博文数量: 92
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 1413
  • 用 户 组: 普通用户
  • 注册时间: 2013-02-04 21:12
文章分类
文章存档

2013年(92)

我的朋友

分类: 信息化

2013-02-06 00:08:22

[代码] 华语 package com; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.File import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Date; import net.sf.json.JSONObject; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.methods.GetMethod; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * 批量下载百度音乐首页的歌曲. * @author gehouse * */ public class MusicSpider { public static final String BAI_DU_MUSIC = ""; private String saveDestinationPath;//下载保存路径 public MusicSpider(String saveDestinationPath){ if(!saveDestinationPath.endsWith("/")){ saveDestinationPath += "/"; } this.saveDestinationPath = saveDestinationPath; } private Document initDocument(String url) { try { Document doc = Jsoup .connect(url) .header("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1") .header("Accept", "text ml,application/xhtml+xml") .header("Accept-Language", "zh-cn,zh;q=0.5") .header("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7") .get(); return doc; } catch (IOException e) { e.printStackTrace(); } return null; } /** * 取到下载链接. * @param url * @return */ private String getDownLoadLink(String url) { if(url.contains("artist")){ return null; } url = BAI_DU_MUSIC + url; Document doc = initDocument(url); Element form = doc.getElementById("form"); if(form == null){ return null; } Elements linkLi = form.getElementsByTag("li"); JSONObject json = JSONObject.fromObject(linkLi.get(0).attr("data-data")); String link = (String) json.get("link"); if(link != null){ link = link.substring(link.indexOf("?") + 6, link.length()); } return link; } /** * 下载. * @param downLoadlink * @param saveName * @param saveDestinationPath */ private void saveMusic(String downLoadlink, String saveName, String saveDestinationPath){ System.out.print(saveName+"----> 下载中......"); Date d1 = new Date(); try { HttpClient client = new HttpClient(); GetMethod get = new GetMethod(downLoadlink); client.executeMethod(get); File file = new File(saveDestinationPath); if(!file.exists()){ file.mkdirs(); } file = new File(saveDestinationPath + saveName + ".mp3"); FileOutputStream outputStream = new FileOutputStream(file); InputStream is = get.getResponseBodyAsStream(); ByteArrayOutputStream swapStream = new ByteArrayOutputStream(); byte[] buff = new byte[1024]; // buff用于存放循环读取的临时数据 int rc = 0; while ((rc = is.read(buff)) > 0) { swapStream.write(buff, 0, rc); } byte[] in_b = swapStream.toByteArray(); outputStream.write(in_b); outputStream.close(); } catch (HttpException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } Date d2 = new Date(); System.out.println(" 下载耗时:"+((d2.getTime()-d1.getTime()) / 1000)+"秒"); } public void catchMusic(){ Document doc = initDocument(BAI_DU_MUSIC); Elements songList = doc.getElementsByClass("song-list");//取到歌曲列表 for(int i = 0;i < songList.size();i++){ Elements labelLi = songList.get(i).getElementsByTag("li"); for(int j = 0;j < labelLi.size();j++){ Elements labelA = labelLi.get(j).getElementsByTag("a"); String musicName = labelA.get(0).attr("title"); File file = new File(saveDestinationPath + musicName + ".mp3"); if(!file.exists()){ String href = labelA.get(0).attr("href"); String downLoadlink = getDownLoadLink(href + "/download?__o=%2Fsong%2F" + href.substring(href.lastIndexOf("/") + 1, href.length())); if(downLoadlink == null){ continue; } saveMusic(downLoadlink, musicName, saveDestinationPath); } } } } /** * @param args */ public static void main(String[] args) { MusicSpider ms = new MusicSpider("e:/baidu_music"); ms.catchMusic(); } }
阅读(634) | 评论(0) | 转发(0) |
0

上一篇:没有了

下一篇:jsp中自定义标签的用途

给主人留下些什么吧!~~