分类:
2008-09-10 10:06:38
同时多个文件,不过单文件没有多线程的并且没有断点续传功能,继续完善:
view plaincopy to clipboardprint?
package com.util.file;
public class Files {
/***
* 获取应用程序的根目录
* @return 应用程序根目录
*/
public static String getSysPath(){
return System.getProperty("user.dir");
}
}
package com.util.file;
public class Files {
/***
* 获取应用程序的根目录
* @return 应用程序根目录
*/
public static String getSysPath(){
return System.getProperty("user.dir");
}
}view plaincopy to clipboardprint?
view plaincopy to clipboardprint?
package com.core.crawl;import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;import com.core.http.Http;
public class WebSpider implements Runnable{
private Http http = new Http();
private String webAddress = "";
private String destFile = "";public void setWebAddress(String webAddress){
this.webAddress = webAddress;
}public void setDestFile (String destFile){
this.destFile = destFile;
}public boolean download() throws IOException, InterruptedException {
HttpURLConnection httpConn = null;
try {
URL url = new URL(webAddress);httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("GET");
httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (; U; NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14");
InputStream in = httpConn.getInputStream();
String fileType = http.fileType(httpConn.getContentType());
System.out.println(fileType);
FileOutputStream out = new FileOutputStream(new File(destFile + fileType));
int chByte = in.read();
while (chByte != -1) {
out.write(chByte);
//System.out.println(chByte);
chByte = in.read();
}
} catch (Exception ex) {
System.out.println(ex.toString());
} finally {
httpConn.disconnect();
}
return true;
}public void run() {
try {
//System.out.println(Thread.currentThread().getName());
download();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
view plaincopy to clipboardprint?package com.core.crawl; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import com.core.http.Http; public class WebSpider implements Runnable{ private Http http = new Http(); private String webAddress = ""; private String destFile = ""; public void setWebAddress(String webAddress){ this.webAddress = webAddress; } public void setDestFile (String destFile){ this.destFile = destFile; } public boolean download() throws IOException, InterruptedException { HttpURLConnection httpConn = null; try { URL url = new URL(webAddress); httpConn = (HttpURLConnection) url.openConnection(); httpConn.setRequestMethod("GET"); httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14"); InputStream in = httpConn.getInputStream(); String fileType = http.fileType(httpConn.getContentType()); System.out.println(fileType); FileOutputStream out = new FileOutputStream(new File(destFile + fileType)); int chByte = in.read(); while (chByte != -1) { out.write(chByte); //System.out.println(chByte); chByte = in.read(); } } catch (Exception ex) { System.out.println(ex.toString()); } finally { httpConn.disconnect(); } return true; } public void run() { try { //System.out.println(Thread.currentThread().getName()); download(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } package com.core.crawl;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import com.core.http.Http;
public class WebSpider implements Runnable{
private Http http = new Http();
private String webAddress = "";
private String destFile = "";
public void setWebAddress(String webAddress){
this.webAddress = webAddress;
}
public void setDestFile (String destFile){
this.destFile = destFile;
}
public boolean download() throws IOException, InterruptedException {
HttpURLConnection httpConn = null;
try {
URL url = new URL(webAddress);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("GET");
httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14");
InputStream in = httpConn.getInputStream();
String fileType = http.fileType(httpConn.getContentType());
System.out.println(fileType);
FileOutputStream out = new FileOutputStream(new File(destFile + fileType));
int chByte = in.read();
while (chByte != -1) {
out.write(chByte);
//System.out.println(chByte);
chByte = in.read();
}
} catch (Exception ex) {
System.out.println(ex.toString());
} finally {
httpConn.disconnect();
}
return true;
}
public void run() {
try {
//System.out.println(Thread.currentThread().getName());
download();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
view plaincopy to clipboardprint?
package com.core.crawl;import java.io.IOException;
import com.util.file.Files;
public class Crawl {
/**
* @param args
* @throws IOException
* @throws InterruptedException
*/
public static void main(String[] args) throws IOException, InterruptedException {long begin = System.currentTimeMillis();
WebSpider spider2 = new WebSpider();
WebSpider spider1 = new WebSpider();
spider1.setWebAddress("");
spider1.setDestFile(Files.getSysPath() + "/"+"spider1.");spider2.setWebAddress("");
spider2.setDestFile(Files.getSysPath() + "/"+"spider2.");Thread t1 = new Thread(spider1);
Thread t2 = new Thread(spider2);
t1.start();
t2.start();t1.join();
t2.join();System.out.println("the end");
System.out.println(System.currentTimeMillis() - begin);
}
}
测试通过: