一起学习
运用Observer和Observable这两个类的共同使用,可以产生一些很好的效果。
我们先看看Observer这个类,它主要是一个接口,只有一个方法就是update(Observable o, Object arg),而它只是在Observable的类中用setchanged()和notifyObservers(Object)的调用才能被调用。
我们看例程:
//Main.java
import java.io.*;
import java.util.*;
import java.util.zip.*;
class Main implements Observer {
Main(File dir, boolean includeDirectories) {
FileWalker fw = new FileWalker();
fw.addObserver(this);
System.out.println("Main.Main()[1]");
fw.walk(dir, includeDirectories);
System.out.println("Main.Main()[2]");
}
// Simply print out the name of the file.
public void update(Observable o, Object arg) {
File f = (File)arg;
// try {
System.out.println(f.getAbsolutePath() i);
// } catch (IOException e) {
// e.printStackTrace();
// }
}
public static void main(String[] args) {
if (args.length < 1 || args.length > 2) {
System.err.println("Usage: java Main
[-d]");
System.exit(1);
}
boolean includeDirectories = args.length == 2 && args[1].equals("-d");
new Main(new File(args[0]), includeDirectories);
}
}
class FileWalker extends Observable {
// If includeDirectories is false, the walker does not notify the observers
// when it encounters a directory. Encountered files are always reported.
void walk(File dir, boolean includeDirectories) {
if (dir.isDirectory()) {
if (includeDirectories) {
System.out.println("FileWalker.walk()[1]");
setChanged();
notifyObservers(dir);
}
String[] filenames = dir.list();
// Recursively walk all subdirectories.
if (filenames != null) {
for (int i=0; i ");
} else {
new Main(args[0], Integer.parseInt(args[1]));
}
}
}
class Spider extends Observable implements Runnable {
Hashtable walked = new Hashtable();
int maxDepth;
URL homeURL;
String host;
int port;
Thread thread;
Spider(URL url, int depth) {
homeURL = url;
maxDepth = depth;
host = url.getHost();
port = getPort(url);
// Start spider thread.
thread = new Thread(this);
thread.start();
}
void walk(URL url, int curDepth) throws IOException {
Vector v = findLinks(url);
// Remove duplicates
for (int i=v.size()-1; i>=0; i--) {
try {
URL ur = new URL(url, (String)v.elementAt(i));
if (walked.get(ur) != null
|| !ur.getProtocol().equals("http")
|| !(getPort(ur) == port)
|| !ur.getHost().equals(host)) {
v.removeElementAt(i);
} else {
walked.put(ur, ur);
setChanged();
notifyObservers(new SpiderArgs(url, ur, curDepth));
}
} catch (MalformedURLException e) {
}
}
// Now walk each of the links in url.
if (curDepth < maxDepth) {
for (int i=0; i " ur);
}
}
}
}
// Finds all the links in 'url' and returns them in a vector.
Vector findLinks(URL url) throws IOException {
Vector v = new Vector();
BufferedReader in = new BufferedReader(
new InputStreamReader(url.openStream()));
String line;
String lineLC;
while ((line = in.readLine()) != null) {
while (line != null) {
int p = line.indexOf(" is on the same line.
int q = 0;
while ((q=line.indexOf(">", p)) < 0) {
String l = in.readLine();
if (l == null) { // EOF reached.
return v;
}
line = l;
}
String u = getLink(in, line, p);
if (u != null && u.length() > 0) {
v.addElement(adjustIfDir(u));
}
// Continue looking for links on the line.
line = line.substring(q 1);
}
}
in.close();
return v;
}
// Returns the port number of 'url'. If the port number is
// not defined, returns the default HTTP port number.
int getPort(URL url) {
int p = url.getPort();
if (p == -1) {
p = 80;
}
return p;
}
// This method implements a heuristic for URLs that are probably
// directories. If the last component of the URL does not contain
// a dot and does not end with a "/", then it is explicitly
// converted to a directory by appending a "/".
static String adjustIfDir(String s) {
int p = s.lastIndexOf("/") 1;
if (!s.endsWith("/") && s.indexOf(".", p) < 0) {
s = "/";
}
return s;
}
// Extracts the tag from s and then returns the remainder of
// the line.
String getLink(BufferedReader in, String s, int p)
throws IOException {
int e;
// Find the href attribute.
p = s.indexOf("href=");
if (p < 0) {
p = s.indexOf("HREF=");
if (p < 0) {
// No href so skip the tag.
return null;
}
}
// Skip the "href="
p = 5;
int q = -1;
if (s.charAt(p) == '"') {
p ;
q = s.indexOf('"', p);
} else {
q = s.indexOf(' ', p);
int q2 = s.indexOf('>', p);
if (Math.min(q, q2) < 0 && Math.max(q, q2) >= 0) {
// If one is > 0 and the other < 0, use the > 0 one.
q = Math.max(q, q2);
}
// Use the smaller of the two.
q = Math.min(q, q2);
}
// Could not complete the href tag for some reason
// so skip the tag.
if (q < 0) {
return null;
}
s = s.substring(p, q);
// Remove the reference, if any.
p = s.indexOf('#');
if (p == 0) {
return null;
} else if (p > 0) {
s = s.substring(0, p);
}
return s;
}
public void run() {
try {
walk(homeURL, 0);
} catch (IOException e) {
System.out.println("*** " homeURL);
}
}
}
class SpiderArgs {
SpiderArgs(URL src, URL dst, int depth) {
this.src = src;
this.dst = dst;
this.depth = depth;
}
URL src;
URL dst;
int depth;
}
该程式主要采用Runable的线程技术和一些字符串的判断来得到从一个页面得到的所有链接的递归算法。当然此程序还是有一些小小的漏洞,就是利用Frontpage做出那个用area href的链接没有在考虑之内。其实parse分析也是一样的。
下载本文示例代码
Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例Observer和Observable的应用实例
阅读(178) | 评论(0) | 转发(0) |