<?
$my_book_url='http://book.yunxiaoge.com/files/article/html/4/4550/index.html';
ereg("http://book.yunxiaoge.com/files/article/html/[0-9]+/[0-9]+/",$my_book_url,$myBook);
$my_book_txt=$myBook[0];
$file_handle = fopen($my_book_url, "r");//读取文件
unlink("test.txt");
while (!feof($file_handle)) { //循环到文件结束
$line = fgets($file_handle); //读取一行文件
$line1=ereg("href=\"[0-9]+.html",$line,$reg); //分析文件内部书的文章页面
$handle = fopen("test.txt", 'a');
if ($line1) {
$my_book_txt_url=$reg[0]; //另外赋值,给抓取分析做准备
$my_book_txt_url=str_replace("href=\"","",$my_book_txt_url);
$my_book_txt_over_url="$my_book_txt$my_book_txt_url"; //转换为抓取地址
echo "$my_book_txt_over_url</p>"; //显示工作状态
$file_handle_txt = fopen($my_book_txt_over_url, "r"); //读取转换后的抓取地址
while (!feof($file_handle_txt)) {
$line_txt = fgets($file_handle_txt);
$line1=ereg("^ .+",$line_txt,$reg); //根据抓取内容标示抓取
$my_over_txt=$reg[0];
$my_over_txt=str_replace(" "," ",$my_over_txt); //过滤字符
$my_over_txt=str_replace("<br />","",$my_over_txt);
$my_over_txt=str_replace("<script language=\"javascript\">","",$my_over_txt);
$my_over_txt=str_replace(""","",$my_over_txt);
if ($line1) {
$handle1=fwrite($handle,"$my_over_txt\n"); //写入文件
}
}
}
}
fclose($file_handle_txt);
fclose($handle);
fclose($file_handle); //关闭文件
echo "完成</p>";
?>


