分类:
2008-01-15 18:02:57
#/bin/sh #Using baidu.com Search #site:gov.cn URL="%3Acs.hit.edu.cn&rn=100&" #index.html site:(gov.cn) #URL="" a=0 if [ ! -z $1 ];then a=$1 fi while true do mv -f baidu.html baidu.html.bak wget -O baidu.html -w 5 --random-wait $URL"pn="$a if [ -f baidu.html.bak ];then diff baidu.html baidu.html.bak > /dev/null if [ $? -eq 0 ];then echo "a="$a echo "Rerun the shell:" echo "$./1.sh "$a exit fi fi html2text baidu.html | grep ".gov.cn/" | cut -f1 -d '/' >> host-baidu.txt a=$(( $a+100 )) sleep 1 done |
#!/bin/sh grep -E "gov.cn$" host-baidu.txt > host-baidu.tmp sort host-baidu.tmp|uniq > baidu-result.txt rm -f host-baidu.tmp baidu.html echo "###################################" echo " Result In baidu-result.txt" echo "###################################" |