#!/usr/bin/perl -w use LWP; use Encode; use DBI; my $browser = LWP::UserAgent->new; my $url=''; my $file='index.html'; my $result='result'; #the old one my $new_result='new_result'; print my $time=localtime,":Now getting web,please wait.......\n"; my $response=$browser->get($url,':content_file'=>$file); die "can't get $url --",$response->content_type unless $response->content_type eq 'text/html'; print 'Done.Now,analyzing......',"\n"; open FH,"<",$file||die "can't open $file :$!\n"; open FILE,">",$new_result||die "can't open $file for write:$!\n"; select FILE; while() { s/\t//; if(/(\d{4}-\d{1,2}-\d{1,2}).*/ig) { print encode("gb2312",decode("gb2312",$1)),"\t"; next; }
if($_=~/ { print encode("utf8",decode("gb2312",$1)),"\t",encode("utf8",decode("gb2312",$2)),"\n"; }
} close FILE; select STDOUT; if((stat $result)[7] == (stat $new_result)[7]) { print "Not Found\n"; exit(0); } open RES,"<",$new_result||die "$!\n"; open FH,"<",$result||die "$!\n"; my @tmp=; my %web; while() #FILE is th new file { my $found=1; foreach my $old_context(@tmp) { if($_ eq $old_context) { $found=0; last; } next; } #print if $found==1; if($found) { my($date,$title,$site)=split /\t/,$_; $site=$url.$site; $web{$title}->{'date'}=$date; $web{$title}->{'site'}=$site; } } foreach(keys %web) { print $_,"\t",$web{$_}->{'date'},"\t",$web{$_}->{'site'},"\n"; } close RES; close FH; unlink $result||die "can't remove $result:$!\n"; rename $new_result,'result'||die "can't rename:$!\n"; print 'Do you wang to write to the database:[Y/N]'; chomp(my $choose=); if($choose eq 'y'||$choose eq 'Y') { ##########以下为connect数据库----------------- my $database='DBI:mysql:database=wei;host=127.0.0.1'; my $user='root'; my $pw='123456'; my $dbh=DBI->connect($database,$user,$pw,{'RaiseError'=>1})||die "can't connect to the database:$DBI::errstr\n";
foreach(keys %web) { my $sql="insert into web(title,date,site) values('$_','$web{$_}->{'date'}','$web{$_}->{'site'}')"; my $sth=$dbh->prepare("$sql"); $sth->execute(); $sth->finish(); } $dbh->disconnect(); } exit;
|
|