001Web::Scraper获取广州天气
- #!/usr/bin/perl
- use utf8;
- use URI;
- use Web::Scraper;
- binmode( STDOUT, ':encoding(utf8)' );
- my $url = "";
- my $proce = scraper {
- process '.yuBaoTable > tr', 'rows[]' =>scraper {
- process 'td', 'cols[]' => 'TEXT';
- };
- };
- my $res = $proce->scrape( URI->new($url) );
- for my $row ( @{ $res->{rows} } ) {
- my @weather=@{ $row->{cols} };
- print "@weather\n";
- }
002Web::Scraper获取网页表格数据
- #!/usr/bin/perl
- use utf8;
- use URI;
- use Web::Scraper;
- binmode( STDOUT, ':encoding(utf8)' );
- my $url = "";
- my $proce = scraper {
- process 'table#UCITeamList > tr', 'rows[]' =>scraper {
- process 'td', 'cols[]' => 'TEXT';
- };
- };
- my $res = $proce->scrape( URI->new($url) );
- for my $row ( @{ $res->{rows} } ) {
- my ($code,$team,$country)=@{ $row->{cols} }[0,1,2];
- printf "%-5s \| %-30s \| %-2s\n", $code,$team,$country;
- }
- __END__
- #html 样式
- <table id=UCITeamList class=WithBorder>
- <tr>
- <td class=ColHeader>Code</td>
- <td class=ColHeader>Team</td>
- <td class=ColHeader>Country</td>
- </tr>
- <tr>
- <td><a href=/Modules/SUCI/TEAMS/TeamDetails.asp?id=OA&RefDate=05.06.2010&MenuId=MTU4MzI&LangId=1&BackLink=%2Ftemplates%2FUCI%2FUCI2%2Flayout%2Easp%3FMenuId%3DMTU4MzI%26LangId%3D1>ALM</a></td>
- <td><a href=/Modules/SUCI/TEAMS/TeamDetails.asp?id=OA&RefDate=05.06.2010&MenuId=MTU4MzI&LangId=1&BackLink=%2Ftemplates%2FUCI%2FUCI2%2Flayout%2Easp%3FMenuId%3DMTU4MzI%26LangId%3D1>AG2R LA MONDIALE</a>
- </td>
- <td><a href=/Modules/SUCI/TEAMS/TeamDetails.asp?id=OA&RefDate=05.06.2010&MenuId=MTU4MzI&LangId=1&BackLink=%2Ftemplates%2FUCI%2FUCI2%2Flayout%2Easp%3FMenuId%3DMTU4MzI%26LangId%3D1>FRA</a></td>
- </tr>
003Template::Extract获取sina新闻
- #!/usr/bin/perl
- use Encode;
- use LWP::Simple 'get';
- use Template::Extract;
- #use Data::Dumper;
- my $html = get('');
- my $rule = <<RULE;
- <div class="blkContainerSblkCon"[% ... %]>
- [% ... %]
- [% FOREACH record %]
- <p>[% content %]</p>
- [% ... %]
- [% END %]
- RULE
- my $extract = Template::Extract->new();
- my $data = $extract->extract( $rule, T($html) );
- #print Dumper(\$data);
- foreach ( 0 .. @{ $data->{'record'} } ) {
- my $contents = $data->{'record'}->[$_]->{content};
- print $contents, "\n";
- }
- sub T {
- my $text = shift;
- return encode( 'utf8', $text );
- }
- __END__
- my $rule = <<RULE;
- [% FOREACH record %]
- <h1 id="artibodyTitle"[% ... %]>[% title %]</h1>
- [% END %]
- RULE
也是很早以前放QQ空间的文章。
阅读(1728) | 评论(0) | 转发(0) |