Chinaunix首页 | 论坛 | 博客
  • 博客访问: 221841
  • 博文数量: 36
  • 博客积分: 1188
  • 博客等级: 军士长
  • 技术积分: 802
  • 用 户 组: 普通用户
  • 注册时间: 2010-04-08 21:45
文章分类

全部博文(36)

文章存档

2020年(1)

2017年(2)

2015年(1)

2014年(1)

2013年(1)

2012年(3)

2011年(27)

分类: WINDOWS

2011-04-12 09:36:51

001Web::Scraper获取广州天气
  1. #!/usr/bin/perl
  2. use utf8;
  3. use URI;
  4. use Web::Scraper;

  5. binmode( STDOUT, ':encoding(utf8)' );

  6. my $url = "";
  7. my $proce = scraper {
  8.     process '.yuBaoTable > tr', 'rows[]' =>scraper {
  9.         process 'td', 'cols[]' => 'TEXT';
  10.     };
  11. };

  12. my $res = $proce->scrape( URI->new($url) );
  13. for my $row ( @{ $res->{rows} } ) {

  14.     my @weather=@{ $row->{cols} };
  15.     print "@weather\n";

  16. }

002Web::Scraper获取网页表格数据

  1. #!/usr/bin/perl
  2. use utf8;
  3. use URI;
  4. use Web::Scraper;

  5. binmode( STDOUT, ':encoding(utf8)' );

  6. my $url = "";
  7. my $proce = scraper {
  8.     process 'table#UCITeamList > tr', 'rows[]' =>scraper {
  9.         process 'td', 'cols[]' => 'TEXT';
  10.     };
  11. };

  12. my $res = $proce->scrape( URI->new($url) );

  13. for my $row ( @{ $res->{rows} } ) {
  14.     my ($code,$team,$country)=@{ $row->{cols} }[0,1,2];
  15.     printf "%-5s \| %-30s \| %-2s\n", $code,$team,$country;
  16. }

  17. __END__
  18. #html 样式
  19. <table id=UCITeamList class=WithBorder>
  20.   <tr>
  21.     <td class=ColHeader>Code</td>
  22.     <td class=ColHeader>Team</td>
  23.     <td class=ColHeader>Country</td>
  24.   </tr>
  25.   <tr>
  26.     <td><a href=/Modules/SUCI/TEAMS/TeamDetails.asp?id=OA&RefDate=05.06.2010&MenuId=MTU4MzI&LangId=1&BackLink=%2Ftemplates%2FUCI%2FUCI2%2Flayout%2Easp%3FMenuId%3DMTU4MzI%26LangId%3D1>ALM</a></td>
  27.     <td><a href=/Modules/SUCI/TEAMS/TeamDetails.asp?id=OA&RefDate=05.06.2010&MenuId=MTU4MzI&LangId=1&BackLink=%2Ftemplates%2FUCI%2FUCI2%2Flayout%2Easp%3FMenuId%3DMTU4MzI%26LangId%3D1>AG2R LA MONDIALE</a>
  28.     </td>
  29.     <td><a href=/Modules/SUCI/TEAMS/TeamDetails.asp?id=OA&RefDate=05.06.2010&MenuId=MTU4MzI&LangId=1&BackLink=%2Ftemplates%2FUCI%2FUCI2%2Flayout%2Easp%3FMenuId%3DMTU4MzI%26LangId%3D1>FRA</a></td>
  30.   </tr>
003Template::Extract获取sina新闻
  1. #!/usr/bin/perl
  2. use Encode;
  3. use LWP::Simple 'get';
  4. use Template::Extract;

  5. #use Data::Dumper;

  6. my $html = get('');
  7. my $rule = <<RULE;
  8. <div class="blkContainerSblkCon"[% ... %]>
  9. [% ... %]
  10. [% FOREACH record %]
  11. <p>[% content %]</p>
  12. [% ... %]
  13. [% END %]
  14. RULE

  15. my $extract = Template::Extract->new();
  16. my $data = $extract->extract( $rule, T($html) );

  17. #print Dumper(\$data);

  18. foreach ( 0 .. @{ $data->{'record'} } ) {
  19.     my $contents = $data->{'record'}->[$_]->{content};
  20.     print $contents, "\n";
  21. }

  22. sub T {
  23.     my $text = shift;
  24.     return encode( 'utf8', $text );

  25. }
  26. __END__
  27. my $rule = <<RULE;
  28. [% FOREACH record %]
  29. <h1 id="artibodyTitle"[% ... %]>[% title %]</h1>
  30. [% END %]
  31. RULE
也是很早以前放QQ空间的文章。
阅读(1695) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~