[学习perl 练手用,请勿用于商业用途,尊重别人的劳动成果,访问一下提供资料的网站,最好看一下它的广告
]
取得电子节目单
#!/usr/bin/perl
use strict;
use warnings;
use LWP::Simple;
use HTML::Tree;
use URI;
#download html files
my $caturl = '';
my $remotefile;
my $localfile;
my $remotedir ='';
my $root = '';
my $locdir = $root;
my $html = get($caturl);
my $i ;
my $url ;
my $file ;
my $tree = HTML::TreeBuilder->new;
$tree->parse_content($html); # !
mkdir ($locdir);
$i=1 ;
foreach my $link ( $tree->look_down(_tag=>'a','target'=>'nr'))
{
if($link)
{
$url = $link->attr('href'); # !
$localfile = $root.'/'.$i.'.html';
getstore($url, $localfile);
$i++ ;
}
}
$tree->delete; # clear memory!
# now merge files
my $filename;
open DIRFILE ,"-|", "ls $locdir |grep html\$|sort -t\. -n" ;
while(<DIRFILE>)
{
chomp ;
$filename = $_;
print $filename,"...\r\n";
&process_file($filename);
}
close DIRFILE ;
sub process_file
{
my $infile;
$infile = shift;
$infile = "$locdir\/$infile" ;
my $tree = HTML::TreeBuilder->new;
$tree->parse_file($infile); # !
my $text;
foreach my $tr ( $tree->look_down(_tag => 'tr', sub{$_[0]->attr('id') =~ /^imgdiv/}))
{
if($tr)
{
foreach my $paras ( $tr->look_down('_tag', 'p'))
{
if($paras)
{
$text = $paras->as_text; # !
chomp $text;
print $text ,"\r\n" ;
}
}
}
}
$tree->delete; # clear memory!
}
|
阅读(542) | 评论(0) | 转发(0) |