功能:将目录开始,下载匹配所有的*.gz, *.bz2, *.zip文件;
缺陷:处理递归目录可能会有问题
TODO:改进,将$site改为参数;文件后缀改为参数;
#!/usr/bin/perl
# (C)icymoon
my $site = "";
my $curr_site;
my @wget_res = ();
sub get_url($)
{
my $sitei = $_[0];
my @wget_res = `wget $_[0] 2>&1`;
print("wget $_[0]\n");
my $curr_index;
if($wget_res[$#wget_res-1] =~ /(index.html.*)\'\s+saved/) {
$curr_index = $1;
my @site_info = `cat ./$curr_index`;
foreach my $line (@site_info) {
if($line =~ /a\s+href\=\"(.*\/)\"\>/) {
get_url("$sitei\/$1");
}
elsif ($line =~ /a\s+href\=\"(.*\.gz)\"\>/)
{
`wget $sitei\/$1 2>&1`;
if($? != 0) {
print FD "wget $sitei\/$1 failed!\n";
}
print("wget $sitei\/$1 $?\n");
}
elsif ($line =~ /a\s+href\=\"(.*\.bz2)\"\>/)
{
`wget $sitei\/$1 2>&1`;
if($? != 0) {
print FD "wget $sitei\/$1 failed!\n";
}
print("wget $sitei\/$1 $?\n");
}
elsif ($line =~ /a\s+href\=\"(.*\.zip)\"\>/)
{
`wget $sitei\/$1 2>&1`;
if($? != 0) {
print FD "wget $sitei\/$1 failed!\n";
}
print("wget $sitei\/$1 $?\n");
}
}
}
}
if(!open(FD,"> wget_log"))
{
printf("Error in open log file\n");
exit("1");
}
get_url($site);
close(FD);
阅读(1287) | 评论(0) | 转发(0) |