分类:
2007-04-18 00:33:01
#!/usr/bin/env perl # #$ARGV[0]:OriginalFile #$ARGV[1]:Title #$ARGV[2]:SubURL #$ARGV[3]:start_table #$ARGV[4]:start_td #$ARGV[5]:end_td1 # # if(@ARGV != 6) { print "Usage: Extract exit(1); } my $line; my $state = 0; my $count_table=0; my $count_td=0; my $count_td1=0; my $OrigFile=$ARGV[0]; my $Title=$ARGV[1]; my $SubURL=$ARGV[2]; my $start_table = $ARGV[3]; my $start_td = $ARGV[4]; my $start_td1 = $ARGV[5];; my $Dir=$ENV{'PWD'}; open(HTML,$Dir."/".$OrigFile) or die "cannot open file:$!"; open(OUTPUT,">$Dir"."/"."out_".$OrigFile) or die "cannot open file:$!"; #Add Title print OUTPUT " while() { chomp; $line=$_; #Trans Upper to Lower $line =~ tr/[A-Z]/[a-z]/; #Delete Commend Line if( ($line=~/\<\!--/) || ($state==1) ) { $temp .= $line; if($line =~ /--\>/) { $state = 0; $temp=""; next; } else { $state = 1; next; } } #Get the need text segment if(/\
先wget下指定URL的网页,再使用上面的脚本处理。
给主人留下些什么吧!~~
|