#!/bin/perl
use strict;
use Encode;
open URL_FILE,"<$ARGV[0]" or die "use perl url_keyword_get.pl url_list.txt ,$!";
#print $ARGV[0];
open(FILE, ">key_word.txt");
#binmode FILE;
while(my $line = <URL_FILE>)
{
# print $line;
# $line =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
my $key_word;
if($line =~ /google/)
{
$line =~ m/q=(.*?)&/; #"?"表示非贪婪匹配
$key_word = $1;
$key_word =~ s/\+/" "/eg;
$key_word=~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;#关键的替换操作
print FILE $key_word,"\n";
print $key_word,"\n";
}
elsif($line =~ /baidu/)
{
$line =~ m/wd=(.*?)&/; #"?"表示非贪婪匹配
$key_word = $1;
$key_word =~ s/\+/" "/eg;
$key_word=~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
$key_word = Encode::decode("gb2312", $key_word);
print FILE $key_word,"\n";
print $key_word,"\n";
}
}
close FILE;
close URL_FILE;
#以下是注释掉的程序,没有用到
=pod #perl 中的段注释
my @array;
@array = split(/%/,$line);
print join(",",@array);
my $i = 1;
while($i < @array )
{
$array[$i]= hex($array[$i]);
my $data = pack("C1",$array[$i]);
print $data,"\n";
print FILE $data;
$i = $i + 1;
}
=cut
|