自动找proxy刷广告代码[PERL]
--------------------------------------------------------------------------------
#!/usr/bin/perl
#脚本编写:ZV(zvrop_at_163.com),部分函数参考watercloud的getproxy脚本.
#用途赚钱
#程序会自动搜索baidu上能找到的代理,测试成功之后,对广告网站提供的URL进行模拟点击
#并虚拟一个随机但是存在的原始地址.
use strict;
use Data::Dump qw(dump);
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
use URI::Escape;
use Getopt::Long;
use threads;
use threads::shared;
use Thread::Semaphore;
my $BAIDU=""; #搜索引擎,这里不要改了
my $m_key="http代理"; #搜索关键字,可以改,最好每人都不一样
my $m_much=100; #每次搜索多少页面
my $m_timeout=100; #测试代理的超时时间.一般不要改
my $m_test_site=".[马赛克之].html";#目的站点,这个需要改到具体的广告点击后的地址.
my $m_referer=".[马赛克之]/forums/index.php?showtopic=";#造假的网址
my $m_rand_max=50000; #造假的网址的随机ID范围
my @GGG; #存放广告的变量.
my $m_thread_num=5;
my $m_sem=Thread::Semaphore->new($m_thread_num);
my $m_get_num=0;
share $m_get_num;
&main();
sub main
{
my $i;
my %url_hist;
my @threads;
my $rurls;
get_gg($m_test_site);
my $m_raw_uri=$BAIDU . "s?rn=".$m_much."&wd=".uri_escape($m_key . " http");
for($i=0;;$i+=$m_much){
$rurls=get_proxy_pub_urls($m_raw_uri."&pn=".$i);
foreach (@$rurls)
{
next if exists($url_hist{$_});
my $rproxys=get_proxys($_);
my @t = &test_proxys($rproxys);
push @threads,@t;
}
foreach (@threads)
{
$_->join();
}
}
}
sub get_gg
{
my $search_uri=shift @_;
my $ua= LWP::UserAgent->new;
$ua->cookie_jar({});
$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)');
$ua->timeout($m_timeout*3);
my $res_obj= $ua->get($search_uri);
exit 1 if (! $res_obj->is_success());
my $html_parse = HTML::LinkExtor->new();
$html_parse->parse($res_obj->as_string);
foreach ($html_parse->links)
{
next if $_->[0] ne "a";
push(@GGG,$_->[2]);
}
}
sub test_proxys
{
my $ref=shift @_;
my %proxy_hist;
my @threads;
foreach (@$ref)
{
next if exists($proxy_hist{$_});
$proxy_hist{$_}=1;
$m_sem->down();
my $t=threads->create("test_http_proxy",$_);
push(@threads,$t) if $t;
}
return @threads;
}
sub filter_url
{
return 1 if @_ != 2;
return 1 if $_[0] !~ /^http/;
return 1 if $_[1] !~ /^http/;
my $uri1=URI->new($_[0]);
my $uri2=URI->new($_[1]);
my $host1=$uri1->host;
my $host2=$uri2->host;
my $key1=substr($host1,index($host1,".")+1);
my $key2=substr($host2,index($host2,".")+1);
return 1 if $key1 eq $key2;
return 1 if $host1 eq $key2;
return 0;
}
sub get_proxy_pub_urls
{
my $search_uri=shift @_;
my $ua= LWP::UserAgent->new;
$ua->cookie_jar({});
$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)');
$ua->timeout($m_timeout*3);
my $res_obj= $ua->get($search_uri);
exit 1 if (! $res_obj->is_success());
my $html_parse = HTML::LinkExtor->new();
$html_parse->parse($res_obj->as_string);
my @urls;
foreach ($html_parse->links)
{
next if $_->[0] ne "a";
next if filter_url($search_uri,$_->[2]);
push(@urls,$_->[2]);
}
return \@urls;
}
sub get_proxys
{
my $url=shift @_;
my @proxys;
my $ua= LWP::UserAgent->new;
$ua->cookie_jar({});
$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)');
$ua->timeout($m_timeout*2);
my $res_obj= $ua->get($url);
next if (! $res_obj->is_success());
my $html=$res_obj->as_string;
while($html =~ m/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\D+(?{
my $proxy="$1:$2";
pos($html);
push @proxys,$proxy;
}
return \@proxys;
}
sub test_http_proxy
{
return if @_ == 0;
$_=shift @_;
my ($proxy,$port)=split /:/,$_;
$_=$proxy . ":" . $port;
my $ua=LWP::UserAgent->new();
$ua->timeout($m_timeout);
$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)');
$ua->proxy("http","http://" . $_);
my $count = @GGG;
my $desturl = @GGG[int(rand($count))];
my $req = HTTP::Request->new(GET => "$desturl");
$req->header('Referer' => $m_referer .int(rand($m_rand_max)));
my $res = $ua->request($req);
if($res->is_success())
{
my $html=$res->as_string;
if(length($html) > 1024 && $html =~ /html/i && \( $html =~ /table/i || $html =~ /div/i) )
{
$m_get_num++;
print "总共点击(".$m_get_num.")次,本次代理:$_\r\n";
}
}
$m_sem->up();
}
阅读(1423) | 评论(0) | 转发(0) |