全部博文(1144)
分类: LINUX
2009-03-18 23:33:12
2007-11-07, 16:36 【转帖】自动找proxy刷广告代码[PERL] |
|
#!/usr/bin/perl #脚本编写:ZV(zvrop_at_163.com),部分函数参考watercloud的getproxy脚本. #用途赚钱 #程序会自动搜索baidu上能找到的代理,测试成功之后,对广告网站提供的URL进行模拟点击 #并虚拟一个随机但是存在的原始地址. use strict; use Data::Dump qw(dump); use LWP::UserAgent; use HTML::LinkExtor; use URI::URL; use URI::Escape; use Getopt::Long; use threads; use threads::shared; use Thread::Semaphore; my $BAIDU=""; #搜索引擎,这里不要改了 my $m_key="http代理"; #搜索关键字,可以改,最好每人都不一样 my $m_much=100; #每次搜索多少页面 my $m_timeout=100; #测试代理的超时时间.一般不要改 my $m_test_site="[马赛克之].html";#目的站点,这个需要改到具体的广告点击后的地址. my $m_referer="[马赛克之]/forums/index.php?showtopic=";#造假的网址 my $m_rand_max=50000; #造假的网址的随机ID范围 my @GGG; #存放广告的变量. my $m_thread_num=5; my $m_sem=Thread::Semaphore->new($m_thread_num); my $m_get_num=0; share $m_get_num; &main(); sub main { my $i; my %url_hist; my @threads; my $rurls; get_gg($m_test_site); my $m_raw_uri=$BAIDU . "s?rn=".$m_much."&wd=".uri_escape($m_key . " http"); for($i=0;;$i+=$m_much){ $rurls=get_proxy_pub_urls($m_raw_uri."&pn=".$i); foreach (@$rurls) { next if exists($url_hist{$_}); my $rproxys=get_proxys($_); my @t = &test_proxys($rproxys); push @threads,@t; } foreach (@threads) { $_->join(); } } } sub get_gg { my $search_uri=shift @_; my $ua= LWP::UserAgent->new; $ua->cookie_jar({}); $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)'); $ua->timeout($m_timeout*3); my $res_obj= $ua->get($search_uri); exit 1 if (! $res_obj->is_success()); my $html_parse = HTML::LinkExtor->new(); $html_parse->parse($res_obj->as_string); foreach ($html_parse->links) { next if $_->[0] ne "a"; push(@GGG,$_->[2]); } } sub test_proxys { my $ref=shift @_; my %proxy_hist; my @threads; foreach (@$ref) { next if exists($proxy_hist{$_}); $proxy_hist{$_}=1; $m_sem->down(); my $t=threads->create("test_http_proxy",$_); push(@threads,$t) if $t; } return @threads; } sub filter_url { return 1 if @_ != 2; return 1 if $_[0] !~ /^http/; return 1 if $_[1] !~ /^http/; my $uri1=URI->new($_[0]); my $uri2=URI->new($_[1]); my $host1=$uri1->host; my $host2=$uri2->host; my $key1=substr($host1,index($host1,".")+1); my $key2=substr($host2,index($host2,".")+1); return 1 if $key1 eq $key2; return 1 if $host1 eq $key2; return 0; } sub get_proxy_pub_urls { my $search_uri=shift @_; my $ua= LWP::UserAgent->new; $ua->cookie_jar({}); $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)'); $ua->timeout($m_timeout*3); my $res_obj= $ua->get($search_uri); exit 1 if (! $res_obj->is_success()); my $html_parse = HTML::LinkExtor->new(); $html_parse->parse($res_obj->as_string); my @urls; foreach ($html_parse->links) { next if $_->[0] ne "a"; next if filter_url($search_uri,$_->[2]); push(@urls,$_->[2]); } return \@urls; } sub get_proxys { my $url=shift @_; my @proxys; my $ua= LWP::UserAgent->new; $ua->cookie_jar({}); $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)'); $ua->timeout($m_timeout*2); my $res_obj= $ua->get($url); next if (! $res_obj->is_success()); my $html=$res_obj->as_string; while($html =~ m/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\D+(?{ my $proxy="$1:$2"; pos($html); push @proxys,$proxy; } return \@proxys; } sub test_http_proxy { return if @_ == 0; $_=shift @_; my ($proxy,$port)=split /:/,$_; $_=$proxy . ":" . $port; my $ua=LWP::UserAgent->new(); $ua->timeout($m_timeout); $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windws NT 5.1)'); $ua->proxy("http","http://" . $_); my $count = @GGG; my $desturl = @GGG[int(rand($count))]; my $req = HTTP::Request->new(GET => "$desturl"); $req->header('Referer' => $m_referer .int(rand($m_rand_max))); my $res = $ua->request($req); if($res->is_success()) { my $html=$res->as_string; if(length($html) > 1024 && $html =~ /html/i && \( $html =~ /table/i || $html =~ /div/i) ) { $m_get_num++; print "总共点击(".$m_get_num.")次,本次代理:$_\r\n"; } } $m_sem->up(); } |