Chinaunix首页 | 论坛 | 博客
  • 博客访问: 484884
  • 博文数量: 142
  • 博客积分: 4126
  • 博客等级: 上校
  • 技术积分: 1545
  • 用 户 组: 普通用户
  • 注册时间: 2008-02-22 10:03
文章分类

全部博文(142)

文章存档

2011年(8)

2010年(7)

2009年(64)

2008年(63)

我的朋友

分类:

2009-03-31 17:17:51

#!/usr/bin/perl -w

#--------------------------------------------------

#author :空气人儿

#blog :空气人儿

#last update :2/13/2009

#Tested in :Winxp Perl v5.10.0

#TO DO : a lot

#version :v.1.1

#--------------------------------------------------

use strict;
use warnings;
use LWP::UserAgent;
use HTML::TreeBuilder;
use HTML::Element;
use HTML::Entities;
use Encode qw(encode);
use HTTP::Cookies::Microsoft;
use Win32::TieRegistry(Delimiter => "/");

#Set default values

#--------------------------------------

my $blog_username = "你的用户名";
my $blog_password = "你的密码";

#main program

#--------------------------------------

#get cookies from local IE cookies files.

#Attention!

#This part will open your IE,but never mind it.

#and it's not needed if you had saved your cookies in your local IE.

#-----------------------------------------------------------------------

my $url = "http://passport.baidu.com/?login&username=$blog_username&password=$blog_password&mem_pass=on";
   system "start iexplore.exe \"$url\"";
   sleep 15;
#-----------------------------------------------------------------------

my $cookies_dir = $Registry->{"CUser/Software/Microsoft/Windows/CurrentVersion/Explorer/Shell Folders/Cookies"};
my $cookie_jar = HTTP::Cookies::Microsoft->new(
                   File => "$cookies_dir\\index.dat",
                   'delayload' => 0,
               );
my $ua = LWP::UserAgent->new;
   $ua->timeout(60);
   $ua->cookie_jar( $cookie_jar );

#get blog category to hash

my %hash = Blog_categoryhash($blog_username);

#backup blog files incording to their category

Blog_category_backup();




#subroutine

#------------------------------------------------------------------------

sub Blog_category_backup
{
    my $key;
    my $value;
    while (($key,$value) = each (%hash))
    {
           my $req = new HTTP::Request(GET => "");
           my $res = $ua->request($req);
           if ($res->is_success)
               {
                  open FH,">$key .html";
                  print FH $res->content;
                  close FH;
               } else {
                         die $res->status_line;
                      }
    }
}

sub Blog_categoryhash
{
            my($blog_user) = shift;
            my %blog_category_link;
         # my $ua = LWP::UserAgent->new();

         # $ua->timeout(60);

            my $req = HTTP::Request->new(GET=>"$blog_user/blog");
            my $res = $ua->request($req);
            my $content = $res->decoded_content();
            my $tree = HTML::TreeBuilder->new();
            $tree->parse($content);
            my $body = undef;
            my @div = $tree->look_down(
                                    _tag => 'div',
                                    class => 'item',
                        sub {
                                 my $link = $_[0]->look_down('_tag','a');
                                return 1 unless $link;
                                return 1 if $link->attr('href') =~ m{/category/};
                                return 0;
                            }
                                      );
            
            foreach my $div(@div){
                                   my $tempbody = $div->as_HTML();
                                   if($tempbody =~m/href=\"(.*?)\" title=\"(.*?)\"\>(.*?)\<(.*?)\((.*?)\)/)
                                      {
                                           my $cate_name = encode('gb2312', decode_entities($3));
                                           my $cate_link = $1;
                                            $blog_category_link{$cate_name} = $cate_link;
                                            if($5 > 15)
                                           {
                                                    my $biaohao = int($5/15);
                                                    while($biaohao >= 1)
                                            {
                                                    my $linkend = "
/index/$biaohao";
                                                    $blog_category_link{$cate_name.$biaohao} = $cate_link.$linkend;
                                                    $biaohao--;
                                            }
                                        }
                                    }
                                }
            $tree = $tree->delete;
                                  return %blog_category_link;
}

阅读(645) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~