Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2007438
  • 博文数量: 369
  • 博客积分: 10093
  • 博客等级: 上将
  • 技术积分: 4271
  • 用 户 组: 普通用户
  • 注册时间: 2005-03-21 00:59
文章分类

全部博文(369)

文章存档

2013年(1)

2011年(2)

2010年(10)

2009年(16)

2008年(33)

2007年(146)

2006年(160)

2005年(1)

分类:

2006-05-05 22:10:57

前段时间做过百度的LRC歌词下载脚本,偶然发现也提供LRC歌词搜索,所以就又顺手写了一个脚本。使用方法和前一个完全一样,权当作练习Perl语言编程了。
#!/usr/bin/perl
#
# Copyright (C) xiaosuo
# License GPL2 or above
#
use strict;
use warnings;
use URI;
use HTML::Parser;
require LWP::UserAgent;
use Getopt::Std;

sub usage
{
        print " Usage: bzmtvlrc [options] MusicName.\n";
        print "         -l              get the lyrics list.\n";
        print "         -p num          set the page to n, default 1.\n";
        print "         -n num          set the number to n.\n";
        print "         -d              download the special lyrics.\n";
        print "         -x              output the content in XML format.\n";
        print "         -h              show this help page.\n";
}

# parse the options.
my %opts;
if(!getopts("lp:n:dxh", \%opts)){
        usage();
        exit(1);
}
if($opts{"h"} || $opts{"l"} && $opts{"d"}){
        usage();
        exit(0);
}
if($#ARGV != 0){
        usage();
        exit(1);
}
my $key = $ARGV[0];
my $page = 1;
if($opts{"p"}){
        if($opts{"p"} < 1){
                usage();
                exit(1);
        }
        $page = $opts{"p"};
}

# the lyrics's structure will be saved in the array
my @lyrics;

# get the lyrics's list
my $base_url = "";
my $url = URI->new($base_url . "/So.asp");
$url->query_form(
        key => $key,
        y => '1',
        page => $page
);
my $ua = LWP::UserAgent->new();
my $response = $ua->get($url->as_string);
if(!$response->is_success){
        die $response->status_line;
}

# temp variables in the page parse routines.
my $in_td_tag = 0;
my $in_b_tag = 0;
my $useful_tr = 0;
my $count_td = 0;
my $music = "";
my $actor = "";
my $link = "";
my $pages = 0;

sub start_tag
{
        my $tag = shift;
        if($tag eq "tr"){
                return 0;
        }elsif($tag eq "td"){
                $in_td_tag = 1;
                if(!$useful_tr){
                        my $attr = shift;
                        return unless exists $attr->{"valign"};
                        if($attr->{"valign"} eq "top"){
                                $useful_tr = 1;
                                return;
                        }
                }
                if($useful_tr){
                        $count_td ++;
                }
        }elsif($tag eq "a"){
                if($useful_tr == 1 and $count_td == 7){
                        my $attr = shift;
                        return unless exists $attr->{"href"};
                        $link = $attr->{"href"};
                        $link =~ s/^LRC(.*)htm$/$base_url\/lrc_db$1lrc/;
                }
                return;
        }elsif($tag eq "b"){
                $in_b_tag = 1;
                return;
        }else{
                return;
        }
}

sub text_tag
{
        if($in_td_tag){
                if($useful_tr){
                        if($count_td == 1){
                                $music = shift;
                        }elsif($count_td == 3){
                                $actor = shift;
                        }
                }elsif($in_b_tag){
                        my $text = shift;
                        if($text =~ /^[0-9]+$/){
                                $pages = int($text);
                        }
                }
                return;
        }else{
                return;
        }
}

sub end_tag
{
        my $tag = shift;
        if($tag eq "tr"){
                if($useful_tr){
                        my $item = {
                                music => $music,
                                actor => $actor,
                                link => $link
                        };
                        push @lyrics, $item;
                        $useful_tr = 0;
                        $music = "";
                        $actor = "";
                        $link = "";
                }
                $count_td = 0;
        }elsif($tag eq "td"){
                $in_td_tag = 0;
        }elsif($tag eq "b"){
                $in_b_tag = 0;
                return;
        }else{
                return;
        }
}

# start parse the page
my $parser = HTML::Parser->new(api_version => 3);
$parser->handler(start => \&start_tag, "tagname, attr");
$parser->handler(text => \&text_tag, "text");
$parser->handler(end => \&end_tag, "tagname");
$parser->parse($response->content);
$parser->eof();
if($pages % 20 != 0){
        $pages = $pages / 20 + 1;
}else{
        $pages = $pages / 20;
}
$pages = int($pages);

# output the content
if(!$opts{"d"}){
        if($opts{"x"}){
                print "\n";
                print "\n";
                foreach my $item (@lyrics){
                        print "\n";
                        print "" . ${$item}{"music"} . "\n";
                        print "" . ${$item}{"actor"} . "\n";
                        print "" . ${$item}{"link"} . "\n";
                        print "
\n";
                }
                print "$pages\n";
                print "
\n";
        }else{
                my $i = 1;
                foreach my $item (@lyrics){
                        print $i . "\t " . ${$item}{"music"} . " - " . ${$item}{"actor"} . "\n";
                        $i ++;
                }
                print "Total page number: $pages.\n";
        }
}else{
        if((!exists $opts{"n"}) ||  $opts{"n"} < 1 || $opts{"n"} > $#lyrics + 1){
                usage();
                exit(1);
        }
        my $number = $opts{"n"} - 1;
        my $link = ${$lyrics[$number]}{"link"};
        if($link eq ""){
                print "There is no LRC lyrics for this music, try the others.\n";
                exit(1);
        }
        my $fua = LWP::UserAgent->new;
        my $fres = $fua->get($link) || die("get $link failed.\n");
        if(!$fres->is_success){
                print "get $link failed.\n";
                exit(1);
        }
        if($opts{"x"}){
                print "\n";
                print "\n";
        }
        foreach my $line (split("\n", $fres->content)){
                if($line =~ /^\[.*:.*\].*$/){
                        print $line . "\n";
                }
        }
        if($opts{"x"}){
                print "
\n";
        }
}

exit 0;

阅读(1191) | 评论(0) | 转发(0) |
0

上一篇:我的北京之行

下一篇:发布xlyrics-0.4.5

给主人留下些什么吧!~~