分类:
2006-05-05 22:10:57
#!/usr/bin/perl # # Copyright (C) xiaosuo # License GPL2 or above # use strict; use warnings; use URI; use HTML::Parser; require LWP::UserAgent; use Getopt::Std; sub usage { print " Usage: bzmtvlrc [options] MusicName.\n"; print " -l get the lyrics list.\n"; print " -p num set the page to n, default 1.\n"; print " -n num set the number to n.\n"; print " -d download the special lyrics.\n"; print " -x output the content in XML format.\n"; print " -h show this help page.\n"; } # parse the options. my %opts; if(!getopts("lp:n:dxh", \%opts)){ usage(); exit(1); } if($opts{"h"} || $opts{"l"} && $opts{"d"}){ usage(); exit(0); } if($#ARGV != 0){ usage(); exit(1); } my $key = $ARGV[0]; my $page = 1; if($opts{"p"}){ if($opts{"p"} < 1){ usage(); exit(1); } $page = $opts{"p"}; } # the lyrics's structure will be saved in the array my @lyrics; # get the lyrics's list my $base_url = ""; my $url = URI->new($base_url . "/So.asp"); $url->query_form( key => $key, y => '1', page => $page ); my $ua = LWP::UserAgent->new(); my $response = $ua->get($url->as_string); if(!$response->is_success){ die $response->status_line; } # temp variables in the page parse routines. my $in_td_tag = 0; my $in_b_tag = 0; my $useful_tr = 0; my $count_td = 0; my $music = ""; my $actor = ""; my $link = ""; my $pages = 0; sub start_tag { my $tag = shift; if($tag eq "tr"){ return 0; }elsif($tag eq "td"){ $in_td_tag = 1; if(!$useful_tr){ my $attr = shift; return unless exists $attr->{"valign"}; if($attr->{"valign"} eq "top"){ $useful_tr = 1; return; } } if($useful_tr){ $count_td ++; } }elsif($tag eq "a"){ if($useful_tr == 1 and $count_td == 7){ my $attr = shift; return unless exists $attr->{"href"}; $link = $attr->{"href"}; $link =~ s/^LRC(.*)htm$/$base_url\/lrc_db$1lrc/; } return; }elsif($tag eq "b"){ $in_b_tag = 1; return; }else{ return; } } sub text_tag { if($in_td_tag){ if($useful_tr){ if($count_td == 1){ $music = shift; }elsif($count_td == 3){ $actor = shift; } }elsif($in_b_tag){ my $text = shift; if($text =~ /^[0-9]+$/){ $pages = int($text); } } return; }else{ return; } } sub end_tag { my $tag = shift; if($tag eq "tr"){ if($useful_tr){ my $item = { music => $music, actor => $actor, link => $link }; push @lyrics, $item; $useful_tr = 0; $music = ""; $actor = ""; $link = ""; } $count_td = 0; }elsif($tag eq "td"){ $in_td_tag = 0; }elsif($tag eq "b"){ $in_b_tag = 0; return; }else{ return; } } # start parse the page my $parser = HTML::Parser->new(api_version => 3); $parser->handler(start => \&start_tag, "tagname, attr"); $parser->handler(text => \&text_tag, "text"); $parser->handler(end => \&end_tag, "tagname"); $parser->parse($response->content); $parser->eof(); if($pages % 20 != 0){ $pages = $pages / 20 + 1; }else{ $pages = $pages / 20; } $pages = int($pages); # output the content if(!$opts{"d"}){ if($opts{"x"}){ print "\n"; print " foreach my $item (@lyrics){ print " print " print " print "" . ${$item}{"link"} . "\n"; print " } print " print " }else{ my $i = 1; foreach my $item (@lyrics){ print $i . "\t " . ${$item}{"music"} . " - " . ${$item}{"actor"} . "\n"; $i ++; } print "Total page number: $pages.\n"; } }else{ if((!exists $opts{"n"}) || $opts{"n"} < 1 || $opts{"n"} > $#lyrics + 1){ usage(); exit(1); } my $number = $opts{"n"} - 1; my $link = ${$lyrics[$number]}{"link"}; if($link eq ""){ print "There is no LRC lyrics for this music, try the others.\n"; exit(1); } my $fua = LWP::UserAgent->new; my $fres = $fua->get($link) || die("get $link failed.\n"); if(!$fres->is_success){ print "get $link failed.\n"; exit(1); } if($opts{"x"}){ print "\n"; print " } foreach my $line (split("\n", $fres->content)){ if($line =~ /^\[.*:.*\].*$/){ print $line . "\n"; } } if($opts{"x"}){ print " } } exit 0; |