利用perl多线程提取SNP序列数据(2009-08-22 17:20:15)标签:perl 多线程 杂谈 分类:perl语言学习
用perl可以提取网页上的数据,但是一个一个的太慢,有人提到用多线程来提取数据,下面是我改进的多线程命令:
1:改进的命令:
#!/usr/bin/perl
use strict;
use warnings;
use threads;
use LWP::Simple;
open(INFILE1, "E:/5001_10000SNP.txt");
open(OUTFILE, ">>E:/5001_10000SNP_seq.txt")|| die "Cannot open the newfile: $!\n";
my $max_thread=200; # 多线程,线程数,要注意内存溢出,300 线程大约需要1.4G左右内存!!!
my @my_threads;
my $i = 1;
sub GetResult {
my ($snp,$i) = @_;
my $url = "";
$_ = get($url);
$_ =~ /
(.*)<\/pre>/;
while((!$_)||($1 eq '[]
')) {
$_ = get($url);
$_ =~ /(.*)<\/pre>/;
}
if(/(.*)<\/pre>/){
print OUTFILE "$snp\t$1\n";
print "\tFinished Threads!\t$i\n";
}
}
my $current_thread = 0;
foreach my $x (){
chomp $x;
if ($current_thread >= $max_thread ) {
foreach my $thread ( @my_threads ) {
$thread -> join();
}
$current_thread = 0;
close OUTFILE;
open(OUTFILE, ">>E:/5001_10000SNP_seq.txt")|| die "Cannot open the newfile: $!\n";
}
print "Current_thread: $current_thread\n";
$i += 1;
$my_threads[$current_thread] = threads ->new ( \&GetResult,$x,$i);
$current_thread ++;
}
close OUTFILE;
close INFILE1;
exit;
2:原命令:
#!/usr/bin/perl
use strict;
use warnings;
use threads;
use LWP::Simple;
open(INFILE1, "E:/5001_10000SNP.txt");
open(OUTFILE, ">>E:/5001_10000SNP_seq.txt")|| die "Cannot open the newfile: $!\n";
open(ERR_OUT, ">>E:/Err_out.txt") || die "Can't open file: $!\n";
my $max_thread=200; # 多线程,线程数,要注意内存溢出,300 线程大约需要1.4G左右内存!!!
my @my_threads;
my $i = 1;
sub GetResult {
my ($snp,$i) = @_;
my $url = "";
$_ = get($url);
if(/
(.*)<\/pre>/){
print OUTFILE "$snp\t$1\n";
print "\tFinished Threads!\t$i\n";
}else{
print ERR_OUT "$snp\n";
print "\t\tError for get: $snp\t$i\n";
}
}
my $current_thread = 0;
foreach my $x (){
chomp $x;
if ($current_thread >= $max_thread ) {
foreach my $thread ( @my_threads ) {
$thread -> join();
}
$current_thread = 0;
close OUTFILE;
open(OUTFILE, ">>E:/5001_10000SNP_seq.txt")|| die "Cannot open the newfile: $!\n";
}
print "Current_thread: $current_thread\n";
$i += 1;
$my_threads[$current_thread] = threads ->new ( \&GetResult,$x,$i);
$current_thread ++;
}
close OUTFILE;
close ERR_OUT;
close INFILE1;
exit;
3:关于多线程的一些资料:
阅读(2211) | 评论(0) | 转发(0) |