数据格式:
at=2010-06-16 23:59:56<|>aop=bl<|>aip=.....
代码
cat /data/tongji/iphone_app/igame/*/*2010-06-17* |perl -nle '
# 根据 用户 PV 排序,并且 在 25% , 50%, 75% 上分别抽取 40 用户
chomp;
my %m = {} ;
# 形成 hash
map{ $m{$1}=$2 if /(.*?)=(.*)/ ; } split /<\|>/ ;
my $cont = "" ;
$cont = "$m{auid}\t$m{at}\t登入" if $m{aop} eq "a" ;
$cont = "$m{auid}\t$m{at}\t浏览\t$m{bn} >> $m{pn}" if $m{aop} eq "be" ;
$cont = "$m{auid}\t$m{at}\t下载\t$m{bn} >> $m{pn}" if $m{aop} eq "d" ;
$cont = "$m{auid}\t$m{at}\t栏目\t$m{bn}" if $m{aop} eq "bl" ;
$cont = "$m{auid}\t$m{at}\t搜索\t$m{pn}" if $m{aop} eq "se" ;
$p{ $m{auid} }++;
$se{ $m{auid} }++ if $m{aop} eq "se" ;
$cont{ $m{auid} }++ if $m{aop} eq "be" ;
$aa{ $m{auid} }++ if $m{aop} eq "a" ;
$h{ $m{auid} }{ $cont }++;
#展现
END{
my $nuu = scalar keys %p;
foreach $k (sort { $p{$b} <=> $p{$a} } keys %p){
++$row;
foreach $tt ( (0.25,0.5,0.75) ){
if( $row> ($nuu*$tt) and $row< ($nuu*$tt)+40 ){
printf "%f%%-$row\t\t\t\t\t登入次数=$aa{$k}\t搜索次数=$se{$k}\t内容次数=$cont{$k}\n",$tt*100;
while ( ($key,$value) = each( %{$h{$k}}) ){
printf "%f%%-$row\t$key\n",$tt*100;
}
}
}
}
}
|
' |sort > /tmp/100032.txt
阅读(1075) | 评论(0) | 转发(0) |