#!/usr/bin/perl
use Data::Dumper;
$words;
$/=undef;
open FILE,"/tmp/harry";
chomp($content=);
while ($content){
if ($content=~/\w+/){
if (exists $$words{$&}){
$$words{$&}++;
}else{
$$words{$&}=1;
}
$content=$';
}else{
last;
}
}
close FILE;
@keys = sort { $$words{$b} <=> $$words{$a} } keys %$words;
for (@keys){print "$_ -> $$words{$_}\n"};
用它统计哈利波特第一季,排名前十的高频词:
the -> 3315
to -> 1851
and -> 1812
a -> 1581
Harry -> 1326
of -> 1250
he -> 1209
was -> 1178
in -> 935
只出现过一次的单词有2989个
总共使用了6658个单词
阅读(1251) | 评论(0) | 转发(0) |