From:
1 最简单的sitemap
(使用前确认目录下有html文件)
ls *.html
ls *.html | awk '{ print "" $1 "
" } '
2 汇总
样本文件,这里暂时称为 file1
====================
a
a
b
b
a
b
c
a
d
b
b
b
d
a
c
b
====================
逐条执行以下的命令,你会发现什么?
cat file1
cat file1 | sort
cat file1 | sort | uniq
cat file1 | sort | uniq -c
cat file1 | sort | uniq -c | sort -rn
3 还是汇总
样本文件存为 file2
====================
2001 a 1
2001 a 1
2002 b 2
2002 b 3
2003 a 4
2001 b 3
2001 c 3
2003 a 3
2004 d 3
2004 b 3
2004 b 3
2005 b 3
2005 d 3
2005 a 1
2006 c 2
2001 b 2
====================
本例假定第一列为时间,第二列为产品名,第三列为数量
先对时间进行汇总并排序
cat file2
cat file2 | cut -d" " -f1,3
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print i, a[i]; }'
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print i, a[i]; }' | sort -rn -k2
它们的执行结果是什么样的?
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print i, a[i]; }' | sort -rn
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print a[i], i; }' | sort -rn
填空,并实现上一组末尾命令的功能
cat file2 | awk '{ a[ ]+= } END { for (i in a) print a[i], i ; }' | sort -rn
那么,对产品汇总并排序呢?
cat file2 | awk '{ a[ ]+= } END { for (i in a) print a[i], i ; }' | sort -rn
偶只想要2001年的
cat file2 | grep "2001"
偶偏不要2001年的
cat file2 | grep -v "2001"
偶想看a
cat file2 | grep "a"
偶偏不要a
cat file2 | grep -v "a"
标上个序号
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print i, a[i]; }' | sort -rn -k2 | nl
倒过来,两种办法
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print i, a[i]; }' | sort -rn -k2 | tac
cat file2 | cut -d" " -f1,3 | awk '{ a[$1]+=$2 } END { for (i in a) print i, a[i]; }' | sort -n -k2
4 单词统计(居然又是汇总…)
样例,存为 file3
====================
hello world who am i
i say hello world
who is fcicq
i do not know who is fcicq
why not to find out
====================
cat file3
cat file3 | sed -e "s/ /\n/g"
cat file3 | sed -e "s/ /\n/g" | sort
cat file3 | sed -e "s/ /\n/g" | sort | uniq -c
cat file3 | sed -e "s/ /\n/g" | sort | uniq -c | sort -rn
取头尾
cat file3 | sed -e "s/ /\n/g" | sort | uniq -c | sort -rn | head
cat file3 | sed -e "s/ /\n/g" | sort | uniq -c | sort -rn | head -n5
cat file3 | sed -e "s/ /\n/g" | sort | uniq -c | sort -rn | tail
cat file3 | sed -e "s/ /\n/g" | sort | uniq -c | sort -rn | tail -n5
统计词(总数,不重复的个数)有好几种办法
cat file3 | sed -e "s/ /\n/g" | sort | uniq | wc -l
cat file3 | sed -e "s/ /\n/g" | wc -l
cat file3 | wc
cat file3 | wc | cut -d" " -f2
cat file3 | wc -w
cat file3 | sed -e "s/ /\n/g" | wc -l
用awk过滤查看netstat 连接数
netstat -nat | awk ‘{print $6}’ | sort | uniq -c | sort -rn