TOC \o "1-3" \h \z \u (一) 文本间隔.. PAGEREF _Toc323325111 \h 1
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310031000000
1. 在每行后追加一空行(包括空行).. PAGEREF _Toc323325112 \h 1
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310032000000
2. 双空行转换为单空行.. PAGEREF _Toc323325113 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310033000000
3. 截去HTML以角括号框起的标记标签.. PAGEREF _Toc323325114 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310034000000
4. 指定以行首abc字符为RS. PAGEREF _Toc323325115 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310035000000
(二) 选择性的显示特定行.. PAGEREF _Toc323325116 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310036000000
1. 读取文件*.12的所有文件的第五到第七行.. PAGEREF _Toc323325117 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310037000000
2. 随机打印5%的文本.. PAGEREF _Toc323325118 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310038000000
3. 查找文件内文本的方式.. PAGEREF _Toc323325119 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100310039000000
4. 显示部分文本(指定行号范围,多文本)100-150行.. PAGEREF _Toc323325120 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320030000000
5. 显示两个正则表达式之间的文本(包含).. PAGEREF _Toc323325121 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320031000000
6. 显示两个正则表达式之间的文本(不包含).. PAGEREF _Toc323325122 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320032000000
7. 匹配多行‘abc\nefg’ PAGEREF _Toc323325123 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320033000000
(三) 选择性的删除特定行.. PAGEREF _Toc323325124 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320034000000
1. 去除文件中的重复项.. PAGEREF _Toc323325125 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320035000000
2. 合并file1和file2,除去重复项.. PAGEREF _Toc323325126 \h 2
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320036000000
3. 提取文件1中有,但文件2中没有.. PAGEREF _Toc323325127 \h 3
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320037000000
(四) 文本转换.. PAGEREF _Toc323325128 \h 3
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320038000000
1. 格式转换(合并同一标签).. PAGEREF _Toc323325129 \h 3
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100320039000000
2. 四栏表格调换第二和第三栏(制表符分隔).. PAGEREF _Toc323325130 \h 3
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330030000000
3. 制表符分隔转换为&. PAGEREF _Toc323325131 \h 3
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330031000000
4. 去除字符串内重复的(从左到右).. PAGEREF _Toc323325132 \h 4
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330032000000
5. 去除行内重复的列(从左到右).. PAGEREF _Toc323325133 \h 4
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330033000000
6. 将一行按照行首拆分为多行.. PAGEREF _Toc323325134 \h 4
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330034000000
(五) 目录相关.. PAGEREF _Toc323325135 \h 4
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330035000000
1. 获取文件夹名/root/home (/root/home/tt.txt) PAGEREF _Toc323325136 \h 4
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330036000000
2. 获取文件名tt.txt (/root/home/tt.txt). PAGEREF _Toc323325137 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330037000000
(六) 正则表达式匹配.. PAGEREF _Toc323325138 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330038000000
1. 匹配行内有模式/regexp/. PAGEREF _Toc323325139 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100330039000000
(七) 特殊应用.. PAGEREF _Toc323325140 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100340030000000
1. 单词计数程序wc. PAGEREF _Toc323325141 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100340031000000
2. 计算第n栏的和(空格为分隔).. PAGEREF _Toc323325142 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100340032000000
3. 统计/pattern/在文件中出现的次数(可能pattern在多行).. PAGEREF _Toc323325143 \h 5
08D0C9EA79F9BACE118C8200AA004BA90B02000000080000000E0000005F0054006F0063003300320033003300320035003100340033000000
sed -e 's/$/\n/' file
awk 'BEGIN{OFS='\n\n'} {print}' file
awk 'BEGIN{OFS='\n\n'} 1' file
awk '{print $0 "\n"}' file
awk '{print;print ""}' file
gawk 'BEGIN{RS="\n *\n"} 1' file
mawk 'BEGIN{ORS = "
"; RS = "<[^<>]*>"} 1' file
BEGIN{RS="\nabc"}
5.
6.
awk
'FNR>4&&FNR<8' *.12 >urfile
awk 'rand() <
0.05' file
egrep 'pattern|pattern' files
awk '/pattern|pattern/' files
awk '/pattern|pattern/ {print FILENAME ":" FNR
":" $0 }' files #推荐
awk '(100 <= FNR)
&& (FNR >= 150) && /pattern/ \
{ print FILENAME
":" FNR ":" $0 }' files
awk '/pattern1/,/pattern2/'
awk '/pattern1/{while(getline) if(match($0,"pattern2"))
break; else print;}'
’
awk
'x~/abc$/&&/^efg/{print x"\n"$0}{x=$0}'
8.
awk
'!a[$0]++' file(s)
awk
'NR==FNR{a[$0]=1;print} #读取file1,建立数组a,下标为$0,并赋值为1,然后打印
NR>FNR{
#读取file2
if(!(a[$0])) {print
} #如果file2 的$0不存在于数组a中,即不存在于file1,则打印。
}' file1 file2
awk
'NR==FNR{a[$0]=1} #读取file2,建立数组a,下标为$0,并赋值为1
NR>FNR{
#读取file1
if(!(a[$0])) {print
} #如果file1 的$0不存在于数组a中,即不存在于file2,则打印。
}' file2 file1
输入文件
1.
a 45
2.
a 32
3.
a 53
4.
a 34
5.
b 41
6.
b 62
7.
b 23
8.
b 34
9.
b 65
期望输出得到的文件
a 45 H1 32 H2 53 H3 34 H4
b 41 H1 62 H2 23 H3 34 H4 65 H5
(1)
若文件a,b有序
awk '{if(v==$1){printf FS $2 FS
"H"++i}else{i=0;printf (NR==1?"":RS) $0 FS
"H"++i;};v=$1}' FILE
(2)
若文件a,b无序
awk
'{a[$1]++;b[$1]=b[$1]$2 OFS"H"a[$1] OFS;}END{for(i in a)print
i,b[i]}' file
注:这里看到输出格式,我的思想被禁锢了,都是想到的是每次输出a 45,但是因为结果需要多的一个H4,所以我应该想到每次输出可以是a 45 H1,这样最后一个就不用做特殊处理了
awk -F'\t' -v OFS='\t' '{print $1,$3,$2,$4}' old > new
awk 'BEGIN{FS=OFS="\t"}{print $1,$3,$2,$4}' old > new
sed -e 's/\t/\&/g' file
awk
'BEGIN{FS='\t';OFS="&"}{$1 = $1; print} ' file #$1 = $1必须要,才能使OFS转换起作用
4.字符串 'aaabcccaaabbbccc',连续的字母作为一个子字符串,从左往右,去重复后,变为 'aaabcccbbb'
4. awk -F
'' '{while(i++<=NF){s=s$i;if($i!=$(i+1)){if(!a[s]++)printf
s;s=""}}}'
5.net123 aaa bbb net123
5.net456 ccc ddd net456
5.结果:
5.net123 aaa bbb
5.net456 ccc ddd
awk
'{for(i=1;i<=NF;i++)if(a[$i]++)$i="";NF+=0;delete a}1'
awk '{for(i=2;i<=NF;i++)print $1,$i}'
原文:
1.
69 2 3 5 0
2.
70 2 3 6 0
3.
71 2 3 6 0
我想根据第一列为基础,后面有四列就输出四行,比如输出结果为
1.
69 2
2.
69 3
3.
69 5
4.
69 0
5.
70 2
6.
70 3
7.
70 6
8.
70 0
9.
71 2
10.
71 3
11.
71 6
12.
71 0
awk 'BEGIN{OFS=FS="/"}{$NF=""}1'
test #$NF将最后一个字段赋值为空
tt.txt (/root/home/tt.txt)
awk -F'/' '{print $NF}'
3.
4.
$0 ~ /regexp/ #不匹配$0 !~
/regexp/
2.
awk '{C += length($0)+1; W +=
NF} END{printNR,W,C}'
awk -v COLUMN=n '{sum +=
$COLUMN } END{print sum}' file
awk
'{$0=n$0;c+=gsub(/China/,",");n=$NF}END{print c}' 1.text #忽略行后空格
awk -F '[ ]+' '{$0=n$0;c+=gsub(/China/,",");n=$NF}END{print
c}' 1.text #不忽略行后空格
# gsub(/China/,",")防止出现
#ChinChina
#aa被认为是两个的情况
4.