分析wc源码-sss0213-ChinaUnix博客

对你说sunshasha.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

sss0213

博客访问： 2407293
博文数量： 473
博客积分： 12252
博客等级：上将
技术积分： 4307
用户组：普通用户
注册时间： 2007-10-12 10:02

文章分类

全部博文（473）

NoSQL（2）
c（1）
ORACLE（21）

OCI（7）

操作（11）
javascript（47）
php（37）

smarty（1）
存储（16）

USB设备（1）

RAID（1）

分区（4）

硬盘（8）
Shell（36）
心情随笔（2）
嵌入式（4）
其它（7）
C和C++（33）
SQLITE（3）

翻译（0）
网络编程（6）
进程和线程编程（4）
工作日志（2）
MYSQL（143）

memcache（0）

high performance（1）

使用心得（83）

看过的手册（2）
Linux（107）

centos（2）

程序（1）

源码分析（2）

工具（4）

内核（2）

文本编辑（6）

配置（19）

信号（2）

命令（37）
未分配的博文（2）

文章存档

2012年（8）

2011年（63）

2010年（73）

2009年（231）

2008年（98）

我的朋友

相关博文

分析wc源码

分类： LINUX

2009-02-19 16:43:11

顺便贴个wc的代码，程序不长也很好理解，但有个地方很有点意思，下面和大家一起分享下。
wc.c
//别告诉我wc是厕所，wc是linux&unix下一个简单的统计一个文件行数lines字数chars单词words数最大行长Length的工具，首先要有这个概念！：）
#include
#include
#include
#include
#include
#include "busybox.h"

//我们把函数放在后面解释，先看看主函数，这样更好理解
static int total_lines, total_words, total_chars, max_length;
static int print_lines, print_words, print_chars, print_length;

FILE *wfopen(const char, const char)；             //函数声明
static void print_counts(int,int,int,int);
static void wc_file(FILE *, const char *);

int wc_main(int argc, char **argv)                      //主函数main
{
FILE *file;
unsigned int num_files_counted = 0;
int opt, status = EXIT_SUCCESS;

total_lines = total_words = total_chars = max_length = 0; //四个变量 char ,line,
print_lines = print_words = print_chars = print_length = 0;       word 和length

            //这个前面已经说了好几便的命令行参数执行方法
while ((opt = getopt(argc, argv, "clLw")) > 0) {
switch (opt) {
case 'c':          //统计字符个数
print_chars = 1;
break;
case 'l':          //统计有几行
print_lines = 1;
break;
case 'L':       //统计最长行
print_length = 1;
break;
case 'w':       //统计单词个数
print_words = 1;
break;
default:
show_usage(); //错误信息，不管
}
}
            //如果什么参数也没有，就统计行数，单词数，和字符数
if (!print_lines && !print_words && !print_chars && !print_length)
print_lines = print_words = print_chars = 1;
            //如果没有别的参数或是后跟的是‘-’，则重标准输入进行统计
if (argv[optind] == NULL || strcmp(argv[optind], "-") == 0) {
wc_file(stdin, "");
return EXIT_SUCCESS;
} else {
while (optind < argc) {
if ((file = wfopen(argv[optind], "r")) != NULL)  //打开文件
wc_file(file, argv[optind]);//能打开打开统计统计
else
status = EXIT_FAILURE;  // 错误退出
num_files_counted++; 打开统计的文件数
optind++;       可以统计多个文件，就在着循环打开
}
}

if (num_files_counted > 1)    //如果统计的文件不止一个，则后来要求把total的一类给
统计出来
print_counts(total_lines, total_words, total_chars,
   max_length, "total");

return status;
}

//下面是函数的实现

FILE *wfopen(const char *path, const char *mode) //这个是打开文件的函数，很简单
{
FILE *fp;
if ((fp = fopen(path, mode)) == NULL) {
perror_msg("%s", path);
errno = 0;
}
return fp;
}

static void print_counts(int lines, int words, int chars, int length,
         const char *name)
//这个是打印统计信息的函数，有五类，也很简单，注意它的打印格式就可以
{
char const *space = "";

if (print_lines) {
printf("%7d", lines);
space = " ";
}
if (print_words) {
printf("%s%7d", space, words);
space = " ";
}
if (print_chars) {
printf("%s%7d", space, chars);
space = " ";
}
if (print_length)
printf("%s%7d", space, length);
if (*name)
printf(" %s", name);
putchar('\n');
}
//这是主要的函数，也是我们讨论的重点，有意思的就在里面。
static void wc_file(FILE * file, const char *name)
{
int lines, words, chars, length;
int in_word = 0, linepos = 0;    //初始单词标识in_word =0
int c;

lines = words = chars = length = 0;       //这是几个相关变量
while ((c = getc(file)) != EOF) {          //这个循环是从文件中依次读一个字符能读到则进入统计循环，直到读完所有字符。
                              chars++;                         //chars++这是统计读到的字符数

//下面就是我觉得有点意思的地方了，其实就是根据读到的字符统计相关的line length和word ,可能看起来这好象很简单，不就是一个简单的switch语句吗，我看的时候，发现怎么switch 下每个case都没有break;而在word_separator后又有break ；我起初以为是不是写错了，后来仔细一分析，才发现其实是故意没有break 的；现在具体分析如下：我们从下往上分析

1).当读到的字符是\v即竖向跳格，基本是什么都不做，如果\v的前面有字符，则进入word_separatord的in_word 单词统计word加一，这很显然的，然后 break
2).当读到的字符是‘  ‘即空格时，这时，除了行位置linepos加一外，也进入了word_separatord进行判断，如果空格前有字符，则作为一个单词的结束，word++, 然后break;
3).当读到字符是\t 即横向跳格，也就是tab时，应该做的事情有 linepos的增加不是一个了，这里有个巧妙的计算方法，具体看代码，然后呢，没有break ,本来应该走 ' ' 空格继续判断的，但是此时已经不能在加一了，，所以在代码中用到了 goto语句直接从\t跳转到\v处进行word是否加一的判断：）是不是很巧妙！
4).当读到的字符是\f即走纸换页时，此时要做的操作  是把先统计上一行是不是最长行，然后在把linepos重新赋为0,然后对单词数进行判断
5).6)也是类似，具体留个大家分析

通过这样的写法，我们可以发现，本来很复杂的情况，被分成了这样的条理，从下到上，判断条件依次增多，如果不需要判断下面的条件则通过goto直接转到break处退出，这是一种思路！

switch (c) {
case '\n':
lines++;
case '\r':
case '\f':
if (linepos > length)
length = linepos;
linepos = 0;
goto word_separator;
case '\t':             //这是计算linepos 经过tab后的位置的方法
linepos += 8 - (linepos % 8);
goto word_separator;
case ' ':
linepos++;
case '\v':
   word_separator:             //当控制字符时，进行单词统计加操作
if (in_word) {
in_word = 0;
words++;
}
break;
default:    //如果是纯正的单词而不是控制字符，则in_word=1
linepos++;
in_word = 1;
break;
}
}
if (linepos > length)                   //这个是找出最大的行数给length
length = linepos;
if (in_word)                               // 这个是防止整篇文章只有一个单词的情况
words++;
print_counts(lines, words, chars, length, name);    //然后显示出来
total_lines += lines;                   //不过是一个或多个文件，计算总数总是需要的
total_words += words;
total_chars += chars;
if (length > max_length)             //统计所有文件的最大行数给max_length
max_length = length;
fclose(file);                                  //关闭文件
fflush(stdout);                            //立刻输出
}

程序很短，也很好理解，但是switch语句确实和我们往常的用法有点不同，其实switch语句本身的实现中也包含了goto语句，当break实际就是goto到switch代码的开始部分。只是这里用的更直接和明显了。希望大家记住这种用法。

阅读(1968) | 评论(0) | 转发(0) |

上一篇：popen函数详解

下一篇：分析cat源码

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6