最近在看程序员面试宝典(第三版),发现p42,删除注释的代码有些问题,自己做了下修改如下:
-
void remove_comment(char buf[], int size)
-
{
-
char *p, *end, c;
-
char *sq_start, *dq_start;
-
char *lc_start, *bc_start;
-
int len;
-
-
p = buf;
-
end = p + size;
-
sq_start = NULL;
-
dq_start = NULL;
-
lc_start = NULL;
-
bc_start = NULL;
-
-
while(p < end)
-
{
-
c = *p;
-
switch(c)
-
{
-
case '\'':
-
if (NULL != dq_start || NULL != lc_start || NULL != bc_start)
-
{
-
p++;
-
continue;
-
}
-
if (sq_start == NULL)
-
{
-
sq_start = p++;
-
}
-
else
-
{
-
len = p++ - sq_start;
-
if (len == 2 && *(sq_start + 1) == '\\')
-
{
-
continue;
-
}
-
sq_start = NULL;
-
}
-
break;
-
-
case '\"':
-
if (NULL != sq_start || NULL != lc_start || NULL != bc_start)
-
{
-
p++;
-
continue;
-
}
-
if (dq_start == NULL)
-
{
-
dq_start = p++;
-
}
-
else
-
{
-
len = p++ - dq_start;
-
if (len == 2 && *(dq_start + 1) == '\\')
-
{
-
continue;
-
}
-
dq_start = NULL;
-
}
-
break;
-
-
case '/':
-
if (NULL != sq_start || NULL != dq_start || NULL != lc_start || NULL != bc_start)
-
{
-
p++;
-
continue;
-
}
-
c = *(p + 1);
-
if ('/' == c)
-
{
-
lc_start = p;
-
p += 2;
-
}
-
else if ('*' == c)
-
{
-
bc_start = p;
-
p += 2;
-
}
-
else
-
{
-
p++;
-
}
-
break;
-
-
case '*':
-
if (NULL == bc_start)
-
{
-
p++;
-
continue;
-
}
-
-
if (*(p + 1) != '/')
-
{
-
p++;
-
continue;
-
}
-
p += 2;
-
memset(bc_start, ' ', p - bc_start);
-
bc_start = NULL;
-
break;
-
-
case '\n':
-
if (lc_start == NULL)
-
{
-
p++;
-
continue;
-
}
-
c = *(p - 1);
-
memset(lc_start, ' ', (c == '\r' ? (p++ - 1) : p++) - lc_start);
-
lc_start = NULL;
-
break;
-
default:
-
p++;
-
break;
-
}
-
}
-
}
-
int main()
-
{
-
int fd,n;
-
char buf[102400];
-
-
fd=open("comment.c", O_RDONLY, 0);
-
if (fd == -1)
-
{
-
return -1;
-
}
-
-
n=read(fd, buf, sizeof(buf));
-
if (n<=0)
-
{
-
close(fd);
-
return -1;
-
}
-
-
remove_comment(buf, n);
-
*(buf+n)='\0';
printf("The file converted is:\n%s", buf);
close(fd);
return 0;
}
算法思路还是很清晰的,无非就是处理会影响注释的符号。处理单双引号是因为单双引号中的注释可忽略,处理换行符号,是因为行注释以行结尾作为结束。
阅读(615) | 评论(0) | 转发(0) |