glibc的 string/strstr.c作者声称其使用的算法是最高效的,打败了其他算法。
/*
* My personal strstr() implementation that beats most other algorithms.
* Until someone tells me otherwise, I assume that this is the
* fastest implementation of strstr() in C.
* I deliberately chose not to comment it. You should have at least
* as much fun trying to understand it, as I had to write it :-).
*
* Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */
#if HAVE_CONFIG_H
# include <config.h>
#endif
#if defined _LIBC || defined HAVE_STRING_H
# include <string.h>
#endif
typedef unsigned chartype;
#undef strstr
char *
strstr (phaystack, pneedle)
const char *phaystack;
const char *pneedle;
{
const unsigned char *haystack, *needle;
chartype b;
const unsigned char *rneedle;
haystack = (const unsigned char *) phaystack;
if ((b = *(needle = (const unsigned char *) pneedle)))
{
chartype c;
haystack--; /* possible ANSI violation */
/* for unify processing afterward */
{
chartype a;
do
if (!(a = *++haystack))
goto ret0; /* if haystack ends, return 0 */
while (a != b); /* find the first postion that equals */
}
/* first char equals, go on */
if (!(c = *++needle))
goto foundneedle; /* only one char in needle, found it */
++needle; /* needle go to 3rd char */
goto jin;
for (;;)
{
{
chartype a;
if (0)
jin:{
if ((a = *++haystack) == c) /* check if the second char in needle equals*/
goto crest;
}
else
a = *++haystack; /* set a = next char to be compared */
do
{
for (; a != b; a = *++haystack) /* search haystack to find a char equals the 1st char of needle */
{
if (!a)
goto ret0;
if ((a = *++haystack) == b) /* one loop compare 2 chars */
break;
if (!a)
goto ret0;
}
}
while ((a = *++haystack) != c); /* until second char equals */
}
crest: /* 2 char equals, go on */
{
chartype a;
{
const unsigned char *rhaystack;
/* rhaystack =next char, haystack-- set haystack point at the postion to be returned */
if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle))) /* go on comparing from 3rd char of needle */
do
{
if (!a)
goto foundneedle;
if (*++rhaystack != (a = *++needle))
break;
if (!a)
goto foundneedle;
}
while (*++rhaystack == (a = *++needle)); /* until reach a char not equal */
needle = rneedle; /* took the register-poor aproach, needle point to 3rd char */
}
if (!a)
break;
}
}
}
foundneedle:
return (char *) haystack;
ret0:
return 0;
}
libc_hidden_builtin_def (strstr)
|
代码读起来真费劲,里面我添了部分注释。
思路是这样的:
如果needle字符串为空,则认为找到;如果不为空,
大循环
在haystack串里找needle第一个字符的位置,然后比较第二个字符;
这时会有两个循环
1。如果第二字符不等,那么继续从needle的第一个字符开始,在haystack串里往下搜索,每次比较两个字符,直到找到前两个字符相等的位置
2。如果第二个字符相等,那么比较needle的第三个字符开始,继续比较,每次两个字符,直到遇到不相等的位置
这里应该主要从概率方面的考虑。
我没发现这个实现与lynx里的strstr.c有什么本质区别。难道我还是没理解上面的代码?
/* Written by Philippe De Muyter . */
/*
* NAME
*
* strstr -- locate first occurrence of a substring
*
* SYNOPSIS
*
* char *strstr (char *s1, char *s2)
*
* DESCRIPTION
*
* Locates the first occurrence in the string pointed to by S1 of the string
* pointed to by S2. Returns a pointer to the substring found, or a NULL
* pointer if not found. If S2 points to a string with zero length, the
* function returns S1.
*
* BUGS
*
*/
char *strstr(char *buf, char *sub)
{
register char *bp;
register char *sp;
if (!*sub)
return buf;
while (*buf) {
bp = buf;
sp = sub;
do {
if (!*sp)
return buf;
} while (*bp++ == *sp++);
buf += 1;
}
return 0;
}
|
阅读(3191) | 评论(0) | 转发(0) |