下面是我的理解,一次性计算出输入字符串的位置,没有重复值.复杂的说就是够造一个有限自动机了.
gperf 可以帮助我们做这件事情,第一是用这个比较简单,第二呢就是这个比较酷!
程序员总喜欢弄些奇奇怪怪的东西在代码里显示自己的卓尔不凡,虽然这个被现代软件工程所鄙视,但是自由软件要的就是
自由!在什么地方使用完美hash?命令行解析,前面的链接已经详细说明了,个人感觉在速度要求高,输入集合确定,查询效率要求高的地方使用完美hash比较合适,毕竟天下没有白吃的午餐,总是空间换速度!
用文字表达思想总是那么的不擅长,还是写个例子先,一切尽在不言中啊!
%{
/* C code that goes verbatim in output */
#include
#include
#include
%}
struct tl{ const char* name ; const char* s2;};
%%
881050971,"aaaaa"
881123701,"bbbbb"
881122057,"ccccc"
881044216,"ddddd"
881052006,"eeeee"
881160639,"fffff"
881120640,"ggggg"
%%
int main(int argc,char **argv)
{
char str[32];
const struct tl * str2;
while(scanf("%s",str))
{
str2=in_word_set(str,strlen(str));
printf("%s ",str2->s2);
}
return 0;
}
运行命令:
gperf -t -L C tlh.gperf >tl.c
那来see see tl.c
/* C code produced by gperf version 3.0.3 */
/* Command-line: gperf -t -L C tlh.gperf */
/* Computed positions: -k'4,8' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35)
&& ('%' == 37) && ('&' == 38) && (''' == 39) && ('(' == 40)
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44)
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48)
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52)
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56)
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60)
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65)
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69)
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73)
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77)
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81)
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85)
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89)
&& ('Z' == 90) && ('[' == 91) && ('\' == 92) && (']' == 93)
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98)
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102)
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106)
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110)
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114)
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118)
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122)
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
error "gperf generated tables don't work with this execution character set. Please report a bug to <>."
#endif
#line 1 "tlh.gperf"
/* C code that goes verbatim in output */
#include
#include
#include
#line 8 "tlh.gperf"
struct tl{ const char* name ; const char* s2;};
#define TOTAL_KEYWORDS 7
#define MIN_WORD_LENGTH 9
#define MAX_WORD_LENGTH 9
#define MIN_HASH_VALUE 9
#define MAX_HASH_VALUE 21
/* maximum key range = 13, duplicates = 0 */
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash (str, len)
register const char *str;
register unsigned int len;
{
static unsigned char asso_values[] =
{
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 0, 5,
2, 22, 4, 7, 2, 22, 0, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22
};
return len + asso_values[(unsigned char)str[7]+1] + asso_values[(unsigned char)str[3]];
}
#ifdef __GNUC__
__inline
#ifdef __GNUC_STDC_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
struct tl *
in_word_set (str, len)
register const char *str;
register unsigned int len;
{
static struct tl wordlist[] =
{
{""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
#line 10 "tlh.gperf"
{"881050971","aaaaa"},
{""},
#line 13 "tlh.gperf"
{"881044216","ddddd"},
{""}, {""},
#line 14 "tlh.gperf"
{"881052006","eeeee"},
{""},
#line 12 "tlh.gperf"
{"881122057","ccccc"},
{""},
#line 15 "tlh.gperf"
{"881160639","fffff"},
#line 11 "tlh.gperf"
{"881123701","bbbbb"},
{""},
#line 16 "tlh.gperf"
{"881120640","ggggg"}
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash (str, len);
if (key <= MAX_HASH_VALUE && key >= 0)
{
register const char *s = wordlist[key].name;
if (*str == *s && !strcmp (str + 1, s + 1))
return &wordlist[key];
}
}
return 0;
}
#line 17 "tlh.gperf"
int main(int argc,char **argv)
{
char str[32];
const struct tl * str2;
while(scanf("%s",str))
{
str2=in_word_set(str,strlen(str));
printf("%s ",str2->s2);
}
return 0;
}
生成代码没有太多出彩的地方,主要就是构建了asso_values 这个静态结构,以什么原则生成这个结构就要看gperf的代码了.
我们注意到,gperf 自动生成了两个函数hash 和 in_word_set ,hash 函数返回一个int值 对应每个key 返回值唯一,
in_word_set 返回key 对应的结构指针.
看gperf的代码的时候注意到它使用的一个hash函数
#include
/*
Some useful hash function.
It's not a particularly good hash function (<< 5 would be better than << 4),
but people believe in it because it comes from Dragon book.
*/
unsigned int
hashpjw (const unsigned char *x, unsigned int len) // From Dragon book, p436
{
unsigned int h = 0;
unsigned int g;
for (; len > 0; len--)
{
h = (h << 4) + *x++;
if ((g = h & 0xf0000000) != 0)
h = (h ^ (g >> 24)) ^ g;
}
return h;
}
有意思的是函数头的注释,不知道 Dragon book 是什么,在google上也没有看出什么来好像很有名的样子.
不行!以后要多写字 少贴些代码.