Chinaunix首页 | 论坛 | 博客
  • 博客访问: 4825922
  • 博文数量: 930
  • 博客积分: 12070
  • 博客等级: 上将
  • 技术积分: 11448
  • 用 户 组: 普通用户
  • 注册时间: 2008-08-15 16:57
文章分类

全部博文(930)

文章存档

2011年(60)

2010年(220)

2009年(371)

2008年(279)

分类: LINUX

2009-08-30 19:47:58

下面是我的理解,一次性计算出输入字符串的位置,没有重复值.复杂的说就是够造一个有限自动机了.
gperf 可以帮助我们做这件事情,第一是用这个比较简单,第二呢就是这个比较酷!
程序员总喜欢弄些奇奇怪怪的东西在代码里显示自己的卓尔不凡,虽然这个被现代软件工程所鄙视,但是自由软件要的就是
自由!在什么地方使用完美hash?命令行解析,前面的链接已经详细说明了,个人感觉在速度要求高,输入集合确定,查询效率要求高的地方使用完美hash比较合适,毕竟天下没有白吃的午餐,总是空间换速度!
用文字表达思想总是那么的不擅长,还是写个例子先,一切尽在不言中啊!

%{
/* C code that goes verbatim in output */
#include
#include
#include
%}
struct tl{ const char* name ; const char* s2;};
%%
881050971,"aaaaa"
881123701,"bbbbb"
881122057,"ccccc"
881044216,"ddddd"
881052006,"eeeee"
881160639,"fffff"
881120640,"ggggg"
%%
int main(int argc,char **argv)
{
char str[32];
const struct tl * str2;
while(scanf("%s",str))
{
str2=in_word_set(str,strlen(str));
printf("%s  ",str2->s2);
}
return 0;
}

运行命令:

gperf -t  -L C tlh.gperf >tl.c
那来see see tl.c

/* C code produced by gperf version 3.0.3 */
/* Command-line: gperf -t -L C tlh.gperf  */
/* Computed positions: -k'4,8' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35)
      && ('%' == 37) && ('&' == 38) && (''' == 39) && ('(' == 40)
      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44)
      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48)
      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52)
      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56)
      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60)
      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65)
      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69)
      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73)
      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77)
      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81)
      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85)
      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89)
      && ('Z' == 90) && ('[' == 91) && ('\' == 92) && (']' == 93)
      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98)
      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102)
      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106)
      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110)
      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114)
      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118)
      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122)
      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646.  */
error "gperf generated tables don't work with this execution character set. Please report a bug to <>."
#endif
#line 1 "tlh.gperf"
/* C code that goes verbatim in output */
#include
#include
#include
#line 8 "tlh.gperf"
struct tl{ const char* name ; const char* s2;};
#define TOTAL_KEYWORDS 7
#define MIN_WORD_LENGTH 9
#define MAX_WORD_LENGTH 9
#define MIN_HASH_VALUE 9
#define MAX_HASH_VALUE 21
/* maximum key range = 13, duplicates = 0 */
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash (str, len)
     register const char *str;
     register unsigned int len;
{
  static unsigned char asso_values[] =
    {
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22,  0,  5,
       2, 22,  4,  7,  2, 22,  0, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 22, 22, 22, 22
    };
  return len + asso_values[(unsigned char)str[7]+1] + asso_values[(unsigned char)str[3]];
}
#ifdef __GNUC__
__inline
#ifdef __GNUC_STDC_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
struct tl *
in_word_set (str, len)
     register const char *str;
     register unsigned int len;
{
  static struct tl wordlist[] =
    {
      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
#line 10 "tlh.gperf"
      {"881050971","aaaaa"},
      {""},
#line 13 "tlh.gperf"
      {"881044216","ddddd"},
      {""}, {""},
#line 14 "tlh.gperf"
      {"881052006","eeeee"},
      {""},
#line 12 "tlh.gperf"
      {"881122057","ccccc"},
      {""},
#line 15 "tlh.gperf"
      {"881160639","fffff"},
#line 11 "tlh.gperf"
      {"881123701","bbbbb"},
      {""},
#line 16 "tlh.gperf"
      {"881120640","ggggg"}
    };
  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
    {
      register int key = hash (str, len);
      if (key <= MAX_HASH_VALUE && key >= 0)
        {
          register const char *s = wordlist[key].name;
          if (*str == *s && !strcmp (str + 1, s + 1))
            return &wordlist[key];
        }
    }
  return 0;
}
#line 17 "tlh.gperf"

int main(int argc,char **argv)
{
char str[32];
const struct tl * str2;
while(scanf("%s",str))
{
str2=in_word_set(str,strlen(str));
printf("%s  ",str2->s2);
}
return 0;
}

生成代码没有太多出彩的地方,主要就是构建了asso_values 这个静态结构,以什么原则生成这个结构就要看gperf的代码了.
我们注意到,gperf 自动生成了两个函数hash  和 in_word_set ,hash 函数返回一个int值 对应每个key 返回值唯一,
in_word_set 返回key 对应的结构指针.
看gperf的代码的时候注意到它使用的一个hash函数

#include
/*
 Some useful hash function.
 It's not a particularly good hash function (<< 5 would be better than << 4),
 but people believe in it because it comes from Dragon book.
*/
unsigned int
hashpjw (const unsigned char *x, unsigned int len) // From Dragon book, p436
{
  unsigned int h = 0;
  unsigned int g;
  for (; len > 0; len--)
    {
      h = (h << 4) + *x++;
      if ((g = h & 0xf0000000) != 0)
        h = (h ^ (g >> 24)) ^ g;
    }
  return h;
}
有意思的是函数头的注释,不知道  Dragon book 是什么,在google上也没有看出什么来好像很有名的样子.
不行!以后要多写字 少贴些代码.
 
阅读(2795) | 评论(0) | 转发(0) |
0

上一篇:最近学习小结2009-8-30

下一篇:抛玻璃算法

给主人留下些什么吧!~~