Chinaunix首页 | 论坛 | 博客
  • 博客访问: 379968
  • 博文数量: 56
  • 博客积分: 1449
  • 博客等级: 中尉
  • 技术积分: 822
  • 用 户 组: 普通用户
  • 注册时间: 2010-10-08 10:24
文章分类

全部博文(56)

文章存档

2014年(7)

2012年(13)

2011年(10)

2010年(26)

分类: C/C++

2011-05-15 15:46:49

  关于TRIE的说明就不罗嗦了.网上很多.这里仅实现了最基本的TRIE。其他TRIE的变种未实现。代码正在编写中。
写完之后测试发现的确很耗内存。BURST TRIE以及HAT TRIE的论文正在研读。搞明白之后再实现一下。


  TRIE的原理相当简单。直接贴代码好了.
代码一: TRIE的数据结构声明以及各种操作集合函数原型声明
代码文件名: simple_trie.h

  1. #ifndef SIMPLE_TRIE_H
  2. #define SIMPLE_TRIE_H
  3. #include <assert.h>

  4. typedef void (*put_value_fn) (void **, void *);
  5. typedef void (*del_value_fn) (void **);

  6. typedef void (*value_print_fn) (void *);

  7. #define ALPHABET_SIZE 26
  8. typedef struct _simple_trie_node {
  9.     char key;
  10.     void *value;
  11.     struct _simple_trie_node *child[ALPHABET_SIZE];
  12. } simple_trie_node;


  13. typedef struct _simple_trie_root {
  14.     void *value;
  15.     put_value_fn put_value;
  16.     del_value_fn del_value;
  17.     struct _simple_trie_node *child[ALPHABET_SIZE];
  18. } simple_trie_root;


  19. static int cal_idx(char key)
  20. {
  21.     assert(key >= 'a' && key <= 'z' && "key string must be lower!");
  22.     return ( key - 'a' );
  23. }

  24. /* create a new simple_trie_root */
  25. simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del);

  26. /* add a key-value pair to trie tree */
  27. int simple_trie_add_pair(simple_trie_root *,const char *, void *);

  28. /* delete a *key*-value mapping in trie tree */
  29. int simple_trie_delete_key(simple_trie_root *, const char *);

  30. /* destroy trie tree */
  31. void simple_trie_destroy(simple_trie_root **);

  32. /* get value mapped by *key* */
  33. void *simple_trie_get_value(simple_trie_root *, const char *);

  34. /* test if trie tree has *key* */
  35. int simple_trie_has_key(simple_trie_root *, const char *);

  36. void simple_trie_dump(simple_trie_root *, value_print_fn print);
  37. #endif
代码二: TRIE的具体操作实现代码
代码文件名: simple_trie.c

  1. /*
     * a simple implementation of Trie tree.
     * any suggestions are welcomed
     *        liangtao90s@gmail.com
     */

  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <stdlib.h>
  5. #include <assert.h>
  6. #include "simple_trie.h"
  7. #include "wrap_malloc.h"

  1. simple_trie_node *simple_trie_node_new(char key, void *value)
  2. {
  3.     simple_trie_node *node;
  4.     node = WRAP_calloc(1, sizeof(simple_trie_node));
  5.     if (node) {
  6.         node->key = key;
  7.         node->value = value;
  8.     } else {
  9.         OOM();
  10.     }
  11.     return node;
  12. }

  13. simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del)
  14. {
  15.     simple_trie_root *root;
  16.     root = WRAP_calloc(1, sizeof(simple_trie_root));
  17.     if (root) {
  18.         root->put_value = put;
  19.         root->del_value = del;
  20.     } else {
  21.         OOM();
  22.     }

  23.     return root;
  24. }


  25. static simple_trie_node **simple_trie_find_last_match_node(simple_trie_root *root,const char **__key)
  26. {
  27.     const char *key = *__key;
  28.     simple_trie_node **node = &root->child[cal_idx(*key)];
  29.     while (*node) {
  30.         key++;
  31.         if (!*key)
  32.             break;
  33.         node = &(*node)->child[cal_idx(*key)];
  34.     }

  35.     *__key = key;
  36.     return node;
  37. }

  38. int simple_trie_add_pair(simple_trie_root *root, const char *key, void *value)
  39. {
  40.     if (!root || !key )
  41.         return -1;

  42.     if (!*key) { /* try to map a empty string to a value */
  43.         root->del_value(&root->value);
  44.         root->put_value(&root->value, value);
  45.         return 0;
  46.     }

  47.     simple_trie_node **node;
  48.     node = simple_trie_find_last_match_node(root, &key);

  49.     if (!*key) { /* simplely replace */
  50.         root->del_value(&(*node)->value);
  51.         root->put_value(&(*node)->value, value);
  52.     } else { /* start construct nodes */
  53.         simple_trie_node *father;
  54.         simple_trie_node *child;
  55.         father = *node = simple_trie_node_new(*key++, NULL);
  56.         while (*key) {
  57.             child = simple_trie_node_new(*key, NULL);
  58.             father->child[cal_idx(*key)] = child;
  59.             father = child;
  60.             key++;
  61.         }

  62.         root->put_value(&father->value, value);
  63.     }

  64.     return 0;
  65. }

  66. static int simple_trie_node_has_child(simple_trie_node *node)
  67. {
  68.     int has_child = 0;
  69.     int i;
  70.     for (i = 0; i < ALPHABET_SIZE; i++) {
  71.         if (node->child[i]) {
  72.             has_child = 1;
  73.             break;
  74.         }
  75.     }

  76.     return has_child;
  77. }
  78.     
  79. static int simple_trie_node_has_only_child(simple_trie_node *node)
  80. {
  81.     int has_only_child = 1;
  82.     int child = 0;
  83.     int i;
  84.     for (i = 0; i < ALPHABET_SIZE; i++) {
  85.         if (node->child[i])
  86.             child++;
  87.         if (child == 2) {
  88.             has_only_child = 0;
  89.             break;
  90.         }
  91.     }
  92.     return has_only_child;
  93. }


  94. static simple_trie_node *simple_trie_find_trie_node(simple_trie_node *start, \
  95.                 const char **__key, simple_trie_node **__extra)
  96. {
  97.     if (!start || !__key || !*__key)
  98.         return NULL;

  99.     int is_leaf = 1;
  100.     const char *key = *__key;
  101.     if (__extra)
  102.         *__extra = start;
  103.     while (*key) {
  104.         /* record the nearest branch above the to-be-found node */
  105.         if (__extra && !simple_trie_node_has_only_child(start)) {
  106.             *__extra = start;
  107.             *__key = key;
  108.         }
  109.         start = start->child[cal_idx(*key++)];
  110.         if (!start)
  111.             return NULL;
  112.     }

  113.     if (simple_trie_node_has_child(start))
  114.         is_leaf = 0;

  115.     /* search along the *__extra* to find the last key-value pair */
  116.     if (is_leaf && __extra) {
  117.         simple_trie_node *tmp = *__extra;
  118.         simple_trie_node *tmp_extra = *__extra;
  119.         const char *tmp_key = *__key;
  120.         key = *__key;
  121.         while (*key) {
  122.             tmp = tmp->child[cal_idx(*key++)];
  123.             if (tmp->value && simple_trie_node_has_child(tmp)) {
  124.                 *__extra = tmp;
  125.                 *__key = key;
  126.             }
  127.         }
  128.         /* the *_extra* branch has only one path leading to start. fix it */
  129.         if (*__extra == tmp) {
  130.             *__extra = tmp_extra;
  131.             *__key = tmp_key;
  132.         }
  133.     }
  134.     
  135.     return start;
  136. }


  137. static void simple_trie_delete_trie_node(simple_trie_root *root, \
  138.                 simple_trie_node *delete_from, const char *key)
  139. {
  140.     if (!delete_from || !key)
  141.         return;
  142.     simple_trie_node *node;
  143.     simple_trie_node *tmp = delete_from->child[cal_idx(*key)];
  144.     delete_from->child[cal_idx(*key++)] = NULL;

  145.     while (*key && tmp) {
  146.         node = tmp;

  147.         tmp = tmp->child[cal_idx(*key++)];
  148.         WRAP_free(node);
  149.     }
  150.     /* don't */
  151.     if (tmp) {
  152.         WRAP_free(tmp);
  153.     }

  154.     /* maybe delete_from is one of the roots in the forests */
  155.     if (!delete_from->value && !simple_trie_node_has_child(delete_from)) {
  156.         root->child[cal_idx(delete_from->key)] = NULL;
  157.         WRAP_free(delete_from);

  158.     }

  159. }


  160. int simple_trie_delete_key(simple_trie_root *root, const char *key)
  161. {
  162.     if (!root || !key)
  163.         return 0;

  164.     simple_trie_node *node;
  165.     simple_trie_node *deleting_node;
  166.     if (!*key) {    /* delete empty key */
  167.         if (root->value)
  168.             root->del_value(&root->value);
  169.         else    /* empty key maps no value */
  170.             return -1;
  171.         return 0;
  172.     }

  173.     node = root->child[cal_idx(*key++)];
  174.     if (!node)
  175.         return -1;

  176.     /* delete one of the roots in forests */
  177.     if (!*key) {
  178.         if (node->value)
  179.             root->del_value(&node->value);
  180.         if (!simple_trie_node_has_child(node)) {
  181.             WRAP_free(node);
  182.             key--;
  183.             root->child[cal_idx(*key)] = NULL;
  184.         }
  185.         return 0;
  186.     }

  187.     node = simple_trie_find_trie_node(node, &key, &deleting_node);

  188.     if (!node) /* no such key */
  189.         return -1;

  190.     root->del_value(&node->value);

  191.     /*
  192.      * this means the node coressponding to the key is a leaf in trie tree
  193.      * so delete all extra nodes below deleting_node.
  194.      */
  195.     if (!simple_trie_node_has_child(node) && deleting_node)
  196.         simple_trie_delete_trie_node(root, deleting_node, key);

  197.     return 0;
  198. }


  199. int simple_trie_has_key(simple_trie_root *root, const char *key)
  200. {
  201.     if (!root || !key)
  202.         return 0;

  203.     if (!*key) { /* empty string mappings */
  204.         if (root->value)
  205.             return 1;
  206.         else
  207.             return 0;
  208.     }

  209.     simple_trie_node *node;

  210.     node = root->child[cal_idx(*key++)];

  211.     if (!node)
  212.         return 0;
  213.     else {
  214.         if (*key)
  215.             node = simple_trie_find_trie_node(node, &key, NULL);
  216.         
  217.         if (node->value)
  218.             return 1;
  219.         else
  220.             return 0;
  221.     }
  222. }


  223. void *simple_trie_get_value(simple_trie_root *root, const char *key)
  224. {
  225.     if (!root || !key)
  226.         return NULL;

  227.     if (!*key) /* empty string mappings */
  228.         return root->value;

  229.     simple_trie_node *node;
  230.     node = root->child[cal_idx(*key++)];

  231.     if (*key)
  232.         node = simple_trie_find_trie_node(node, &key, NULL);


  233.     if (node)
  234.         return node->value;
  235.     else
  236.         return NULL;
  237. }


  238. /* recusively free trie nodes. Hopes the stack won't get broken */

  239. static void simple_trie_destroy_node(simple_trie_root *root, simple_trie_node *node)
  240. {
  241.     int i;
  242.     for (i = 0; i < ALPHABET_SIZE; i++) {
  243.         if (node->child[i])
  244.             simple_trie_destroy_node(root, node->child[i]);
  245.     }

  246.     if (node->value)
  247.         root->del_value(&node->value);
  248.     WRAP_free(node);
  249. }



  250. void simple_trie_destroy(simple_trie_root **__root)
  251. {
  252.     if (!__root)
  253.         return;

  254.     simple_trie_root *root = *__root;
  255.     int i;
  256.     for (i = 0; i < ALPHABET_SIZE; i++) {
  257.         if (root->child[i])
  258.             simple_trie_destroy_node(root, root->child[i]);
  259.     }
  260.     if (root->value)
  261.         root->del_value(&root->value);
  262.     WRAP_free(root);
  263.     *__root = NULL;
  264. }

  265. #define MAX_KEY_LEN 512
  266. struct _prefix {
  267.     char prefix[MAX_KEY_LEN];
  268.     int n_prefix;
  269. } prefix;

  270. static void append_prefix(struct _prefix *prefix, char key)
  271. {
  272.     prefix->prefix[prefix->n_prefix] = key;
  273.     prefix->n_prefix++;
  274.     if (prefix->n_prefix >= MAX_KEY_LEN) {
  275.         fprintf(stderr, "prefix too long!\n");
  276.         exit(-1);
  277.     }
  278. }

  279. static void deappend_prefix(struct _prefix *prefix)
  280. {
  281.     prefix->n_prefix--;
  282. }

  283. static void simple_trie_node_dump(simple_trie_node *node, struct _prefix *prefix, value_print_fn print)
  284. {
  285.     int i;
  286.     append_prefix(prefix, node->key);
  287.     for (i = 0; i < ALPHABET_SIZE; i++) {
  288.         if (node->child[i])
  289.             simple_trie_node_dump(node->child[i], prefix, print);
  290.     }

  291.     if (node->value) {
  292.         printf("[%.*s] => ", prefix->n_prefix, prefix->prefix);
  293.         print(node->value);
  294.     }

  295.     deappend_prefix(prefix);
  296. }
  297.         




  298. void simple_trie_dump(simple_trie_root *root, value_print_fn print)
  299. {
  300.     if (root->value) {
  301.          printf("[empty string] => ");
  302.          print(root->value);
  303.     }

  304.     int i;
  305.     for (i = 0; i < ALPHABET_SIZE; i++) {
  306.         if (root->child[i]) {
  307.             prefix.n_prefix = 0;
  308.             simple_trie_node_dump(root->child[i], &prefix, print);
  309.         }
  310.     }
  311. }

以上实现了基本的TRIE结构的操作.
阅读(3141) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~