Chinaunix首页 | 论坛 | 博客
  • 博客访问: 378023
  • 博文数量: 56
  • 博客积分: 1449
  • 博客等级: 中尉
  • 技术积分: 822
  • 用 户 组: 普通用户
  • 注册时间: 2010-10-08 10:24
文章分类

全部博文(56)

文章存档

2014年(7)

2012年(13)

2011年(10)

2010年(26)

分类: C/C++

2011-05-16 16:55:54

使用数组来保存链接子节点的的trie树的确非常的耗费内存.比如我用脚本随机生成的长度均匀分配在3-30的
字符串,共28W条.480W个字符.建立起trie树耗费的内存为
  1. [Total]:Total memory consumed 397.1 MB
而使用链表耗费的内存为
  1. [Total]:Total memory consumed 56.7 MB

修改的地方不多.原理一样.不罗嗦了.代码如下

头文件
  1. #ifndef SIMPLE_TRIE_H
  2. #define SIMPLE_TRIE_H
  3. #include <assert.h>

  4. typedef void (*put_value_fn) (void **, void *);
  5. typedef void (*del_value_fn) (void **);

  6. typedef void (*value_print_fn) (void *);


  7. typedef struct _simple_trie_node {
  8.     char key;
  9.     void *value;
  10.     struct _simple_trie_node *silbling;
  11.     struct _simple_trie_node *first_child;
  12. } simple_trie_node;


  13. typedef struct _simple_trie_root {
  14.     void *value;
  15.     put_value_fn put_value;
  16.     del_value_fn del_value;
  17.     struct _simple_trie_node *first_child;
  18. } simple_trie_root;




  19. /* create a new simple_trie_root */
  20. simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del);

  21. /* add a key-value pair to trie tree */
  22. int simple_trie_add_pair(simple_trie_root *,const char *, void *);

  23. /* delete a *key*-value mapping in trie tree */
  24. int simple_trie_delete_key(simple_trie_root *, const char *);

  25. /* destroy trie tree */
  26. void simple_trie_destroy(simple_trie_root **);

  27. /* get value mapped by *key* */
  28. void *simple_trie_get_value(simple_trie_root *, const char *);

  29. /* test if trie tree has *key* */
  30. int simple_trie_has_key(simple_trie_root *, const char *);

  31. void simple_trie_dump(simple_trie_root *, value_print_fn print);
  32. #endif


实现文件
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include <assert.h>
  5. #include "simple_trie.h"
  6. #include "wrap_malloc.h"

  7. void default_del_value(void **value)
  8. {
  9.     if (value)
  10.         *value = NULL;
  11. }

  12. void default_put_value(void **value, void *assign_value)
  13. {
  14.     if (value)
  15.         *value = assign_value;
  16. }

  17. simple_trie_node *simple_trie_node_new(char key, void *value)
  18. {
  19.     simple_trie_node *node;
  20.     node = WRAP_calloc(1, sizeof(simple_trie_node));
  21.     if (node) {
  22.         node->key = key;
  23.         node->value = value;
  24.     } else {
  25.         OOM();
  26.     }
  27.     return node;
  28. }

  29. simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del)
  30. {
  31.     simple_trie_root *root;
  32.     root = WRAP_calloc(1, sizeof(simple_trie_root));
  33.     if (root) {
  34.         if (put)
  35.             root->put_value = put;
  36.         else
  37.             root->put_value = default_put_value;

  38.         if (del)
  39.             root->del_value = del;
  40.         else
  41.             root->del_value = default_del_value;
  42.     } else {
  43.         OOM();
  44.     }

  45.     return root;
  46. }

  47. static simple_trie_node *simple_trie_search_silbling(simple_trie_node *node ,simple_trie_node **prev, const char key)
  48. {
  49.     if (prev)
  50.         *prev = NULL;

  51.     while (node) {
  52.         if (node->key == key)
  53.             break;
  54.         if (prev)
  55.             *prev = node;
  56.         node = node->silbling;
  57.     }
  58.     return node;
  59. }

  60. static simple_trie_node *simple_trie_find_last_match_node(simple_trie_root *root,const char **__key)
  61. {
  62.     const char *key = *__key;
  63.     int matched_even_once = 0;
  64.     simple_trie_node *last;
  65.     simple_trie_node *node = root->first_child;
  66.     last = node;

  67.     while (*key) {
  68.         node = simple_trie_search_silbling(node, NULL, *key);
  69.         if (!node)
  70.             break;
  71.         matched_even_once = 1;
  72.         last = node;
  73.         node = node->first_child;    
  74.         key++;
  75.     }
  76.     
  77.     if (matched_even_once)
  78.         *__key = key;
  79.     else
  80.         last = NULL;

  81.     return last;
  82. }




  83. static void simple_trie_add_child(simple_trie_node **first_child, simple_trie_node *c)
  84. {

  85.     if (!*first_child)
  86.         *first_child = c;
  87.     else {
  88. #if 0        /* add at the front of the silbling chain */
  89.         simple_trie_node *youngest_silbling;
  90.         youngest_silbling = *first_child;
  91.         while (youngest_silbling->silbling)
  92.             youngest_silbling = youngest_silbling->silbling;
  93.         youngest_silbling->silbling = c;
  94. #else        /* add at the end of the silbling chain */
  95.         c->silbling = *first_child;
  96.         *first_child = c;
  97. #endif    
  98.     }
  99. }

  100. int simple_trie_add_pair(simple_trie_root *root, const char *key, void *value)
  101. {
  102.     if (!root || !key )
  103.         return -1;

  104.     if (!*key) { /* try to map a empty string to a value */
  105.         if (root->value)
  106.             root->del_value(&root->value);
  107.             root->put_value(&root->value, value);
  108.         return 0;
  109.     }

  110.     simple_trie_node *node;
  111.     simple_trie_node *child;
  112.     node = simple_trie_find_last_match_node(root, &key);


  113.     if (!*key) { /* simplely replace */
  114.         root->del_value(&node->value);
  115.         root->put_value(&node->value, value);
  116.     } else { /* start construct nodes */
  117.         if (!node) { /* construct from root */
  118.             node = simple_trie_node_new(*key++, NULL);
  119.             simple_trie_add_child(&root->first_child, node);
  120.         }

  121.         while (*key) {
  122.             child = simple_trie_node_new(*key++, NULL);
  123.             simple_trie_add_child(&node->first_child, child);
  124.             node = child;
  125.         }

  126.         root->put_value(&node->value, value);
  127.     }

  128.     return 0;
  129. }


  130. static int simple_trie_node_has_child(simple_trie_node *node)
  131. {
  132.     return node->first_child != NULL;
  133. }
  134.     
  135. static int simple_trie_node_has_only_child(simple_trie_node *node)
  136. {
  137.     return ( node->first_child && !node->first_child->silbling );
  138. }


  139. /*
  140.  * if called from delete-key function. then the caller provide __extra to get
  141.  * extra returned results,also this means this function must do some extra work
  142.  * instead of only finding the last node coresspoding to the *__key.
  143.  * it puts the nearest root of the branch from to-be-found node in *__extra.
  144.  * thus, the delete-key function can easily decide where to start deleting nodes
  145.  */
  146. static simple_trie_node *simple_trie_find_trie_node(simple_trie_node *start, \
  147.                 const char **__key, simple_trie_node **__extra)
  148. {
  149.     if (!start || !__key || !*__key)
  150.         return NULL;

  151.     int is_leaf = 1;
  152.     const char *key = *__key;
  153.     if (__extra)
  154.         *__extra = start;
  155.     while (*key) {
  156.         /* record the nearest branch above the to-be-found node */
  157.         if (__extra && !simple_trie_node_has_only_child(start)) {
  158.             *__extra = start;
  159.             *__key = key;
  160.         }
  161.         start = simple_trie_search_silbling(start->first_child, NULL, *key++);
  162.         if (!start)
  163.             return NULL;
  164.     }

  165.     if (simple_trie_node_has_child(start))
  166.         is_leaf = 0;

  167.     /* search along the *__extra* to find the last key-value pair */
  168.     if (is_leaf && __extra) {
  169.         simple_trie_node *tmp = *__extra;
  170.         simple_trie_node *tmp_extra = *__extra;
  171.         const char *tmp_key = *__key;
  172.         key = *__key;
  173.         while (*key) {
  174.             tmp = simple_trie_search_silbling(tmp->first_child, NULL, *key++);
  175.             if (tmp->value && simple_trie_node_has_child(tmp)) {
  176.                 *__extra = tmp;
  177.                 *__key = key;
  178.             }
  179.         }
  180.         /* the *_extra* branch has only one path leading to start. fix it */
  181.         if (*__extra == tmp) {
  182.             *__extra = tmp_extra;
  183.             *__key = tmp_key;
  184.         }
  185.     }
  186.     
  187.     return start;
  188. }

  189. static void simple_trie_delete_child(simple_trie_node **first_child, simple_trie_node *delete)
  190. {
  191.     simple_trie_node *tmp = *first_child;
  192.     simple_trie_node *prev = tmp;

  193.     if (*first_child = delete) {
  194.         *first_child = delete->silbling;
  195.         return;
  196.     }
  197.     
  198.     while (tmp) {
  199.         if (prev == delete)
  200.             break;
  201.         prev = tmp;
  202.         tmp = tmp->silbling;
  203.     }
  204.     
  205.     if (tmp)
  206.         prev->silbling = delete->silbling;
  207. }

  208. static void simple_trie_delete_trie_node(simple_trie_root *root, \
  209.                 simple_trie_node *delete_from, const char *key)
  210. {
  211.     if (!delete_from || !key)
  212.         return;
  213.     simple_trie_node *node;
  214.     simple_trie_node *tmp = simple_trie_search_silbling(delete_from->first_child, &node, *key);
  215.     if (node)
  216.         node->silbling = tmp->silbling;
  217.     else
  218.         delete_from->first_child = tmp->silbling;

  219.     while (*key && tmp) {
  220.         node = tmp;

  221.         tmp = simple_trie_search_silbling(tmp->first_child, NULL, *++key);
  222.         WRAP_free(node);
  223.     }
  224.     /* don't */
  225.     if (tmp) {
  226.         WRAP_free(tmp);
  227.     }

  228.     /* maybe delete_from is one of the roots in the forests */
  229.     if (!delete_from->value && !simple_trie_node_has_child(delete_from)) {
  230.         simple_trie_delete_child(&root->first_child, delete_from);
  231.         WRAP_free(delete_from);

  232.     }

  233. }


  234. int simple_trie_delete_key(simple_trie_root *root, const char *key)
  235. {
  236.     if (!root || !key)
  237.         return 0;

  238.     simple_trie_node *node;
  239.     simple_trie_node *prev;
  240.     simple_trie_node *deleting_node;
  241.     if (!*key) {    /* delete empty key */
  242.         if (root->value)
  243.             root->del_value(&root->value);
  244.         else    /* empty key maps no value */
  245.             return -1;
  246.         return 0;
  247.     }

  248.     node = simple_trie_search_silbling(root->first_child, NULL, *key++);
  249.     if (!node)
  250.         return -1;

  251.     /* delete one of the roots in forests */
  252.     if (!*key) {
  253.         if (node->value)
  254.             root->del_value(&node->value);
  255.         if (!simple_trie_node_has_child(node)) {
  256.             WRAP_free(node);
  257.             key--;
  258.             simple_trie_delete_child(&root->first_child, node);
  259.         }
  260.         return 0;
  261.     }

  262.     node = simple_trie_find_trie_node(node, &key, &deleting_node);

  263.     if (!node) /* no such key */
  264.         return -1;

  265.     root->del_value(&node->value);

  266.     /*
  267.      * this means the node coressponding to the key is a leaf in trie tree
  268.      * so delete all extra nodes below deleting_node.
  269.      */
  270.     if (!simple_trie_node_has_child(node) && deleting_node)
  271.         simple_trie_delete_trie_node(root, deleting_node, key);

  272.     return 0;
  273. }



  274. int simple_trie_has_key(simple_trie_root *root, const char *key)
  275. {
  276.     if (!root || !key)
  277.         return 0;

  278.     if (!*key) { /* empty string mappings */
  279.         if (root->value)
  280.             return 1;
  281.         else
  282.             return 0;
  283.     }

  284.     simple_trie_node *node;

  285.     node = simple_trie_search_silbling(root->first_child, NULL, *key++);
  286.     if (!node)
  287.         return 0;
  288.     else {
  289.         if (*key)
  290.             node = simple_trie_find_trie_node(node, &key, NULL);
  291.         
  292.         if (node->value)
  293.             return 1;
  294.         else
  295.             return 0;
  296.     }
  297. }


  298. void *simple_trie_get_value(simple_trie_root *root, const char *key)
  299. {
  300.     if (!root || !key)
  301.         return NULL;

  302.     if (!*key) /* empty string mappings */
  303.         return root->value;

  304.     simple_trie_node *node;
  305.     node = simple_trie_search_silbling(root->first_child, NULL, *key++);
  306.     if (*key)
  307.         node = simple_trie_find_trie_node(node, &key, NULL);


  308.     if (node)
  309.         return node->value;
  310.     else
  311.         return NULL;
  312. }


  313. /* recusively free trie nodes. Hopes the stack won't get broken */

  314. static void simple_trie_destroy_node(simple_trie_root *root, simple_trie_node *node)
  315. {
  316.     simple_trie_node *child = node->first_child;
  317.     while(child) {    
  318.         simple_trie_destroy_node(root, child);
  319.         child = child->silbling;
  320.     }

  321.     if (node->value) {
  322.         root->del_value(&node->value);
  323.     }
  324.     WRAP_free(node);
  325. }



  326. void simple_trie_destroy(simple_trie_root **__root)
  327. {
  328.     if (!__root)
  329.         return;

  330.     simple_trie_root *root = *__root;
  331.     simple_trie_node *node = root->first_child;
  332.     while(node) {
  333.         simple_trie_destroy_node(root, node);
  334.         node = node->silbling;
  335.         
  336.     }
  337.     if (root->value)
  338.         root->del_value(&root->value);
  339.     WRAP_free(root);
  340.     *__root = NULL;
  341. }



  342. #define MAX_KEY_LEN 512
  343. struct _prefix {
  344.     char prefix[MAX_KEY_LEN];
  345.     int n_prefix;
  346. } prefix;

  347. static void append_prefix(struct _prefix *prefix, char key)
  348. {
  349.     prefix->prefix[prefix->n_prefix] = key;
  350.     prefix->n_prefix++;
  351.     if (prefix->n_prefix >= MAX_KEY_LEN) {
  352.         fprintf(stderr, "prefix too long!\n");
  353.         exit(-1);
  354.     }
  355. }

  356. static void deappend_prefix(struct _prefix *prefix)
  357. {
  358.     prefix->n_prefix--;
  359. }

  360. static void simple_trie_node_dump(simple_trie_node *node, struct _prefix *prefix, value_print_fn print)
  361. {
  362.     simple_trie_node *child = node->first_child;
  363.     append_prefix(prefix, node->key);
  364.     while (child) {
  365.         simple_trie_node_dump(child, prefix, print);
  366.         child = child->silbling;
  367.     }

  368.     if (node->value) {
  369.         printf("[%.*s] => ", prefix->n_prefix, prefix->prefix);
  370.         print(node->value);
  371.     }

  372.     deappend_prefix(prefix);
  373. }
  374.         




  375. void simple_trie_dump(simple_trie_root *root, value_print_fn print)
  376. {
  377.     if (root->value) {
  378.          printf("[empty string] => ");
  379.          print(root->value);
  380.     }

  381.     simple_trie_node *node = root->first_child;
  382.     while (node) {
  383.         prefix.n_prefix = 0;
  384.         simple_trie_node_dump(node, &prefix, print);
  385.         node = node->silbling;
  386.     }
  387. }
阅读(2665) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~