使用数组来保存链接子节点的的trie树的确非常的耗费内存.比如我用脚本随机生成的长度均匀分配在3-30的
字符串,共28W条.480W个字符.建立起trie树耗费的内存为
- [Total]:Total memory consumed 397.1 MB
而使用链表耗费的内存为
- [Total]:Total memory consumed 56.7 MB
修改的地方不多.原理一样.不罗嗦了.代码如下
头文件
- #ifndef SIMPLE_TRIE_H
-
#define SIMPLE_TRIE_H
-
#include <assert.h>
-
-
typedef void (*put_value_fn) (void **, void *);
-
typedef void (*del_value_fn) (void **);
-
-
typedef void (*value_print_fn) (void *);
-
-
-
typedef struct _simple_trie_node {
-
char key;
-
void *value;
-
struct _simple_trie_node *silbling;
-
struct _simple_trie_node *first_child;
-
} simple_trie_node;
-
-
-
typedef struct _simple_trie_root {
-
void *value;
-
put_value_fn put_value;
-
del_value_fn del_value;
-
struct _simple_trie_node *first_child;
-
} simple_trie_root;
-
-
-
-
-
/* create a new simple_trie_root */
-
simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del);
-
-
/* add a key-value pair to trie tree */
-
int simple_trie_add_pair(simple_trie_root *,const char *, void *);
-
-
/* delete a *key*-value mapping in trie tree */
-
int simple_trie_delete_key(simple_trie_root *, const char *);
-
-
/* destroy trie tree */
-
void simple_trie_destroy(simple_trie_root **);
-
-
/* get value mapped by *key* */
-
void *simple_trie_get_value(simple_trie_root *, const char *);
-
-
/* test if trie tree has *key* */
-
int simple_trie_has_key(simple_trie_root *, const char *);
-
-
void simple_trie_dump(simple_trie_root *, value_print_fn print);
-
#endif
实现文件
- #include <stdio.h>
-
#include <string.h>
-
#include <stdlib.h>
-
#include <assert.h>
-
#include "simple_trie.h"
-
#include "wrap_malloc.h"
-
-
void default_del_value(void **value)
-
{
-
if (value)
-
*value = NULL;
-
}
-
-
void default_put_value(void **value, void *assign_value)
-
{
-
if (value)
-
*value = assign_value;
-
}
-
-
simple_trie_node *simple_trie_node_new(char key, void *value)
-
{
-
simple_trie_node *node;
-
node = WRAP_calloc(1, sizeof(simple_trie_node));
-
if (node) {
-
node->key = key;
-
node->value = value;
-
} else {
-
OOM();
-
}
-
return node;
-
}
-
-
simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del)
-
{
-
simple_trie_root *root;
-
root = WRAP_calloc(1, sizeof(simple_trie_root));
-
if (root) {
-
if (put)
-
root->put_value = put;
-
else
-
root->put_value = default_put_value;
-
-
if (del)
-
root->del_value = del;
-
else
-
root->del_value = default_del_value;
-
} else {
-
OOM();
-
}
-
-
return root;
-
}
-
-
static simple_trie_node *simple_trie_search_silbling(simple_trie_node *node ,simple_trie_node **prev, const char key)
-
{
-
if (prev)
-
*prev = NULL;
-
-
while (node) {
-
if (node->key == key)
-
break;
-
if (prev)
-
*prev = node;
-
node = node->silbling;
-
}
-
return node;
-
}
-
-
static simple_trie_node *simple_trie_find_last_match_node(simple_trie_root *root,const char **__key)
-
{
-
const char *key = *__key;
-
int matched_even_once = 0;
-
simple_trie_node *last;
-
simple_trie_node *node = root->first_child;
-
last = node;
-
-
while (*key) {
-
node = simple_trie_search_silbling(node, NULL, *key);
-
if (!node)
-
break;
-
matched_even_once = 1;
-
last = node;
-
node = node->first_child;
-
key++;
-
}
-
-
if (matched_even_once)
-
*__key = key;
-
else
-
last = NULL;
-
-
return last;
-
}
-
-
-
-
-
static void simple_trie_add_child(simple_trie_node **first_child, simple_trie_node *c)
-
{
-
-
if (!*first_child)
-
*first_child = c;
-
else {
-
#if 0 /* add at the front of the silbling chain */
-
simple_trie_node *youngest_silbling;
-
youngest_silbling = *first_child;
-
while (youngest_silbling->silbling)
-
youngest_silbling = youngest_silbling->silbling;
-
youngest_silbling->silbling = c;
-
#else /* add at the end of the silbling chain */
-
c->silbling = *first_child;
-
*first_child = c;
-
#endif
-
}
-
}
-
-
int simple_trie_add_pair(simple_trie_root *root, const char *key, void *value)
-
{
-
if (!root || !key )
-
return -1;
-
-
if (!*key) { /* try to map a empty string to a value */
-
if (root->value)
-
root->del_value(&root->value);
-
root->put_value(&root->value, value);
-
return 0;
-
}
-
-
simple_trie_node *node;
-
simple_trie_node *child;
-
node = simple_trie_find_last_match_node(root, &key);
-
-
-
if (!*key) { /* simplely replace */
-
root->del_value(&node->value);
-
root->put_value(&node->value, value);
-
} else { /* start construct nodes */
-
if (!node) { /* construct from root */
-
node = simple_trie_node_new(*key++, NULL);
-
simple_trie_add_child(&root->first_child, node);
-
}
-
-
while (*key) {
-
child = simple_trie_node_new(*key++, NULL);
-
simple_trie_add_child(&node->first_child, child);
-
node = child;
-
}
-
-
root->put_value(&node->value, value);
-
}
-
-
return 0;
-
}
-
-
-
static int simple_trie_node_has_child(simple_trie_node *node)
-
{
-
return node->first_child != NULL;
-
}
-
-
static int simple_trie_node_has_only_child(simple_trie_node *node)
-
{
-
return ( node->first_child && !node->first_child->silbling );
-
}
-
-
-
/*
-
* if called from delete-key function. then the caller provide __extra to get
-
* extra returned results,also this means this function must do some extra work
-
* instead of only finding the last node coresspoding to the *__key.
-
* it puts the nearest root of the branch from to-be-found node in *__extra.
-
* thus, the delete-key function can easily decide where to start deleting nodes
-
*/
-
static simple_trie_node *simple_trie_find_trie_node(simple_trie_node *start, \
-
const char **__key, simple_trie_node **__extra)
-
{
-
if (!start || !__key || !*__key)
-
return NULL;
-
-
int is_leaf = 1;
-
const char *key = *__key;
-
if (__extra)
-
*__extra = start;
-
while (*key) {
-
/* record the nearest branch above the to-be-found node */
-
if (__extra && !simple_trie_node_has_only_child(start)) {
-
*__extra = start;
-
*__key = key;
-
}
-
start = simple_trie_search_silbling(start->first_child, NULL, *key++);
-
if (!start)
-
return NULL;
-
}
-
-
if (simple_trie_node_has_child(start))
-
is_leaf = 0;
-
-
/* search along the *__extra* to find the last key-value pair */
-
if (is_leaf && __extra) {
-
simple_trie_node *tmp = *__extra;
-
simple_trie_node *tmp_extra = *__extra;
-
const char *tmp_key = *__key;
-
key = *__key;
-
while (*key) {
-
tmp = simple_trie_search_silbling(tmp->first_child, NULL, *key++);
-
if (tmp->value && simple_trie_node_has_child(tmp)) {
-
*__extra = tmp;
-
*__key = key;
-
}
-
}
-
/* the *_extra* branch has only one path leading to start. fix it */
-
if (*__extra == tmp) {
-
*__extra = tmp_extra;
-
*__key = tmp_key;
-
}
-
}
-
-
return start;
-
}
-
-
static void simple_trie_delete_child(simple_trie_node **first_child, simple_trie_node *delete)
-
{
-
simple_trie_node *tmp = *first_child;
-
simple_trie_node *prev = tmp;
-
-
if (*first_child = delete) {
-
*first_child = delete->silbling;
-
return;
-
}
-
-
while (tmp) {
-
if (prev == delete)
-
break;
-
prev = tmp;
-
tmp = tmp->silbling;
-
}
-
-
if (tmp)
-
prev->silbling = delete->silbling;
-
}
-
-
static void simple_trie_delete_trie_node(simple_trie_root *root, \
-
simple_trie_node *delete_from, const char *key)
-
{
-
if (!delete_from || !key)
-
return;
-
simple_trie_node *node;
-
simple_trie_node *tmp = simple_trie_search_silbling(delete_from->first_child, &node, *key);
-
if (node)
-
node->silbling = tmp->silbling;
-
else
-
delete_from->first_child = tmp->silbling;
-
-
while (*key && tmp) {
-
node = tmp;
-
-
tmp = simple_trie_search_silbling(tmp->first_child, NULL, *++key);
-
WRAP_free(node);
-
}
-
/* don't */
-
if (tmp) {
-
WRAP_free(tmp);
-
}
-
-
/* maybe delete_from is one of the roots in the forests */
-
if (!delete_from->value && !simple_trie_node_has_child(delete_from)) {
-
simple_trie_delete_child(&root->first_child, delete_from);
-
WRAP_free(delete_from);
-
-
}
-
-
}
-
-
-
int simple_trie_delete_key(simple_trie_root *root, const char *key)
-
{
-
if (!root || !key)
-
return 0;
-
-
simple_trie_node *node;
-
simple_trie_node *prev;
-
simple_trie_node *deleting_node;
-
if (!*key) { /* delete empty key */
-
if (root->value)
-
root->del_value(&root->value);
-
else /* empty key maps no value */
-
return -1;
-
return 0;
-
}
-
-
node = simple_trie_search_silbling(root->first_child, NULL, *key++);
-
if (!node)
-
return -1;
-
-
/* delete one of the roots in forests */
-
if (!*key) {
-
if (node->value)
-
root->del_value(&node->value);
-
if (!simple_trie_node_has_child(node)) {
-
WRAP_free(node);
-
key--;
-
simple_trie_delete_child(&root->first_child, node);
-
}
-
return 0;
-
}
-
-
node = simple_trie_find_trie_node(node, &key, &deleting_node);
-
-
if (!node) /* no such key */
-
return -1;
-
-
root->del_value(&node->value);
-
-
/*
-
* this means the node coressponding to the key is a leaf in trie tree
-
* so delete all extra nodes below deleting_node.
-
*/
-
if (!simple_trie_node_has_child(node) && deleting_node)
-
simple_trie_delete_trie_node(root, deleting_node, key);
-
-
return 0;
-
}
-
-
-
-
int simple_trie_has_key(simple_trie_root *root, const char *key)
-
{
-
if (!root || !key)
-
return 0;
-
-
if (!*key) { /* empty string mappings */
-
if (root->value)
-
return 1;
-
else
-
return 0;
-
}
-
-
simple_trie_node *node;
-
-
node = simple_trie_search_silbling(root->first_child, NULL, *key++);
-
if (!node)
-
return 0;
-
else {
-
if (*key)
-
node = simple_trie_find_trie_node(node, &key, NULL);
-
-
if (node->value)
-
return 1;
-
else
-
return 0;
-
}
-
}
-
-
-
void *simple_trie_get_value(simple_trie_root *root, const char *key)
-
{
-
if (!root || !key)
-
return NULL;
-
-
if (!*key) /* empty string mappings */
-
return root->value;
-
-
simple_trie_node *node;
-
node = simple_trie_search_silbling(root->first_child, NULL, *key++);
-
if (*key)
-
node = simple_trie_find_trie_node(node, &key, NULL);
-
-
-
if (node)
-
return node->value;
-
else
-
return NULL;
-
}
-
-
-
/* recusively free trie nodes. Hopes the stack won't get broken */
-
-
static void simple_trie_destroy_node(simple_trie_root *root, simple_trie_node *node)
-
{
-
simple_trie_node *child = node->first_child;
-
while(child) {
-
simple_trie_destroy_node(root, child);
-
child = child->silbling;
-
}
-
-
if (node->value) {
-
root->del_value(&node->value);
-
}
-
WRAP_free(node);
-
}
-
-
-
-
void simple_trie_destroy(simple_trie_root **__root)
-
{
-
if (!__root)
-
return;
-
-
simple_trie_root *root = *__root;
-
simple_trie_node *node = root->first_child;
-
while(node) {
-
simple_trie_destroy_node(root, node);
-
node = node->silbling;
-
-
}
-
if (root->value)
-
root->del_value(&root->value);
-
WRAP_free(root);
-
*__root = NULL;
-
}
-
-
-
-
#define MAX_KEY_LEN 512
-
struct _prefix {
-
char prefix[MAX_KEY_LEN];
-
int n_prefix;
-
} prefix;
-
-
static void append_prefix(struct _prefix *prefix, char key)
-
{
-
prefix->prefix[prefix->n_prefix] = key;
-
prefix->n_prefix++;
-
if (prefix->n_prefix >= MAX_KEY_LEN) {
-
fprintf(stderr, "prefix too long!\n");
-
exit(-1);
-
}
-
}
-
-
static void deappend_prefix(struct _prefix *prefix)
-
{
-
prefix->n_prefix--;
-
}
-
-
static void simple_trie_node_dump(simple_trie_node *node, struct _prefix *prefix, value_print_fn print)
-
{
-
simple_trie_node *child = node->first_child;
-
append_prefix(prefix, node->key);
-
while (child) {
-
simple_trie_node_dump(child, prefix, print);
-
child = child->silbling;
-
}
-
-
if (node->value) {
-
printf("[%.*s] => ", prefix->n_prefix, prefix->prefix);
-
print(node->value);
-
}
-
-
deappend_prefix(prefix);
-
}
-
-
-
-
-
-
void simple_trie_dump(simple_trie_root *root, value_print_fn print)
-
{
-
if (root->value) {
-
printf("[empty string] => ");
-
print(root->value);
-
}
-
-
simple_trie_node *node = root->first_child;
-
while (node) {
-
prefix.n_prefix = 0;
-
simple_trie_node_dump(node, &prefix, print);
-
node = node->silbling;
-
}
-
}
阅读(2665) | 评论(0) | 转发(0) |