关于TRIE的说明就不罗嗦了.网上很多.这里仅实现了最基本的TRIE。其他TRIE的变种未实现。代码正在编写中。
写完之后测试发现的确很耗内存。BURST TRIE以及HAT TRIE的论文正在研读。搞明白之后再实现一下。
TRIE的原理相当简单。直接贴代码好了.
代码一: TRIE的数据结构声明以及各种操作集合函数原型声明
代码文件名: simple_trie.h
- #ifndef SIMPLE_TRIE_H
-
#define SIMPLE_TRIE_H
-
#include <assert.h>
-
-
typedef void (*put_value_fn) (void **, void *);
-
typedef void (*del_value_fn) (void **);
-
-
typedef void (*value_print_fn) (void *);
-
-
#define ALPHABET_SIZE 26
-
typedef struct _simple_trie_node {
-
char key;
-
void *value;
-
struct _simple_trie_node *child[ALPHABET_SIZE];
-
} simple_trie_node;
-
-
-
typedef struct _simple_trie_root {
-
void *value;
-
put_value_fn put_value;
-
del_value_fn del_value;
-
struct _simple_trie_node *child[ALPHABET_SIZE];
-
} simple_trie_root;
-
-
-
static int cal_idx(char key)
-
{
-
assert(key >= 'a' && key <= 'z' && "key string must be lower!");
-
return ( key - 'a' );
-
}
-
-
/* create a new simple_trie_root */
-
simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del);
-
-
/* add a key-value pair to trie tree */
-
int simple_trie_add_pair(simple_trie_root *,const char *, void *);
-
-
/* delete a *key*-value mapping in trie tree */
-
int simple_trie_delete_key(simple_trie_root *, const char *);
-
-
/* destroy trie tree */
-
void simple_trie_destroy(simple_trie_root **);
-
-
/* get value mapped by *key* */
-
void *simple_trie_get_value(simple_trie_root *, const char *);
-
-
/* test if trie tree has *key* */
-
int simple_trie_has_key(simple_trie_root *, const char *);
-
-
void simple_trie_dump(simple_trie_root *, value_print_fn print);
-
#endif
代码二: TRIE的具体操作实现代码
代码文件名: simple_trie.c
/*
* a simple implementation of Trie tree.
* any suggestions are welcomed
* liangtao90s@gmail.com
*/
- #include <stdio.h>
-
#include <string.h>
-
#include <stdlib.h>
-
#include <assert.h>
-
#include "simple_trie.h"
-
#include "wrap_malloc.h"
-
-
simple_trie_node *simple_trie_node_new(char key, void *value)
-
{
-
simple_trie_node *node;
-
node = WRAP_calloc(1, sizeof(simple_trie_node));
-
if (node) {
-
node->key = key;
-
node->value = value;
-
} else {
-
OOM();
-
}
-
return node;
-
}
-
-
simple_trie_root *simple_trie_root_new(put_value_fn put, del_value_fn del)
-
{
-
simple_trie_root *root;
-
root = WRAP_calloc(1, sizeof(simple_trie_root));
-
if (root) {
-
root->put_value = put;
-
root->del_value = del;
-
} else {
-
OOM();
-
}
-
-
return root;
-
}
-
-
-
static simple_trie_node **simple_trie_find_last_match_node(simple_trie_root *root,const char **__key)
-
{
-
const char *key = *__key;
-
simple_trie_node **node = &root->child[cal_idx(*key)];
-
while (*node) {
-
key++;
-
if (!*key)
-
break;
-
node = &(*node)->child[cal_idx(*key)];
-
}
-
-
*__key = key;
-
return node;
-
}
-
-
int simple_trie_add_pair(simple_trie_root *root, const char *key, void *value)
-
{
-
if (!root || !key )
-
return -1;
-
-
if (!*key) { /* try to map a empty string to a value */
-
root->del_value(&root->value);
-
root->put_value(&root->value, value);
-
return 0;
-
}
-
-
simple_trie_node **node;
-
node = simple_trie_find_last_match_node(root, &key);
-
-
if (!*key) { /* simplely replace */
-
root->del_value(&(*node)->value);
-
root->put_value(&(*node)->value, value);
-
} else { /* start construct nodes */
-
simple_trie_node *father;
-
simple_trie_node *child;
-
father = *node = simple_trie_node_new(*key++, NULL);
-
while (*key) {
-
child = simple_trie_node_new(*key, NULL);
-
father->child[cal_idx(*key)] = child;
-
father = child;
-
key++;
-
}
-
-
root->put_value(&father->value, value);
-
}
-
-
return 0;
-
}
-
-
static int simple_trie_node_has_child(simple_trie_node *node)
-
{
-
int has_child = 0;
-
int i;
-
for (i = 0; i < ALPHABET_SIZE; i++) {
-
if (node->child[i]) {
-
has_child = 1;
-
break;
-
}
-
}
-
-
return has_child;
-
}
-
-
static int simple_trie_node_has_only_child(simple_trie_node *node)
-
{
-
int has_only_child = 1;
-
int child = 0;
-
int i;
-
for (i = 0; i < ALPHABET_SIZE; i++) {
-
if (node->child[i])
-
child++;
-
if (child == 2) {
-
has_only_child = 0;
-
break;
-
}
-
}
-
return has_only_child;
-
}
-
-
-
static simple_trie_node *simple_trie_find_trie_node(simple_trie_node *start, \
-
const char **__key, simple_trie_node **__extra)
-
{
-
if (!start || !__key || !*__key)
-
return NULL;
-
-
int is_leaf = 1;
-
const char *key = *__key;
-
if (__extra)
-
*__extra = start;
-
while (*key) {
-
/* record the nearest branch above the to-be-found node */
-
if (__extra && !simple_trie_node_has_only_child(start)) {
-
*__extra = start;
-
*__key = key;
-
}
-
start = start->child[cal_idx(*key++)];
-
if (!start)
-
return NULL;
-
}
-
-
if (simple_trie_node_has_child(start))
-
is_leaf = 0;
-
-
/* search along the *__extra* to find the last key-value pair */
-
if (is_leaf && __extra) {
-
simple_trie_node *tmp = *__extra;
-
simple_trie_node *tmp_extra = *__extra;
-
const char *tmp_key = *__key;
-
key = *__key;
-
while (*key) {
-
tmp = tmp->child[cal_idx(*key++)];
-
if (tmp->value && simple_trie_node_has_child(tmp)) {
-
*__extra = tmp;
-
*__key = key;
-
}
-
}
-
/* the *_extra* branch has only one path leading to start. fix it */
-
if (*__extra == tmp) {
-
*__extra = tmp_extra;
-
*__key = tmp_key;
-
}
-
}
-
-
return start;
-
}
-
-
-
static void simple_trie_delete_trie_node(simple_trie_root *root, \
-
simple_trie_node *delete_from, const char *key)
-
{
-
if (!delete_from || !key)
-
return;
-
simple_trie_node *node;
-
simple_trie_node *tmp = delete_from->child[cal_idx(*key)];
-
delete_from->child[cal_idx(*key++)] = NULL;
-
-
while (*key && tmp) {
-
node = tmp;
-
-
tmp = tmp->child[cal_idx(*key++)];
-
WRAP_free(node);
-
}
-
/* don't */
-
if (tmp) {
-
WRAP_free(tmp);
-
}
-
-
/* maybe delete_from is one of the roots in the forests */
-
if (!delete_from->value && !simple_trie_node_has_child(delete_from)) {
-
root->child[cal_idx(delete_from->key)] = NULL;
-
WRAP_free(delete_from);
-
-
}
-
-
}
-
-
-
int simple_trie_delete_key(simple_trie_root *root, const char *key)
-
{
-
if (!root || !key)
-
return 0;
-
-
simple_trie_node *node;
-
simple_trie_node *deleting_node;
-
if (!*key) { /* delete empty key */
-
if (root->value)
-
root->del_value(&root->value);
-
else /* empty key maps no value */
-
return -1;
-
return 0;
-
}
-
-
node = root->child[cal_idx(*key++)];
-
if (!node)
-
return -1;
-
-
/* delete one of the roots in forests */
-
if (!*key) {
-
if (node->value)
-
root->del_value(&node->value);
-
if (!simple_trie_node_has_child(node)) {
-
WRAP_free(node);
-
key--;
-
root->child[cal_idx(*key)] = NULL;
-
}
-
return 0;
-
}
-
-
node = simple_trie_find_trie_node(node, &key, &deleting_node);
-
-
if (!node) /* no such key */
-
return -1;
-
-
root->del_value(&node->value);
-
-
/*
-
* this means the node coressponding to the key is a leaf in trie tree
-
* so delete all extra nodes below deleting_node.
-
*/
-
if (!simple_trie_node_has_child(node) && deleting_node)
-
simple_trie_delete_trie_node(root, deleting_node, key);
-
-
return 0;
-
}
-
-
-
int simple_trie_has_key(simple_trie_root *root, const char *key)
-
{
-
if (!root || !key)
-
return 0;
-
-
if (!*key) { /* empty string mappings */
-
if (root->value)
-
return 1;
-
else
-
return 0;
-
}
-
-
simple_trie_node *node;
-
-
node = root->child[cal_idx(*key++)];
-
-
if (!node)
-
return 0;
-
else {
-
if (*key)
-
node = simple_trie_find_trie_node(node, &key, NULL);
-
-
if (node->value)
-
return 1;
-
else
-
return 0;
-
}
-
}
-
-
-
void *simple_trie_get_value(simple_trie_root *root, const char *key)
-
{
-
if (!root || !key)
-
return NULL;
-
-
if (!*key) /* empty string mappings */
-
return root->value;
-
-
simple_trie_node *node;
-
node = root->child[cal_idx(*key++)];
-
-
if (*key)
-
node = simple_trie_find_trie_node(node, &key, NULL);
-
-
-
if (node)
-
return node->value;
-
else
-
return NULL;
-
}
-
-
-
/* recusively free trie nodes. Hopes the stack won't get broken */
-
-
static void simple_trie_destroy_node(simple_trie_root *root, simple_trie_node *node)
-
{
-
int i;
-
for (i = 0; i < ALPHABET_SIZE; i++) {
-
if (node->child[i])
-
simple_trie_destroy_node(root, node->child[i]);
-
}
-
-
if (node->value)
-
root->del_value(&node->value);
-
WRAP_free(node);
-
}
-
-
-
-
void simple_trie_destroy(simple_trie_root **__root)
-
{
-
if (!__root)
-
return;
-
-
simple_trie_root *root = *__root;
-
int i;
-
for (i = 0; i < ALPHABET_SIZE; i++) {
-
if (root->child[i])
-
simple_trie_destroy_node(root, root->child[i]);
-
}
-
if (root->value)
-
root->del_value(&root->value);
-
WRAP_free(root);
-
*__root = NULL;
-
}
-
-
#define MAX_KEY_LEN 512
-
struct _prefix {
-
char prefix[MAX_KEY_LEN];
-
int n_prefix;
-
} prefix;
-
-
static void append_prefix(struct _prefix *prefix, char key)
-
{
-
prefix->prefix[prefix->n_prefix] = key;
-
prefix->n_prefix++;
-
if (prefix->n_prefix >= MAX_KEY_LEN) {
-
fprintf(stderr, "prefix too long!\n");
-
exit(-1);
-
}
-
}
-
-
static void deappend_prefix(struct _prefix *prefix)
-
{
-
prefix->n_prefix--;
-
}
-
-
static void simple_trie_node_dump(simple_trie_node *node, struct _prefix *prefix, value_print_fn print)
-
{
-
int i;
-
append_prefix(prefix, node->key);
-
for (i = 0; i < ALPHABET_SIZE; i++) {
-
if (node->child[i])
-
simple_trie_node_dump(node->child[i], prefix, print);
-
}
-
-
if (node->value) {
-
printf("[%.*s] => ", prefix->n_prefix, prefix->prefix);
-
print(node->value);
-
}
-
-
deappend_prefix(prefix);
-
}
-
-
-
-
-
-
void simple_trie_dump(simple_trie_root *root, value_print_fn print)
-
{
-
if (root->value) {
-
printf("[empty string] => ");
-
print(root->value);
-
}
-
-
int i;
-
for (i = 0; i < ALPHABET_SIZE; i++) {
-
if (root->child[i]) {
-
prefix.n_prefix = 0;
-
simple_trie_node_dump(root->child[i], &prefix, print);
-
}
-
}
-
}
以上实现了基本的TRIE结构的操作.
阅读(3136) | 评论(0) | 转发(0) |