是一种特别的树结构,一般用于存放字符串。Critbit tree是一种,其树的深度为O(longest-length),有点像二叉树,不过对于字符串做分支检测的时候代价很小。
Crit-bit快速高效的支持下面的一些操作:
-
插入一个字符串
-
测试一个字符串是否在树里
-
删除一个字符串
-
查找出树中所有以某个字符串开始的所有字符串
在插入过程
然后再继续插入后的结构变化是:
代码基于varnish, 并添加列出相同prefix字符串功能
-
#include <stdio.h>
-
#include <stdlib.h>
-
-
#include <stdio.h>
-
#include <stdlib.h>
-
#include <sys/types.h>
-
#include <sys/time.h>
-
#include <unistd.h>
-
#include <unistd.h>
-
#include <inttypes.h>
-
-
/*---------------------------------------------------------------------
-
* Table for finding out how many bits two bytes have in common sequentially,
-
* counting from the MSB towards the LSB.
-
* ie:
-
* hcb_bittbl[0x01 ^ 0x22] == 2
-
* hcb_bittbl[0x10 ^ 0x0b] == 3
-
*
-
*/
-
-
#define DIGEST_LEN 32
-
static unsigned char hcb_bittbl[256];
-
-
static unsigned char hcb_bits(unsigned char x, unsigned char y)
-
{
-
return hcb_bittbl[x ^ y];
-
}
-
-
static void hcb_build_bittbl(void)
-
{
-
unsigned char x;
-
unsigned y;
-
-
y = 0;
-
for (x = 0; x < 8; x++)
-
for (; y < (1U << x); y++)
-
hcb_bittbl[y] = 8 - x;
-
}
-
-
/*---------------------------------------------------------------------
-
* For space reasons we overload the two pointers with two different
-
* kinds of of pointers. We cast them to uintptr_t's and abuse the
-
* low two bits to tell them apart, assuming that Varnish will never
-
* run on machines with less than 32bit alignment.
-
*
-
* Asserts will explode if these assumptions are not met.
-
*/
-
-
struct hcb_y {
-
unsigned short critbit;
-
unsigned char ptr;
-
unsigned char bitmask;
-
volatile uintptr_t leaf[2];
-
};
-
-
#define HCB_BIT_NODE (1<<0)
-
#define HCB_BIT_Y (1<<1)
-
-
struct hcb_root {
-
volatile uintptr_t origo;
-
};
-
-
static struct hcb_root hcb_root;
-
-
/*---------------------------------------------------------------------
-
* Pointer accessor functions
-
*/
-
static int hcb_is_node(uintptr_t u)
-
{
-
return (u & HCB_BIT_NODE);
-
}
-
-
static int hcb_is_y(uintptr_t u)
-
{
-
return (u & HCB_BIT_Y);
-
}
-
-
static uintptr_t hcb_r_node(uintptr_t n)
-
{
-
return (HCB_BIT_NODE | (uintptr_t)n);
-
}
-
-
static uintptr_t hcb_l_node(uintptr_t u)
-
{
-
return (u & ~HCB_BIT_NODE);
-
}
-
-
static uintptr_t hcb_r_y(struct hcb_y *y)
-
{
-
return (HCB_BIT_Y | (uintptr_t)y);
-
}
-
-
static struct hcb_y *hcb_l_y(uintptr_t u)
-
{
-
return ((struct hcb_y *)(u & ~HCB_BIT_Y));
-
}
-
-
/*---------------------------------------------------------------------
-
* Find the "critical" bit that separates these two digests
-
*/
-
-
static unsigned hcb_crit_bit(const char *oh1, const char *oh2, struct hcb_y *y)
-
{
-
unsigned char u, r;
-
for (u = 0; u < DIGEST_LEN && oh1[u] == oh2[u]; u++)
-
;
-
-
r = hcb_bits(oh1[u], oh2[u]);
-
y->ptr = u;
-
y->bitmask = 0x80 >> r;
-
y->critbit = u * 8 + r;
-
return (y->critbit);
-
}
-
-
/*---------------------------------------------------------------------
-
* Unless we have the lock, we need to be very careful about pointer
-
* references into the tree, we cannot trust things to be the same
-
* in two consequtive memory accesses.
-
*/
-
-
static char *hcb_insert(struct hcb_root *root, char *oh)
-
{
-
volatile uintptr_t *p;
-
uintptr_t pp;
-
struct hcb_y *y, *y2;
-
char *oh2;
-
unsigned s, s2;
-
-
p = &root->origo;
-
pp = *p;
-
if (pp == 0) {
-
*p = hcb_r_node(oh);
-
return (oh);
-
}
-
-
while(hcb_is_y(pp)) {
-
y = hcb_l_y(pp);
-
s = (oh[y->ptr] & y->bitmask) != 0;
-
p = &y->leaf[s];
-
pp = *p;
-
}
-
-
if (pp == 0) {
-
/* We raced hcb_delete and got a NULL pointer */
-
return (NULL);
-
}
-
-
/* We found a node, does it match ? */
-
oh2 = hcb_l_node(pp);
-
if (!memcmp(oh2, oh, DIGEST_LEN))
-
return (oh2);
-
-
/* Insert */
-
y2 = (struct hcb_y *) malloc(sizeof(struct hcb_y));
-
if(y2==NULL)
-
return NULL;
-
(void)hcb_crit_bit(oh, oh2, y2);
-
s2 = (oh[y2->ptr] & y2->bitmask) != 0;
-
y2->leaf[s2] = hcb_r_node(oh);
-
s2 = 1-s2;
-
-
p = &root->origo;
-
-
while(hcb_is_y(*p)) {
-
y = hcb_l_y(*p);
-
if (y->critbit > y2->critbit)
-
break;
-
s = (oh[y->ptr] & y->bitmask) != 0;
-
p = &y->leaf[s];
-
}
-
y2->leaf[s2] = *p;
-
*p = hcb_r_y(y2);
-
return(oh);
-
}
-
-
static void hcb_delete(struct hcb_root *r, char *oh)
-
{
-
struct hcb_y *y;
-
volatile uintptr_t *p;
-
unsigned s;
-
-
-
if (r->origo == hcb_r_node(oh)) {
-
r->origo = 0;
-
return;
-
}
-
-
-
p = &r->origo;
-
y = NULL;
-
while(1) {
-
y = hcb_l_y(*p);
-
s = (oh[y->ptr] & y->bitmask) != 0;
-
if (y->leaf[s] == hcb_r_node(oh)) {
-
*p = y->leaf[1 - s];
-
free(y);
-
return;
-
}
-
p = &y->leaf[s];
-
}
-
-
}
-
-
static void hcb_start(void)
-
{
-
memset(&hcb_root, 0, sizeof hcb_root);
-
hcb_build_bittbl();
-
}
-
-
static char *hcb_lookup(struct hcb_root *root, char *noh)
-
{
-
volatile uintptr_t *p;
-
uintptr_t pp;
-
struct hcb_y *y, *y2;
-
char *oh2;
-
unsigned s, s2;
-
-
p = &root->origo;
-
pp = *p;
-
if (pp == 0) {
-
return NULL;
-
}
-
-
while(hcb_is_y(pp)) {
-
y = hcb_l_y(pp);
-
s = (noh[y->ptr] & y->bitmask) != 0;
-
p = &y->leaf[s];
-
pp = *p;
-
}
-
-
if (pp == 0) {
-
/* We raced hcb_delete and got a NULL pointer */
-
return (NULL);
-
}
-
-
/* We found a node, does it match ? */
-
oh2 = hcb_l_node(pp);
-
if (!memcmp(oh2, noh, DIGEST_LEN))
-
return (oh2);
-
-
return(NULL);
-
}
-
-
-
-
static void recur_print(uintptr_t pp)
-
{
-
if(pp==0)
-
return;
-
-
if( hcb_is_node(pp) )
-
{
-
char *oh2 = hcb_l_node(pp);
-
printf("%sn", oh2);
-
return;
-
}
-
else
-
{
-
struct hcb_y *y = hcb_l_y(pp);
-
recur_print(y->leaf[0]);
-
recur_print(y->leaf[1]);
-
}
-
}
-
-
static char *hcb_get_prefix(struct hcb_root *root, char *prefix)
-
{
-
int prefix_len = strlen(prefix);
-
if(prefix_len ==0)
-
return NULL;
-
-
volatile uintptr_t *p;
-
uintptr_t pp;
-
struct hcb_y *y, *y2;
-
char *oh2;
-
unsigned s, s2;
-
-
p = &root->origo;
-
pp = *p;
-
if (pp == 0) {
-
return NULL;
-
}
-
-
while(hcb_is_y(pp)) {
-
y = hcb_l_y(pp);
-
if(y->ptr >= prefix_len)
-
break;
-
-
s = (prefix[y->ptr] & y->bitmask) != 0;
-
p = &y->leaf[s];
-
pp = *p;
-
}
-
-
if (pp == 0) {
-
/* We raced hcb_delete and got a NULL pointer */
-
return (NULL);
-
}
-
-
recur_print(pp);
-
-
return(NULL);
-
}
-
-
-
int main()
-
{
-
hcb_start();
-
-
char t1[DIGEST_LEN] = "test";
-
hcb_insert(&hcb_root, t1);
-
-
char t2[DIGEST_LEN] = "test1";
-
hcb_insert(&hcb_root, t2);
-
-
char t3[DIGEST_LEN] = "test2";
-
hcb_insert(&hcb_root, t3);
-
-
int i = 0;
-
while(i<100)
-
{
-
char *t4 = (char *)malloc(DIGEST_LEN);
-
memset(t4, 0, DIGEST_LEN);
-
sprintf(t4, "test%d", i);
-
hcb_insert(&hcb_root, t4);
-
i++;
-
}
-
-
if( hcb_lookup(&hcb_root, t1) )
-
{
-
printf("find itn");
-
}
-
else
-
{
-
printf("can not find itn");
-
}
-
-
printf("begin to deleten");
-
hcb_delete(&hcb_root, t1);
-
-
if( hcb_lookup(&hcb_root, t1) )
-
{
-
printf("find itn");
-
}
-
else
-
{
-
printf("can not find itn");
-
}
-
-
hcb_get_prefix(&hcb_root, "test");
-
-
return 0;
-
}
阅读(9489) | 评论(0) | 转发(1) |