Chinaunix首页 | 论坛 | 博客
  • 博客访问: 79686
  • 博文数量: 83
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 20
  • 用 户 组: 普通用户
  • 注册时间: 2012-03-30 00:36
文章分类

全部博文(83)

文章存档

2014年(83)

我的朋友

分类: C/C++

2014-07-17 14:42:50

原文地址:Crit-bit Tree概述和实现 作者:bjpiao


是一种特别的树结构,一般用于存放字符串。Critbit tree是一种,其树的深度为O(longest-length),有点像二叉树,不过对于字符串做分支检测的时候代价很小。

Crit-bit快速高效的支持下面的一些操作:

  • 插入一个字符串
  • 测试一个字符串是否在树里
  • 删除一个字符串
  • 查找出树中所有以某个字符串开始的所有字符串

在插入过程

critbit

然后再继续插入后的结构变化是:critbit


代码基于varnish, 并添加列出相同prefix字符串功能

点击(此处)折叠或打开

  1. #include <stdio.h>
  2. #include <stdlib.h>

  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <sys/types.h>
  6. #include <sys/time.h>
  7. #include <unistd.h>
  8. #include <unistd.h>
  9. #include <inttypes.h>

  10. /*---------------------------------------------------------------------
  11.  * Table for finding out how many bits two bytes have in common sequentially,
  12.  * counting from the MSB towards the LSB.
  13.  * ie:
  14.  * hcb_bittbl[0x01 ^ 0x22] == 2
  15.  * hcb_bittbl[0x10 ^ 0x0b] == 3
  16.  *
  17.  */

  18. #define DIGEST_LEN 32
  19. static unsigned char hcb_bittbl[256];

  20. static unsigned char hcb_bits(unsigned char x, unsigned char y)
  21. {
  22.         return hcb_bittbl[x ^ y];
  23. }

  24. static void hcb_build_bittbl(void)
  25. {
  26.         unsigned char x;
  27.         unsigned y;

  28.         y = 0;
  29.         for (x = 0; x < 8; x++)
  30.                 for (; y < (1U << x); y++)
  31.                         hcb_bittbl[y] = 8 - x;
  32. }

  33. /*---------------------------------------------------------------------
  34.  * For space reasons we overload the two pointers with two different
  35.  * kinds of of pointers. We cast them to uintptr_t's and abuse the
  36.  * low two bits to tell them apart, assuming that Varnish will never
  37.  * run on machines with less than 32bit alignment.
  38.  *
  39.  * Asserts will explode if these assumptions are not met.
  40.  */

  41. struct hcb_y {
  42.         unsigned short critbit;
  43.         unsigned char ptr;
  44.         unsigned char bitmask;
  45.         volatile uintptr_t leaf[2];
  46. };

  47. #define HCB_BIT_NODE (1<<0)
  48. #define HCB_BIT_Y (1<<1)

  49. struct hcb_root {
  50.         volatile uintptr_t origo;
  51. };

  52. static struct hcb_root hcb_root;

  53. /*---------------------------------------------------------------------
  54.  * Pointer accessor functions
  55.  */
  56. static int hcb_is_node(uintptr_t u)
  57. {
  58.         return (u & HCB_BIT_NODE);
  59. }

  60. static int hcb_is_y(uintptr_t u)
  61. {
  62.         return (u & HCB_BIT_Y);
  63. }

  64. static uintptr_t hcb_r_node(uintptr_t n)
  65. {
  66.         return (HCB_BIT_NODE | (uintptr_t)n);
  67. }

  68. static uintptr_t hcb_l_node(uintptr_t u)
  69. {
  70.         return (u & ~HCB_BIT_NODE);
  71. }

  72. static uintptr_t hcb_r_y(struct hcb_y *y)
  73. {
  74.         return (HCB_BIT_Y | (uintptr_t)y);
  75. }

  76. static struct hcb_y *hcb_l_y(uintptr_t u)
  77. {
  78.         return ((struct hcb_y *)(u & ~HCB_BIT_Y));
  79. }

  80. /*---------------------------------------------------------------------
  81.  * Find the "critical" bit that separates these two digests
  82.  */

  83. static unsigned hcb_crit_bit(const char *oh1, const char *oh2, struct hcb_y *y)
  84. {
  85.         unsigned char u, r;
  86.         for (u = 0; u < DIGEST_LEN && oh1[u] == oh2[u]; u++)
  87.                 ;

  88.         r = hcb_bits(oh1[u], oh2[u]);
  89.         y->ptr = u;
  90.         y->bitmask = 0x80 >> r;
  91.         y->critbit = u * 8 + r;
  92.         return (y->critbit);
  93. }

  94. /*---------------------------------------------------------------------
  95.  * Unless we have the lock, we need to be very careful about pointer
  96.  * references into the tree, we cannot trust things to be the same
  97.  * in two consequtive memory accesses.
  98.  */

  99. static char *hcb_insert(struct hcb_root *root, char *oh)
  100. {
  101.         volatile uintptr_t *p;
  102.         uintptr_t pp;
  103.         struct hcb_y *y, *y2;
  104.         char *oh2;
  105.         unsigned s, s2;

  106.         p = &root->origo;
  107.         pp = *p;
  108.         if (pp == 0) {
  109.                 *p = hcb_r_node(oh);
  110.                 return (oh);
  111.         }

  112.         while(hcb_is_y(pp)) {
  113.                 y = hcb_l_y(pp);
  114.                 s = (oh[y->ptr] & y->bitmask) != 0;
  115.                 p = &y->leaf[s];
  116.                 pp = *p;
  117.         }

  118.         if (pp == 0) {
  119.                 /* We raced hcb_delete and got a NULL pointer */
  120.                 return (NULL);
  121.         }

  122.         /* We found a node, does it match ? */
  123.         oh2 = hcb_l_node(pp);
  124.         if (!memcmp(oh2, oh, DIGEST_LEN))
  125.                 return (oh2);

  126.         /* Insert */
  127.         y2 = (struct hcb_y *) malloc(sizeof(struct hcb_y));
  128.         if(y2==NULL)
  129.                 return NULL;
  130.         (void)hcb_crit_bit(oh, oh2, y2);
  131.         s2 = (oh[y2->ptr] & y2->bitmask) != 0;
  132.         y2->leaf[s2] = hcb_r_node(oh);
  133.         s2 = 1-s2;

  134.         p = &root->origo;

  135.         while(hcb_is_y(*p)) {
  136.                 y = hcb_l_y(*p);
  137.                 if (y->critbit > y2->critbit)
  138.                         break;
  139.                 s = (oh[y->ptr] & y->bitmask) != 0;
  140.                 p = &y->leaf[s];
  141.         }
  142.         y2->leaf[s2] = *p;
  143.         *p = hcb_r_y(y2);
  144.         return(oh);
  145. }

  146. static void hcb_delete(struct hcb_root *r, char *oh)
  147. {
  148.         struct hcb_y *y;
  149.         volatile uintptr_t *p;
  150.         unsigned s;


  151.         if (r->origo == hcb_r_node(oh)) {
  152.                 r->origo = 0;
  153.                 return;
  154.         }


  155.         p = &r->origo;
  156.         y = NULL;
  157.         while(1) {
  158.                 y = hcb_l_y(*p);
  159.                 s = (oh[y->ptr] & y->bitmask) != 0;
  160.                 if (y->leaf[s] == hcb_r_node(oh)) {
  161.                         *p = y->leaf[1 - s];
  162.                         free(y);
  163.                         return;
  164.                 }
  165.                 p = &y->leaf[s];
  166.         }

  167. }

  168. static void hcb_start(void)
  169. {
  170.         memset(&hcb_root, 0, sizeof hcb_root);
  171.         hcb_build_bittbl();
  172. }

  173. static char *hcb_lookup(struct hcb_root *root, char *noh)
  174. {
  175.         volatile uintptr_t *p;
  176.         uintptr_t pp;
  177.         struct hcb_y *y, *y2;
  178.         char *oh2;
  179.         unsigned s, s2;

  180.         p = &root->origo;
  181.         pp = *p;
  182.         if (pp == 0) {
  183.                 return NULL;
  184.         }

  185.         while(hcb_is_y(pp)) {
  186.                 y = hcb_l_y(pp);
  187.                 s = (noh[y->ptr] & y->bitmask) != 0;
  188.                 p = &y->leaf[s];
  189.                 pp = *p;
  190.         }

  191.         if (pp == 0) {
  192.                 /* We raced hcb_delete and got a NULL pointer */
  193.                 return (NULL);
  194.         }

  195.         /* We found a node, does it match ? */
  196.         oh2 = hcb_l_node(pp);
  197.         if (!memcmp(oh2, noh, DIGEST_LEN))
  198.                 return (oh2);

  199.         return(NULL);
  200. }



  201. static void recur_print(uintptr_t pp)
  202. {
  203.         if(pp==0)
  204.                 return;

  205.         if( hcb_is_node(pp) )
  206.         {
  207.                 char *oh2 = hcb_l_node(pp);
  208.                 printf("%sn", oh2);
  209.                 return;
  210.         }
  211.         else
  212.         {
  213.                 struct hcb_y *y = hcb_l_y(pp);
  214.                 recur_print(y->leaf[0]);
  215.                 recur_print(y->leaf[1]);
  216.         }
  217. }

  218. static char *hcb_get_prefix(struct hcb_root *root, char *prefix)
  219. {
  220.         int prefix_len = strlen(prefix);
  221.         if(prefix_len ==0)
  222.                 return NULL;

  223.         volatile uintptr_t *p;
  224.         uintptr_t pp;
  225.         struct hcb_y *y, *y2;
  226.         char *oh2;
  227.         unsigned s, s2;

  228.         p = &root->origo;
  229.         pp = *p;
  230.         if (pp == 0) {
  231.                         return NULL;
  232.         }

  233.         while(hcb_is_y(pp)) {
  234.                 y = hcb_l_y(pp);
  235.                 if(y->ptr >= prefix_len)
  236.                         break;

  237.                 s = (prefix[y->ptr] & y->bitmask) != 0;
  238.                 p = &y->leaf[s];
  239.                 pp = *p;
  240.         }

  241.         if (pp == 0) {
  242.                         /* We raced hcb_delete and got a NULL pointer */
  243.                         return (NULL);
  244.         }

  245.         recur_print(pp);

  246.         return(NULL);
  247. }


  248. int main()
  249. {
  250.         hcb_start();

  251.         char t1[DIGEST_LEN] = "test";
  252.         hcb_insert(&hcb_root, t1);

  253.         char t2[DIGEST_LEN] = "test1";
  254.         hcb_insert(&hcb_root, t2);

  255.         char t3[DIGEST_LEN] = "test2";
  256.         hcb_insert(&hcb_root, t3);

  257.         int i = 0;
  258.         while(i<100)
  259.         {
  260.                 char *t4 = (char *)malloc(DIGEST_LEN);
  261.                 memset(t4, 0, DIGEST_LEN);
  262.                 sprintf(t4, "test%d", i);
  263.                 hcb_insert(&hcb_root, t4);
  264.                 i++;
  265.         }

  266.         if( hcb_lookup(&hcb_root, t1) )
  267.         {
  268.                 printf("find itn");
  269.         }
  270.         else
  271.         {
  272.                 printf("can not find itn");
  273.         }

  274.         printf("begin to deleten");
  275.         hcb_delete(&hcb_root, t1);

  276.         if( hcb_lookup(&hcb_root, t1) )
  277.         {
  278.                 printf("find itn");
  279.         }
  280.         else
  281.         {
  282.                 printf("can not find itn");
  283.         }

  284.         hcb_get_prefix(&hcb_root, "test");

  285.         return 0;
  286. }

阅读(1624) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~