Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1876433
  • 博文数量: 184
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 2388
  • 用 户 组: 普通用户
  • 注册时间: 2016-12-21 22:26
个人简介

90后空巢老码农

文章分类

全部博文(184)

文章存档

2021年(26)

2020年(56)

2019年(54)

2018年(47)

2017年(1)

我的朋友

分类: NOSQL

2019-01-07 20:34:50

dict这两个文件是用来实现哈希表功能的,其原文是这样说的

点击(此处)折叠或打开

  1. /* This file implements in-memory hash tables with insert/del/replace/find/
  2.  * get-random-element operations. Hash tables will auto-resize if needed
  3.  * tables of power of two in size are used, collisions are handled by
  4.  * chaining. See the source code for more information... :)*/

然后是定义了两个宏来表示dict的状态

点击(此处)折叠或打开

  1. #define DICT_OK 0
  2. #define DICT_ERR 1



之后的话就是定义了一堆结构体来表征dict,源代码里面是通过自底向上的方式进行罗列,但我还是喜欢自顶向下来描述一下,orz~~
首先是dict,其中存放了字典类型dictType(哈希函数、键值复制函数、键值销毁函数、键的比较)信息,两个哈希表dictht,这里之所以用两个,是为了在字典扩展或者缩容时将旧表rehash到新表时中间寄存数据所用,导完数据之后,会释放旧表。


点击(此处)折叠或打开

  1. typedef struct dict {
  2.     dictType *type;
  3.     void *privdata;
  4.     dictht ht[2];
  5.     long rehashidx; /* rehashing not in progress if rehashidx == -1,用于在rehash的时候,遍历每一个桶*/
  6.     unsigned long iterators; /* number of iterators currently running */
  7. } dict;
接着是dictType和dictht

点击(此处)折叠或打开

  1. typedef struct dictType {
  2.     uint64_t (*hashFunction)(const void *key);
  3.     void *(*keyDup)(void *privdata, const void *key);
  4.     void *(*valDup)(void *privdata, const void *obj);
  5.     int (*keyCompare)(void *privdata, const void *key1, const void *key2);
  6.     void (*keyDestructor)(void *privdata, void *key);
  7.     void (*valDestructor)(void *privdata, void *obj);
  8. } dictType;
  9. typedef struct dictht {
  10.     dictEntry **table;
  11.     unsigned long size;
  12.     unsigned long sizemask;
  13.     unsigned long used;
  14. } dictht;
最后就是dict当中的基本单元dictEntry和iterator了


点击(此处)折叠或打开

  1. typedef struct dictEntry {
  2.     void *key;
  3.     union {
  4.         void *val;
  5.         uint64_t u64;
  6.         int64_t s64;
  7.         double d;
  8.     } v;
  9.     struct dictEntry *next;
  10. } dictEntry;

  11. /* If safe is set to 1 this is a safe iterator, that means, you can call
  12.  * dictAdd, dictFind, and other functions against the dictionary even while
  13.  * iterating. Otherwise it is a non safe iterator, and only dictNext()
  14.  * should be called while iterating. */
  15. typedef struct dictIterator {
  16.     dict *d;
  17.     long index;
  18.     int table, safe;
  19.     dictEntry *entry, *nextEntry;
  20.     /* unsafe iterator fingerprint for misuse detection. */
  21.     long long fingerprint;
  22. } dictIterator;


由以上结构我们也可以猜测一下,该哈希表解决hash冲突的途径是使用链表放进桶里面: )

接下来是定义了几个私有函数:


点击(此处)折叠或打开

  1. static void _dictReset(dictht *ht)
  2. {
  3.     ht->table = NULL;
  4.     ht->size = 0;
  5.     ht->sizemask = 0;
  6.     ht->used = 0;
  7. }
  8. /* Initialize the hash table */
  9. int _dictInit(dict *d, dictType *type,
  10.         void *privDataPtr)
  11. {
  12.     _dictReset(&d->ht[0]);
  13.     _dictReset(&d->ht[1]);
  14.     d->type = type;
  15.     d->privdata = privDataPtr;
  16.     d->rehashidx = -1;
  17.     d->iterators = 0;
  18.     return DICT_OK;
  19. }
  20. /* Expand the hash table if needed */
  21. static int _dictExpandIfNeeded(dict *d)
  22. {
  23.     /* Incremental rehashing already in progress. Return. */
  24.     if (dictIsRehashing(d)) return DICT_OK;

  25.     /* If the hash table is empty expand it to the initial size. */
  26.     if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);// 4

  27.     /* If we reached the 1:1 ratio, and we are allowed to resize the hash
  28.      * table (global setting) or we should avoid it but the ratio between
  29.      * elements/buckets is over the "safe" threshold, we resize doubling
  30.      * the number of buckets. */
  31.     if (d->ht[0].used >= d->ht[0].size &&
  32.         (dict_can_resize ||
  33.          d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))//5
  34.     {
  35.         return dictExpand(d, d->ht[0].used*2);
  36.     }
  37.     return DICT_OK;
  38. }
  39. /* Our hash table capability is a power of two */
  40. static unsigned long _dictNextPower(unsigned long size)
  41. {
  42.     unsigned long i = DICT_HT_INITIAL_SIZE;

  43.     if (size >= LONG_MAX) return LONG_MAX + 1LU;
  44.     while(1) {
  45.         if (i >= size)
  46.             return i;
  47.         i *= 2;
  48.     }
  49. }
  50. // 用于检测当前key是否已经存在于表中,-1表示出错或者已经存在(此时可能返回已经存在的元素的指针),另外,如果当前正在rehashing,则返回的就是新的哈希表的index
  51. static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
  52. {
  53.     unsigned long idx, table;
  54.     dictEntry *he;
  55.     if (existing) *existing = NULL;

  56.     /* Expand the hash table if needed */
  57.     if (_dictExpandIfNeeded(d) == DICT_ERR)
  58.         return -1;
  59.     for (table = 0; table <= 1; table++) {
  60.         idx = hash & d->ht[table].sizemask;
  61.         /* Search if this slot does not already contain the given key */
  62.         he = d->ht[table].table[idx];
  63.         while(he) {
  64.             if (key==he->key || dictCompareKeys(d, key, he->key)) {
  65.                 if (existing) *existing = he;
  66.                 return -1;
  67.             }
  68.             he = he->next;
  69.         }
  70.         if (!dictIsRehashing(d)) break;
  71.     }
  72.     return idx;
  73. }
其中其哈希算法用的是siphash,它的输入是一个变长字符串以及一个128位的key,输出就是哈希值了,在redis的dict当中的定义如下,其中两个sip函数的具体实现参见源码的siphash.c文件:

点击(此处)折叠或打开

  1. static uint8_t dict_hash_function_seed[16]; // key
  2. uint64_t dictGenHashFunction(const void *key, int len) {
  3.     return siphash(key,len,dict_hash_function_seed);
  4. }

  5. uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
  6.     return siphash_nocase(buf,len,dict_hash_function_seed);
  7. }
下面简单说一下字典的扩展和rehash。扩展的话就是相同size不扩展,如果ht[0]为空就直接链过去,否则存在ht[1]备用

点击(此处)折叠或打开

  1. int dictExpand(dict *d, unsigned long size)
  2. {
  3.     dictht n; /* the new hash table */
  4.     unsigned long realsize = _dictNextPower(size);

  5.     /* the size is invalid if it is smaller than the number of
  6.      * elements already inside the hash table */
  7.     if (dictIsRehashing(d) || d->ht[0].used > size)
  8.         return DICT_ERR;

  9.     /* Rehashing to the same table size is not useful. */
  10.     if (realsize == d->ht[0].size) return DICT_ERR;

  11.     /* Allocate the new hash table and initialize all pointers to NULL */
  12.     n.size = realsize;
  13.     n.sizemask = realsize-1;
  14.     n.table = zcalloc(realsize*sizeof(dictEntry*));
  15.     n.used = 0;

  16.     /* Is this the first initialization? If so it's not really a rehashing
  17.      * we just set the first hash table so that it can accept keys. */
  18.     if (d->ht[0].table == NULL) {
  19.         d->ht[0] = n;
  20.         return DICT_OK;
  21.     }

  22.     /* Prepare a second hash table for incremental rehashing */
  23.     d->ht[1] = n;
  24.     d->rehashidx = 0;
  25.     return DICT_OK;
  26. }
rehash,为了防止在redis存储大量数据时进行rehash而block在rehash,其定义了最多访问的空桶数目n*10,其采取了渐进式的rehash来处理,rehashidx由0一直增到ht[0].size,期间如果达到了最多访问的空桶数目,就要停止,等待下一次rehash


点击(此处)折叠或打开

  1. int dictRehash(dict *d, int n) {
  2.     int empty_visits = n*10; /* Max number of empty buckets to visit. */
  3.     if (!dictIsRehashing(d)) return 0;

  4.     while(n-- && d->ht[0].used != 0) {
  5.         dictEntry *de, *nextde;

  6.         /* Note that rehashidx can't overflow as we are sure there are more
  7.          * elements because ht[0].used != 0 */
  8.         assert(d->ht[0].size > (unsigned long)d->rehashidx);
  9.         while(d->ht[0].table[d->rehashidx] == NULL) {
  10.             d->rehashidx++;
  11.             if (--empty_visits == 0) return 1;
  12.         }
  13.         de = d->ht[0].table[d->rehashidx];
  14.         /* Move all the keys in this bucket from the old to the new hash HT */
  15.         while(de) {
  16.             uint64_t h;

  17.             nextde = de->next;
  18.             /* Get the index in the new hash table */
  19.             h = dictHashKey(d, de->key) & d->ht[1].sizemask;
  20.             de->next = d->ht[1].table[h];
  21.             d->ht[1].table[h] = de;
  22.             d->ht[0].used--;
  23.             d->ht[1].used++;
  24.             de = nextde;
  25.         }
  26.         d->ht[0].table[d->rehashidx] = NULL;
  27.         d->rehashidx++;
  28.     }

  29.     /* Check if we already rehashed the whole table... */
  30.     if (d->ht[0].used == 0) {
  31.         zfree(d->ht[0].table);
  32.         d->ht[0] = d->ht[1];
  33.         _dictReset(&d->ht[1]);
  34.         d->rehashidx = -1;
  35.         return 0;
  36.     }

  37.     /* More to rehash... */
  38.     return 1;
  39. }
添加元素,如果正在rehash,则插入在ht[1]里面,确保插入正确;删除元素,如果正在rehash,需要遍历两个表

点击(此处)折叠或打开

  1. dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
  2. {
  3.     long index;
  4.     dictEntry *entry;
  5.     dictht *ht;

  6.     if (dictIsRehashing(d)) _dictRehashStep(d);

  7.     /* Get the index of the new element, or -1 if
  8.      * the element already exists. */
  9.     if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
  10.         return NULL;

  11.     /* Allocate the memory and store the new entry.
  12.      * Insert the element in top, with the assumption that in a database
  13.      * system it is more likely that recently added entries are accessed
  14.      * more frequently. */
  15.     ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
  16.     entry = zmalloc(sizeof(*entry));
  17.     entry->next = ht->table[index];
  18.     ht->table[index] = entry;
  19.     ht->used++;

  20.     /* Set the hash entry fields. */
  21.     dictSetKey(d, entry, key);
  22.     return entry;
  23. }
  24. static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
  25.     uint64_t h, idx;
  26.     dictEntry *he, *prevHe;
  27.     int table;

  28.     if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

  29.     if (dictIsRehashing(d)) _dictRehashStep(d);
  30.     h = dictHashKey(d, key);

  31.     for (table = 0; table <= 1; table++) {
  32.         idx = h & d->ht[table].sizemask;
  33.         he = d->ht[table].table[idx];
  34.         prevHe = NULL;
  35.         while(he) {
  36.             if (key==he->key || dictCompareKeys(d, key, he->key)) {
  37.                 /* Unlink the element from the list */
  38.                 if (prevHe)
  39.                     prevHe->next = he->next;
  40.                 else
  41.                     d->ht[table].table[idx] = he->next;
  42.                 if (!nofree) {
  43.                     dictFreeKey(d, he);
  44.                     dictFreeVal(d, he);
  45.                     zfree(he);
  46.                 }
  47.                 d->ht[table].used--;
  48.                 return he;
  49.             }
  50.             prevHe = he;
  51.             he = he->next;
  52.         }
  53.         if (!dictIsRehashing(d)) break;
  54.     }
  55.     return NULL; /* not found */
  56. }
删除操作代码


点击(此处)折叠或打开

  1. /* Destroy an entire dictionary */
  2. int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
  3.     unsigned long i;

  4.     /* Free all the elements */
  5.     for (i = 0; i < ht->size && ht->used > 0; i++) {
  6.         dictEntry *he, *nextHe;

  7.         if (callback && (i & 65535) == 0) callback(d->privdata);

  8.         if ((he = ht->table[i]) == NULL) continue;
  9.         while(he) {
  10.             nextHe = he->next;
  11.             dictFreeKey(d, he);
  12.             dictFreeVal(d, he);
  13.             zfree(he);
  14.             ht->used--;
  15.             he = nextHe;
  16.         }
  17.     }
  18.     /* Free the table and the allocated cache structure */
  19.     zfree(ht->table);
  20.     /* Re-initialize the table */
  21.     _dictReset(ht);
  22.     return DICT_OK; /* never fails */
  23. }

  24. /* Clear & Release the hash table */
  25. void dictRelease(dict *d)
  26. {
  27.     _dictClear(d,&d->ht[0],NULL);
  28.     _dictClear(d,&d->ht[1],NULL);
  29.     zfree(d);
  30. }
它还使用了一个指纹信息用于确定在一个iterator的生命周期内,有没有对哈希表进行非法操作

点击(此处)折叠或打开

  1. /* A fingerprint is a 64 bit number that represents the state of the dictionary
  2.  * at a given time, it's just a few dict properties xored together.
  3.  * When an unsafe iterator is initialized, we get the dict fingerprint, and check
  4.  * the fingerprint again when the iterator is released.
  5.  * If the two fingerprints are different it means that the user of the iterator
  6.  * performed forbidden operations against the dictionary while iterating. */
  7. long long dictFingerprint(dict *d) {
  8.     long long integers[6], hash = 0;
  9.     int j;

  10.     integers[0] = (long) d->ht[0].table;
  11.     integers[1] = d->ht[0].size;
  12.     integers[2] = d->ht[0].used;
  13.     integers[3] = (long) d->ht[1].table;
  14.     integers[4] = d->ht[1].size;
  15.     integers[5] = d->ht[1].used;

  16.     /* We hash N integers by summing every successive integer with the integer
  17.      * hashing of the previous sum. Basically:
  18.      *
  19.      * Result = hash(hash(hash(int1)+int2)+int3) ...
  20.      *
  21.      * This way the same set of integers in a different order will (likely) hash
  22.      * to a different number. */
  23.     for (j = 0; j < 6; j++) {
  24.         hash += integers[j];
  25.         /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
  26.         hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
  27.         hash = hash ^ (hash >> 24);
  28.         hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
  29.         hash = hash ^ (hash >> 14);
  30.         hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
  31.         hash = hash ^ (hash >> 28);
  32.         hash = hash + (hash << 31);
  33.     }
  34.     return hash;
  35. }
看看源代码还是挺有帮助的:)




阅读(4930) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~