dict这两个文件是用来实现哈希表功能的,其原文是这样说的
-
/* This file implements in-memory hash tables with insert/del/replace/find/
-
* get-random-element operations. Hash tables will auto-resize if needed
-
* tables of power of two in size are used, collisions are handled by
-
* chaining. See the source code for more information... :)*/
然后是定义了两个宏来表示dict的状态
-
#define DICT_OK 0
-
#define DICT_ERR 1
之后的话就是定义了一堆结构体来表征dict,源代码里面是通过自底向上的方式进行罗列,但我还是喜欢自顶向下来描述一下,orz~~
首先是dict,其中存放了字典类型dictType(哈希函数、键值复制函数、键值销毁函数、键的比较)信息,两个哈希表dictht,这里之所以用两个,是为了在字典扩展或者缩容时将旧表rehash到新表时中间寄存数据所用,导完数据之后,会释放旧表。
-
typedef struct dict {
-
dictType *type;
-
void *privdata;
-
dictht ht[2];
-
long rehashidx; /* rehashing not in progress if rehashidx == -1,用于在rehash的时候,遍历每一个桶*/
-
unsigned long iterators; /* number of iterators currently running */
-
} dict;
接着是dictType和dictht
-
typedef struct dictType {
-
uint64_t (*hashFunction)(const void *key);
-
void *(*keyDup)(void *privdata, const void *key);
-
void *(*valDup)(void *privdata, const void *obj);
-
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
-
void (*keyDestructor)(void *privdata, void *key);
-
void (*valDestructor)(void *privdata, void *obj);
-
} dictType;
-
typedef struct dictht {
-
dictEntry **table;
-
unsigned long size;
-
unsigned long sizemask;
-
unsigned long used;
-
} dictht;
最后就是dict当中的基本单元dictEntry和iterator了
-
typedef struct dictEntry {
-
void *key;
-
union {
-
void *val;
-
uint64_t u64;
-
int64_t s64;
-
double d;
-
} v;
-
struct dictEntry *next;
-
} dictEntry;
-
-
/* If safe is set to 1 this is a safe iterator, that means, you can call
-
* dictAdd, dictFind, and other functions against the dictionary even while
-
* iterating. Otherwise it is a non safe iterator, and only dictNext()
-
* should be called while iterating. */
-
typedef struct dictIterator {
-
dict *d;
-
long index;
-
int table, safe;
-
dictEntry *entry, *nextEntry;
-
/* unsafe iterator fingerprint for misuse detection. */
-
long long fingerprint;
-
} dictIterator;
由以上结构我们也可以猜测一下,该哈希表解决hash冲突的途径是使用链表放进桶里面: )
接下来是定义了几个私有函数:
-
static void _dictReset(dictht *ht)
-
{
-
ht->table = NULL;
-
ht->size = 0;
-
ht->sizemask = 0;
-
ht->used = 0;
-
}
-
/* Initialize the hash table */
-
int _dictInit(dict *d, dictType *type,
-
void *privDataPtr)
-
{
-
_dictReset(&d->ht[0]);
-
_dictReset(&d->ht[1]);
-
d->type = type;
-
d->privdata = privDataPtr;
-
d->rehashidx = -1;
-
d->iterators = 0;
-
return DICT_OK;
-
}
-
/* Expand the hash table if needed */
-
static int _dictExpandIfNeeded(dict *d)
-
{
-
/* Incremental rehashing already in progress. Return. */
-
if (dictIsRehashing(d)) return DICT_OK;
-
-
/* If the hash table is empty expand it to the initial size. */
-
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);// 4
-
-
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
-
* table (global setting) or we should avoid it but the ratio between
-
* elements/buckets is over the "safe" threshold, we resize doubling
-
* the number of buckets. */
-
if (d->ht[0].used >= d->ht[0].size &&
-
(dict_can_resize ||
-
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))//5
-
{
-
return dictExpand(d, d->ht[0].used*2);
-
}
-
return DICT_OK;
-
}
-
/* Our hash table capability is a power of two */
-
static unsigned long _dictNextPower(unsigned long size)
-
{
-
unsigned long i = DICT_HT_INITIAL_SIZE;
-
-
if (size >= LONG_MAX) return LONG_MAX + 1LU;
-
while(1) {
-
if (i >= size)
-
return i;
-
i *= 2;
-
}
-
}
-
// 用于检测当前key是否已经存在于表中,-1表示出错或者已经存在(此时可能返回已经存在的元素的指针),另外,如果当前正在rehashing,则返回的就是新的哈希表的index
-
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
-
{
-
unsigned long idx, table;
-
dictEntry *he;
-
if (existing) *existing = NULL;
-
-
/* Expand the hash table if needed */
-
if (_dictExpandIfNeeded(d) == DICT_ERR)
-
return -1;
-
for (table = 0; table <= 1; table++) {
-
idx = hash & d->ht[table].sizemask;
-
/* Search if this slot does not already contain the given key */
-
he = d->ht[table].table[idx];
-
while(he) {
-
if (key==he->key || dictCompareKeys(d, key, he->key)) {
-
if (existing) *existing = he;
-
return -1;
-
}
-
he = he->next;
-
}
-
if (!dictIsRehashing(d)) break;
-
}
-
return idx;
-
}
其中其哈希算法用的是siphash,它的输入是一个变长字符串以及一个128位的key,输出就是哈希值了,在redis的dict当中的定义如下,其中两个sip函数的具体实现参见源码的siphash.c文件:
-
static uint8_t dict_hash_function_seed[16]; // key
-
uint64_t dictGenHashFunction(const void *key, int len) {
-
return siphash(key,len,dict_hash_function_seed);
-
}
-
-
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
-
return siphash_nocase(buf,len,dict_hash_function_seed);
-
}
下面简单说一下字典的扩展和rehash。扩展的话就是相同size不扩展,如果ht[0]为空就直接链过去,否则存在ht[1]备用
-
int dictExpand(dict *d, unsigned long size)
-
{
-
dictht n; /* the new hash table */
-
unsigned long realsize = _dictNextPower(size);
-
-
/* the size is invalid if it is smaller than the number of
-
* elements already inside the hash table */
-
if (dictIsRehashing(d) || d->ht[0].used > size)
-
return DICT_ERR;
-
-
/* Rehashing to the same table size is not useful. */
-
if (realsize == d->ht[0].size) return DICT_ERR;
-
-
/* Allocate the new hash table and initialize all pointers to NULL */
-
n.size = realsize;
-
n.sizemask = realsize-1;
-
n.table = zcalloc(realsize*sizeof(dictEntry*));
-
n.used = 0;
-
-
/* Is this the first initialization? If so it's not really a rehashing
-
* we just set the first hash table so that it can accept keys. */
-
if (d->ht[0].table == NULL) {
-
d->ht[0] = n;
-
return DICT_OK;
-
}
-
-
/* Prepare a second hash table for incremental rehashing */
-
d->ht[1] = n;
-
d->rehashidx = 0;
-
return DICT_OK;
-
}
rehash,为了防止在redis存储大量数据时进行rehash而block在rehash,其定义了最多访问的空桶数目n*10,其采取了渐进式的rehash来处理,rehashidx由0一直增到ht[0].size,期间如果达到了最多访问的空桶数目,就要停止,等待下一次rehash
-
int dictRehash(dict *d, int n) {
-
int empty_visits = n*10; /* Max number of empty buckets to visit. */
-
if (!dictIsRehashing(d)) return 0;
-
-
while(n-- && d->ht[0].used != 0) {
-
dictEntry *de, *nextde;
-
-
/* Note that rehashidx can't overflow as we are sure there are more
-
* elements because ht[0].used != 0 */
-
assert(d->ht[0].size > (unsigned long)d->rehashidx);
-
while(d->ht[0].table[d->rehashidx] == NULL) {
-
d->rehashidx++;
-
if (--empty_visits == 0) return 1;
-
}
-
de = d->ht[0].table[d->rehashidx];
-
/* Move all the keys in this bucket from the old to the new hash HT */
-
while(de) {
-
uint64_t h;
-
-
nextde = de->next;
-
/* Get the index in the new hash table */
-
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
-
de->next = d->ht[1].table[h];
-
d->ht[1].table[h] = de;
-
d->ht[0].used--;
-
d->ht[1].used++;
-
de = nextde;
-
}
-
d->ht[0].table[d->rehashidx] = NULL;
-
d->rehashidx++;
-
}
-
-
/* Check if we already rehashed the whole table... */
-
if (d->ht[0].used == 0) {
-
zfree(d->ht[0].table);
-
d->ht[0] = d->ht[1];
-
_dictReset(&d->ht[1]);
-
d->rehashidx = -1;
-
return 0;
-
}
-
-
/* More to rehash... */
-
return 1;
-
}
添加元素,如果正在rehash,则插入在ht[1]里面,确保插入正确;删除元素,如果正在rehash,需要遍历两个表
-
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
-
{
-
long index;
-
dictEntry *entry;
-
dictht *ht;
-
-
if (dictIsRehashing(d)) _dictRehashStep(d);
-
-
/* Get the index of the new element, or -1 if
-
* the element already exists. */
-
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
-
return NULL;
-
-
/* Allocate the memory and store the new entry.
-
* Insert the element in top, with the assumption that in a database
-
* system it is more likely that recently added entries are accessed
-
* more frequently. */
-
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
-
entry = zmalloc(sizeof(*entry));
-
entry->next = ht->table[index];
-
ht->table[index] = entry;
-
ht->used++;
-
-
/* Set the hash entry fields. */
-
dictSetKey(d, entry, key);
-
return entry;
-
}
-
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
-
uint64_t h, idx;
-
dictEntry *he, *prevHe;
-
int table;
-
-
if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
-
-
if (dictIsRehashing(d)) _dictRehashStep(d);
-
h = dictHashKey(d, key);
-
-
for (table = 0; table <= 1; table++) {
-
idx = h & d->ht[table].sizemask;
-
he = d->ht[table].table[idx];
-
prevHe = NULL;
-
while(he) {
-
if (key==he->key || dictCompareKeys(d, key, he->key)) {
-
/* Unlink the element from the list */
-
if (prevHe)
-
prevHe->next = he->next;
-
else
-
d->ht[table].table[idx] = he->next;
-
if (!nofree) {
-
dictFreeKey(d, he);
-
dictFreeVal(d, he);
-
zfree(he);
-
}
-
d->ht[table].used--;
-
return he;
-
}
-
prevHe = he;
-
he = he->next;
-
}
-
if (!dictIsRehashing(d)) break;
-
}
-
return NULL; /* not found */
-
}
删除操作代码
-
/* Destroy an entire dictionary */
-
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
-
unsigned long i;
-
-
/* Free all the elements */
-
for (i = 0; i < ht->size && ht->used > 0; i++) {
-
dictEntry *he, *nextHe;
-
-
if (callback && (i & 65535) == 0) callback(d->privdata);
-
-
if ((he = ht->table[i]) == NULL) continue;
-
while(he) {
-
nextHe = he->next;
-
dictFreeKey(d, he);
-
dictFreeVal(d, he);
-
zfree(he);
-
ht->used--;
-
he = nextHe;
-
}
-
}
-
/* Free the table and the allocated cache structure */
-
zfree(ht->table);
-
/* Re-initialize the table */
-
_dictReset(ht);
-
return DICT_OK; /* never fails */
-
}
-
-
/* Clear & Release the hash table */
-
void dictRelease(dict *d)
-
{
-
_dictClear(d,&d->ht[0],NULL);
-
_dictClear(d,&d->ht[1],NULL);
-
zfree(d);
-
}
它还使用了一个指纹信息用于确定在一个iterator的生命周期内,有没有对哈希表进行非法操作
-
/* A fingerprint is a 64 bit number that represents the state of the dictionary
-
* at a given time, it's just a few dict properties xored together.
-
* When an unsafe iterator is initialized, we get the dict fingerprint, and check
-
* the fingerprint again when the iterator is released.
-
* If the two fingerprints are different it means that the user of the iterator
-
* performed forbidden operations against the dictionary while iterating. */
-
long long dictFingerprint(dict *d) {
-
long long integers[6], hash = 0;
-
int j;
-
-
integers[0] = (long) d->ht[0].table;
-
integers[1] = d->ht[0].size;
-
integers[2] = d->ht[0].used;
-
integers[3] = (long) d->ht[1].table;
-
integers[4] = d->ht[1].size;
-
integers[5] = d->ht[1].used;
-
-
/* We hash N integers by summing every successive integer with the integer
-
* hashing of the previous sum. Basically:
-
*
-
* Result = hash(hash(hash(int1)+int2)+int3) ...
-
*
-
* This way the same set of integers in a different order will (likely) hash
-
* to a different number. */
-
for (j = 0; j < 6; j++) {
-
hash += integers[j];
-
/* For the hashing step we use Tomas Wang's 64 bit integer hash. */
-
hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
-
hash = hash ^ (hash >> 24);
-
hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
-
hash = hash ^ (hash >> 14);
-
hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
-
hash = hash ^ (hash >> 28);
-
hash = hash + (hash << 31);
-
}
-
return hash;
-
}
看看源代码还是挺有帮助的:)
阅读(4930) | 评论(0) | 转发(0) |