1. 概述

nginx 中的hash表主要是用处理静态key/value的查询，也就是根据静态数据初始化hash表后，不再增加删除数据到hash表中，用于根据key获得value的查询。这与redis等NoSQL数据库中动态hash表不同，nginx初始化静hash表一次，就把所有的静态数据初始化到hash表中，供后面查询使用，所以初始化的速度影响不大，只要保证后面查询效率高即可。

另外，hash表还有一个碰撞问题。解决碰撞问题，主要有三种：

线性探测；
二次哈希；
开放链表。

nginx的hash表解决碰撞问题使用的第三种方式，但不是链表，而是一个数组。

2. 数据结构

点击(此处)折叠或打开

typedef struct {
// 指向用户数据，必须保证value为该结构的第一个成员，后面的初始化及查找需要value作为第一个结构成员
void *value;
// 与name的长度
u_short len;
// 用户数据的key保存在name中，不定长
u_char name[1];
} ngx_hash_elt_t;
typedef struct {
// ngx_hash_elt_t 数组的首址
ngx_hash_elt_t **buckets;
// 数组的大小
ngx_uint_t size;
} ngx_hash_t;
// hash表初始化时的辅助数据结构
typedef struct {
// key->data 所指向的数据保存到 ngx_hash_elt_t 的 name 中；
// key->len 保存到 ngx_hash_elt_t 的 len 中
ngx_str_t key;
// 根据 ngx_hash_key_pt 哈希函数计算出的值
ngx_uint_t key_hash;
// 指向 key对应的数据
void *value;
} ngx_hash_key_t;
// 哈希函数
typedef ngx_uint_t (*ngx_hash_key_pt) (u_char *data, size_t len);
// 初始化一个 hash 表的相关参数的数据结构
typedef struct {
// 需要初始化的hash表，调用 ngx_hash_init 后，即可以完成对hash表的初始化
ngx_hash_t *hash;
// 计算 key 的 key_hash 的函数指针，不是很准确，应该 key_hash % hash->size
// 得到真正的hash值
ngx_hash_key_pt key;
// 初始化限制因子，即 hash->size 应 <= max_size;
ngx_uint_t max_size;
// 初始化限制因子, 即 hash 表中每个桶的大小不超过bucket_size,
// 即hash表的每个元素不能大于bucket_size - sizeof(void *)
// ( 桶的结束符还占用 sizeof(void *) 大小
ngx_uint_t bucket_size;
// 桶的名字，主要用于记录日志，方便阅读
char *name;
// 如果 hash 指针为空，则 hash 表结构从该内存池分配内存；
// 所有hash 元素的内存从该内存池pool分配内存
ngx_pool_t *pool;
// 分配临时内存用的内存池
ngx_pool_t *temp_pool;
} ngx_hash_init_t;

3. 接口

点击(此处)折叠或打开

// hash表初始化
ngx_int_t ngx_hash_init(ngx_hash_init_t *hinit, ngx_hash_key_t *names,
ngx_uint_t nelts);
// 根据key从hash表查询value
void *ngx_hash_find(ngx_hash_t *hash, ngx_uint_t key, u_char *name, size_t len);
// 计算 ngx_hash_elt_t 所占用的内存的大小，并要求对齐按照 sizeof(*) 对齐
// 其中第一个 sizeof（void *) 是 value 指针所占的内存大小
// name->key.len 是 ngx_hash_elt_t的数组name所占用的内存大小，
// 2 表示 ngx_hash_elt_t的len成员占用的字节
// 并按照 sizeof(void *) 对齐
#define NGX_HASH_ELT_SIZE(name) \
(sizeof(void *) + ngx_align((name)->key.len + 2, sizeof(void *)))

4. 接口解析

4.1 ngx_hash_init hash表初始化解析

点击(此处)折叠或打开

ngx_int_t

ngx_hash_init(ngx_hash_init_t hinit, ngx_hash_key_t names, ngx_uint_t nelts)

{

    u_char elts;

    size_t len;

    u_short test;

    ngx_uint_t i, n, key, size, start, bucket_size;

    ngx_hash_elt_t *elt, **buckets;

    for (n = 0; n < nelts; n++) {

        if (hinit->bucket_size < NGX_HASH_ELT_SIZE(&names[n]) + sizeof(void ))

        {

            // names[n] 表示的 ngx_hash_elt_t 占用的空间 + sizeof(void ) 大于 bucket_size;

            // 加 sizeof(void ) 是因为每个桶的最后用一个空指针表示该桶结束

            ngx_log_error(NGX_LOG_EMERG, hinit->pool->log, 0,

                          "could not build the %s, you should "

                          "increase %s_bucket_size: %i",

                          hinit->name, hinit->name, hinit->bucket_size);

            return NGX_ERROR;

        }

    }

    // 用于统计每个桶的大小，感觉此处使用 sizeof(ngx_uint_t) 更合适

    // 猜测：可能作者认为不会有超过u_short范围的需求？

    test = ngx_alloc(hinit->max_size sizeof(u_short), hinit->pool->log);

    if (test == NULL) {

        return NGX_ERROR;

    }

    // 去掉桶的结尾大小，每个桶用一个NULL指针表示结束

    bucket_size = hinit->bucket_size - sizeof(void );

    // ngx_hash_elt_t 对齐后算占用的最小空间为 min_memof(ngx_hash_elt_t) = 2 sizeof(void )

    // max_bucket_elt_num = bucket_size / min_memof(ngx_hash_elt_t) 为一个桶的最多元素数目

    // min_bucket_num = nelts / max_bucket_elt_num

    start = nelts / (bucket_size / (2 sizeof(void )));

    start = start ? start : 1;

    // 该部分不懂。猜测：

    // max_size 不算小，且 max_size / nelts < 100 不是非常非常稀疏，

    // 为了效率，直接从 max_size - 1000 开始探测，也就是最多探测1000次

    if (hinit->max_size > 10000 && nelts && hinit->max_size / nelts < 100) {

        start = hinit->max_size - 1000;

    }

    // 开始探测调整size的大小，以找最小的满足要求的size

    for (size = start; size < hinit->max_size; size++) {

        ngx_memzero(test, size sizeof(u_short));

        for (n = 0; n < nelts; n++) {

            if (names[n].key.data == NULL) {

                continue;

            }

            key = names[n].key_hash % size;

            // 累计计算出每个桶的大小

            test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));

#if 0

            ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,

                          "%ui: %ui %ui \"%V\"",

                          size, key, test[key], &names[n].key);

#endif

            // 如果已经大于 bucket_size ,从下个size再试探

            // 此处 bucket_size 已经减去 sizeof(void )

            if (test[key] > (u_short) bucket_size) {

                goto next;

            }

        }

        goto found;

    next:

        continue;

    }

    ngx_log_error(NGX_LOG_EMERG, hinit->pool->log, 0,

                  "could not build the %s, you should increase "

                  "either %s_max_size: %i or %s_bucket_size: %i",

                  hinit->name, hinit->name, hinit->max_size,

                  hinit->name, hinit->bucket_size);

    ngx_free(test);

    return NGX_ERROR;

found:

    // 已经找到合适的size

    for (i = 0; i < size; i++) {

        test[i] = sizeof(void ); // 每个桶加上结尾NULL指针的大小

    }

    for (n = 0; n < nelts; n++) {

        if (names[n].key.data == NULL) {

            continue;

        }

        key = names[n].key_hash % size;

        test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));

    }

    len = 0;

    for (i = 0; i < size; i++) {

        if (test[i] == sizeof(void )) {

            // 该桶没有元素

            continue;

        }

        // 为了效率，按照 cacheline 对齐

        test[i] = (u_short) (ngx_align(test[i], ngx_cacheline_size));

        len += test[i];

    }

    if (hinit->hash == NULL) {

        hinit->hash = ngx_pcalloc(hinit->pool, sizeof(ngx_hash_wildcard_t)

                                             + size sizeof(ngx_hash_elt_t *));

        if (hinit->hash == NULL) {

            ngx_free(test);

            return NGX_ERROR;

        }

        buckets = (ngx_hash_elt_t **)

                      ((u_char ) hinit->hash + sizeof(ngx_hash_wildcard_t));

    } else {

        buckets = ngx_pcalloc(hinit->pool, size sizeof(ngx_hash_elt_t ));

        if (buckets == NULL) {

            ngx_free(test);

            return NGX_ERROR;

        }

    }

    // 所有元素都分配在一块连续内存内，保持数据局部性，提高性能

    // 加 ngx_cacheline_size 的防止在按照 ngx_cacheline_size 调整内存指针 elts 时

    // 防止内存越界

    elts = ngx_palloc(hinit->pool, len + ngx_cacheline_size);

    if (elts == NULL) {

        ngx_free(test);

        return NGX_ERROR;

    }

    // 按照 ngx_cacheline_size 调整内存指针 elts

    // 还是为了提高性能

    elts = ngx_align_ptr(elts, ngx_cacheline_size);

    // 设置每个桶的起始位置

    for (i = 0; i < size; i++) {

        if (test[i] == sizeof(void )) {

            continue;

        }

        buckets[i] = (ngx_hash_elt_t ) elts;

        elts += test[i];

    }

    // 设置桶的当前可用内存位置

    for (i = 0; i < size; i++) {

        test[i] = 0;

    }

    // 把每个元素放进相应的桶内的相应位置

    for (n = 0; n < nelts; n++) {

        if (names[n].key.data == NULL) {

            continue;

        }

        // 找到对应的桶

        key = names[n].key_hash % size;

        // 找到空位置

        elt = (ngx_hash_elt_t ) ((u_char ) buckets[key] + test[key]);

        // 设置 elt 的值

        elt->value = names[n].value;

        elt->len = (u_short) names[n].key.len;

        ngx_strlow(elt->name, names[n].key.data, names[n].key.len);

        // 记录下一个空位置的地址

        test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));

    }

    for (i = 0; i < size; i++) {

        if (buckets[i] == NULL) {

            continue;

        }

        elt = (ngx_hash_elt_t ) ((u_char ) buckets[i] + test[i]);

        // 虽然 elt 指向 bucket[i] 的最后4个字节，还是强制转为 (ngx_hash_elt_t ) 类型

        // 而 ngx_hash_elt_t 的成员 void value 恰好处于首部，故赋值 value为null没问题；

        // 如 ngx_hash_elt_t 的成员 void value 不处于首部，则有问题，导致内存越界

        // 查询时就是利用 elt->value == NULL 条件来结束对该桶的查询

        elt->value = NULL;

    }

    // 释放辅助内存

    ngx_free(test);

    hinit->hash->buckets = buckets;

    hinit->hash->size = size;

#if 0

    for (i = 0; i < size; i++) {

        ngx_str_t val;

        ngx_uint_t key;

        elt = buckets[i];

        if (elt == NULL) {

            ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,

                          "%ui: NULL", i);

            continue;

        }

        while (elt->value) {

            val.len = elt->len;

            val.data = &elt->name[0];

            key = hinit->key(val.data, val.len);

            ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,

                          "%ui: %p \"%V\" %ui", i, elt, &val, key);

            elt = (ngx_hash_elt_t ) ngx_align_ptr(&elt->name[0] + elt->len,

                                                   sizeof(void ));

        }

    }

#endif

    return NGX_OK;

}

初始化完成后，大体的内存布局如下图： ngx_hast_t内存布局示意图

4.2 ngx_hsh_find 查询方法解析

点击(此处)折叠或打开

void *
ngx_hash_find(ngx_hash_t *hash, ngx_uint_t key, u_char *name, size_t len)
{
ngx_uint_t i;
ngx_hash_elt_t *elt;
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "hf:\"%*s\"", len, name);
#endif
elt = hash->buckets[key % hash->size];
// 该桶为null
if (elt == NULL) {
return NULL;
}
// 找到桶的第一个元素的地址且不为空
// elt->value = NULL 为该桶的结束标志，
// 到达该结束标志未找到，即不再hash表中
while (elt->value) {
// 先匹配长度
if (len != (size_t) elt->len) {
goto next;
}
// 再检查每个字符
for (i = 0; i < len; i++) {
if (name[i] != elt->name[i]) {
goto next;
}
}
return elt->value;
next:
elt = (ngx_hash_elt_t *) ngx_align_ptr(&elt->name[0] + elt->len,
sizeof(void *));
continue;
}
// 未找到
return NULL;
}

5. 其他

nginx的hash表还用来处理key带通配符的，主要用来location的查找，key的通配符又分前向、后向，即头部带通配符、尾部通配符。基本原理相似，但更复杂一些。

阅读(1601) | 评论(0) | 转发(0) |

上一篇：4. nginx基本数据结构之链表ngx_list_t

下一篇：7. nginx基本数据结构之基数树ngx_radix_tree_t

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6