哈希桶的分析与实现-郝姬友-ChinaUnix博客

郝姬友的ChinaUnix博客

首页　| 　博文目录　| 　关于我

郝姬友

博客访问： 210257
博文数量： 65
博客积分： 0
博客等级：民兵
技术积分： 91
用户组：普通用户
注册时间： 2015-04-10 09:41

文章分类

全部博文（65）

未分配的博文（65）

文章存档

2020年（1）

2018年（1）

2017年（30）

2016年（30）

2015年（3）

我的朋友

相关博文

哈希桶的分析与实现

分类： C/C++

2016-04-23 14:20:56

原文地址：哈希桶的分析与实现作者：幸运啤酒盖

哈希表的链地址法的实现大学里已经教过，今天来分析一下另一种解决哈希冲突的做法，即为每个Hash值，建立一个Hash桶(Bucket)，桶的容量是固定的，也就是只能处理固定次数的冲突，如1048576个Hash桶，每个桶中有4个表项(Entry)，总计4M个表项。其实这两种的实现思路雷同，就是对Hash表中每个Hash值建立一个冲突表，即将冲突的几个记录以表的形式存储在其中；

完整的代码，请看：，一位圣安德鲁斯大学的讲师：博客

这里截取几个主要的片段：

主要的数据结构：

struct Pair {

    char *key;

    char *value;
};

struct Bucket {
    unsigned int count;
    Pair *pairs;
};

struct StrMap {
    unsigned int count;
    Bucket *buckets;
};

主要的函数：

put：

int sm_put(StrMap *map, const char *key, const char *value)
{
    unsigned int key_len, value_len, index;
    Bucket *bucket;
    Pair *tmp_pairs, *pair;

    char *tmp_value;

    char *new_key, *new_value;

    if (map == NULL) {

        return 0;
    }

    if (key == NULL || value == NULL) {

        return 0;
    }
    key_len = strlen(key);
    value_len = strlen(value); /* Get a pointer to the bucket the key string hashes to */

    index = hash(key) % map->count;
    bucket = &(map->buckets[index]); /* Check if we can handle insertion by simply replacing
                                      * an existing value in a key-value pair in the bucket. */

    if ((pair = get_pair(bucket, key)) != NULL) { /* The bucket contains a pair that matches the provided key,
                                                   * change the value for that pair to the new value. */

    if (strlen(pair->value) < value_len) { /* If the new value is larger than the old value, re-allocate
                                            * space for the new larger value. */

    tmp_value = realloc(pair->value, (value_len + 1) * sizeof(char));

    if (tmp_value == NULL) {

        return 0;
            }
            pair->value = tmp_value;
        } /* Copy the new value into the pair that matches the key */

        strcpy(pair->value, value);

        return 1;
    } /* Allocate space for a new key and value */

    new_key = malloc((key_len + 1) * sizeof(char));

    if (new_key == NULL) {

        return 0;
    }
    new_value = malloc((value_len + 1) * sizeof(char));

    if (new_value == NULL) {
        free(new_key); return 0;
    } /* Create a key-value pair */

    if (bucket->count == 0) { /* The bucket is empty, lazily allocate space for a single
                               * key-value pair. */

         bucket->pairs = malloc(sizeof(Pair)); if (bucket->pairs == NULL) {
            free(new_key);
            free(new_value); return 0;
        }
        bucket->count = 1;
    } else { /* The bucket wasn't empty but no pair existed that matches the provided
              * key, so create a new key-value pair. */

            tmp_pairs = realloc(bucket->pairs, (bucket->count + 1) * sizeof(Pair));

            if (tmp_pairs == NULL) {
                free(new_key);
                free(new_value);

                return 0;
                }
        bucket->pairs = tmp_pairs;
        bucket->count++;
    } /* Get the last pair in the chain for the bucket */

    pair = &(bucket->pairs[bucket->count - 1]);
    pair->key = new_key;
    pair->value = new_value; /* Copy the key and its value into the key-value pair */

    strcpy(pair->key, key);
    strcpy(pair->value, value);

    return 1;
}

get：

int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf)
{
    unsigned int index;
    Bucket *bucket;
    Pair *pair;

    if (map == NULL) { return 0;}

    if (key == NULL) { return 0;}
    index = hash(key) % map->count;
    bucket = &(map->buckets[index]);
    pair = get_pair(bucket, key);

    if (pair == NULL) { return 0;}

    if (out_buf == NULL && n_out_buf == 0) { return strlen(pair->value) + 1;}

    if (out_buf == NULL) { return 0;}

    if (strlen(pair->value) >= n_out_buf) { return 0;}
    strcpy(out_buf, pair->value);

    return 1;
}

哈希函数：

/* * Returns a hash code for the provided string. */ static unsigned long hash(const char *str)
{
    unsigned long hash = 5381;

    int c;

    while (c = *str++) {
        hash = ((hash << 5) + hash) + c;
    }

    return hash;
}

大致的思路是这样的：

首先哈希桶的个数是固定的，有用户构建的时候输入，一旦构建，个数就已经固定；查找的时候首先将key值通过哈希函数获取哈希值，根据哈希值获取到对应的哈希桶，然后遍历哈希桶内的pairs数组获取；

这两种实现方法看似比较类似，但也有差异：

基于哈希桶的情况下，由于Hash桶容量的限制，所以，有可能发生Hash表填不满的情况，也就是，虽然Hash表里面还有空位，但是新建的表项由于冲突过多，而不能装入Hash表中。不过，这样的实现也有其好处，就是查表的最大开销是可以确定的，因为最多处理的冲突数是确定的，所以算法的时间复杂度为O(1)+O(m)，其中m为Hash桶容量。

而另一种通过链表的实现，由于Hash桶的容量是无限的，因此，只要没有超出Hash表的最大容量，就能够容纳新建的表项。但是，一旦发生了Hash冲突严重的情况，就会造成Hash桶的链表过长，大大降低查找效率。在最坏的情况下，时间复杂度退化为O(n)，其中n为Hash表的总容量。当然，这种情况的概率小之又小，几乎是可以忽略的。

阅读(2909) | 评论(0) | 转发(0) |

上一篇：滑动窗口与拥塞窗口

下一篇：C语言typeof详解

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6