一个hash算法的实现-CUDev-ChinaUnix博客

CUDevcudev.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

CUDev

博客访问： 5799804
博文数量： 675
博客积分： 20301
博客等级：上将
技术积分： 7671
用户组：普通用户
注册时间： 2005-12-31 16:15

文章分类

全部博文（675）

Web架构（4）
Thinking（1）
SF（2）
Kernel and Drive（70）
perl（2）
QT4学习笔记（9）
网络编程（52）
嵌入式Linux（4）
服务器管理（64）
操作系统研究（11）
Linux深入学习（38）
算法研究（29）
网络安全（34）
python（19）
心情日记（6）
程序设计（127）
Linux应用（134）
Shell（64）
未分配的博文（5）

文章存档

2012年（1）

2011年（20）

2010年（14）

2009年（63）

2008年（118）

2007年（141）

2006年（318）

我的朋友

相关博文

一个hash算法的实现

分类： C/C++

2007-05-08 14:53:49

/* Copyright 2006 David Crawshaw, released under the new BSD license.
* Version 2, from */

#include
#include
#include
#include "hash.h"

/* Table is sized by primes to minimise clustering.
   See: */
static const unsigned int sizes[] = {
    53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317,
    196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843,
    50331653, 100663319, 201326611, 402653189, 805306457, 1610612741
};
static const int sizes_count = sizeof(sizes) / sizeof(sizes[0]);
/*hash表上限因子，当大于这个因子时，需要增长hash表*/
static const float load_factor = 0.65;

struct record {
    unsigned int hash;
    const char *key;
    void *value;
};

struct hash {
    struct record *records;
    unsigned int records_count;
    unsigned int size_index;
};

/*增长hash表，用于扩展hash表容量*/
static int hash_grow(hash *h)
{
    int i;
    struct record *old_recs;
    unsigned int old_recs_length;

    old_recs_length = sizes[h->size_index];
    old_recs = h->records;

/*本来hash表已经是最大的了，无法再增长了*/
    if (h->size_index == sizes_count - 1) return -1;
   /*取下一个hash表大小*/
    if ((h->records = calloc(sizes[++h->size_index],
                    sizeof(struct record))) == NULL) {
        h->records = old_recs;
        return -1;
    }

    h->records_count = 0;

/*将原来旧的hash表中的数据，添加到新建的hash表中*/
    // rehash table
    for (i=0; i < old_recs_length; i++)
        if (old_recs[i].hash && old_recs[i].key)
            hash_add(h, old_recs[i].key, old_recs[i].value);

    free(old_recs);

    return 0;
}

/* algorithm djb2 */
static unsigned int strhash(const char *str)
{
    int c;
    int hash = 5381;
    while (c = *str++)
        hash = hash * 33 + c; //(hash<<5)+hash+c
    return hash == 0 ? 1 : hash;
}

hash * hash_new(unsigned int capacity) {
    struct hash *h;
    int i, sind;

    capacity /= load_factor;

    for (i=0; i < sizes_count; i++)
        if (sizes[i] > capacity) { sind = i; break; }

    if ((h = malloc(sizeof(struct hash))) == NULL) return NULL;
    if ((h->records = calloc(sizes[sind], sizeof(struct record))) == NULL) {
        free(h);
        return NULL;
    }

    h->records_count = 0;
    h->size_index = sind;

    return h;
}

void hash_destroy(hash *h)
{
    free(h->records);
    free(h);
}

int hash_add(hash *h, const char *key, void *value)
{
    struct record *recs;
    int rc;
    unsigned int off, ind, size, code;

    if (key == NULL || *key == '\0') return -2;
   /*当hash表中的元素个数达到一定的限制之后，就应该增长hash表，使其能够容纳更多的数据*/
    if (h->records_count > sizes[h->size_index] * load_factor) {
        rc = hash_grow(h);
        if (rc) return rc;
    }

    code = strhash(key);
    recs = h->records;
    size = sizes[h->size_index];

    ind = code % size;
    off = 0;
/*查看ind内是否有数据，如果没有就可以加入新的节点*/
    while (recs[ind].key)
        ind = (code + (int)pow(++off,2)) % size;

    recs[ind].hash = code;
    recs[ind].key = key;
    recs[ind].value = value;

    h->records_count++;

    return 0;
}

void * hash_get(hash *h, const char *key)
{
    struct record *recs;
    unsigned int off, ind, size;
    unsigned int code = strhash(key);

    recs = h->records;
    size = sizes[h->size_index];
    ind = code % size;
    off = 0;

    // search on hash which remains even if a record has been removed,
    // so hash_remove() does not need to move any collision records
    while (recs[ind].hash) {
        if ((code == recs[ind].hash) && recs[ind].key &&
                strcmp(key, recs[ind].key) == 0)
            return recs[ind].value;
        ind = (code + (int)pow(++off,2)) % size;
    }

    return NULL;
}

void * hash_remove(hash *h, const char *key)
{
    unsigned int code = strhash(key);
    struct record *recs;
    void * value;
    unsigned int off, ind, size;

    recs = h->records;
    size = sizes[h->size_index];
    ind = code % size;
    off = 0;

    while (recs[ind].hash) {
        if ((code == recs[ind].hash) && recs[ind].key &&
                strcmp(key, recs[ind].key) == 0) {
            // do not erase hash, so probes for collisions succeed
            value = recs[ind].value;
            recs[ind].key = 0;
            recs[ind].value = 0;
            h->records_count--;
            return value;
        }
        ind = (code + (int)pow(++off, 2)) % size;
    }

    return NULL;
}

unsigned int hash_size(hash *h)
{
    return h->records_count;
}

阅读(4116) | 评论(0) | 转发(0) |

上一篇：Hash Functions

下一篇：inet_ntoa函数的问题

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6