散列是数组存储方式的一种发展,相比数组,散列的数据访问速度要高于数组,因为可以依据存储数据的部分内容找到数据在数组中的存储位置,进而能够快速实现数据的访问,理想的散列访问速度是非常迅速的,而不像在数组中的遍历过程,采用存储数组中内容的部分元素作为映射函数的输入,映射函数的输出就是存储数据的位置,这样的访问速度就省去了遍历数组的实现,因此时间复杂度可以认为为O(1),而数组遍历的时间复杂度为O(n)。
散列是能一种快速实现访问的存储方式。通常作为检索部分的数据项是整形或者字符串,当是字符串时,字符串的数量要远远大于数组的长度,这时候就会有多个字符串映射到一个存储位置的情况,这就是所谓的冲突问题,而且冲突时肯定存在的,这时候如何实现数据的存储又是需要解决的。
目前主要的解决方式有两大类,第一种采用链表的形式,将所有冲突的数据项采用链表的形式链接起来,这样搜索数据的复杂度就包含了链表的遍历问题,特别是当所有的项都链接到一个链表下时,这时候实际上就是遍历链表,复杂度并不一定有很大的进步,但是这种链表链接的方式有很高的填充率。第二种就是充分利用没有实现的存储空间,利用探测法探测空闲的空间,进而实现数据的存储,目前有三种探测方式:线性探测法、平方探测法,以及双散列法,三种方式中平方探测法运用比较多,但是都存在各种各样的优缺点,这时候的散列搜索优势就没有理想情况下那么明显。有时候甚至比遍历数组更加的慢。但是确实不失为一种处理方式。
映射函数可选择的比较多,其实完全可以定义自己的映射函数,但是有时候为了降低冲突的概率设置了一些比较好的映射函数,比如求和取余,或者乘以一定的系数再求和取余等。
本文采用平方探测法解决了冲突问题,具体的实现如下所示:
1、结构体定义
- #ifndef __HASHMAP_H_H_
- #define __HASHMAP_H_H_
- #include "list.h"
- #define TABSIZE 101
- /*状态变量*/
- typedef enum STATE{EMPTY = 0, ACTIVE = 1, DELETED = 2} State;
- /*键值结构体*/
- typedef struct _pair
- {
- char *key;
- char *value;
- }Pair_t, *Pair_handle_t;
- /*每一个实际的存储对象*/
- typedef struct _hashEntry
- {
- Pair_handle_t pair;
- State state;
- }HashEntry_t, *HashEntry_handle_t;
- /*哈希表结构体,便于创建*/
- typedef struct _hashmap
- {
- HashEntry_t *map;
- /*存储实际的存储量*/
- int size;
- /*容量*/
- int capacity;
- }Hashmap_t, *Hashmap_handle_t;
- /*隐射函数类型定义*/
- typedef int(*hashfunc)(const char *, int);
- #ifdef __cplusplus
- extern "C"
- {
- #endif
- bool alloc_hashmap(Hashmap_handle_t *hashmap, int capacity);
- bool init_hashmap(Hashmap_handle_t hashmap, int capacity);
- bool insert_hashnode(Hashmap_handle_t hashmap, const char *key, const char *value);
- Pair_handle_t search_hashnode(Hashmap_handle_t hashmap, const char *key);
- char *GetValue(Hashmap_handle_t hashmap, const char *key);
- bool delete_hashnode(Hashmap_handle_t hashmap, const char *key);
- int Length(Hashmap_handle_t hashmap);
- int Capacity(Hashmap_handle_t hashmap);
- void delete_hashmap(Hashmap_handle_t hashmap);
- void free_hashmap(Hashmap_handle_t *hashmap);
- char *key_pair(Pair_handle_t pair);
- char *value_pair(Pair_handle_t pair);
- Hashmap_handle_t copy_hashmap(Hashmap_handle_t hashmap);
- bool resize(Hashmap_handle_t hashmap);
- #ifdef __cplusplus
- }
- #endif
- #endif
实现表的分配和创建,采用了动态分配的方式实现,这样可能在性能上比不上静态数据,但是为了实现数组大小的调整,我选择了动态分配的实现方式。
- /*分配一个新的对象,可以实现自动分配*/
- bool alloc_hashmap(Hashmap_handle_t *hashmap, int capacity)
- {
- HashEntry_handle_t temp = NULL;
- Hashmap_t * map = NULL;
- if(*hashmap == NULL)
- {
- /*分配一个散列对象*/
- map = (Hashmap_handle_t)malloc(sizeof(Hashmap_t));
- if(map == NULL)
- return false;
- /*指针指向当前对象*/
- *hashmap = map;
- map = NULL;
- /*分配一个数组空间,大小可以控制*/
- temp = (HashEntry_handle_t)malloc(
- sizeof(HashEntry_t)*capacity);
- if(temp != NULL)
- {
- /*散列对象的指针指向数组*/
- (*hashmap)->map = temp;
- temp = NULL;
- /*设置参数*/
- (*hashmap)->capacity = capacity;
- (*hashmap)->size = 0;
- /*初始化分配的数组空间*/
- Tabinital((*hashmap)->map,capacity);
- return true;
- }
- }
- return false;
- }
- /*初始化一个新的对象,这个对象已经创建,只是没有初始化而已*/
- bool init_hashmap(Hashmap_handle_t hashmap, int capacity)
- {
- HashEntry_handle_t temp = NULL;
-
- if(hashmap != NULL)
- {
- /*分配数组空间*/
- temp = (HashEntry_handle_t)malloc(
- sizeof(HashEntry_t)*capacity);
- if(temp != NULL)
- {
- /*完成对象的填充操作*/
- hashmap->map = temp;
- temp = NULL;
- hashmap->capacity = capacity;
- hashmap->size = 0;
- /*初始化数组对象*/
- Tabinital(hashmap->map,capacity);
- return true;
- }
- }
- return false;
- }
关于数组中对象的创建,和释放操作,如下所示:
- /*分配一个pair对象*/
- static bool make_pair(Pair_handle_t *pair, const char *key, const char *value)
- {
- Pair_handle_t newpair =(Pair_handle_t)malloc(sizeof(Pair_t));
- char *newstr = NULL;
- if(newpair == NULL)
- return false;
-
- newstr = (char *)malloc(strlen(key) + 1);
- if(newstr == NULL)
- return false;
- strcpy(newstr, key);
- newstr[strlen(key)] = '\0';
- newpair->key = newstr;
- newstr = NULL;
- newstr = (char *)malloc(strlen(value) + 1);
- if(newstr == NULL)
- return false;
- strcpy(newstr, value);
- newstr[strlen(value)] = '\0';
- newpair->value = newstr;
- newstr = NULL;
-
- (*pair) = newpair;
- return true;
- }
- /*释放一个对象pair*/
- static void delete_pair(Pair_handle_t *pair)
- {
- Pair_handle_t temp = NULL;
- if(*pair == NULL)
- return ;
- temp = *pair;
- free(temp->key);
- temp->key = NULL;
- free(temp->value);
- temp->value = NULL;
- free(temp);
- temp = NULL;
- *pair = NULL;
- }
数组元素的基本操作:
- /*完成数组对象的初始化操作*/
- static void Tabinital(HashEntry_t *tab, int size)
- {
- int i = 0;
- for(; i < size; ++ i)
- {
- tab[i].pair = NULL;
- tab[i].state = EMPTY;
- }
- }
- static void delete_array(HashEntry_handle_t *array, int size)
- {
- int i = 0;
- if(*array != NULL)
- {
- for(i = 0; i < size; ++ i)
- {
- if((*array)[i].state == ACTIVE)
- {
- delete_pair(&((*array)[i].pair));
- (*array)[i].state = DELETED;
- }
- }
- free(*array);
- *array = NULL;
- }
- }
插入元素的操作、有两个函数的创建,其中一个为了便于后期大小的调整操作。
- /*插入数据到散列中,采用了二次探测的实现方式,并设置了退出条件*/
- static bool insert_data(Hashmap_handle_t hashmap,
- const char *key, const char *value, hashfunc func)
- {
- int hashval = func(key,hashmap->capacity);
- int index = 0;
- char * newstr = NULL;
- Pair_handle_t newpair = NULL;
- while(hashmap->map[hashval].state != EMPTY)
- {
- if((hashmap->map[hashval].state == ACTIVE)
- && (strcmp(hashmap->map[hashval].pair->key,key) == 0))
- break;
- index ++;
- hashval += index * index;
- hashval %= hashmap->capacity;
- if(index == 200)
- break;
- }
-
- if(hashmap->map[hashval].state == EMPTY)
- {
- if(make_pair(&newpair,key,value))
- {
- hashmap->map[hashval].state = ACTIVE;
- hashmap->map[hashval].pair = newpair;
- newpair = NULL;
- hashmap->size ++;
- return true;
- }
- }
- else if((hashmap->map[hashval].state == ACTIVE)
- && (strcmp(hashmap->map[hashval].pair->key, key) == 0))
- {
- newstr = (char *)malloc(strlen(value) + 1);
- if(newstr != NULL)
- {
- strcpy(newstr,value);
- newstr[strlen(value)] = '\0';
- free(hashmap->map[hashval].pair->value);
- hashmap->map[hashval].pair->value = newstr;
- newstr = NULL;
- return true;
- }
- }
- return false;
- }
- static bool insert2map(HashEntry_handle_t map,
- const char *key, const char *value,
- hashfunc func, int size)
- {
- int hashval = func(key,size);
- int index = 0;
- char *newstr = NULL;
- Pair_handle_t newpair = NULL;
- if(map != NULL)
- {
- while(map[hashval].state != EMPTY)
- {
- if((map[hashval].state == ACTIVE)
- && (strcmp(map[hashval].pair->key, key) == 0))
- break;
-
- index ++;
- hashval += index * index;
- hashval %= size;
- /*防止死循环*/
- if(index == 200)
- break;
- }
-
- if(map[hashval].state == EMPTY)
- {
- if(!make_pair(&newpair,key,value))
- return false;
- map[hashval].pair = newpair;
- map[hashval].state = ACTIVE;
- newpair = NULL;
- return true;
- }
- else if((map[hashval].state == ACTIVE) &&
- (strcmp(map[hashval].pair->key, key) == 0))
- {
- newstr = (char *)malloc(strlen(value) +1);
- if(newstr != NULL)
- {
- free(map[hashval].pair->value);
- map[hashval].pair->value = NULL;
- strcpy(newstr, value);
- newstr[strlen(value)] = '\0';
- map[hashval].pair->value = newstr;
- return true;
- }
- }
- }
- return false;
- }
调整大小和插入的实际操作。
- bool resize(Hashmap_handle_t hashmap)
- {
- int i = 0;
- HashEntry_handle_t newarray = NULL;
- int size = next_prime(2*hashmap->capacity);
- if(hashmap != NULL)
- {
- newarray = (HashEntry_handle_t)malloc(sizeof(HashEntry_t)
- *size);
- if(newarray == NULL)
- return false;
- /*这一步至关重要*/
- Tabinital(newarray, size);
- for(; i < hashmap->capacity ; ++ i)
- {
- if(hashmap->map[i].state == ACTIVE)
- {
- if(!insert2map(newarray,
- hashmap->map[i].pair->key,
- hashmap->map[i].pair->value,
- HashFunc, size))
- return false;
- }
- }
- delete_array(&hashmap->map,hashmap->capacity);
- hashmap->map = newarray;
- hashmap->capacity = size;
- return true;
- }
- return false;
- }
- bool insert_hashnode(Hashmap_handle_t hashmap,
- const char *key, const char *value)
- {
- if(hashmap->size < hashmap->capacity)
- {
- if(insert_data(hashmap,key,value,HashFunc))
- {
- //hashmap->size ++;
- }
-
- return true;
- }
- else /*增加容量*/
- {
- if(!resize(hashmap))
- return false;
- if(insert_data(hashmap,key,value,HashFunc))
- {
- //hashmap->size ++;
- }
- return true;
- }
- return false;
- }
搜索操作
- static Pair_handle_t search_data(Hashmap_handle_t hashmap, const char *key, hashfunc func)
- {
- int hashval = func(key, hashmap->capacity);
- int index = 0;
-
- while(hashmap->map[hashval].state != EMPTY)
- {
- if((hashmap->map[hashval].state == ACTIVE)
- && (strcmp(hashmap->map[hashval].pair->key,key) == 0))
- break;
- index ++;
- hashval += index * index;
- hashval %= hashmap->capacity;
- if(index == 200)
- break;
- }
- if((hashmap->map[hashval].state == ACTIVE)
- && (strcmp(hashmap->map[hashval].pair->key, key) == 0))
- {
- return hashmap->map[hashval].pair;
- }
-
- return NULL;
- }
- Pair_handle_t search_hashnode(Hashmap_handle_t hashmap, const char * key)
- {
- return search_data(hashmap,key,HashFunc);
- }
删除操作
- static bool delete_node(Hashmap_handle_t hashmap,const char *key,hashfunc func)
- {
- int hashval = func(key, hashmap->capacity);
- int index = 0;
-
- /**********************************************
- *退出循环的条件是找到空闲的单元,或者关键字匹配
- *********************************************/
- while(hashmap->map[hashval].state != EMPTY)
- {
- if((hashmap->map[hashval].state == ACTIVE)
- && strcmp(hashmap->map[hashval].pair->key,key) == 0)
- break;
- index ++;
- hashval += index * index;
- hashval %= hashmap->capacity;
-
- if(index == 200)
- break;
- }
- /*判断删除条件*/
- if((hashmap->map[hashval].state == ACTIVE) &&
- (strcmp(hashmap->map[hashval].pair->key, key) == 0))
- {
- hashmap->map[hashval].state = DELETED;
- delete_pair(&(hashmap->map[hashval].pair));
- hashmap->map[hashval].pair = NULL;
-
- return true;
- }
- return false;
- }
- bool delete_hashnode(Hashmap_handle_t hashmap, const char *key)
- {
- if(delete_node(hashmap, key, HashFunc))
- {
- hashmap->size --;
- return true;
- }
- return false;
- }
参数获取函数;
- int Length(Hashmap_t *map)
- {
- return map->size;
- }
- int Capacity(Hashmap_t *map)
- {
- return map->capacity;
- }
- void delete_hashmap(Hashmap_handle_t hashmap)
- {
- int i = 0;
- int size = hashmap->capacity;
- if(hashmap != NULL)
- {
- for(; i < size; ++ i)
- {
- if(hashmap->map[i].state == ACTIVE)
- {
- delete_pair(&(hashmap->map[i].pair));
- hashmap->map[i].state = DELETED;
- hashmap->map[i].pair = NULL;
- hashmap->size --;
- }
- }
- free(hashmap->map);
- hashmap->map = NULL;
- }
- }
- void free_hashmap(Hashmap_handle_t *hashmap)
- {
- delete_hashmap(*hashmap);
- free(*hashmap);
- *hashmap = NULL;
- }
- char *key_pair(Pair_handle_t pair)
- {
- if(pair != NULL)
- {
- return pair->key;
- }
- return NULL;
- }
- char *value_pair(Pair_handle_t pair)
- {
- if(pair != NULL)
- {
- return pair->value;
- }
- return NULL;
- }
- /*复制散列操作,相当于创建了一个新的散列对象,而不是指针复制*/
- Hashmap_handle_t copy_hashmap(Hashmap_handle_t hashmap)
- {
- Hashmap_handle_t newhashmap = NULL;
- int i = 0;
- if(hashmap == NULL)
- return NULL;
-
- /*采用动态分配的方式实现*/
- if(!alloc_hashmap(&newhashmap,hashmap->capacity))
- return NULL;
-
- for(; i < hashmap->capacity ; ++ i)
- {
- if(hashmap->map[i].state == ACTIVE)
- {
- /*得到当前中的值实现插入操作*/
- insert_hashnode(newhashmap,
- hashmap->map[i].pair->key,
- hashmap->map[i].pair->value);
- }
- else if(hashmap->map[i].state == DELETED)
- {
- newhashmap->map[i].state = DELETED;
- }
- }
- return newhashmap;
- }
测试函数:
- #include "hashmap.h"
- #include <time.h>
- #define CAPACITY 13
- char *keys[] = {
- "abcd",
- "defh",
- "abcd",
- "12345",
- "a1b2c3",
- "12345",
- "a1b2c3",
- "23456",
- "hijhk",
- "test1",
- "test1",
- "789123",
- "Input",
- };
- char *values[] = {
- "abcd",
- "defh",
- "abcd",
- "12345",
- "a1b2c3",
- "12345",
- "a1b2c3",
- "23456",
- "hijhk",
- "test1",
- "test1",
- "789123",
- "Input",
- };
- int myhashFunc(const char *key, int Tabsize)
- {
- int hashVal = 0;
- int i = 0;
- int len = strlen(key);
- for(; i < len ; ++ i )
- hashVal += 37 *hashVal + key[i];
-
- hashVal %= Tabsize;
- if(hashVal < 0)
- hashVal += Tabsize;
- return hashVal;
- }
-
- int main()
- {
- int i = 0;
- char str1[13];
- char str2[13];
- Hashmap_t mymap ;
- Hashmap_handle_t map = NULL;
- Hashmap_handle_t doubmap = NULL;
-
- Pair_handle_t pair;
- /*静态分配*/
- srand((int)time(0));
- printf("init and alloc test:\n");
- if(!init_hashmap(&mymap,13))
- {
- return false;
- }
- /*动态分配*/
- if(!alloc_hashmap(&map,13))
- {
- return false;
- }
- printf("Sucessed!\n");
- /*插入测试*/
- printf("insert test:\n");
- for(i = 0; i < CAPACITY + 10; ++ i)
- {
- sprintf(str1,"%d%d",rand()%10+1,rand()%10+1);
- sprintf(str2,"%d%d",rand()%10+1,rand()%10+1);
-
- printf("%s->-f(%s)->%d->%s\n",str1,str1,
- myhashFunc(str1,CAPACITY),str2);
- // sprintf(str1,"%s",keys[i]);
- // sprintf(str2,"%s",values[i]);
- if(!insert_hashnode(&mymap,str1,str2))
- {
- printf("i = %d, insert to mymap failed\n", i);
- break;
- }
- if(!insert_hashnode(map,str1,str2))
- {
- printf("i = %d, insert to map failed\n", i);
- break;
- }
- }
- printf("Sucessed!\n");
- /*查找测试*/
- printf("search test:\n");
- if((pair = search_hashnode(&mymap,str1)) != NULL)
- {
- printf("%s->%s\n",key_pair(pair),value_pair(pair));
- }
- if((pair = search_hashnode(map,str1)) != NULL)
- {
- printf("%s->%s\n",key_pair(pair),value_pair(pair));
- }
- printf("Sucessed!\n");
-
- /*delete*/
- printf("delete test:\n");
- if(delete_hashnode(&mymap,str1))
- {
- printf("Deleted success!!\n");
- }
- else
- {
- printf("Sorry, Failed!!\n");
- }
- if(delete_hashnode(map,str1))
- {
- printf("Deleted success!!\n");
- }
- else
- {
- printf("Sorry, Failed!!\n");
- }
-
- printf("Valid length : %d, Capacity : %d\n",
- Length(&mymap),Capacity(&mymap));
- printf("Valid length : %d, Capacity : %d\n",
- Length(map),Capacity(map));
-
- /*改变长度*/
- printf("resize test:\n");
- if(resize(&mymap))
- printf("Sucessed!\n");
- if(resize(map))
- printf("Sucessed!\n");
- /*长度*/
- printf("Valid length : %d, Capacity : %d\n",
- Length(&mymap),Capacity(&mymap));
- printf("Valid length : %d, Capacity : %d\n",
- Length(map),Capacity(map));
- printf("Valid length : %d, Capacity : %d\n",
- Length(&mymap),Capacity(&mymap));
- printf("Valid length : %d, Capacity : %d\n",
- Length(map),Capacity(map));
-
- printf("copy test:\n");
- doubmap = copy_hashmap(&mymap);
- printf("Valid length : %d, Capacity : %d\n",
- Length(doubmap),Capacity(doubmap));
- printf("Sucessed!\n");
-
- /*释放内存*/
- printf("free test:\n");
- delete_hashmap(&mymap);
- free_hashmap(&map);
- free_hashmap(&doubmap);
- printf("Valid length : %d, Capacity : %d\n",
- Length(&mymap),Capacity(&mymap));
- printf("Sucessed!\n");
-
- return 0;
- }
测试结果:
- [gong@Gong-Computer newversion]$ ./main
- init and alloc test:
- insert test:
- 48->-f(48)->4->49
- 108->-f(108)->5->910
- 78->-f(78)->1->98
- 87->-f(87)->12->73
- 36->-f(36)->3->109
- 59->-f(59)->4->98
- 32->-f(32)->12->48
- 210->-f(210)->10->91
- 105->-f(105)->2->22
- 41->-f(41)->10->82
- 1010->-f(1010)->11->69
- 19->-f(19)->8->64
- 25->-f(25)->3->45
- 28->-f(28)->6->104
- 16->-f(16)->5->83
- 44->-f(44)->0->86
- 85->-f(85)->10->72
- 51->-f(51)->9->27
- 54->-f(54)->12->57
- 107->-f(107)->4->210
- 73->-f(73)->9->27
- 1010->-f(1010)->11->61
- 63->-f(63)->10->63
- search test:
- 63->63
- 63->63
- delete test:
- Deleted
- Deleted
- Valid length : 21, Capacity : 29
- Valid length : 21, Capacity : 29
- resize test:
- Valid length : 21, Capacity : 59
- Valid length : 21, Capacity : 59
- Valid length : 21, Capacity : 59
- Valid length : 21, Capacity : 59
- copy test:
- Valid length : 21, Capacity : 59
- free test:
- Valid length : 0, Capacity : 59
从实验效果可知,基本上实现了散列的基本操作。
阅读(4668) | 评论(0) | 转发(3) |