哈希表

Youtube上看到一个小哥讲的Hash Table,讲清它的原理和结构之后又带着从零开始实现了构造哈希函数、创建哈希表、增加/删除数据等一系列系统的操作,讲解和示例清晰完整,丝毫不拖泥带水,大赞。

链接

Introduction to Hash Tables

需要科学上网,有强烈欲望想要看的同学可以参考之前这篇ubuntu下服务器搭建的博客,自己配置外网服务器

哈希表

hash
哈希表(Hash Table),是一种数据结构。它通过计算一个关于键值的函数,将所需查询的数据映射到表中的一个位置来访问,这加快了查找的速度。STL中的map就是哈希表的一种实现。
键值可以是字符串或者其它较为复杂的类型,下面以字符串为例。若将它们直接存储在数组里,当想要访问它们的时候只能O(n)复杂度的遍历一遍。但是通过构造哈希函数,我们可以首先将这个字符串映射为一个整数类型,将这个整数作为它们所在数组位置的下标,这样当我们想要访问某一个字符串,只需要把它送到哈希函数里得到一个返回的哈希值,也就是它所在数组位置的下标,就能以O(1)的复杂度访问它。
哈希函数的构造方法有很多,但一般我们得不到一个完美的哈希函数,即可能会存在两个字符串它们对应的哈希值相同。为了应对这种冲突,我们可以把数组的每个位置当做链表,存在多值时就链接到链表后面。如上图所示。

代码

边听着小哥的讲解边写的代码,包括了构造哈希函数、创建哈希表、增加/删除数据等操作。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#include <bits/stdc++.h>

using namespace std;

const int tablesize = 40;
string s;
struct item{
item(){
name = "empty";
drink = "empty";
next = NULL;
}
string name;
string drink;
item* next;
};
item* hashTable[tablesize];

void Init();
int Hash(string s);
void AddItem(string name, string drink);
int NumberOfItemsIndex(int idx);
void PrintTable();
void PrintItemInIndex(int idx);
void FindDrink(string name);
void RemoveItem(string name);

void RemoveItem(string name){
int idx = Hash(name);
item* ptr = hashTable[idx];

//Case 0 - bucket is empty
if(ptr->name == "empty"){
cout << name << " was not found in the hashTable" << endl;
}

//Case 1 - only 1 item contained in bucket and that item has matching name
else if(ptr->name == name && ptr->next == NULL){
ptr->name = "empty";
ptr->drink = "empty";
cout << name << "was removed from the Hash Table." << endl;
}

//Case 2 - match is located in the first item int the bucket but there are
//more items in the bucket
else if(ptr->name == name && ptr->next != NULL){
hashTable[idx] = ptr->next;
delete ptr;
cout << name << " was removed from the Hash Table." << endl;
}

//Case3 - bucket contains item but first item is not a match
else{
item* p1;
item* p2;
p1 = ptr->next;
p2 = ptr;
while(p1 != NULL && p1->name != name){
p1 = p1->next;
p2 = p2->next;
}
//Case 3.1 - no match
if(p1 == NULL){
cout << name << " was not found in the hashTable" << endl;
}
//Case 3.2 - match is found
else{
p2->next = p1->next;
delete p1;
cout << name << " was removed from the Hash Table." << endl;
}
}
}

void FindDrink(string name){
int idx = Hash(name);
bool foundName = false;
string drink;

item* ptr = hashTable[idx];
while(ptr){
if(ptr->name == name){
foundName = true;
drink = ptr->drink;
break;
}
ptr = ptr->next;
}
if(foundName){
cout << name << "'s favorite drink = " << drink << endl;
}
else{
cout << name << "'s info was not found in the hashTable." << endl;
}
}

void PrintItemInIndex(int idx){
item* ptr = hashTable[idx];
if(ptr->name == "empty"){
cout << "index " << idx << " is empty" << endl;
}
else{
cout << "index " << idx << " contains the following item" << endl;
while(ptr){
cout << "--------------------\n";
cout << ptr->name << endl;
cout << ptr->drink << endl;
cout << "--------------------\n";
ptr = ptr->next;
}
}
}

void PrintTable(){
int number;
for(int i = 0;i < tablesize; i++){
number = NumberOfItemsIndex(i);
cout << "--------------------\n";
cout << "index = " << i << endl;
cout << hashTable[i]->name << endl;
cout << hashTable[i]->drink << endl;
cout << "number of items = " << number << endl;
cout << "--------------------\n";

}
}

int NumberOfItemsIndex(int idx){
int count = 0;
if(hashTable[idx]->name == "empty"){
return 0;
}
else{
item* ptr = hashTable[idx];
while(ptr){
count++;
ptr = ptr->next;
}
return count;
}
}

int Hash(string s){
int hashVal = 0;
int idx;
int len = s.length();
for(int i = 0; i < len; i++){
hashVal = (hashVal + s[i])*17;
}
idx = hashVal % tablesize;

return idx;
}

void AddItem(string name, string drink){
int idx = Hash(name);
if(hashTable[idx]->name == "empty"){
hashTable[idx]->name = name;
hashTable[idx]->drink = drink;
}
else{
item* newItem = new item();
newItem->name = name;
newItem->drink = drink;
newItem->next = NULL;

item* ptr = hashTable[idx];
while(ptr->next != NULL){
ptr = ptr->next;
}
ptr->next = newItem;
}
}

void Init(){
for(int i = 0; i < tablesize; i++){
hashTable[i] = new item();
}
}

int main(){
#ifndef ONLINE_JUDGE
//freopen("in.txt", "r", stdin);
#endif
Init();

AddItem("Paul", "Locha");
AddItem("Kim", "Iced Mocha");
AddItem("Emma", "Strawberry Smoothy");
AddItem("Annie", "Hot Chocolate");
AddItem("Sarah", "Passion Tea");
AddItem("Pepper", "Caramel Mocha");
AddItem("Mike", "Chai Tea");
AddItem("Steve", "Apple Cider");
AddItem("Sill", "Root Beer");
AddItem("Marie", "Skinny Latte");
AddItem("Susan", "Water");
AddItem("Joe", "Green Tea");

//PrintTable();
//PrintItemInIndex(2);
//FindDrink("Joe");
/*
string name;
while(name != "exit"){
cout << "Remove ";
cin >> name;
if(s != "exit");
RemoveItem(name);
}
*/

return 0;
}