2 % (c) The AQUA Project, Glasgow University, 1995
4 %************************************************************************
6 \section[Hash.lc]{Dynamic Hash Tables}
8 %************************************************************************
10 Dynamically expanding linear hash tables, as described in
11 Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
15 #ifdef PAR /* whole file */
19 #define HSEGSIZE 1024 /* Size of a single hash table segment */
20 /* Also the minimum size of a hash table */
21 #define HDIRSIZE 1024 /* Size of the segment directory */
22 /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
23 #define HLOAD 5 /* Maximum average load of a single hash bucket */
25 #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
26 /* Number of HashList cells to allocate in one go */
34 /* Linked list of (key, data) pairs for separate chaining */
38 struct hashlist *next; /* Next cell in bucket chain (same hash value) */
42 int split; /* Next bucket to split when expanding */
43 int max; /* Max bucket of smaller table */
44 int mask1; /* Mask for doing the mod of h_1 (smaller table) */
45 int mask2; /* Mask for doing the mod of h_2 (larger table) */
46 int kcount; /* Number of keys */
47 int bcount; /* Number of buckets */
48 HashList **dir[HDIRSIZE]; /* Directory of segments */
53 Hash first using the smaller table. If the bucket is less than the
54 next bucket to be split, re-hash using the larger table.
65 /* Strip the boring zero bits */
66 key /= sizeof(StgWord);
68 /* Mod the size of the hash table (a power of 2) */
69 bucket = key & table->mask1;
71 if (bucket < table->split) {
72 /* Mod the size of the expanded hash table (also a power of 2) */
73 bucket = key & table->mask2;
80 Allocate a new segment of the dynamically growing hash table.
85 allocSegment(table, segment)
89 if ((table->dir[segment] = (HashList **) malloc(HSEGSIZE * sizeof(HashList *))) == NULL) {
91 fprintf(stderr, "VM exhausted\n");
98 Expand the larger hash table by one bucket, and split one bucket
99 from the smaller table into two parts. Only the bucket referenced
100 by @table->split@ is affected by the expansion.
117 if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
118 /* Wow! That's big. Too big, so don't expand. */
121 /* Calculate indices of bucket to split */
122 oldsegment = table->split / HSEGSIZE;
123 oldindex = table->split % HSEGSIZE;
125 newbucket = table->max + table->split;
127 /* And the indices of the new bucket */
128 newsegment = newbucket / HSEGSIZE;
129 newindex = newbucket % HSEGSIZE;
132 allocSegment(table, newsegment);
134 if (++table->split == table->max) {
137 table->mask1 = table->mask2;
138 table->mask2 = table->mask2 << 1 | 1;
142 /* Split the bucket, paying no attention to the original order */
145 for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
147 if (hash(table, hl->key) == newbucket) {
155 table->dir[oldsegment][oldindex] = old;
156 table->dir[newsegment][newindex] = new;
166 lookupHashTable(table, key)
175 bucket = hash(table, key);
176 segment = bucket / HSEGSIZE;
177 index = bucket % HSEGSIZE;
179 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
189 We allocate the hashlist cells in large chunks to cut down on malloc
190 overhead. Although we keep a free list of hashlist cells, we make
191 no effort to actually return the space to the malloc arena.
195 static HashList *freeList = NULL;
198 allocHashList(STG_NO_ARGS)
202 if ((hl = freeList) != NULL) {
204 } else if ((hl = (HashList *) malloc(HCHUNK * sizeof(HashList))) != NULL) {
206 for (p = freeList; p < hl + HCHUNK - 1; p++)
211 fprintf(stderr, "VM exhausted\n");
230 insertHashTable(table, key, data)
241 /* We want no duplicates */
242 ASSERT(lookupHashTable(table, key) == NULL);
245 /* When the average load gets too high, we expand the table */
246 if (++table->kcount >= HLOAD * table->bcount)
249 bucket = hash(table, key);
250 segment = bucket / HSEGSIZE;
251 index = bucket % HSEGSIZE;
253 hl = allocHashList();
257 hl->next = table->dir[segment][index];
258 table->dir[segment][index] = hl;
267 removeHashTable(table, key, data)
276 HashList *prev = NULL;
278 bucket = hash(table, key);
279 segment = bucket / HSEGSIZE;
280 index = bucket % HSEGSIZE;
282 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
283 if (hl->key == key && (data == NULL || hl->data == data)) {
285 table->dir[segment][index] = hl->next;
287 prev->next = hl->next;
295 ASSERT(data == NULL);
301 When we free a hash table, we are also good enough to free the
302 data part of each (key, data) pair, as long as our caller can tell
308 freeHashTable(table, freeDataFun)
310 void (*freeDataFun) PROTO((void *));
317 /* The last bucket with something in it is table->max + table->split - 1 */
318 segment = (table->max + table->split - 1) / HSEGSIZE;
319 index = (table->max + table->split - 1) % HSEGSIZE;
321 while (segment >= 0) {
323 for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
325 if (freeDataFun != NULL)
326 (*freeDataFun)(hl->data);
331 free(table->dir[segment]);
333 index = HSEGSIZE - 1;
339 When we initialize a hash table, we set up the first segment as well,
340 initializing all of the first segment's hash buckets to NULL.
345 allocHashTable(STG_NO_ARGS)
350 if ((table = (HashTable *) malloc(sizeof(HashTable))) == NULL) {
352 fprintf(stderr, "VM exhausted\n");
355 allocSegment(table, 0);
356 for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
359 table->max = HSEGSIZE;
360 table->mask1 = HSEGSIZE - 1;
361 table->mask2 = 2 * HSEGSIZE - 1;
363 table->bcount = HSEGSIZE;
368 #endif /* PAR -- whole file */