2 % (c) The AQUA Project, Glasgow University, 1995
4 %************************************************************************
6 \section[Hash.lc]{Dynamic Hash Tables}
8 %************************************************************************
10 Dynamically expanding linear hash tables, as described in
11 Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
15 #ifdef PAR /* whole file */
19 #define HSEGSIZE 1024 /* Size of a single hash table segment */
20 /* Also the minimum size of a hash table */
21 #define HDIRSIZE 1024 /* Size of the segment directory */
22 /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
23 #define HLOAD 5 /* Maximum average load of a single hash bucket */
25 #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
26 /* Number of HashList cells to allocate in one go */
34 /* Linked list of (key, data) pairs for separate chaining */
38 struct hashlist *next; /* Next cell in bucket chain (same hash value) */
42 int split; /* Next bucket to split when expanding */
43 int max; /* Max bucket of smaller table */
44 int mask1; /* Mask for doing the mod of h_1 (smaller table) */
45 int mask2; /* Mask for doing the mod of h_2 (larger table) */
46 int kcount; /* Number of keys */
47 int bcount; /* Number of buckets */
48 HashList **dir[HDIRSIZE]; /* Directory of segments */
53 Hash first using the smaller table. If the bucket is less than the
54 next bucket to be split, re-hash using the larger table.
59 hash(HashTable *table, W_ key)
63 /* Strip the boring zero bits */
64 key /= sizeof(StgWord);
66 /* Mod the size of the hash table (a power of 2) */
67 bucket = key & table->mask1;
69 if (bucket < table->split) {
70 /* Mod the size of the expanded hash table (also a power of 2) */
71 bucket = key & table->mask2;
78 Allocate a new segment of the dynamically growing hash table.
83 allocSegment(HashTable *table, int segment)
85 table->dir[segment] = (HashList **) stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment");
90 Expand the larger hash table by one bucket, and split one bucket
91 from the smaller table into two parts. Only the bucket referenced
92 by @table->split@ is affected by the expansion.
97 expand(HashTable *table)
108 if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
109 /* Wow! That's big. Too big, so don't expand. */
112 /* Calculate indices of bucket to split */
113 oldsegment = table->split / HSEGSIZE;
114 oldindex = table->split % HSEGSIZE;
116 newbucket = table->max + table->split;
118 /* And the indices of the new bucket */
119 newsegment = newbucket / HSEGSIZE;
120 newindex = newbucket % HSEGSIZE;
123 allocSegment(table, newsegment);
125 if (++table->split == table->max) {
128 table->mask1 = table->mask2;
129 table->mask2 = table->mask2 << 1 | 1;
133 /* Split the bucket, paying no attention to the original order */
136 for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
138 if (hash(table, hl->key) == newbucket) {
146 table->dir[oldsegment][oldindex] = old;
147 table->dir[newsegment][newindex] = new;
157 lookupHashTable(table, key)
166 bucket = hash(table, key);
167 segment = bucket / HSEGSIZE;
168 index = bucket % HSEGSIZE;
170 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
180 We allocate the hashlist cells in large chunks to cut down on malloc
181 overhead. Although we keep a free list of hashlist cells, we make
182 no effort to actually return the space to the malloc arena.
186 static HashList *freeList = NULL;
189 allocHashList(STG_NO_ARGS)
193 if ((hl = freeList) != NULL) {
196 hl = (HashList *) stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
199 for (p = freeList; p < hl + HCHUNK - 1; p++)
207 freeHashList(HashList *hl)
218 insertHashTable(table, key, data)
229 /* We want no duplicates */
230 ASSERT(lookupHashTable(table, key) == NULL);
233 /* When the average load gets too high, we expand the table */
234 if (++table->kcount >= HLOAD * table->bcount)
237 bucket = hash(table, key);
238 segment = bucket / HSEGSIZE;
239 index = bucket % HSEGSIZE;
241 hl = allocHashList();
245 hl->next = table->dir[segment][index];
246 table->dir[segment][index] = hl;
255 removeHashTable(table, key, data)
264 HashList *prev = NULL;
266 bucket = hash(table, key);
267 segment = bucket / HSEGSIZE;
268 index = bucket % HSEGSIZE;
270 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
271 if (hl->key == key && (data == NULL || hl->data == data)) {
273 table->dir[segment][index] = hl->next;
275 prev->next = hl->next;
283 ASSERT(data == NULL);
289 When we free a hash table, we are also good enough to free the
290 data part of each (key, data) pair, as long as our caller can tell
296 freeHashTable(table, freeDataFun)
298 void (*freeDataFun) PROTO((void *));
305 /* The last bucket with something in it is table->max + table->split - 1 */
306 segment = (table->max + table->split - 1) / HSEGSIZE;
307 index = (table->max + table->split - 1) % HSEGSIZE;
309 while (segment >= 0) {
311 for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
313 if (freeDataFun != NULL)
314 (*freeDataFun)(hl->data);
319 free(table->dir[segment]);
321 index = HSEGSIZE - 1;
327 When we initialize a hash table, we set up the first segment as well,
328 initializing all of the first segment's hash buckets to NULL.
333 allocHashTable(STG_NO_ARGS)
338 table = (HashTable *) stgMallocBytes(sizeof(HashTable),"allocHashTable");
340 allocSegment(table, 0);
342 for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
346 table->max = HSEGSIZE;
347 table->mask1 = HSEGSIZE - 1;
348 table->mask2 = 2 * HSEGSIZE - 1;
350 table->bcount = HSEGSIZE;
355 #endif /* PAR -- whole file */