1 /*-----------------------------------------------------------------------------
3 * (c) The AQUA Project, Glasgow University, 1995-1998
4 * (c) The GHC Team, 1999
6 * Dynamically expanding linear hash tables, as described in
7 * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
9 * -------------------------------------------------------------------------- */
11 #include "PosixSource.h"
19 #define HSEGSIZE 1024 /* Size of a single hash table segment */
20 /* Also the minimum size of a hash table */
21 #define HDIRSIZE 1024 /* Size of the segment directory */
22 /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
23 #define HLOAD 5 /* Maximum average load of a single hash bucket */
25 #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
26 /* Number of HashList cells to allocate in one go */
29 /* Linked list of (key, data) pairs for separate chaining */
30 typedef struct hashlist {
33 struct hashlist *next; /* Next cell in bucket chain (same hash value) */
36 typedef struct chunklist {
38 struct chunklist *next;
42 int split; /* Next bucket to split when expanding */
43 int max; /* Max bucket of smaller table */
44 int mask1; /* Mask for doing the mod of h_1 (smaller table) */
45 int mask2; /* Mask for doing the mod of h_2 (larger table) */
46 int kcount; /* Number of keys */
47 int bcount; /* Number of buckets */
48 HashList **dir[HDIRSIZE]; /* Directory of segments */
49 HashList *freeList; /* free list of HashLists */
50 HashListChunk *chunks;
51 HashFunction *hash; /* hash function */
52 CompareFunction *compare; /* key comparison function */
55 /* -----------------------------------------------------------------------------
56 * Hash first using the smaller table. If the bucket is less than the
57 * next bucket to be split, re-hash using the larger table.
58 * -------------------------------------------------------------------------- */
61 hashWord(HashTable *table, StgWord key)
65 /* Strip the boring zero bits */
66 key /= sizeof(StgWord);
68 /* Mod the size of the hash table (a power of 2) */
69 bucket = key & table->mask1;
71 if (bucket < table->split) {
72 /* Mod the size of the expanded hash table (also a power of 2) */
73 bucket = key & table->mask2;
79 hashStr(HashTable *table, char *key)
88 h = h % 1048583; /* some random large prime */
91 /* Mod the size of the hash table (a power of 2) */
92 bucket = h & table->mask1;
94 if (bucket < table->split) {
95 /* Mod the size of the expanded hash table (also a power of 2) */
96 bucket = h & table->mask2;
103 compareWord(StgWord key1, StgWord key2)
105 return (key1 == key2);
109 compareStr(StgWord key1, StgWord key2)
111 return (strcmp((char *)key1, (char *)key2) == 0);
115 /* -----------------------------------------------------------------------------
116 * Allocate a new segment of the dynamically growing hash table.
117 * -------------------------------------------------------------------------- */
120 allocSegment(HashTable *table, int segment)
122 table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
127 /* -----------------------------------------------------------------------------
128 * Expand the larger hash table by one bucket, and split one bucket
129 * from the smaller table into two parts. Only the bucket referenced
130 * by @table->split@ is affected by the expansion.
131 * -------------------------------------------------------------------------- */
134 expand(HashTable *table)
145 if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
146 /* Wow! That's big. Too big, so don't expand. */
149 /* Calculate indices of bucket to split */
150 oldsegment = table->split / HSEGSIZE;
151 oldindex = table->split % HSEGSIZE;
153 newbucket = table->max + table->split;
155 /* And the indices of the new bucket */
156 newsegment = newbucket / HSEGSIZE;
157 newindex = newbucket % HSEGSIZE;
160 allocSegment(table, newsegment);
162 if (++table->split == table->max) {
165 table->mask1 = table->mask2;
166 table->mask2 = table->mask2 << 1 | 1;
170 /* Split the bucket, paying no attention to the original order */
173 for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
175 if (table->hash(table, hl->key) == newbucket) {
183 table->dir[oldsegment][oldindex] = old;
184 table->dir[newsegment][newindex] = new;
190 lookupHashTable(HashTable *table, StgWord key)
197 bucket = table->hash(table, key);
198 segment = bucket / HSEGSIZE;
199 index = bucket % HSEGSIZE;
201 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
202 if (table->compare(hl->key, key))
209 /* -----------------------------------------------------------------------------
210 * We allocate the hashlist cells in large chunks to cut down on malloc
211 * overhead. Although we keep a free list of hashlist cells, we make
212 * no effort to actually return the space to the malloc arena.
213 * -------------------------------------------------------------------------- */
216 allocHashList (HashTable *table)
221 if ((hl = table->freeList) != NULL) {
222 table->freeList = hl->next;
224 hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
225 cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
227 cl->next = table->chunks;
230 table->freeList = hl + 1;
231 for (p = table->freeList; p < hl + HCHUNK - 1; p++)
239 freeHashList (HashTable *table, HashList *hl)
241 hl->next = table->freeList;
242 table->freeList = hl;
246 insertHashTable(HashTable *table, StgWord key, void *data)
253 // Disable this assert; sometimes it's useful to be able to
254 // overwrite entries in the hash table.
255 // ASSERT(lookupHashTable(table, key) == NULL);
257 /* When the average load gets too high, we expand the table */
258 if (++table->kcount >= HLOAD * table->bcount)
261 bucket = table->hash(table, key);
262 segment = bucket / HSEGSIZE;
263 index = bucket % HSEGSIZE;
265 hl = allocHashList(table);
269 hl->next = table->dir[segment][index];
270 table->dir[segment][index] = hl;
275 removeHashTable(HashTable *table, StgWord key, void *data)
281 HashList *prev = NULL;
283 bucket = table->hash(table, key);
284 segment = bucket / HSEGSIZE;
285 index = bucket % HSEGSIZE;
287 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
288 if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
290 table->dir[segment][index] = hl->next;
292 prev->next = hl->next;
293 freeHashList(table,hl);
301 ASSERT(data == NULL);
305 /* -----------------------------------------------------------------------------
306 * When we free a hash table, we are also good enough to free the
307 * data part of each (key, data) pair, as long as our caller can tell
309 * -------------------------------------------------------------------------- */
312 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
318 HashListChunk *cl, *cl_next;
320 /* The last bucket with something in it is table->max + table->split - 1 */
321 segment = (table->max + table->split - 1) / HSEGSIZE;
322 index = (table->max + table->split - 1) % HSEGSIZE;
324 while (segment >= 0) {
326 for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
328 if (freeDataFun != NULL)
329 (*freeDataFun)(hl->data);
333 stgFree(table->dir[segment]);
335 index = HSEGSIZE - 1;
337 for (cl = table->chunks; cl != NULL; cl = cl_next) {
345 /* -----------------------------------------------------------------------------
346 * When we initialize a hash table, we set up the first segment as well,
347 * initializing all of the first segment's hash buckets to NULL.
348 * -------------------------------------------------------------------------- */
351 allocHashTable_(HashFunction *hash, CompareFunction *compare)
356 table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
358 allocSegment(table, 0);
360 for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
364 table->max = HSEGSIZE;
365 table->mask1 = HSEGSIZE - 1;
366 table->mask2 = 2 * HSEGSIZE - 1;
368 table->bcount = HSEGSIZE;
369 table->freeList = NULL;
370 table->chunks = NULL;
372 table->compare = compare;
380 return allocHashTable_(hashWord, compareWord);
384 allocStrHashTable(void)
386 return allocHashTable_((HashFunction *)hashStr,
387 (CompareFunction *)compareStr);