1 /*-----------------------------------------------------------------------------
2 * $Id: Hash.c,v 1.9 2002/07/17 09:21:49 simonmar Exp $
4 * (c) The AQUA Project, Glasgow University, 1995-1998
5 * (c) The GHC Team, 1999
7 * Dynamically expanding linear hash tables, as described in
8 * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
10 * -------------------------------------------------------------------------- */
12 #include "PosixSource.h"
20 #define HSEGSIZE 1024 /* Size of a single hash table segment */
21 /* Also the minimum size of a hash table */
22 #define HDIRSIZE 1024 /* Size of the segment directory */
23 /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
24 #define HLOAD 5 /* Maximum average load of a single hash bucket */
26 #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
27 /* Number of HashList cells to allocate in one go */
30 /* Linked list of (key, data) pairs for separate chaining */
34 struct hashlist *next; /* Next cell in bucket chain (same hash value) */
37 typedef struct hashlist HashList;
39 typedef int HashFunction(HashTable *table, StgWord key);
40 typedef int CompareFunction(StgWord key1, StgWord key2);
43 int split; /* Next bucket to split when expanding */
44 int max; /* Max bucket of smaller table */
45 int mask1; /* Mask for doing the mod of h_1 (smaller table) */
46 int mask2; /* Mask for doing the mod of h_2 (larger table) */
47 int kcount; /* Number of keys */
48 int bcount; /* Number of buckets */
49 HashList **dir[HDIRSIZE]; /* Directory of segments */
50 HashFunction *hash; /* hash function */
51 CompareFunction *compare; /* key comparison function */
54 /* -----------------------------------------------------------------------------
55 * Hash first using the smaller table. If the bucket is less than the
56 * next bucket to be split, re-hash using the larger table.
57 * -------------------------------------------------------------------------- */
60 hashWord(HashTable *table, StgWord key)
64 /* Strip the boring zero bits */
65 key /= sizeof(StgWord);
67 /* Mod the size of the hash table (a power of 2) */
68 bucket = key & table->mask1;
70 if (bucket < table->split) {
71 /* Mod the size of the expanded hash table (also a power of 2) */
72 bucket = key & table->mask2;
78 hashStr(HashTable *table, char *key)
87 h = h % 1048583; /* some random large prime */
90 /* Mod the size of the hash table (a power of 2) */
91 bucket = h & table->mask1;
93 if (bucket < table->split) {
94 /* Mod the size of the expanded hash table (also a power of 2) */
95 bucket = h & table->mask2;
102 compareWord(StgWord key1, StgWord key2)
104 return (key1 == key2);
108 compareStr(StgWord key1, StgWord key2)
110 return (strcmp((char *)key1, (char *)key2) == 0);
114 /* -----------------------------------------------------------------------------
115 * Allocate a new segment of the dynamically growing hash table.
116 * -------------------------------------------------------------------------- */
119 allocSegment(HashTable *table, int segment)
121 table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
126 /* -----------------------------------------------------------------------------
127 * Expand the larger hash table by one bucket, and split one bucket
128 * from the smaller table into two parts. Only the bucket referenced
129 * by @table->split@ is affected by the expansion.
130 * -------------------------------------------------------------------------- */
133 expand(HashTable *table)
144 if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
145 /* Wow! That's big. Too big, so don't expand. */
148 /* Calculate indices of bucket to split */
149 oldsegment = table->split / HSEGSIZE;
150 oldindex = table->split % HSEGSIZE;
152 newbucket = table->max + table->split;
154 /* And the indices of the new bucket */
155 newsegment = newbucket / HSEGSIZE;
156 newindex = newbucket % HSEGSIZE;
159 allocSegment(table, newsegment);
161 if (++table->split == table->max) {
164 table->mask1 = table->mask2;
165 table->mask2 = table->mask2 << 1 | 1;
169 /* Split the bucket, paying no attention to the original order */
172 for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
174 if (table->hash(table, hl->key) == newbucket) {
182 table->dir[oldsegment][oldindex] = old;
183 table->dir[newsegment][newindex] = new;
189 lookupHashTable(HashTable *table, StgWord key)
196 bucket = table->hash(table, key);
197 segment = bucket / HSEGSIZE;
198 index = bucket % HSEGSIZE;
200 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
201 if (table->compare(hl->key, key))
208 /* -----------------------------------------------------------------------------
209 * We allocate the hashlist cells in large chunks to cut down on malloc
210 * overhead. Although we keep a free list of hashlist cells, we make
211 * no effort to actually return the space to the malloc arena.
212 * -------------------------------------------------------------------------- */
214 static HashList *freeList = NULL;
221 if ((hl = freeList) != NULL) {
224 hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
227 for (p = freeList; p < hl + HCHUNK - 1; p++)
235 freeHashList(HashList *hl)
242 insertHashTable(HashTable *table, StgWord key, void *data)
249 /* We want no duplicates */
250 ASSERT(lookupHashTable(table, key) == NULL);
252 /* When the average load gets too high, we expand the table */
253 if (++table->kcount >= HLOAD * table->bcount)
256 bucket = table->hash(table, key);
257 segment = bucket / HSEGSIZE;
258 index = bucket % HSEGSIZE;
260 hl = allocHashList();
264 hl->next = table->dir[segment][index];
265 table->dir[segment][index] = hl;
270 removeHashTable(HashTable *table, StgWord key, void *data)
276 HashList *prev = NULL;
278 bucket = table->hash(table, key);
279 segment = bucket / HSEGSIZE;
280 index = bucket % HSEGSIZE;
282 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
283 if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
285 table->dir[segment][index] = hl->next;
287 prev->next = hl->next;
296 ASSERT(data == NULL);
300 /* -----------------------------------------------------------------------------
301 * When we free a hash table, we are also good enough to free the
302 * data part of each (key, data) pair, as long as our caller can tell
304 * -------------------------------------------------------------------------- */
307 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
314 /* The last bucket with something in it is table->max + table->split - 1 */
315 segment = (table->max + table->split - 1) / HSEGSIZE;
316 index = (table->max + table->split - 1) % HSEGSIZE;
318 while (segment >= 0) {
320 for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
322 if (freeDataFun != NULL)
323 (*freeDataFun)(hl->data);
328 free(table->dir[segment]);
330 index = HSEGSIZE - 1;
335 /* -----------------------------------------------------------------------------
336 * When we initialize a hash table, we set up the first segment as well,
337 * initializing all of the first segment's hash buckets to NULL.
338 * -------------------------------------------------------------------------- */
341 allocHashTable_(HashFunction *hash, CompareFunction *compare)
346 table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
348 allocSegment(table, 0);
350 for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
354 table->max = HSEGSIZE;
355 table->mask1 = HSEGSIZE - 1;
356 table->mask2 = 2 * HSEGSIZE - 1;
358 table->bcount = HSEGSIZE;
360 table->compare = compare;
368 return allocHashTable_(hashWord, compareWord);
372 allocStrHashTable(void)
374 return allocHashTable_((HashFunction *)hashStr,
375 (CompareFunction *)compareStr);