% % (c) The AQUA Project, Glasgow University, 1995 % %************************************************************************ %* * \section[Hash.lc]{Dynamic Hash Tables} %* * %************************************************************************ Dynamically expanding linear hash tables, as described in Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988, pp. 446 -- 457. \begin{code} #ifdef PAR /* whole file */ #include "rtsdefs.h" #define HSEGSIZE 1024 /* Size of a single hash table segment */ /* Also the minimum size of a hash table */ #define HDIRSIZE 1024 /* Size of the segment directory */ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */ #define HLOAD 5 /* Maximum average load of a single hash bucket */ #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList)) /* Number of HashList cells to allocate in one go */ \end{code} Fill in the ADTs. \begin{code} /* Linked list of (key, data) pairs for separate chaining */ struct hashlist { StgWord key; void *data; struct hashlist *next; /* Next cell in bucket chain (same hash value) */ }; struct hashtable { int split; /* Next bucket to split when expanding */ int max; /* Max bucket of smaller table */ int mask1; /* Mask for doing the mod of h_1 (smaller table) */ int mask2; /* Mask for doing the mod of h_2 (larger table) */ int kcount; /* Number of keys */ int bcount; /* Number of buckets */ HashList **dir[HDIRSIZE]; /* Directory of segments */ }; \end{code} Hash first using the smaller table. If the bucket is less than the next bucket to be split, re-hash using the larger table. \begin{code} static int hash(HashTable *table, W_ key) { int bucket; /* Strip the boring zero bits */ key /= sizeof(StgWord); /* Mod the size of the hash table (a power of 2) */ bucket = key & table->mask1; if (bucket < table->split) { /* Mod the size of the expanded hash table (also a power of 2) */ bucket = key & table->mask2; } return bucket; } \end{code} Allocate a new segment of the dynamically growing hash table. \begin{code} static void allocSegment(HashTable *table, int segment) { table->dir[segment] = (HashList **) stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment"); } \end{code} Expand the larger hash table by one bucket, and split one bucket from the smaller table into two parts. Only the bucket referenced by @table->split@ is affected by the expansion. \begin{code} static void expand(HashTable *table) { int oldsegment; int oldindex; int newbucket; int newsegment; int newindex; HashList *hl; HashList *next; HashList *old, *new; if (table->split + table->max >= HDIRSIZE * HSEGSIZE) /* Wow! That's big. Too big, so don't expand. */ return; /* Calculate indices of bucket to split */ oldsegment = table->split / HSEGSIZE; oldindex = table->split % HSEGSIZE; newbucket = table->max + table->split; /* And the indices of the new bucket */ newsegment = newbucket / HSEGSIZE; newindex = newbucket % HSEGSIZE; if (newindex == 0) allocSegment(table, newsegment); if (++table->split == table->max) { table->split = 0; table->max *= 2; table->mask1 = table->mask2; table->mask2 = table->mask2 << 1 | 1; } table->bcount++; /* Split the bucket, paying no attention to the original order */ old = new = NULL; for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) { next = hl->next; if (hash(table, hl->key) == newbucket) { hl->next = new; new = hl; } else { hl->next = old; old = hl; } } table->dir[oldsegment][oldindex] = old; table->dir[newsegment][newindex] = new; return; } \end{code} \begin{code} void * lookupHashTable(table, key) HashTable *table; StgWord key; { int bucket; int segment; int index; HashList *hl; bucket = hash(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) if (hl->key == key) return hl->data; /* It's not there */ return NULL; } \end{code} We allocate the hashlist cells in large chunks to cut down on malloc overhead. Although we keep a free list of hashlist cells, we make no effort to actually return the space to the malloc arena. \begin{code} static HashList *freeList = NULL; static HashList * allocHashList(STG_NO_ARGS) { HashList *hl, *p; if ((hl = freeList) != NULL) { freeList = hl->next; } else { hl = (HashList *) stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList"); freeList = hl + 1; for (p = freeList; p < hl + HCHUNK - 1; p++) p->next = p + 1; p->next = NULL; } return hl; } static void freeHashList(HashList *hl) { hl->next = freeList; freeList = hl; } \end{code} \begin{code} void insertHashTable(table, key, data) HashTable *table; StgWord key; void *data; { int bucket; int segment; int index; HashList *hl; #if 0 /* We want no duplicates */ ASSERT(lookupHashTable(table, key) == NULL); #endif /* When the average load gets too high, we expand the table */ if (++table->kcount >= HLOAD * table->bcount) expand(table); bucket = hash(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; hl = allocHashList(); hl->key = key; hl->data = data; hl->next = table->dir[segment][index]; table->dir[segment][index] = hl; } \end{code} \begin{code} void * removeHashTable(table, key, data) HashTable *table; StgWord key; void *data; { int bucket; int segment; int index; HashList *hl; HashList *prev = NULL; bucket = hash(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { if (hl->key == key && (data == NULL || hl->data == data)) { if (prev == NULL) table->dir[segment][index] = hl->next; else prev->next = hl->next; table->kcount--; return hl->data; } prev = hl; } /* It's not there */ ASSERT(data == NULL); return NULL; } \end{code} When we free a hash table, we are also good enough to free the data part of each (key, data) pair, as long as our caller can tell us how to do it. \begin{code} void freeHashTable(table, freeDataFun) HashTable *table; void (*freeDataFun) PROTO((void *)); { long segment; long index; HashList *hl; HashList *next; /* The last bucket with something in it is table->max + table->split - 1 */ segment = (table->max + table->split - 1) / HSEGSIZE; index = (table->max + table->split - 1) % HSEGSIZE; while (segment >= 0) { while (index >= 0) { for (hl = table->dir[segment][index]; hl != NULL; hl = next) { next = hl->next; if (freeDataFun != NULL) (*freeDataFun)(hl->data); freeHashList(hl); } index--; } free(table->dir[segment]); segment--; index = HSEGSIZE - 1; } free(table); } \end{code} When we initialize a hash table, we set up the first segment as well, initializing all of the first segment's hash buckets to NULL. \begin{code} HashTable * allocHashTable(STG_NO_ARGS) { HashTable *table; HashList **hb; table = (HashTable *) stgMallocBytes(sizeof(HashTable),"allocHashTable"); allocSegment(table, 0); for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++) *hb = NULL; table->split = 0; table->max = HSEGSIZE; table->mask1 = HSEGSIZE - 1; table->mask2 = 2 * HSEGSIZE - 1; table->kcount = 0; table->bcount = HSEGSIZE; return table; } #endif /* PAR -- whole file */ \end{code}