--- /dev/null
+%
+% (c) The AQUA Project, Glasgow University, 1995
+%
+%************************************************************************
+%* *
+\section[Hash.lc]{Dynamic Hash Tables}
+%* *
+%************************************************************************
+
+Dynamically expanding linear hash tables, as described in
+Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
+pp. 446 -- 457.
+
+\begin{code}
+#ifdef PAR /* whole file */
+
+#include "rtsdefs.h"
+
+#define HSEGSIZE 1024 /* Size of a single hash table segment */
+ /* Also the minimum size of a hash table */
+#define HDIRSIZE 1024 /* Size of the segment directory */
+ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
+#define HLOAD 5 /* Maximum average load of a single hash bucket */
+
+#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
+ /* Number of HashList cells to allocate in one go */
+
+\end{code}
+
+Fill in the ADTs.
+
+\begin{code}
+
+/* Linked list of (key, data) pairs for separate chaining */
+struct hashlist {
+ StgWord key;
+ void *data;
+ struct hashlist *next; /* Next cell in bucket chain (same hash value) */
+};
+
+struct hashtable {
+ int split; /* Next bucket to split when expanding */
+ int max; /* Max bucket of smaller table */
+ int mask1; /* Mask for doing the mod of h_1 (smaller table) */
+ int mask2; /* Mask for doing the mod of h_2 (larger table) */
+ int kcount; /* Number of keys */
+ int bcount; /* Number of buckets */
+ HashList **dir[HDIRSIZE]; /* Directory of segments */
+};
+
+\end{code}
+
+Hash first using the smaller table. If the bucket is less than the
+next bucket to be split, re-hash using the larger table.
+
+\begin{code}
+
+static int
+hash(table, key)
+HashTable *table;
+StgWord key;
+{
+ int bucket;
+
+ /* Strip the boring zero bits */
+ key /= sizeof(StgWord);
+
+ /* Mod the size of the hash table (a power of 2) */
+ bucket = key & table->mask1;
+
+ if (bucket < table->split) {
+ /* Mod the size of the expanded hash table (also a power of 2) */
+ bucket = key & table->mask2;
+ }
+ return bucket;
+}
+
+\end{code}
+
+Allocate a new segment of the dynamically growing hash table.
+
+\begin{code}
+
+static void
+allocSegment(table, segment)
+HashTable *table;
+int segment;
+{
+ if ((table->dir[segment] = (HashList **) malloc(HSEGSIZE * sizeof(HashList *))) == NULL) {
+ fflush(stdout);
+ fprintf(stderr, "VM exhausted\n");
+ EXIT(EXIT_FAILURE);
+ }
+}
+
+\end{code}
+
+Expand the larger hash table by one bucket, and split one bucket
+from the smaller table into two parts. Only the bucket referenced
+by @table->split@ is affected by the expansion.
+
+\begin{code}
+
+static void
+expand(table)
+HashTable *table;
+{
+ int oldsegment;
+ int oldindex;
+ int newbucket;
+ int newsegment;
+ int newindex;
+ HashList *hl;
+ HashList *next;
+ HashList *old, *new;
+
+ if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
+ /* Wow! That's big. Too big, so don't expand. */
+ return;
+
+ /* Calculate indices of bucket to split */
+ oldsegment = table->split / HSEGSIZE;
+ oldindex = table->split % HSEGSIZE;
+
+ newbucket = table->max + table->split;
+
+ /* And the indices of the new bucket */
+ newsegment = newbucket / HSEGSIZE;
+ newindex = newbucket % HSEGSIZE;
+
+ if (newindex == 0)
+ allocSegment(table, newsegment);
+
+ if (++table->split == table->max) {
+ table->split = 0;
+ table->max *= 2;
+ table->mask1 = table->mask2;
+ table->mask2 = table->mask2 << 1 | 1;
+ }
+ table->bcount++;
+
+ /* Split the bucket, paying no attention to the original order */
+
+ old = new = NULL;
+ for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (hash(table, hl->key) == newbucket) {
+ hl->next = new;
+ new = hl;
+ } else {
+ hl->next = old;
+ old = hl;
+ }
+ }
+ table->dir[oldsegment][oldindex] = old;
+ table->dir[newsegment][newindex] = new;
+
+ return;
+}
+
+\end{code}
+
+\begin{code}
+
+void *
+lookupHashTable(table, key)
+HashTable *table;
+StgWord key;
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
+ if (hl->key == key)
+ return hl->data;
+
+ /* It's not there */
+ return NULL;
+}
+
+\end{code}
+
+We allocate the hashlist cells in large chunks to cut down on malloc
+overhead. Although we keep a free list of hashlist cells, we make
+no effort to actually return the space to the malloc arena.
+
+\begin{code}
+
+static HashList *freeList = NULL;
+
+static HashList *
+allocHashList(STG_NO_ARGS)
+{
+ HashList *hl, *p;
+
+ if ((hl = freeList) != NULL) {
+ freeList = hl->next;
+ } else if ((hl = (HashList *) malloc(HCHUNK * sizeof(HashList))) != NULL) {
+ freeList = hl + 1;
+ for (p = freeList; p < hl + HCHUNK - 1; p++)
+ p->next = p + 1;
+ p->next = NULL;
+ } else {
+ fflush(stdout);
+ fprintf(stderr, "VM exhausted\n");
+ EXIT(EXIT_FAILURE);
+ }
+ return hl;
+}
+
+static void
+freeHashList(hl)
+HashList *hl;
+{
+ hl->next = freeList;
+ freeList = hl;
+}
+
+\end{code}
+
+\begin{code}
+
+void
+insertHashTable(table, key, data)
+HashTable *table;
+StgWord key;
+void *data;
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+#if 0
+ /* We want no duplicates */
+ ASSERT(lookupHashTable(table, key) == NULL);
+#endif
+
+ /* When the average load gets too high, we expand the table */
+ if (++table->kcount >= HLOAD * table->bcount)
+ expand(table);
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ hl = allocHashList();
+
+ hl->key = key;
+ hl->data = data;
+ hl->next = table->dir[segment][index];
+ table->dir[segment][index] = hl;
+
+}
+
+\end{code}
+
+\begin{code}
+
+void *
+removeHashTable(table, key, data)
+HashTable *table;
+StgWord key;
+void *data;
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+ HashList *prev = NULL;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
+ if (hl->key == key && (data == NULL || hl->data == data)) {
+ if (prev == NULL)
+ table->dir[segment][index] = hl->next;
+ else
+ prev->next = hl->next;
+ table->kcount--;
+ return hl->data;
+ }
+ prev = hl;
+ }
+
+ /* It's not there */
+ ASSERT(data == NULL);
+ return NULL;
+}
+
+\end{code}
+
+When we free a hash table, we are also good enough to free the
+data part of each (key, data) pair, as long as our caller can tell
+us how to do it.
+
+\begin{code}
+
+void
+freeHashTable(table, freeDataFun)
+HashTable *table;
+void (*freeDataFun) PROTO((void *));
+{
+ long segment;
+ long index;
+ HashList *hl;
+ HashList *next;
+
+ /* The last bucket with something in it is table->max + table->split - 1 */
+ segment = (table->max + table->split - 1) / HSEGSIZE;
+ index = (table->max + table->split - 1) % HSEGSIZE;
+
+ while (segment >= 0) {
+ while (index >= 0) {
+ for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (freeDataFun != NULL)
+ (*freeDataFun)(hl->data);
+ freeHashList(hl);
+ }
+ index--;
+ }
+ free(table->dir[segment]);
+ segment--;
+ index = HSEGSIZE - 1;
+ }
+ free(table);
+}
+\end{code}
+
+When we initialize a hash table, we set up the first segment as well,
+initializing all of the first segment's hash buckets to NULL.
+
+\begin{code}
+
+HashTable *
+allocHashTable(STG_NO_ARGS)
+{
+ HashTable *table;
+ HashList **hb;
+
+ if ((table = (HashTable *) malloc(sizeof(HashTable))) == NULL) {
+ fflush(stdout);
+ fprintf(stderr, "VM exhausted\n");
+ EXIT(EXIT_FAILURE);
+ }
+ allocSegment(table, 0);
+ for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
+ *hb = NULL;
+ table->split = 0;
+ table->max = HSEGSIZE;
+ table->mask1 = HSEGSIZE - 1;
+ table->mask2 = 2 * HSEGSIZE - 1;
+ table->kcount = 0;
+ table->bcount = HSEGSIZE;
+
+ return table;
+}
+
+#endif /* PAR -- whole file */
+\end{code}