X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fruntime%2Fgum%2FHash.lc;fp=ghc%2Fruntime%2Fgum%2FHash.lc;h=0000000000000000000000000000000000000000;hb=438596897ebbe25a07e1c82085cfbc5bdb00f09e;hp=dfd328cda2867c54e372b2df7fadc0e796a2efbe;hpb=967cc47f37cb93a5e2b6df7822c9a646f0428247;p=ghc-hetmet.git diff --git a/ghc/runtime/gum/Hash.lc b/ghc/runtime/gum/Hash.lc deleted file mode 100644 index dfd328c..0000000 --- a/ghc/runtime/gum/Hash.lc +++ /dev/null @@ -1,356 +0,0 @@ -% -% (c) The AQUA Project, Glasgow University, 1995 -% -%************************************************************************ -%* * -\section[Hash.lc]{Dynamic Hash Tables} -%* * -%************************************************************************ - -Dynamically expanding linear hash tables, as described in -Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988, -pp. 446 -- 457. - -\begin{code} -#ifdef PAR /* whole file */ - -#include "rtsdefs.h" - -#define HSEGSIZE 1024 /* Size of a single hash table segment */ - /* Also the minimum size of a hash table */ -#define HDIRSIZE 1024 /* Size of the segment directory */ - /* Maximum hash table size is HSEGSIZE * HDIRSIZE */ -#define HLOAD 5 /* Maximum average load of a single hash bucket */ - -#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList)) - /* Number of HashList cells to allocate in one go */ - -\end{code} - -Fill in the ADTs. - -\begin{code} - -/* Linked list of (key, data) pairs for separate chaining */ -struct hashlist { - StgWord key; - void *data; - struct hashlist *next; /* Next cell in bucket chain (same hash value) */ -}; - -struct hashtable { - int split; /* Next bucket to split when expanding */ - int max; /* Max bucket of smaller table */ - int mask1; /* Mask for doing the mod of h_1 (smaller table) */ - int mask2; /* Mask for doing the mod of h_2 (larger table) */ - int kcount; /* Number of keys */ - int bcount; /* Number of buckets */ - HashList **dir[HDIRSIZE]; /* Directory of segments */ -}; - -\end{code} - -Hash first using the smaller table. If the bucket is less than the -next bucket to be split, re-hash using the larger table. - -\begin{code} - -static int -hash(HashTable *table, W_ key) -{ - int bucket; - - /* Strip the boring zero bits */ - key /= sizeof(StgWord); - - /* Mod the size of the hash table (a power of 2) */ - bucket = key & table->mask1; - - if (bucket < table->split) { - /* Mod the size of the expanded hash table (also a power of 2) */ - bucket = key & table->mask2; - } - return bucket; -} - -\end{code} - -Allocate a new segment of the dynamically growing hash table. - -\begin{code} - -static void -allocSegment(HashTable *table, int segment) -{ - table->dir[segment] = (HashList **) stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment"); -} - -\end{code} - -Expand the larger hash table by one bucket, and split one bucket -from the smaller table into two parts. Only the bucket referenced -by @table->split@ is affected by the expansion. - -\begin{code} - -static void -expand(HashTable *table) -{ - int oldsegment; - int oldindex; - int newbucket; - int newsegment; - int newindex; - HashList *hl; - HashList *next; - HashList *old, *new; - - if (table->split + table->max >= HDIRSIZE * HSEGSIZE) - /* Wow! That's big. Too big, so don't expand. */ - return; - - /* Calculate indices of bucket to split */ - oldsegment = table->split / HSEGSIZE; - oldindex = table->split % HSEGSIZE; - - newbucket = table->max + table->split; - - /* And the indices of the new bucket */ - newsegment = newbucket / HSEGSIZE; - newindex = newbucket % HSEGSIZE; - - if (newindex == 0) - allocSegment(table, newsegment); - - if (++table->split == table->max) { - table->split = 0; - table->max *= 2; - table->mask1 = table->mask2; - table->mask2 = table->mask2 << 1 | 1; - } - table->bcount++; - - /* Split the bucket, paying no attention to the original order */ - - old = new = NULL; - for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) { - next = hl->next; - if (hash(table, hl->key) == newbucket) { - hl->next = new; - new = hl; - } else { - hl->next = old; - old = hl; - } - } - table->dir[oldsegment][oldindex] = old; - table->dir[newsegment][newindex] = new; - - return; -} - -\end{code} - -\begin{code} - -void * -lookupHashTable(table, key) -HashTable *table; -StgWord key; -{ - int bucket; - int segment; - int index; - HashList *hl; - - bucket = hash(table, key); - segment = bucket / HSEGSIZE; - index = bucket % HSEGSIZE; - - for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) - if (hl->key == key) - return hl->data; - - /* It's not there */ - return NULL; -} - -\end{code} - -We allocate the hashlist cells in large chunks to cut down on malloc -overhead. Although we keep a free list of hashlist cells, we make -no effort to actually return the space to the malloc arena. - -\begin{code} - -static HashList *freeList = NULL; - -static HashList * -allocHashList(STG_NO_ARGS) -{ - HashList *hl, *p; - - if ((hl = freeList) != NULL) { - freeList = hl->next; - } else { - hl = (HashList *) stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList"); - - freeList = hl + 1; - for (p = freeList; p < hl + HCHUNK - 1; p++) - p->next = p + 1; - p->next = NULL; - } - return hl; -} - -static void -freeHashList(HashList *hl) -{ - hl->next = freeList; - freeList = hl; -} - -\end{code} - -\begin{code} - -void -insertHashTable(table, key, data) -HashTable *table; -StgWord key; -void *data; -{ - int bucket; - int segment; - int index; - HashList *hl; - -#if 0 - /* We want no duplicates */ - ASSERT(lookupHashTable(table, key) == NULL); -#endif - - /* When the average load gets too high, we expand the table */ - if (++table->kcount >= HLOAD * table->bcount) - expand(table); - - bucket = hash(table, key); - segment = bucket / HSEGSIZE; - index = bucket % HSEGSIZE; - - hl = allocHashList(); - - hl->key = key; - hl->data = data; - hl->next = table->dir[segment][index]; - table->dir[segment][index] = hl; - -} - -\end{code} - -\begin{code} - -void * -removeHashTable(table, key, data) -HashTable *table; -StgWord key; -void *data; -{ - int bucket; - int segment; - int index; - HashList *hl; - HashList *prev = NULL; - - bucket = hash(table, key); - segment = bucket / HSEGSIZE; - index = bucket % HSEGSIZE; - - for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { - if (hl->key == key && (data == NULL || hl->data == data)) { - if (prev == NULL) - table->dir[segment][index] = hl->next; - else - prev->next = hl->next; - table->kcount--; - return hl->data; - } - prev = hl; - } - - /* It's not there */ - ASSERT(data == NULL); - return NULL; -} - -\end{code} - -When we free a hash table, we are also good enough to free the -data part of each (key, data) pair, as long as our caller can tell -us how to do it. - -\begin{code} - -void -freeHashTable(table, freeDataFun) - HashTable *table; - void (*freeDataFun) PROTO((void *)); -{ - long segment; - long index; - HashList *hl; - HashList *next; - - /* The last bucket with something in it is table->max + table->split - 1 */ - segment = (table->max + table->split - 1) / HSEGSIZE; - index = (table->max + table->split - 1) % HSEGSIZE; - - while (segment >= 0) { - while (index >= 0) { - for (hl = table->dir[segment][index]; hl != NULL; hl = next) { - next = hl->next; - if (freeDataFun != NULL) - (*freeDataFun)(hl->data); - freeHashList(hl); - } - index--; - } - free(table->dir[segment]); - segment--; - index = HSEGSIZE - 1; - } - free(table); -} -\end{code} - -When we initialize a hash table, we set up the first segment as well, -initializing all of the first segment's hash buckets to NULL. - -\begin{code} - -HashTable * -allocHashTable(STG_NO_ARGS) -{ - HashTable *table; - HashList **hb; - - table = (HashTable *) stgMallocBytes(sizeof(HashTable),"allocHashTable"); - - allocSegment(table, 0); - - for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++) - *hb = NULL; - - table->split = 0; - table->max = HSEGSIZE; - table->mask1 = HSEGSIZE - 1; - table->mask2 = 2 * HSEGSIZE - 1; - table->kcount = 0; - table->bcount = HSEGSIZE; - - return table; -} - -#endif /* PAR -- whole file */ -\end{code}