rts/parallel/0Hash.c

   1 /*-----------------------------------------------------------------------------
   2  *
   3  * (c) The AQUA Project, Glasgow University, 1995-1998
   4  * (c) The GHC Team, 1999
   5  *
   6  * Dynamically expanding linear hash tables, as described in
   7  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   8  * pp. 446 -- 457.
   9  * -------------------------------------------------------------------------- */
  10
  11 /*
  12    Replaced with ghc/rts/Hash.c in the new RTS
  13 */
  14
  15 #if 0
  16
  17 #include "Rts.h"
  18 #include "Hash.h"
  19 #include "RtsUtils.h"
  20
  21 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  22                             /* Also the minimum size of a hash table */
  23 #define HDIRSIZE    1024    /* Size of the segment directory */
  24                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  25 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  26
  27 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  28                             /* Number of HashList cells to allocate in one go */
  29
  30
  31 /* Linked list of (key, data) pairs for separate chaining */
  32 struct hashlist {
  33     StgWord key;
  34     void *data;
  35     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  36 };
  37
  38 typedef struct hashlist HashList;
  39
  40 struct hashtable {
  41     int split;              /* Next bucket to split when expanding */
  42     int max;                /* Max bucket of smaller table */
  43     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  44     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  45     int kcount;             /* Number of keys */
  46     int bcount;             /* Number of buckets */
  47     HashList **dir[HDIRSIZE];   /* Directory of segments */
  48 };
  49
  50 /* -----------------------------------------------------------------------------
  51  * Hash first using the smaller table.  If the bucket is less than the
  52  * next bucket to be split, re-hash using the larger table.
  53  * -------------------------------------------------------------------------- */
  54
  55 static int
  56 hash(HashTable *table, W_ key)
  57 {
  58     int bucket;
  59
  60     /* Strip the boring zero bits */
  61     key /= sizeof(StgWord);
  62
  63     /* Mod the size of the hash table (a power of 2) */
  64     bucket = key & table->mask1;
  65
  66     if (bucket < table->split) {
  67         /* Mod the size of the expanded hash table (also a power of 2) */
  68         bucket = key & table->mask2;
  69     }
  70     return bucket;
  71 }
  72
  73 /* -----------------------------------------------------------------------------
  74  * Allocate a new segment of the dynamically growing hash table.
  75  * -------------------------------------------------------------------------- */
  76
  77 static void
  78 allocSegment(HashTable *table, int segment)
  79 {
  80     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
  81                                          "allocSegment");
  82 }
  83
  84
  85 /* -----------------------------------------------------------------------------
  86  * Expand the larger hash table by one bucket, and split one bucket
  87  * from the smaller table into two parts.  Only the bucket referenced
  88  * by @table->split@ is affected by the expansion.
  89  * -------------------------------------------------------------------------- */
  90
  91 static void
  92 expand(HashTable *table)
  93 {
  94     int oldsegment;
  95     int oldindex;
  96     int newbucket;
  97     int newsegment;
  98     int newindex;
  99     HashList *hl;
 100     HashList *next;
 101     HashList *old, *new;
 102
 103     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 104         /* Wow!  That's big.  Too big, so don't expand. */
 105         return;
 106
 107     /* Calculate indices of bucket to split */
 108     oldsegment = table->split / HSEGSIZE;
 109     oldindex = table->split % HSEGSIZE;
 110
 111     newbucket = table->max + table->split;
 112
 113     /* And the indices of the new bucket */
 114     newsegment = newbucket / HSEGSIZE;
 115     newindex = newbucket % HSEGSIZE;
 116
 117     if (newindex == 0)
 118         allocSegment(table, newsegment);
 119
 120     if (++table->split == table->max) {
 121         table->split = 0;
 122         table->max *= 2;
 123         table->mask1 = table->mask2;
 124         table->mask2 = table->mask2 << 1 | 1;
 125     }
 126     table->bcount++;
 127
 128     /* Split the bucket, paying no attention to the original order */
 129
 130     old = new = NULL;
 131     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 132         next = hl->next;
 133         if (hash(table, hl->key) == newbucket) {
 134             hl->next = new;
 135             new = hl;
 136         } else {
 137             hl->next = old;
 138             old = hl;
 139         }
 140     }
 141     table->dir[oldsegment][oldindex] = old;
 142     table->dir[newsegment][newindex] = new;
 143
 144     return;
 145 }
 146
 147 void *
 148 lookupHashTable(HashTable *table, StgWord key)
 149 {
 150     int bucket;
 151     int segment;
 152     int index;
 153     HashList *hl;
 154
 155     bucket = hash(table, key);
 156     segment = bucket / HSEGSIZE;
 157     index = bucket % HSEGSIZE;
 158
 159     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 160         if (hl->key == key)
 161             return hl->data;
 162
 163     /* It's not there */
 164     return NULL;
 165 }
 166
 167 /* -----------------------------------------------------------------------------
 168  * We allocate the hashlist cells in large chunks to cut down on malloc
 169  * overhead.  Although we keep a free list of hashlist cells, we make
 170  * no effort to actually return the space to the malloc arena.
 171  * -------------------------------------------------------------------------- */
 172
 173 static HashList *freeList = NULL;
 174
 175 static HashList *
 176 allocHashList(void)
 177 {
 178     HashList *hl, *p;
 179
 180     if ((hl = freeList) != NULL) {
 181         freeList = hl->next;
 182     } else {
 183         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 184
 185         freeList = hl + 1;
 186         for (p = freeList; p < hl + HCHUNK - 1; p++)
 187             p->next = p + 1;
 188         p->next = NULL;
 189     }
 190     return hl;
 191 }
 192
 193 static void
 194 freeHashList(HashList *hl)
 195 {
 196     hl->next = freeList;
 197     freeList = hl;
 198 }
 199
 200 void
 201 insertHashTable(HashTable *table, StgWord key, void *data)
 202 {
 203     int bucket;
 204     int segment;
 205     int index;
 206     HashList *hl;
 207
 208     /* We want no duplicates */
 209     ASSERT(lookupHashTable(table, key) == NULL);
 210
 211     /* When the average load gets too high, we expand the table */
 212     if (++table->kcount >= HLOAD * table->bcount)
 213         expand(table);
 214
 215     bucket = hash(table, key);
 216     segment = bucket / HSEGSIZE;
 217     index = bucket % HSEGSIZE;
 218
 219     hl = allocHashList();
 220
 221     hl->key = key;
 222     hl->data = data;
 223     hl->next = table->dir[segment][index];
 224     table->dir[segment][index] = hl;
 225
 226 }
 227
 228 void *
 229 removeHashTable(HashTable *table, StgWord key, void *data)
 230 {
 231     int bucket;
 232     int segment;
 233     int index;
 234     HashList *hl;
 235     HashList *prev = NULL;
 236
 237     bucket = hash(table, key);
 238     segment = bucket / HSEGSIZE;
 239     index = bucket % HSEGSIZE;
 240
 241     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 242         if (hl->key == key && (data == NULL || hl->data == data)) {
 243             if (prev == NULL)
 244                 table->dir[segment][index] = hl->next;
 245             else
 246                 prev->next = hl->next;
 247             table->kcount--;
 248             return hl->data;
 249         }
 250         prev = hl;
 251     }
 252
 253     /* It's not there */
 254     ASSERT(data == NULL);
 255     return NULL;
 256 }
 257
 258 /* -----------------------------------------------------------------------------
 259  * When we free a hash table, we are also good enough to free the
 260  * data part of each (key, data) pair, as long as our caller can tell
 261  * us how to do it.
 262  * -------------------------------------------------------------------------- */
 263
 264 void
 265 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 266 {
 267     long segment;
 268     long index;
 269     HashList *hl;
 270     HashList *next;
 271
 272     /* The last bucket with something in it is table->max + table->split - 1 */
 273     segment = (table->max + table->split - 1) / HSEGSIZE;
 274     index = (table->max + table->split - 1) % HSEGSIZE;
 275
 276     while (segment >= 0) {
 277         while (index >= 0) {
 278             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 279                 next = hl->next;
 280                 if (freeDataFun != NULL)
 281                     (*freeDataFun)(hl->data);
 282                 freeHashList(hl);
 283             }
 284             index--;
 285         }
 286         free(table->dir[segment]);
 287         segment--;
 288         index = HSEGSIZE - 1;
 289     }
 290     free(table);
 291 }
 292
 293 /* -----------------------------------------------------------------------------
 294  * When we initialize a hash table, we set up the first segment as well,
 295  * initializing all of the first segment's hash buckets to NULL.
 296  * -------------------------------------------------------------------------- */
 297
 298 HashTable *
 299 allocHashTable(void)
 300 {
 301     HashTable *table;
 302     HashList **hb;
 303
 304     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 305
 306     allocSegment(table, 0);
 307
 308     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 309         *hb = NULL;
 310
 311     table->split = 0;
 312     table->max = HSEGSIZE;
 313     table->mask1 = HSEGSIZE - 1;
 314     table->mask2 = 2 * HSEGSIZE - 1;
 315     table->kcount = 0;
 316     table->bcount = HSEGSIZE;
 317
 318     return table;
 319 }
 320 #endif