rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  *
   3  * (c) The AQUA Project, Glasgow University, 1995-1998
   4  * (c) The GHC Team, 1999
   5  *
   6  * Dynamically expanding linear hash tables, as described in
   7  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   8  * pp. 446 -- 457.
   9  * -------------------------------------------------------------------------- */
  10
  11 #include "PosixSource.h"
  12 #include "Rts.h"
  13 #include "Hash.h"
  14 #include "RtsUtils.h"
  15
  16 #include <stdlib.h>
  17 #include <string.h>
  18
  19 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  20                             /* Also the minimum size of a hash table */
  21 #define HDIRSIZE    1024    /* Size of the segment directory */
  22                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  23 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  24
  25 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  26                             /* Number of HashList cells to allocate in one go */
  27
  28
  29 /* Linked list of (key, data) pairs for separate chaining */
  30 struct hashlist {
  31     StgWord key;
  32     void *data;
  33     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  34 };
  35
  36 typedef struct hashlist HashList;
  37
  38 typedef int HashFunction(HashTable *table, StgWord key);
  39 typedef int CompareFunction(StgWord key1, StgWord key2);
  40
  41 struct hashtable {
  42     int split;              /* Next bucket to split when expanding */
  43     int max;                /* Max bucket of smaller table */
  44     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  45     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  46     int kcount;             /* Number of keys */
  47     int bcount;             /* Number of buckets */
  48     HashList **dir[HDIRSIZE];   /* Directory of segments */
  49     HashFunction *hash;         /* hash function */
  50     CompareFunction *compare;   /* key comparison function */
  51 };
  52
  53 /* -----------------------------------------------------------------------------
  54  * Hash first using the smaller table.  If the bucket is less than the
  55  * next bucket to be split, re-hash using the larger table.
  56  * -------------------------------------------------------------------------- */
  57
  58 static int
  59 hashWord(HashTable *table, StgWord key)
  60 {
  61     int bucket;
  62
  63     /* Strip the boring zero bits */
  64     key /= sizeof(StgWord);
  65
  66     /* Mod the size of the hash table (a power of 2) */
  67     bucket = key & table->mask1;
  68
  69     if (bucket < table->split) {
  70         /* Mod the size of the expanded hash table (also a power of 2) */
  71         bucket = key & table->mask2;
  72     }
  73     return bucket;
  74 }
  75
  76 static int
  77 hashStr(HashTable *table, char *key)
  78 {
  79     int h, bucket;
  80     char *s;
  81
  82     s = key;
  83     for (h=0; *s; s++) {
  84         h *= 128;
  85         h += *s;
  86         h = h % 1048583;        /* some random large prime */
  87     }
  88
  89     /* Mod the size of the hash table (a power of 2) */
  90     bucket = h & table->mask1;
  91
  92     if (bucket < table->split) {
  93         /* Mod the size of the expanded hash table (also a power of 2) */
  94         bucket = h & table->mask2;
  95     }
  96
  97     return bucket;
  98 }
  99
 100 static int
 101 compareWord(StgWord key1, StgWord key2)
 102 {
 103     return (key1 == key2);
 104 }
 105
 106 static int
 107 compareStr(StgWord key1, StgWord key2)
 108 {
 109     return (strcmp((char *)key1, (char *)key2) == 0);
 110 }
 111
 112
 113 /* -----------------------------------------------------------------------------
 114  * Allocate a new segment of the dynamically growing hash table.
 115  * -------------------------------------------------------------------------- */
 116
 117 static void
 118 allocSegment(HashTable *table, int segment)
 119 {
 120     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
 121                                          "allocSegment");
 122 }
 123
 124
 125 /* -----------------------------------------------------------------------------
 126  * Expand the larger hash table by one bucket, and split one bucket
 127  * from the smaller table into two parts.  Only the bucket referenced
 128  * by @table->split@ is affected by the expansion.
 129  * -------------------------------------------------------------------------- */
 130
 131 static void
 132 expand(HashTable *table)
 133 {
 134     int oldsegment;
 135     int oldindex;
 136     int newbucket;
 137     int newsegment;
 138     int newindex;
 139     HashList *hl;
 140     HashList *next;
 141     HashList *old, *new;
 142
 143     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 144         /* Wow!  That's big.  Too big, so don't expand. */
 145         return;
 146
 147     /* Calculate indices of bucket to split */
 148     oldsegment = table->split / HSEGSIZE;
 149     oldindex = table->split % HSEGSIZE;
 150
 151     newbucket = table->max + table->split;
 152
 153     /* And the indices of the new bucket */
 154     newsegment = newbucket / HSEGSIZE;
 155     newindex = newbucket % HSEGSIZE;
 156
 157     if (newindex == 0)
 158         allocSegment(table, newsegment);
 159
 160     if (++table->split == table->max) {
 161         table->split = 0;
 162         table->max *= 2;
 163         table->mask1 = table->mask2;
 164         table->mask2 = table->mask2 << 1 | 1;
 165     }
 166     table->bcount++;
 167
 168     /* Split the bucket, paying no attention to the original order */
 169
 170     old = new = NULL;
 171     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 172         next = hl->next;
 173         if (table->hash(table, hl->key) == newbucket) {
 174             hl->next = new;
 175             new = hl;
 176         } else {
 177             hl->next = old;
 178             old = hl;
 179         }
 180     }
 181     table->dir[oldsegment][oldindex] = old;
 182     table->dir[newsegment][newindex] = new;
 183
 184     return;
 185 }
 186
 187 void *
 188 lookupHashTable(HashTable *table, StgWord key)
 189 {
 190     int bucket;
 191     int segment;
 192     int index;
 193     HashList *hl;
 194
 195     bucket = table->hash(table, key);
 196     segment = bucket / HSEGSIZE;
 197     index = bucket % HSEGSIZE;
 198
 199     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 200         if (table->compare(hl->key, key))
 201             return hl->data;
 202
 203     /* It's not there */
 204     return NULL;
 205 }
 206
 207 /* -----------------------------------------------------------------------------
 208  * We allocate the hashlist cells in large chunks to cut down on malloc
 209  * overhead.  Although we keep a free list of hashlist cells, we make
 210  * no effort to actually return the space to the malloc arena.
 211  * -------------------------------------------------------------------------- */
 212
 213 static HashList *freeList = NULL;
 214
 215 static struct chunkList {
 216   void *chunk;
 217   struct chunkList *next;
 218 } *chunks;
 219
 220 static HashList *
 221 allocHashList(void)
 222 {
 223     HashList *hl, *p;
 224     struct chunkList *cl;
 225
 226     if ((hl = freeList) != NULL) {
 227         freeList = hl->next;
 228     } else {
 229         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 230         cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
 231         cl->chunk = hl;
 232         cl->next = chunks;
 233         chunks = cl;
 234
 235         freeList = hl + 1;
 236         for (p = freeList; p < hl + HCHUNK - 1; p++)
 237             p->next = p + 1;
 238         p->next = NULL;
 239     }
 240     return hl;
 241 }
 242
 243 static void
 244 freeHashList(HashList *hl)
 245 {
 246     hl->next = freeList;
 247     freeList = hl;
 248 }
 249
 250 void
 251 insertHashTable(HashTable *table, StgWord key, void *data)
 252 {
 253     int bucket;
 254     int segment;
 255     int index;
 256     HashList *hl;
 257
 258     // Disable this assert; sometimes it's useful to be able to
 259     // overwrite entries in the hash table.
 260     // ASSERT(lookupHashTable(table, key) == NULL);
 261
 262     /* When the average load gets too high, we expand the table */
 263     if (++table->kcount >= HLOAD * table->bcount)
 264         expand(table);
 265
 266     bucket = table->hash(table, key);
 267     segment = bucket / HSEGSIZE;
 268     index = bucket % HSEGSIZE;
 269
 270     hl = allocHashList();
 271
 272     hl->key = key;
 273     hl->data = data;
 274     hl->next = table->dir[segment][index];
 275     table->dir[segment][index] = hl;
 276
 277 }
 278
 279 void *
 280 removeHashTable(HashTable *table, StgWord key, void *data)
 281 {
 282     int bucket;
 283     int segment;
 284     int index;
 285     HashList *hl;
 286     HashList *prev = NULL;
 287
 288     bucket = table->hash(table, key);
 289     segment = bucket / HSEGSIZE;
 290     index = bucket % HSEGSIZE;
 291
 292     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 293         if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
 294             if (prev == NULL)
 295                 table->dir[segment][index] = hl->next;
 296             else
 297                 prev->next = hl->next;
 298             freeHashList(hl);
 299             table->kcount--;
 300             return hl->data;
 301         }
 302         prev = hl;
 303     }
 304
 305     /* It's not there */
 306     ASSERT(data == NULL);
 307     return NULL;
 308 }
 309
 310 /* -----------------------------------------------------------------------------
 311  * When we free a hash table, we are also good enough to free the
 312  * data part of each (key, data) pair, as long as our caller can tell
 313  * us how to do it.
 314  * -------------------------------------------------------------------------- */
 315
 316 void
 317 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 318 {
 319     long segment;
 320     long index;
 321     HashList *hl;
 322     HashList *next;
 323
 324     /* The last bucket with something in it is table->max + table->split - 1 */
 325     segment = (table->max + table->split - 1) / HSEGSIZE;
 326     index = (table->max + table->split - 1) % HSEGSIZE;
 327
 328     while (segment >= 0) {
 329         while (index >= 0) {
 330             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 331                 next = hl->next;
 332                 if (freeDataFun != NULL)
 333                     (*freeDataFun)(hl->data);
 334                 freeHashList(hl);
 335             }
 336             index--;
 337         }
 338         stgFree(table->dir[segment]);
 339         segment--;
 340         index = HSEGSIZE - 1;
 341     }
 342     stgFree(table);
 343 }
 344
 345 /* -----------------------------------------------------------------------------
 346  * When we initialize a hash table, we set up the first segment as well,
 347  * initializing all of the first segment's hash buckets to NULL.
 348  * -------------------------------------------------------------------------- */
 349
 350 static HashTable *
 351 allocHashTable_(HashFunction *hash, CompareFunction *compare)
 352 {
 353     HashTable *table;
 354     HashList **hb;
 355
 356     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 357
 358     allocSegment(table, 0);
 359
 360     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 361         *hb = NULL;
 362
 363     table->split = 0;
 364     table->max = HSEGSIZE;
 365     table->mask1 = HSEGSIZE - 1;
 366     table->mask2 = 2 * HSEGSIZE - 1;
 367     table->kcount = 0;
 368     table->bcount = HSEGSIZE;
 369     table->hash = hash;
 370     table->compare = compare;
 371
 372     return table;
 373 }
 374
 375 HashTable *
 376 allocHashTable(void)
 377 {
 378     return allocHashTable_(hashWord, compareWord);
 379 }
 380
 381 HashTable *
 382 allocStrHashTable(void)
 383 {
 384     return allocHashTable_((HashFunction *)hashStr,
 385                            (CompareFunction *)compareStr);
 386 }
 387
 388 void
 389 exitHashTable(void)
 390 {
 391   struct chunkList *cl;
 392
 393   while ((cl = chunks) != NULL) {
 394     chunks = cl->next;
 395     stgFree(cl->chunk);
 396     stgFree(cl);
 397   }
 398 }