rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  *
   3  * (c) The AQUA Project, Glasgow University, 1995-1998
   4  * (c) The GHC Team, 1999
   5  *
   6  * Dynamically expanding linear hash tables, as described in
   7  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   8  * pp. 446 -- 457.
   9  * -------------------------------------------------------------------------- */
  10
  11 #include "PosixSource.h"
  12 #include "Rts.h"
  13 #include "Hash.h"
  14 #include "RtsUtils.h"
  15
  16 #include <stdlib.h>
  17 #include <string.h>
  18
  19 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  20                             /* Also the minimum size of a hash table */
  21 #define HDIRSIZE    1024    /* Size of the segment directory */
  22                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  23 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  24
  25 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  26                             /* Number of HashList cells to allocate in one go */
  27
  28
  29 /* Linked list of (key, data) pairs for separate chaining */
  30 struct hashlist {
  31     StgWord key;
  32     void *data;
  33     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  34 };
  35
  36 typedef struct hashlist HashList;
  37
  38 struct hashtable {
  39     int split;              /* Next bucket to split when expanding */
  40     int max;                /* Max bucket of smaller table */
  41     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  42     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  43     int kcount;             /* Number of keys */
  44     int bcount;             /* Number of buckets */
  45     HashList **dir[HDIRSIZE];   /* Directory of segments */
  46     HashFunction *hash;         /* hash function */
  47     CompareFunction *compare;   /* key comparison function */
  48 };
  49
  50 /* -----------------------------------------------------------------------------
  51  * Hash first using the smaller table.  If the bucket is less than the
  52  * next bucket to be split, re-hash using the larger table.
  53  * -------------------------------------------------------------------------- */
  54
  55 int
  56 hashWord(HashTable *table, StgWord key)
  57 {
  58     int bucket;
  59
  60     /* Strip the boring zero bits */
  61     key /= sizeof(StgWord);
  62
  63     /* Mod the size of the hash table (a power of 2) */
  64     bucket = key & table->mask1;
  65
  66     if (bucket < table->split) {
  67         /* Mod the size of the expanded hash table (also a power of 2) */
  68         bucket = key & table->mask2;
  69     }
  70     return bucket;
  71 }
  72
  73 int
  74 hashStr(HashTable *table, char *key)
  75 {
  76     int h, bucket;
  77     char *s;
  78
  79     s = key;
  80     for (h=0; *s; s++) {
  81         h *= 128;
  82         h += *s;
  83         h = h % 1048583;        /* some random large prime */
  84     }
  85
  86     /* Mod the size of the hash table (a power of 2) */
  87     bucket = h & table->mask1;
  88
  89     if (bucket < table->split) {
  90         /* Mod the size of the expanded hash table (also a power of 2) */
  91         bucket = h & table->mask2;
  92     }
  93
  94     return bucket;
  95 }
  96
  97 static int
  98 compareWord(StgWord key1, StgWord key2)
  99 {
 100     return (key1 == key2);
 101 }
 102
 103 static int
 104 compareStr(StgWord key1, StgWord key2)
 105 {
 106     return (strcmp((char *)key1, (char *)key2) == 0);
 107 }
 108
 109
 110 /* -----------------------------------------------------------------------------
 111  * Allocate a new segment of the dynamically growing hash table.
 112  * -------------------------------------------------------------------------- */
 113
 114 static void
 115 allocSegment(HashTable *table, int segment)
 116 {
 117     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
 118                                          "allocSegment");
 119 }
 120
 121
 122 /* -----------------------------------------------------------------------------
 123  * Expand the larger hash table by one bucket, and split one bucket
 124  * from the smaller table into two parts.  Only the bucket referenced
 125  * by @table->split@ is affected by the expansion.
 126  * -------------------------------------------------------------------------- */
 127
 128 static void
 129 expand(HashTable *table)
 130 {
 131     int oldsegment;
 132     int oldindex;
 133     int newbucket;
 134     int newsegment;
 135     int newindex;
 136     HashList *hl;
 137     HashList *next;
 138     HashList *old, *new;
 139
 140     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 141         /* Wow!  That's big.  Too big, so don't expand. */
 142         return;
 143
 144     /* Calculate indices of bucket to split */
 145     oldsegment = table->split / HSEGSIZE;
 146     oldindex = table->split % HSEGSIZE;
 147
 148     newbucket = table->max + table->split;
 149
 150     /* And the indices of the new bucket */
 151     newsegment = newbucket / HSEGSIZE;
 152     newindex = newbucket % HSEGSIZE;
 153
 154     if (newindex == 0)
 155         allocSegment(table, newsegment);
 156
 157     if (++table->split == table->max) {
 158         table->split = 0;
 159         table->max *= 2;
 160         table->mask1 = table->mask2;
 161         table->mask2 = table->mask2 << 1 | 1;
 162     }
 163     table->bcount++;
 164
 165     /* Split the bucket, paying no attention to the original order */
 166
 167     old = new = NULL;
 168     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 169         next = hl->next;
 170         if (table->hash(table, hl->key) == newbucket) {
 171             hl->next = new;
 172             new = hl;
 173         } else {
 174             hl->next = old;
 175             old = hl;
 176         }
 177     }
 178     table->dir[oldsegment][oldindex] = old;
 179     table->dir[newsegment][newindex] = new;
 180
 181     return;
 182 }
 183
 184 void *
 185 lookupHashTable(HashTable *table, StgWord key)
 186 {
 187     int bucket;
 188     int segment;
 189     int index;
 190     HashList *hl;
 191
 192     bucket = table->hash(table, key);
 193     segment = bucket / HSEGSIZE;
 194     index = bucket % HSEGSIZE;
 195
 196     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 197         if (table->compare(hl->key, key))
 198             return hl->data;
 199
 200     /* It's not there */
 201     return NULL;
 202 }
 203
 204 /* -----------------------------------------------------------------------------
 205  * We allocate the hashlist cells in large chunks to cut down on malloc
 206  * overhead.  Although we keep a free list of hashlist cells, we make
 207  * no effort to actually return the space to the malloc arena.
 208  * -------------------------------------------------------------------------- */
 209
 210 static HashList *freeList = NULL;
 211
 212 static struct chunkList {
 213   void *chunk;
 214   struct chunkList *next;
 215 } *chunks;
 216
 217 static HashList *
 218 allocHashList(void)
 219 {
 220     HashList *hl, *p;
 221     struct chunkList *cl;
 222
 223     if ((hl = freeList) != NULL) {
 224         freeList = hl->next;
 225     } else {
 226         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 227         cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
 228         cl->chunk = hl;
 229         cl->next = chunks;
 230         chunks = cl;
 231
 232         freeList = hl + 1;
 233         for (p = freeList; p < hl + HCHUNK - 1; p++)
 234             p->next = p + 1;
 235         p->next = NULL;
 236     }
 237     return hl;
 238 }
 239
 240 static void
 241 freeHashList(HashList *hl)
 242 {
 243     hl->next = freeList;
 244     freeList = hl;
 245 }
 246
 247 void
 248 insertHashTable(HashTable *table, StgWord key, void *data)
 249 {
 250     int bucket;
 251     int segment;
 252     int index;
 253     HashList *hl;
 254
 255     // Disable this assert; sometimes it's useful to be able to
 256     // overwrite entries in the hash table.
 257     // ASSERT(lookupHashTable(table, key) == NULL);
 258
 259     /* When the average load gets too high, we expand the table */
 260     if (++table->kcount >= HLOAD * table->bcount)
 261         expand(table);
 262
 263     bucket = table->hash(table, key);
 264     segment = bucket / HSEGSIZE;
 265     index = bucket % HSEGSIZE;
 266
 267     hl = allocHashList();
 268
 269     hl->key = key;
 270     hl->data = data;
 271     hl->next = table->dir[segment][index];
 272     table->dir[segment][index] = hl;
 273
 274 }
 275
 276 void *
 277 removeHashTable(HashTable *table, StgWord key, void *data)
 278 {
 279     int bucket;
 280     int segment;
 281     int index;
 282     HashList *hl;
 283     HashList *prev = NULL;
 284
 285     bucket = table->hash(table, key);
 286     segment = bucket / HSEGSIZE;
 287     index = bucket % HSEGSIZE;
 288
 289     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 290         if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
 291             if (prev == NULL)
 292                 table->dir[segment][index] = hl->next;
 293             else
 294                 prev->next = hl->next;
 295             freeHashList(hl);
 296             table->kcount--;
 297             return hl->data;
 298         }
 299         prev = hl;
 300     }
 301
 302     /* It's not there */
 303     ASSERT(data == NULL);
 304     return NULL;
 305 }
 306
 307 /* -----------------------------------------------------------------------------
 308  * When we free a hash table, we are also good enough to free the
 309  * data part of each (key, data) pair, as long as our caller can tell
 310  * us how to do it.
 311  * -------------------------------------------------------------------------- */
 312
 313 void
 314 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 315 {
 316     long segment;
 317     long index;
 318     HashList *hl;
 319     HashList *next;
 320
 321     /* The last bucket with something in it is table->max + table->split - 1 */
 322     segment = (table->max + table->split - 1) / HSEGSIZE;
 323     index = (table->max + table->split - 1) % HSEGSIZE;
 324
 325     while (segment >= 0) {
 326         while (index >= 0) {
 327             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 328                 next = hl->next;
 329                 if (freeDataFun != NULL)
 330                     (*freeDataFun)(hl->data);
 331                 freeHashList(hl);
 332             }
 333             index--;
 334         }
 335         stgFree(table->dir[segment]);
 336         segment--;
 337         index = HSEGSIZE - 1;
 338     }
 339     stgFree(table);
 340 }
 341
 342 /* -----------------------------------------------------------------------------
 343  * When we initialize a hash table, we set up the first segment as well,
 344  * initializing all of the first segment's hash buckets to NULL.
 345  * -------------------------------------------------------------------------- */
 346
 347 HashTable *
 348 allocHashTable_(HashFunction *hash, CompareFunction *compare)
 349 {
 350     HashTable *table;
 351     HashList **hb;
 352
 353     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 354
 355     allocSegment(table, 0);
 356
 357     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 358         *hb = NULL;
 359
 360     table->split = 0;
 361     table->max = HSEGSIZE;
 362     table->mask1 = HSEGSIZE - 1;
 363     table->mask2 = 2 * HSEGSIZE - 1;
 364     table->kcount = 0;
 365     table->bcount = HSEGSIZE;
 366     table->hash = hash;
 367     table->compare = compare;
 368
 369     return table;
 370 }
 371
 372 HashTable *
 373 allocHashTable(void)
 374 {
 375     return allocHashTable_(hashWord, compareWord);
 376 }
 377
 378 HashTable *
 379 allocStrHashTable(void)
 380 {
 381     return allocHashTable_((HashFunction *)hashStr,
 382                            (CompareFunction *)compareStr);
 383 }
 384
 385 void
 386 exitHashTable(void)
 387 {
 388   struct chunkList *cl;
 389
 390   while ((cl = chunks) != NULL) {
 391     chunks = cl->next;
 392     stgFree(cl->chunk);
 393     stgFree(cl);
 394   }
 395 }