rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  *
   3  * (c) The AQUA Project, Glasgow University, 1995-1998
   4  * (c) The GHC Team, 1999
   5  *
   6  * Dynamically expanding linear hash tables, as described in
   7  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   8  * pp. 446 -- 457.
   9  * -------------------------------------------------------------------------- */
  10
  11 #include "PosixSource.h"
  12 #include "Rts.h"
  13
  14 #include "Hash.h"
  15 #include "RtsUtils.h"
  16
  17 #include <string.h>
  18
  19 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  20                             /* Also the minimum size of a hash table */
  21 #define HDIRSIZE    1024    /* Size of the segment directory */
  22                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  23 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  24
  25 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  26                             /* Number of HashList cells to allocate in one go */
  27
  28
  29 /* Linked list of (key, data) pairs for separate chaining */
  30 typedef struct hashlist {
  31     StgWord key;
  32     void *data;
  33     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  34 } HashList;
  35
  36 typedef struct chunklist {
  37   HashList *chunk;
  38   struct chunklist *next;
  39 } HashListChunk;
  40
  41 struct hashtable {
  42     int split;              /* Next bucket to split when expanding */
  43     int max;                /* Max bucket of smaller table */
  44     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  45     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  46     int kcount;             /* Number of keys */
  47     int bcount;             /* Number of buckets */
  48     HashList **dir[HDIRSIZE];   /* Directory of segments */
  49     HashList *freeList;         /* free list of HashLists */
  50     HashListChunk *chunks;
  51     HashFunction *hash;         /* hash function */
  52     CompareFunction *compare;   /* key comparison function */
  53 };
  54
  55 /* -----------------------------------------------------------------------------
  56  * Hash first using the smaller table.  If the bucket is less than the
  57  * next bucket to be split, re-hash using the larger table.
  58  * -------------------------------------------------------------------------- */
  59
  60 int
  61 hashWord(HashTable *table, StgWord key)
  62 {
  63     int bucket;
  64
  65     /* Strip the boring zero bits */
  66     key /= sizeof(StgWord);
  67
  68     /* Mod the size of the hash table (a power of 2) */
  69     bucket = key & table->mask1;
  70
  71     if (bucket < table->split) {
  72         /* Mod the size of the expanded hash table (also a power of 2) */
  73         bucket = key & table->mask2;
  74     }
  75     return bucket;
  76 }
  77
  78 int
  79 hashStr(HashTable *table, char *key)
  80 {
  81     int h, bucket;
  82     char *s;
  83
  84     s = key;
  85     for (h=0; *s; s++) {
  86         h *= 128;
  87         h += *s;
  88         h = h % 1048583;        /* some random large prime */
  89     }
  90
  91     /* Mod the size of the hash table (a power of 2) */
  92     bucket = h & table->mask1;
  93
  94     if (bucket < table->split) {
  95         /* Mod the size of the expanded hash table (also a power of 2) */
  96         bucket = h & table->mask2;
  97     }
  98
  99     return bucket;
 100 }
 101
 102 static int
 103 compareWord(StgWord key1, StgWord key2)
 104 {
 105     return (key1 == key2);
 106 }
 107
 108 static int
 109 compareStr(StgWord key1, StgWord key2)
 110 {
 111     return (strcmp((char *)key1, (char *)key2) == 0);
 112 }
 113
 114
 115 /* -----------------------------------------------------------------------------
 116  * Allocate a new segment of the dynamically growing hash table.
 117  * -------------------------------------------------------------------------- */
 118
 119 static void
 120 allocSegment(HashTable *table, int segment)
 121 {
 122     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
 123                                          "allocSegment");
 124 }
 125
 126
 127 /* -----------------------------------------------------------------------------
 128  * Expand the larger hash table by one bucket, and split one bucket
 129  * from the smaller table into two parts.  Only the bucket referenced
 130  * by @table->split@ is affected by the expansion.
 131  * -------------------------------------------------------------------------- */
 132
 133 static void
 134 expand(HashTable *table)
 135 {
 136     int oldsegment;
 137     int oldindex;
 138     int newbucket;
 139     int newsegment;
 140     int newindex;
 141     HashList *hl;
 142     HashList *next;
 143     HashList *old, *new;
 144
 145     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 146         /* Wow!  That's big.  Too big, so don't expand. */
 147         return;
 148
 149     /* Calculate indices of bucket to split */
 150     oldsegment = table->split / HSEGSIZE;
 151     oldindex = table->split % HSEGSIZE;
 152
 153     newbucket = table->max + table->split;
 154
 155     /* And the indices of the new bucket */
 156     newsegment = newbucket / HSEGSIZE;
 157     newindex = newbucket % HSEGSIZE;
 158
 159     if (newindex == 0)
 160         allocSegment(table, newsegment);
 161
 162     if (++table->split == table->max) {
 163         table->split = 0;
 164         table->max *= 2;
 165         table->mask1 = table->mask2;
 166         table->mask2 = table->mask2 << 1 | 1;
 167     }
 168     table->bcount++;
 169
 170     /* Split the bucket, paying no attention to the original order */
 171
 172     old = new = NULL;
 173     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 174         next = hl->next;
 175         if (table->hash(table, hl->key) == newbucket) {
 176             hl->next = new;
 177             new = hl;
 178         } else {
 179             hl->next = old;
 180             old = hl;
 181         }
 182     }
 183     table->dir[oldsegment][oldindex] = old;
 184     table->dir[newsegment][newindex] = new;
 185
 186     return;
 187 }
 188
 189 void *
 190 lookupHashTable(HashTable *table, StgWord key)
 191 {
 192     int bucket;
 193     int segment;
 194     int index;
 195     HashList *hl;
 196
 197     bucket = table->hash(table, key);
 198     segment = bucket / HSEGSIZE;
 199     index = bucket % HSEGSIZE;
 200
 201     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 202         if (table->compare(hl->key, key))
 203             return hl->data;
 204
 205     /* It's not there */
 206     return NULL;
 207 }
 208
 209 /* -----------------------------------------------------------------------------
 210  * We allocate the hashlist cells in large chunks to cut down on malloc
 211  * overhead.  Although we keep a free list of hashlist cells, we make
 212  * no effort to actually return the space to the malloc arena.
 213  * -------------------------------------------------------------------------- */
 214
 215 static HashList *
 216 allocHashList (HashTable *table)
 217 {
 218     HashList *hl, *p;
 219     HashListChunk *cl;
 220
 221     if ((hl = table->freeList) != NULL) {
 222         table->freeList = hl->next;
 223     } else {
 224         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 225         cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
 226         cl->chunk = hl;
 227         cl->next = table->chunks;
 228         table->chunks = cl;
 229
 230         table->freeList = hl + 1;
 231         for (p = table->freeList; p < hl + HCHUNK - 1; p++)
 232             p->next = p + 1;
 233         p->next = NULL;
 234     }
 235     return hl;
 236 }
 237
 238 static void
 239 freeHashList (HashTable *table, HashList *hl)
 240 {
 241     hl->next = table->freeList;
 242     table->freeList = hl;
 243 }
 244
 245 void
 246 insertHashTable(HashTable *table, StgWord key, void *data)
 247 {
 248     int bucket;
 249     int segment;
 250     int index;
 251     HashList *hl;
 252
 253     // Disable this assert; sometimes it's useful to be able to
 254     // overwrite entries in the hash table.
 255     // ASSERT(lookupHashTable(table, key) == NULL);
 256
 257     /* When the average load gets too high, we expand the table */
 258     if (++table->kcount >= HLOAD * table->bcount)
 259         expand(table);
 260
 261     bucket = table->hash(table, key);
 262     segment = bucket / HSEGSIZE;
 263     index = bucket % HSEGSIZE;
 264
 265     hl = allocHashList(table);
 266
 267     hl->key = key;
 268     hl->data = data;
 269     hl->next = table->dir[segment][index];
 270     table->dir[segment][index] = hl;
 271
 272 }
 273
 274 void *
 275 removeHashTable(HashTable *table, StgWord key, void *data)
 276 {
 277     int bucket;
 278     int segment;
 279     int index;
 280     HashList *hl;
 281     HashList *prev = NULL;
 282
 283     bucket = table->hash(table, key);
 284     segment = bucket / HSEGSIZE;
 285     index = bucket % HSEGSIZE;
 286
 287     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 288         if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
 289             if (prev == NULL)
 290                 table->dir[segment][index] = hl->next;
 291             else
 292                 prev->next = hl->next;
 293             freeHashList(table,hl);
 294             table->kcount--;
 295             return hl->data;
 296         }
 297         prev = hl;
 298     }
 299
 300     /* It's not there */
 301     ASSERT(data == NULL);
 302     return NULL;
 303 }
 304
 305 /* -----------------------------------------------------------------------------
 306  * When we free a hash table, we are also good enough to free the
 307  * data part of each (key, data) pair, as long as our caller can tell
 308  * us how to do it.
 309  * -------------------------------------------------------------------------- */
 310
 311 void
 312 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 313 {
 314     long segment;
 315     long index;
 316     HashList *hl;
 317     HashList *next;
 318     HashListChunk *cl, *cl_next;
 319
 320     /* The last bucket with something in it is table->max + table->split - 1 */
 321     segment = (table->max + table->split - 1) / HSEGSIZE;
 322     index = (table->max + table->split - 1) % HSEGSIZE;
 323
 324     while (segment >= 0) {
 325         while (index >= 0) {
 326             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 327                 next = hl->next;
 328                 if (freeDataFun != NULL)
 329                     (*freeDataFun)(hl->data);
 330             }
 331             index--;
 332         }
 333         stgFree(table->dir[segment]);
 334         segment--;
 335         index = HSEGSIZE - 1;
 336     }
 337     for (cl = table->chunks; cl != NULL; cl = cl_next) {
 338         cl_next = cl->next;
 339         stgFree(cl->chunk);
 340         stgFree(cl);
 341     }
 342     stgFree(table);
 343 }
 344
 345 /* -----------------------------------------------------------------------------
 346  * When we initialize a hash table, we set up the first segment as well,
 347  * initializing all of the first segment's hash buckets to NULL.
 348  * -------------------------------------------------------------------------- */
 349
 350 HashTable *
 351 allocHashTable_(HashFunction *hash, CompareFunction *compare)
 352 {
 353     HashTable *table;
 354     HashList **hb;
 355
 356     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 357
 358     allocSegment(table, 0);
 359
 360     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 361         *hb = NULL;
 362
 363     table->split = 0;
 364     table->max = HSEGSIZE;
 365     table->mask1 = HSEGSIZE - 1;
 366     table->mask2 = 2 * HSEGSIZE - 1;
 367     table->kcount = 0;
 368     table->bcount = HSEGSIZE;
 369     table->freeList = NULL;
 370     table->chunks = NULL;
 371     table->hash = hash;
 372     table->compare = compare;
 373
 374     return table;
 375 }
 376
 377 HashTable *
 378 allocHashTable(void)
 379 {
 380     return allocHashTable_(hashWord, compareWord);
 381 }
 382
 383 HashTable *
 384 allocStrHashTable(void)
 385 {
 386     return allocHashTable_((HashFunction *)hashStr,
 387                            (CompareFunction *)compareStr);
 388 }
 389
 390 void
 391 exitHashTable(void)
 392 {
 393     /* nothing to do */
 394 }