ghc/rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  * $Id: Hash.c,v 1.10 2003/03/25 17:58:47 sof Exp $
   3  *
   4  * (c) The AQUA Project, Glasgow University, 1995-1998
   5  * (c) The GHC Team, 1999
   6  *
   7  * Dynamically expanding linear hash tables, as described in
   8  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   9  * pp. 446 -- 457.
  10  * -------------------------------------------------------------------------- */
  11
  12 #include "PosixSource.h"
  13 #include "Rts.h"
  14 #include "Hash.h"
  15 #include "RtsUtils.h"
  16
  17 #include <stdlib.h>
  18 #include <string.h>
  19
  20 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  21                             /* Also the minimum size of a hash table */
  22 #define HDIRSIZE    1024    /* Size of the segment directory */
  23                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  24 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  25
  26 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  27                             /* Number of HashList cells to allocate in one go */
  28
  29
  30 /* Linked list of (key, data) pairs for separate chaining */
  31 struct hashlist {
  32     StgWord key;
  33     void *data;
  34     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  35 };
  36
  37 typedef struct hashlist HashList;
  38
  39 typedef int HashFunction(HashTable *table, StgWord key);
  40 typedef int CompareFunction(StgWord key1, StgWord key2);
  41
  42 struct hashtable {
  43     int split;              /* Next bucket to split when expanding */
  44     int max;                /* Max bucket of smaller table */
  45     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  46     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  47     int kcount;             /* Number of keys */
  48     int bcount;             /* Number of buckets */
  49     HashList **dir[HDIRSIZE];   /* Directory of segments */
  50     HashFunction *hash;         /* hash function */
  51     CompareFunction *compare;   /* key comparison function */
  52 };
  53
  54 /* -----------------------------------------------------------------------------
  55  * Hash first using the smaller table.  If the bucket is less than the
  56  * next bucket to be split, re-hash using the larger table.
  57  * -------------------------------------------------------------------------- */
  58
  59 static int
  60 hashWord(HashTable *table, StgWord key)
  61 {
  62     int bucket;
  63
  64     /* Strip the boring zero bits */
  65     key /= sizeof(StgWord);
  66
  67     /* Mod the size of the hash table (a power of 2) */
  68     bucket = key & table->mask1;
  69
  70     if (bucket < table->split) {
  71         /* Mod the size of the expanded hash table (also a power of 2) */
  72         bucket = key & table->mask2;
  73     }
  74     return bucket;
  75 }
  76
  77 static int
  78 hashStr(HashTable *table, char *key)
  79 {
  80     int h, bucket;
  81     char *s;
  82
  83     s = key;
  84     for (h=0; *s; s++) {
  85         h *= 128;
  86         h += *s;
  87         h = h % 1048583;        /* some random large prime */
  88     }
  89
  90     /* Mod the size of the hash table (a power of 2) */
  91     bucket = h & table->mask1;
  92
  93     if (bucket < table->split) {
  94         /* Mod the size of the expanded hash table (also a power of 2) */
  95         bucket = h & table->mask2;
  96     }
  97
  98     return bucket;
  99 }
 100
 101 static int
 102 compareWord(StgWord key1, StgWord key2)
 103 {
 104     return (key1 == key2);
 105 }
 106
 107 static int
 108 compareStr(StgWord key1, StgWord key2)
 109 {
 110     return (strcmp((char *)key1, (char *)key2) == 0);
 111 }
 112
 113
 114 /* -----------------------------------------------------------------------------
 115  * Allocate a new segment of the dynamically growing hash table.
 116  * -------------------------------------------------------------------------- */
 117
 118 static void
 119 allocSegment(HashTable *table, int segment)
 120 {
 121     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
 122                                          "allocSegment");
 123 }
 124
 125
 126 /* -----------------------------------------------------------------------------
 127  * Expand the larger hash table by one bucket, and split one bucket
 128  * from the smaller table into two parts.  Only the bucket referenced
 129  * by @table->split@ is affected by the expansion.
 130  * -------------------------------------------------------------------------- */
 131
 132 static void
 133 expand(HashTable *table)
 134 {
 135     int oldsegment;
 136     int oldindex;
 137     int newbucket;
 138     int newsegment;
 139     int newindex;
 140     HashList *hl;
 141     HashList *next;
 142     HashList *old, *new;
 143
 144     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 145         /* Wow!  That's big.  Too big, so don't expand. */
 146         return;
 147
 148     /* Calculate indices of bucket to split */
 149     oldsegment = table->split / HSEGSIZE;
 150     oldindex = table->split % HSEGSIZE;
 151
 152     newbucket = table->max + table->split;
 153
 154     /* And the indices of the new bucket */
 155     newsegment = newbucket / HSEGSIZE;
 156     newindex = newbucket % HSEGSIZE;
 157
 158     if (newindex == 0)
 159         allocSegment(table, newsegment);
 160
 161     if (++table->split == table->max) {
 162         table->split = 0;
 163         table->max *= 2;
 164         table->mask1 = table->mask2;
 165         table->mask2 = table->mask2 << 1 | 1;
 166     }
 167     table->bcount++;
 168
 169     /* Split the bucket, paying no attention to the original order */
 170
 171     old = new = NULL;
 172     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 173         next = hl->next;
 174         if (table->hash(table, hl->key) == newbucket) {
 175             hl->next = new;
 176             new = hl;
 177         } else {
 178             hl->next = old;
 179             old = hl;
 180         }
 181     }
 182     table->dir[oldsegment][oldindex] = old;
 183     table->dir[newsegment][newindex] = new;
 184
 185     return;
 186 }
 187
 188 void *
 189 lookupHashTable(HashTable *table, StgWord key)
 190 {
 191     int bucket;
 192     int segment;
 193     int index;
 194     HashList *hl;
 195
 196     bucket = table->hash(table, key);
 197     segment = bucket / HSEGSIZE;
 198     index = bucket % HSEGSIZE;
 199
 200     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 201         if (table->compare(hl->key, key))
 202             return hl->data;
 203
 204     /* It's not there */
 205     return NULL;
 206 }
 207
 208 /* -----------------------------------------------------------------------------
 209  * We allocate the hashlist cells in large chunks to cut down on malloc
 210  * overhead.  Although we keep a free list of hashlist cells, we make
 211  * no effort to actually return the space to the malloc arena.
 212  * -------------------------------------------------------------------------- */
 213
 214 static HashList *freeList = NULL;
 215
 216 static HashList *
 217 allocHashList(void)
 218 {
 219     HashList *hl, *p;
 220
 221     if ((hl = freeList) != NULL) {
 222         freeList = hl->next;
 223     } else {
 224         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 225
 226         freeList = hl + 1;
 227         for (p = freeList; p < hl + HCHUNK - 1; p++)
 228             p->next = p + 1;
 229         p->next = NULL;
 230     }
 231     return hl;
 232 }
 233
 234 static void
 235 freeHashList(HashList *hl)
 236 {
 237     hl->next = freeList;
 238     freeList = hl;
 239 }
 240
 241 void
 242 insertHashTable(HashTable *table, StgWord key, void *data)
 243 {
 244     int bucket;
 245     int segment;
 246     int index;
 247     HashList *hl;
 248
 249     /* We want no duplicates */
 250     ASSERT(lookupHashTable(table, key) == NULL);
 251
 252     /* When the average load gets too high, we expand the table */
 253     if (++table->kcount >= HLOAD * table->bcount)
 254         expand(table);
 255
 256     bucket = table->hash(table, key);
 257     segment = bucket / HSEGSIZE;
 258     index = bucket % HSEGSIZE;
 259
 260     hl = allocHashList();
 261
 262     hl->key = key;
 263     hl->data = data;
 264     hl->next = table->dir[segment][index];
 265     table->dir[segment][index] = hl;
 266
 267 }
 268
 269 void *
 270 removeHashTable(HashTable *table, StgWord key, void *data)
 271 {
 272     int bucket;
 273     int segment;
 274     int index;
 275     HashList *hl;
 276     HashList *prev = NULL;
 277
 278     bucket = table->hash(table, key);
 279     segment = bucket / HSEGSIZE;
 280     index = bucket % HSEGSIZE;
 281
 282     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 283         if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
 284             if (prev == NULL)
 285                 table->dir[segment][index] = hl->next;
 286             else
 287                 prev->next = hl->next;
 288             freeHashList(hl);
 289             table->kcount--;
 290             return hl->data;
 291         }
 292         prev = hl;
 293     }
 294
 295     /* It's not there */
 296     ASSERT(data == NULL);
 297     return NULL;
 298 }
 299
 300 /* -----------------------------------------------------------------------------
 301  * When we free a hash table, we are also good enough to free the
 302  * data part of each (key, data) pair, as long as our caller can tell
 303  * us how to do it.
 304  * -------------------------------------------------------------------------- */
 305
 306 void
 307 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 308 {
 309     long segment;
 310     long index;
 311     HashList *hl;
 312     HashList *next;
 313
 314     /* The last bucket with something in it is table->max + table->split - 1 */
 315     segment = (table->max + table->split - 1) / HSEGSIZE;
 316     index = (table->max + table->split - 1) % HSEGSIZE;
 317
 318     while (segment >= 0) {
 319         while (index >= 0) {
 320             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 321                 next = hl->next;
 322                 if (freeDataFun != NULL)
 323                     (*freeDataFun)(hl->data);
 324                 freeHashList(hl);
 325             }
 326             index--;
 327         }
 328         stgFree(table->dir[segment]);
 329         segment--;
 330         index = HSEGSIZE - 1;
 331     }
 332     stgFree(table);
 333 }
 334
 335 /* -----------------------------------------------------------------------------
 336  * When we initialize a hash table, we set up the first segment as well,
 337  * initializing all of the first segment's hash buckets to NULL.
 338  * -------------------------------------------------------------------------- */
 339
 340 static HashTable *
 341 allocHashTable_(HashFunction *hash, CompareFunction *compare)
 342 {
 343     HashTable *table;
 344     HashList **hb;
 345
 346     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 347
 348     allocSegment(table, 0);
 349
 350     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 351         *hb = NULL;
 352
 353     table->split = 0;
 354     table->max = HSEGSIZE;
 355     table->mask1 = HSEGSIZE - 1;
 356     table->mask2 = 2 * HSEGSIZE - 1;
 357     table->kcount = 0;
 358     table->bcount = HSEGSIZE;
 359     table->hash = hash;
 360     table->compare = compare;
 361
 362     return table;
 363 }
 364
 365 HashTable *
 366 allocHashTable(void)
 367 {
 368     return allocHashTable_(hashWord, compareWord);
 369 }
 370
 371 HashTable *
 372 allocStrHashTable(void)
 373 {
 374     return allocHashTable_((HashFunction *)hashStr,
 375                            (CompareFunction *)compareStr);
 376 }