ghc/rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  * $Id: Hash.c,v 1.8 2002/04/09 12:55:11 simonmar Exp $
   3  *
   4  * (c) The AQUA Project, Glasgow University, 1995-1998
   5  * (c) The GHC Team, 1999
   6  *
   7  * Dynamically expanding linear hash tables, as described in
   8  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   9  * pp. 446 -- 457.
  10  * -------------------------------------------------------------------------- */
  11
  12 #include "PosixSource.h"
  13 #include "Rts.h"
  14 #include "Hash.h"
  15 #include "RtsUtils.h"
  16
  17 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  18                             /* Also the minimum size of a hash table */
  19 #define HDIRSIZE    1024    /* Size of the segment directory */
  20                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  21 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  22
  23 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  24                             /* Number of HashList cells to allocate in one go */
  25
  26
  27 /* Linked list of (key, data) pairs for separate chaining */
  28 struct hashlist {
  29     StgWord key;
  30     void *data;
  31     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  32 };
  33
  34 typedef struct hashlist HashList;
  35
  36 typedef int HashFunction(HashTable *table, StgWord key);
  37 typedef int CompareFunction(StgWord key1, StgWord key2);
  38
  39 struct hashtable {
  40     int split;              /* Next bucket to split when expanding */
  41     int max;                /* Max bucket of smaller table */
  42     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  43     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  44     int kcount;             /* Number of keys */
  45     int bcount;             /* Number of buckets */
  46     HashList **dir[HDIRSIZE];   /* Directory of segments */
  47     HashFunction *hash;         /* hash function */
  48     CompareFunction *compare;   /* key comparison function */
  49 };
  50
  51 /* -----------------------------------------------------------------------------
  52  * Hash first using the smaller table.  If the bucket is less than the
  53  * next bucket to be split, re-hash using the larger table.
  54  * -------------------------------------------------------------------------- */
  55
  56 static int
  57 hashWord(HashTable *table, StgWord key)
  58 {
  59     int bucket;
  60
  61     /* Strip the boring zero bits */
  62     key /= sizeof(StgWord);
  63
  64     /* Mod the size of the hash table (a power of 2) */
  65     bucket = key & table->mask1;
  66
  67     if (bucket < table->split) {
  68         /* Mod the size of the expanded hash table (also a power of 2) */
  69         bucket = key & table->mask2;
  70     }
  71     return bucket;
  72 }
  73
  74 static int
  75 hashStr(HashTable *table, char *key)
  76 {
  77     int h, bucket;
  78     char *s;
  79
  80     s = key;
  81     for (h=0; *s; s++) {
  82         h *= 128;
  83         h += *s;
  84         h = h % 1048583;        /* some random large prime */
  85     }
  86
  87     /* Mod the size of the hash table (a power of 2) */
  88     bucket = h & table->mask1;
  89
  90     if (bucket < table->split) {
  91         /* Mod the size of the expanded hash table (also a power of 2) */
  92         bucket = h & table->mask2;
  93     }
  94
  95     return bucket;
  96 }
  97
  98 static int
  99 compareWord(StgWord key1, StgWord key2)
 100 {
 101     return (key1 == key2);
 102 }
 103
 104 static int
 105 compareStr(StgWord key1, StgWord key2)
 106 {
 107     return (strcmp((char *)key1, (char *)key2) == 0);
 108 }
 109
 110
 111 /* -----------------------------------------------------------------------------
 112  * Allocate a new segment of the dynamically growing hash table.
 113  * -------------------------------------------------------------------------- */
 114
 115 static void
 116 allocSegment(HashTable *table, int segment)
 117 {
 118     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
 119                                          "allocSegment");
 120 }
 121
 122
 123 /* -----------------------------------------------------------------------------
 124  * Expand the larger hash table by one bucket, and split one bucket
 125  * from the smaller table into two parts.  Only the bucket referenced
 126  * by @table->split@ is affected by the expansion.
 127  * -------------------------------------------------------------------------- */
 128
 129 static void
 130 expand(HashTable *table)
 131 {
 132     int oldsegment;
 133     int oldindex;
 134     int newbucket;
 135     int newsegment;
 136     int newindex;
 137     HashList *hl;
 138     HashList *next;
 139     HashList *old, *new;
 140
 141     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 142         /* Wow!  That's big.  Too big, so don't expand. */
 143         return;
 144
 145     /* Calculate indices of bucket to split */
 146     oldsegment = table->split / HSEGSIZE;
 147     oldindex = table->split % HSEGSIZE;
 148
 149     newbucket = table->max + table->split;
 150
 151     /* And the indices of the new bucket */
 152     newsegment = newbucket / HSEGSIZE;
 153     newindex = newbucket % HSEGSIZE;
 154
 155     if (newindex == 0)
 156         allocSegment(table, newsegment);
 157
 158     if (++table->split == table->max) {
 159         table->split = 0;
 160         table->max *= 2;
 161         table->mask1 = table->mask2;
 162         table->mask2 = table->mask2 << 1 | 1;
 163     }
 164     table->bcount++;
 165
 166     /* Split the bucket, paying no attention to the original order */
 167
 168     old = new = NULL;
 169     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 170         next = hl->next;
 171         if (table->hash(table, hl->key) == newbucket) {
 172             hl->next = new;
 173             new = hl;
 174         } else {
 175             hl->next = old;
 176             old = hl;
 177         }
 178     }
 179     table->dir[oldsegment][oldindex] = old;
 180     table->dir[newsegment][newindex] = new;
 181
 182     return;
 183 }
 184
 185 void *
 186 lookupHashTable(HashTable *table, StgWord key)
 187 {
 188     int bucket;
 189     int segment;
 190     int index;
 191     HashList *hl;
 192
 193     bucket = table->hash(table, key);
 194     segment = bucket / HSEGSIZE;
 195     index = bucket % HSEGSIZE;
 196
 197     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 198         if (table->compare(hl->key, key))
 199             return hl->data;
 200
 201     /* It's not there */
 202     return NULL;
 203 }
 204
 205 /* -----------------------------------------------------------------------------
 206  * We allocate the hashlist cells in large chunks to cut down on malloc
 207  * overhead.  Although we keep a free list of hashlist cells, we make
 208  * no effort to actually return the space to the malloc arena.
 209  * -------------------------------------------------------------------------- */
 210
 211 static HashList *freeList = NULL;
 212
 213 static HashList *
 214 allocHashList(void)
 215 {
 216     HashList *hl, *p;
 217
 218     if ((hl = freeList) != NULL) {
 219         freeList = hl->next;
 220     } else {
 221         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 222
 223         freeList = hl + 1;
 224         for (p = freeList; p < hl + HCHUNK - 1; p++)
 225             p->next = p + 1;
 226         p->next = NULL;
 227     }
 228     return hl;
 229 }
 230
 231 static void
 232 freeHashList(HashList *hl)
 233 {
 234     hl->next = freeList;
 235     freeList = hl;
 236 }
 237
 238 void
 239 insertHashTable(HashTable *table, StgWord key, void *data)
 240 {
 241     int bucket;
 242     int segment;
 243     int index;
 244     HashList *hl;
 245
 246     /* We want no duplicates */
 247     ASSERT(lookupHashTable(table, key) == NULL);
 248
 249     /* When the average load gets too high, we expand the table */
 250     if (++table->kcount >= HLOAD * table->bcount)
 251         expand(table);
 252
 253     bucket = table->hash(table, key);
 254     segment = bucket / HSEGSIZE;
 255     index = bucket % HSEGSIZE;
 256
 257     hl = allocHashList();
 258
 259     hl->key = key;
 260     hl->data = data;
 261     hl->next = table->dir[segment][index];
 262     table->dir[segment][index] = hl;
 263
 264 }
 265
 266 void *
 267 removeHashTable(HashTable *table, StgWord key, void *data)
 268 {
 269     int bucket;
 270     int segment;
 271     int index;
 272     HashList *hl;
 273     HashList *prev = NULL;
 274
 275     bucket = table->hash(table, key);
 276     segment = bucket / HSEGSIZE;
 277     index = bucket % HSEGSIZE;
 278
 279     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 280         if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
 281             if (prev == NULL)
 282                 table->dir[segment][index] = hl->next;
 283             else
 284                 prev->next = hl->next;
 285             freeHashList(hl);
 286             table->kcount--;
 287             return hl->data;
 288         }
 289         prev = hl;
 290     }
 291
 292     /* It's not there */
 293     ASSERT(data == NULL);
 294     return NULL;
 295 }
 296
 297 /* -----------------------------------------------------------------------------
 298  * When we free a hash table, we are also good enough to free the
 299  * data part of each (key, data) pair, as long as our caller can tell
 300  * us how to do it.
 301  * -------------------------------------------------------------------------- */
 302
 303 void
 304 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 305 {
 306     long segment;
 307     long index;
 308     HashList *hl;
 309     HashList *next;
 310
 311     /* The last bucket with something in it is table->max + table->split - 1 */
 312     segment = (table->max + table->split - 1) / HSEGSIZE;
 313     index = (table->max + table->split - 1) % HSEGSIZE;
 314
 315     while (segment >= 0) {
 316         while (index >= 0) {
 317             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 318                 next = hl->next;
 319                 if (freeDataFun != NULL)
 320                     (*freeDataFun)(hl->data);
 321                 freeHashList(hl);
 322             }
 323             index--;
 324         }
 325         free(table->dir[segment]);
 326         segment--;
 327         index = HSEGSIZE - 1;
 328     }
 329     free(table);
 330 }
 331
 332 /* -----------------------------------------------------------------------------
 333  * When we initialize a hash table, we set up the first segment as well,
 334  * initializing all of the first segment's hash buckets to NULL.
 335  * -------------------------------------------------------------------------- */
 336
 337 static HashTable *
 338 allocHashTable_(HashFunction *hash, CompareFunction *compare)
 339 {
 340     HashTable *table;
 341     HashList **hb;
 342
 343     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 344
 345     allocSegment(table, 0);
 346
 347     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 348         *hb = NULL;
 349
 350     table->split = 0;
 351     table->max = HSEGSIZE;
 352     table->mask1 = HSEGSIZE - 1;
 353     table->mask2 = 2 * HSEGSIZE - 1;
 354     table->kcount = 0;
 355     table->bcount = HSEGSIZE;
 356     table->hash = hash;
 357     table->compare = compare;
 358
 359     return table;
 360 }
 361
 362 HashTable *
 363 allocHashTable(void)
 364 {
 365     return allocHashTable_(hashWord, compareWord);
 366 }
 367
 368 HashTable *
 369 allocStrHashTable(void)
 370 {
 371     return allocHashTable_((HashFunction *)hashStr,
 372                            (CompareFunction *)compareStr);
 373 }