ghc/rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  * $Id: Hash.c,v 1.7 2001/11/26 13:06:49 simonmar Exp $
   3  *
   4  * (c) The AQUA Project, Glasgow University, 1995-1998
   5  * (c) The GHC Team, 1999
   6  *
   7  * Dynamically expanding linear hash tables, as described in
   8  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   9  * pp. 446 -- 457.
  10  * -------------------------------------------------------------------------- */
  11
  12 #include "PosixSource.h"
  13 #include "Rts.h"
  14 #include "Hash.h"
  15 #include "RtsUtils.h"
  16 #include "Arena.h"
  17
  18 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  19                             /* Also the minimum size of a hash table */
  20 #define HDIRSIZE    1024    /* Size of the segment directory */
  21                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  22 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  23
  24 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  25                             /* Number of HashList cells to allocate in one go */
  26
  27
  28 /* Linked list of (key, data) pairs for separate chaining */
  29 struct hashlist {
  30     StgWord key;
  31     void *data;
  32     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  33 };
  34
  35 typedef struct hashlist HashList;
  36
  37 typedef int HashFunction(HashTable *table, StgWord key);
  38 typedef int CompareFunction(StgWord key1, StgWord key2);
  39
  40 struct hashtable {
  41     int split;              /* Next bucket to split when expanding */
  42     int max;                /* Max bucket of smaller table */
  43     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  44     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  45     int kcount;             /* Number of keys */
  46     int bcount;             /* Number of buckets */
  47     HashList **dir[HDIRSIZE];   /* Directory of segments */
  48     HashFunction *hash;         /* hash function */
  49     CompareFunction *compare;   /* key comparison function */
  50     Arena *arena;
  51 };
  52
  53 /* -----------------------------------------------------------------------------
  54  * Hash first using the smaller table.  If the bucket is less than the
  55  * next bucket to be split, re-hash using the larger table.
  56  * -------------------------------------------------------------------------- */
  57
  58 static int
  59 hashWord(HashTable *table, StgWord key)
  60 {
  61     int bucket;
  62
  63     /* Strip the boring zero bits */
  64     key /= sizeof(StgWord);
  65
  66     /* Mod the size of the hash table (a power of 2) */
  67     bucket = key & table->mask1;
  68
  69     if (bucket < table->split) {
  70         /* Mod the size of the expanded hash table (also a power of 2) */
  71         bucket = key & table->mask2;
  72     }
  73     return bucket;
  74 }
  75
  76 static int
  77 hashStr(HashTable *table, char *key)
  78 {
  79     int h, bucket;
  80     char *s;
  81
  82     s = key;
  83     for (h=0; *s; s++) {
  84         h *= 128;
  85         h += *s;
  86         h = h % 1048583;        /* some random large prime */
  87     }
  88
  89     /* Mod the size of the hash table (a power of 2) */
  90     bucket = h & table->mask1;
  91
  92     if (bucket < table->split) {
  93         /* Mod the size of the expanded hash table (also a power of 2) */
  94         bucket = h & table->mask2;
  95     }
  96
  97     return bucket;
  98 }
  99
 100 static int
 101 compareWord(StgWord key1, StgWord key2)
 102 {
 103     return (key1 == key2);
 104 }
 105
 106 static int
 107 compareStr(StgWord key1, StgWord key2)
 108 {
 109     return (strcmp((char *)key1, (char *)key2) == 0);
 110 }
 111
 112
 113 /* -----------------------------------------------------------------------------
 114  * Allocate a new segment of the dynamically growing hash table.
 115  * -------------------------------------------------------------------------- */
 116
 117 static void
 118 allocSegment(HashTable *table, int segment)
 119 {
 120     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
 121                                          "allocSegment");
 122 }
 123
 124 /* -----------------------------------------------------------------------------
 125  * Expand the larger hash table by one bucket, and split one bucket
 126  * from the smaller table into two parts.  Only the bucket referenced
 127  * by @table->split@ is affected by the expansion.
 128  * -------------------------------------------------------------------------- */
 129
 130 static void
 131 expand(HashTable *table)
 132 {
 133     int oldsegment;
 134     int oldindex;
 135     int newbucket;
 136     int newsegment;
 137     int newindex;
 138     HashList *hl;
 139     HashList *next;
 140     HashList *old, *new;
 141
 142     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 143         /* Wow!  That's big.  Too big, so don't expand. */
 144         return;
 145
 146     /* Calculate indices of bucket to split */
 147     oldsegment = table->split / HSEGSIZE;
 148     oldindex = table->split % HSEGSIZE;
 149
 150     newbucket = table->max + table->split;
 151
 152     /* And the indices of the new bucket */
 153     newsegment = newbucket / HSEGSIZE;
 154     newindex = newbucket % HSEGSIZE;
 155
 156     if (newindex == 0)
 157         allocSegment(table, newsegment);
 158
 159     if (++table->split == table->max) {
 160         table->split = 0;
 161         table->max *= 2;
 162         table->mask1 = table->mask2;
 163         table->mask2 = table->mask2 << 1 | 1;
 164     }
 165     table->bcount++;
 166
 167     /* Split the bucket, paying no attention to the original order */
 168
 169     old = new = NULL;
 170     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 171         next = hl->next;
 172         if (table->hash(table, hl->key) == newbucket) {
 173             hl->next = new;
 174             new = hl;
 175         } else {
 176             hl->next = old;
 177             old = hl;
 178         }
 179     }
 180     table->dir[oldsegment][oldindex] = old;
 181     table->dir[newsegment][newindex] = new;
 182
 183     return;
 184 }
 185
 186 void *
 187 lookupHashTable(HashTable *table, StgWord key)
 188 {
 189     int bucket;
 190     int segment;
 191     int index;
 192     HashList *hl;
 193
 194     bucket = table->hash(table, key);
 195     segment = bucket / HSEGSIZE;
 196     index = bucket % HSEGSIZE;
 197
 198     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 199         if (table->compare(hl->key, key))
 200             return hl->data;
 201
 202     /* It's not there */
 203     return NULL;
 204 }
 205
 206 void
 207 insertHashTable(HashTable *table, StgWord key, void *data)
 208 {
 209     int bucket;
 210     int segment;
 211     int index;
 212     HashList *hl;
 213
 214     /* We want no duplicates */
 215     ASSERT(lookupHashTable(table, key) == NULL);
 216
 217     /* When the average load gets too high, we expand the table */
 218     if (++table->kcount >= HLOAD * table->bcount)
 219         expand(table);
 220
 221     bucket = table->hash(table, key);
 222     segment = bucket / HSEGSIZE;
 223     index = bucket % HSEGSIZE;
 224
 225     hl = arenaAlloc(table->arena, sizeof(struct hashlist));
 226
 227     hl->key = key;
 228     hl->data = data;
 229     hl->next = table->dir[segment][index];
 230     table->dir[segment][index] = hl;
 231
 232 }
 233
 234 void *
 235 removeHashTable(HashTable *table, StgWord key, void *data)
 236 {
 237     int bucket;
 238     int segment;
 239     int index;
 240     HashList *hl;
 241     HashList *prev = NULL;
 242
 243     bucket = table->hash(table, key);
 244     segment = bucket / HSEGSIZE;
 245     index = bucket % HSEGSIZE;
 246
 247     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 248         if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
 249             if (prev == NULL)
 250                 table->dir[segment][index] = hl->next;
 251             else
 252                 prev->next = hl->next;
 253             table->kcount--;
 254             return hl->data;
 255         }
 256         prev = hl;
 257     }
 258
 259     /* It's not there */
 260     ASSERT(data == NULL);
 261     return NULL;
 262 }
 263
 264 /* -----------------------------------------------------------------------------
 265  * When we free a hash table, we are also good enough to free the
 266  * data part of each (key, data) pair, as long as our caller can tell
 267  * us how to do it.
 268  * -------------------------------------------------------------------------- */
 269
 270 void
 271 freeHashTable( HashTable *table, void (*freeDataFun)(void *) )
 272 {
 273     long segment;
 274     long index;
 275     HashList *hl;
 276     HashList *next;
 277
 278     /* The last bucket with something in it is table->max + table->split - 1 */
 279     segment = (table->max + table->split - 1) / HSEGSIZE;
 280     index = (table->max + table->split - 1) % HSEGSIZE;
 281
 282     while (segment >= 0) {
 283         if (freeDataFun != NULL) {
 284             while (index >= 0) {
 285                 for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 286                     next = hl->next;
 287                     (*freeDataFun)(hl->data);
 288                 }
 289                 index--;
 290             }
 291         }
 292         free(table->dir[segment]);
 293         segment--;
 294         index = HSEGSIZE - 1;
 295     }
 296
 297     arenaFree(table->arena);
 298     free(table);
 299 }
 300
 301 /* -----------------------------------------------------------------------------
 302  * When we initialize a hash table, we set up the first segment as well,
 303  * initializing all of the first segment's hash buckets to NULL.
 304  * -------------------------------------------------------------------------- */
 305
 306 static HashTable *
 307 allocHashTable_(HashFunction *hash, CompareFunction *compare)
 308 {
 309     HashTable *table;
 310     HashList **hb;
 311
 312     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 313
 314     table->arena = newArena();
 315
 316     allocSegment(table, 0);
 317
 318     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 319         *hb = NULL;
 320
 321     table->split = 0;
 322     table->max = HSEGSIZE;
 323     table->mask1 = HSEGSIZE - 1;
 324     table->mask2 = 2 * HSEGSIZE - 1;
 325     table->kcount = 0;
 326     table->bcount = HSEGSIZE;
 327     table->hash = hash;
 328     table->compare = compare;
 329
 330     return table;
 331 }
 332
 333 HashTable *
 334 allocHashTable(void)
 335 {
 336     return allocHashTable_(hashWord, compareWord);
 337 }
 338
 339 HashTable *
 340 allocStrHashTable(void)
 341 {
 342     return allocHashTable_((HashFunction *)hashStr,
 343                            (CompareFunction *)compareStr);
 344 }