ghc/runtime/gum/Hash.lc

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1995
   3 %
   4 %************************************************************************
   5 %*                                                                      *
   6 \section[Hash.lc]{Dynamic Hash Tables}
   7 %*                                                                      *
   8 %************************************************************************
   9
  10 Dynamically expanding linear hash tables, as described in
  11 Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
  12 pp. 446 -- 457.
  13
  14 \begin{code}
  15 #ifdef PAR /* whole file */
  16
  17 #include "rtsdefs.h"
  18
  19 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  20                             /* Also the minimum size of a hash table */
  21 #define HDIRSIZE    1024    /* Size of the segment directory */
  22                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  23 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  24
  25 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  26                             /* Number of HashList cells to allocate in one go */
  27
  28 \end{code}
  29
  30 Fill in the ADTs.
  31
  32 \begin{code}
  33
  34 /* Linked list of (key, data) pairs for separate chaining */
  35 struct hashlist {
  36     StgWord key;
  37     void *data;
  38     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  39 };
  40
  41 struct hashtable {
  42     int split;              /* Next bucket to split when expanding */
  43     int max;                /* Max bucket of smaller table */
  44     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  45     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  46     int kcount;             /* Number of keys */
  47     int bcount;             /* Number of buckets */
  48     HashList **dir[HDIRSIZE];   /* Directory of segments */
  49 };
  50
  51 \end{code}
  52
  53 Hash first using the smaller table.  If the bucket is less than the
  54 next bucket to be split, re-hash using the larger table.
  55
  56 \begin{code}
  57
  58 static int
  59 hash(table, key)
  60 HashTable *table;
  61 StgWord key;
  62 {
  63     int bucket;
  64
  65     /* Strip the boring zero bits */
  66     key /= sizeof(StgWord);
  67
  68     /* Mod the size of the hash table (a power of 2) */
  69     bucket = key & table->mask1;
  70
  71     if (bucket < table->split) {
  72         /* Mod the size of the expanded hash table (also a power of 2) */
  73         bucket = key & table->mask2;
  74     }
  75     return bucket;
  76 }
  77
  78 \end{code}
  79
  80 Allocate a new segment of the dynamically growing hash table.
  81
  82 \begin{code}
  83
  84 static void
  85 allocSegment(table, segment)
  86 HashTable *table;
  87 int segment;
  88 {
  89     if ((table->dir[segment] = (HashList **) malloc(HSEGSIZE * sizeof(HashList *))) == NULL) {
  90         fflush(stdout);
  91         fprintf(stderr, "VM exhausted\n");
  92         EXIT(EXIT_FAILURE);
  93     }
  94 }
  95
  96 \end{code}
  97
  98 Expand the larger hash table by one bucket, and split one bucket
  99 from the smaller table into two parts.  Only the bucket referenced
 100 by @table->split@ is affected by the expansion.
 101
 102 \begin{code}
 103
 104 static void
 105 expand(table)
 106 HashTable *table;
 107 {
 108     int oldsegment;
 109     int oldindex;
 110     int newbucket;
 111     int newsegment;
 112     int newindex;
 113     HashList *hl;
 114     HashList *next;
 115     HashList *old, *new;
 116
 117     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 118         /* Wow!  That's big.  Too big, so don't expand. */
 119         return;
 120
 121     /* Calculate indices of bucket to split */
 122     oldsegment = table->split / HSEGSIZE;
 123     oldindex = table->split % HSEGSIZE;
 124
 125     newbucket = table->max + table->split;
 126
 127     /* And the indices of the new bucket */
 128     newsegment = newbucket / HSEGSIZE;
 129     newindex = newbucket % HSEGSIZE;
 130
 131     if (newindex == 0)
 132         allocSegment(table, newsegment);
 133
 134     if (++table->split == table->max) {
 135         table->split = 0;
 136         table->max *= 2;
 137         table->mask1 = table->mask2;
 138         table->mask2 = table->mask2 << 1 | 1;
 139     }
 140     table->bcount++;
 141
 142     /* Split the bucket, paying no attention to the original order */
 143
 144     old = new = NULL;
 145     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 146         next = hl->next;
 147         if (hash(table, hl->key) == newbucket) {
 148             hl->next = new;
 149             new = hl;
 150         } else {
 151             hl->next = old;
 152             old = hl;
 153         }
 154     }
 155     table->dir[oldsegment][oldindex] = old;
 156     table->dir[newsegment][newindex] = new;
 157
 158     return;
 159 }
 160
 161 \end{code}
 162
 163 \begin{code}
 164
 165 void *
 166 lookupHashTable(table, key)
 167 HashTable *table;
 168 StgWord key;
 169 {
 170     int bucket;
 171     int segment;
 172     int index;
 173     HashList *hl;
 174
 175     bucket = hash(table, key);
 176     segment = bucket / HSEGSIZE;
 177     index = bucket % HSEGSIZE;
 178
 179     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 180         if (hl->key == key)
 181             return hl->data;
 182
 183     /* It's not there */
 184     return NULL;
 185 }
 186
 187 \end{code}
 188
 189 We allocate the hashlist cells in large chunks to cut down on malloc
 190 overhead.  Although we keep a free list of hashlist cells, we make
 191 no effort to actually return the space to the malloc arena.
 192
 193 \begin{code}
 194
 195 static HashList *freeList = NULL;
 196
 197 static HashList *
 198 allocHashList(STG_NO_ARGS)
 199 {
 200     HashList *hl, *p;
 201
 202     if ((hl = freeList) != NULL) {
 203         freeList = hl->next;
 204     } else if ((hl = (HashList *) malloc(HCHUNK * sizeof(HashList))) != NULL) {
 205         freeList = hl + 1;
 206         for (p = freeList; p < hl + HCHUNK - 1; p++)
 207             p->next = p + 1;
 208         p->next = NULL;
 209     } else {
 210         fflush(stdout);
 211         fprintf(stderr, "VM exhausted\n");
 212         EXIT(EXIT_FAILURE);
 213     }
 214     return hl;
 215 }
 216
 217 static void
 218 freeHashList(hl)
 219 HashList *hl;
 220 {
 221     hl->next = freeList;
 222     freeList = hl;
 223 }
 224
 225 \end{code}
 226
 227 \begin{code}
 228
 229 void
 230 insertHashTable(table, key, data)
 231 HashTable *table;
 232 StgWord key;
 233 void *data;
 234 {
 235     int bucket;
 236     int segment;
 237     int index;
 238     HashList *hl;
 239
 240 #if 0
 241     /* We want no duplicates */
 242     ASSERT(lookupHashTable(table, key) == NULL);
 243 #endif
 244
 245     /* When the average load gets too high, we expand the table */
 246     if (++table->kcount >= HLOAD * table->bcount)
 247         expand(table);
 248
 249     bucket = hash(table, key);
 250     segment = bucket / HSEGSIZE;
 251     index = bucket % HSEGSIZE;
 252
 253     hl = allocHashList();
 254
 255     hl->key = key;
 256     hl->data = data;
 257     hl->next = table->dir[segment][index];
 258     table->dir[segment][index] = hl;
 259
 260 }
 261
 262 \end{code}
 263
 264 \begin{code}
 265
 266 void *
 267 removeHashTable(table, key, data)
 268 HashTable *table;
 269 StgWord key;
 270 void *data;
 271 {
 272     int bucket;
 273     int segment;
 274     int index;
 275     HashList *hl;
 276     HashList *prev = NULL;
 277
 278     bucket = hash(table, key);
 279     segment = bucket / HSEGSIZE;
 280     index = bucket % HSEGSIZE;
 281
 282     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 283         if (hl->key == key && (data == NULL || hl->data == data)) {
 284             if (prev == NULL)
 285                 table->dir[segment][index] = hl->next;
 286             else
 287                 prev->next = hl->next;
 288             table->kcount--;
 289             return hl->data;
 290         }
 291         prev = hl;
 292     }
 293
 294     /* It's not there */
 295     ASSERT(data == NULL);
 296     return NULL;
 297 }
 298
 299 \end{code}
 300
 301 When we free a hash table, we are also good enough to free the
 302 data part of each (key, data) pair, as long as our caller can tell
 303 us how to do it.
 304
 305 \begin{code}
 306
 307 void
 308 freeHashTable(table, freeDataFun)
 309 HashTable *table;
 310 void (*freeDataFun) PROTO((void *));
 311 {
 312     long segment;
 313     long index;
 314     HashList *hl;
 315     HashList *next;
 316
 317     /* The last bucket with something in it is table->max + table->split - 1 */
 318     segment = (table->max + table->split - 1) / HSEGSIZE;
 319     index = (table->max + table->split - 1) % HSEGSIZE;
 320
 321     while (segment >= 0) {
 322         while (index >= 0) {
 323             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 324                 next = hl->next;
 325                 if (freeDataFun != NULL)
 326                     (*freeDataFun)(hl->data);
 327                 freeHashList(hl);
 328             }
 329             index--;
 330         }
 331         free(table->dir[segment]);
 332         segment--;
 333         index = HSEGSIZE - 1;
 334     }
 335     free(table);
 336 }
 337 \end{code}
 338
 339 When we initialize a hash table, we set up the first segment as well,
 340 initializing all of the first segment's hash buckets to NULL.
 341
 342 \begin{code}
 343
 344 HashTable *
 345 allocHashTable(STG_NO_ARGS)
 346 {
 347     HashTable *table;
 348     HashList **hb;
 349
 350     if ((table = (HashTable *) malloc(sizeof(HashTable))) == NULL) {
 351         fflush(stdout);
 352         fprintf(stderr, "VM exhausted\n");
 353         EXIT(EXIT_FAILURE);
 354     }
 355     allocSegment(table, 0);
 356     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 357         *hb = NULL;
 358     table->split = 0;
 359     table->max = HSEGSIZE;
 360     table->mask1 = HSEGSIZE - 1;
 361     table->mask2 = 2 * HSEGSIZE - 1;
 362     table->kcount = 0;
 363     table->bcount = HSEGSIZE;
 364
 365     return table;
 366 }
 367
 368 #endif /* PAR -- whole file */
 369 \end{code}