ghc/runtime/gum/Hash.lc

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1995
   3 %
   4 %************************************************************************
   5 %*                                                                      *
   6 \section[Hash.lc]{Dynamic Hash Tables}
   7 %*                                                                      *
   8 %************************************************************************
   9
  10 Dynamically expanding linear hash tables, as described in
  11 Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
  12 pp. 446 -- 457.
  13
  14 \begin{code}
  15 #ifdef PAR /* whole file */
  16
  17 #include "rtsdefs.h"
  18
  19 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  20                             /* Also the minimum size of a hash table */
  21 #define HDIRSIZE    1024    /* Size of the segment directory */
  22                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  23 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  24
  25 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  26                             /* Number of HashList cells to allocate in one go */
  27
  28 \end{code}
  29
  30 Fill in the ADTs.
  31
  32 \begin{code}
  33
  34 /* Linked list of (key, data) pairs for separate chaining */
  35 struct hashlist {
  36     StgWord key;
  37     void *data;
  38     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  39 };
  40
  41 struct hashtable {
  42     int split;              /* Next bucket to split when expanding */
  43     int max;                /* Max bucket of smaller table */
  44     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  45     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  46     int kcount;             /* Number of keys */
  47     int bcount;             /* Number of buckets */
  48     HashList **dir[HDIRSIZE];   /* Directory of segments */
  49 };
  50
  51 \end{code}
  52
  53 Hash first using the smaller table.  If the bucket is less than the
  54 next bucket to be split, re-hash using the larger table.
  55
  56 \begin{code}
  57
  58 static int
  59 hash(HashTable *table, W_ key)
  60 {
  61     int bucket;
  62
  63     /* Strip the boring zero bits */
  64     key /= sizeof(StgWord);
  65
  66     /* Mod the size of the hash table (a power of 2) */
  67     bucket = key & table->mask1;
  68
  69     if (bucket < table->split) {
  70         /* Mod the size of the expanded hash table (also a power of 2) */
  71         bucket = key & table->mask2;
  72     }
  73     return bucket;
  74 }
  75
  76 \end{code}
  77
  78 Allocate a new segment of the dynamically growing hash table.
  79
  80 \begin{code}
  81
  82 static void
  83 allocSegment(HashTable *table, int segment)
  84 {
  85     table->dir[segment] = (HashList **) stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment");
  86 }
  87
  88 \end{code}
  89
  90 Expand the larger hash table by one bucket, and split one bucket
  91 from the smaller table into two parts.  Only the bucket referenced
  92 by @table->split@ is affected by the expansion.
  93
  94 \begin{code}
  95
  96 static void
  97 expand(HashTable *table)
  98 {
  99     int oldsegment;
 100     int oldindex;
 101     int newbucket;
 102     int newsegment;
 103     int newindex;
 104     HashList *hl;
 105     HashList *next;
 106     HashList *old, *new;
 107
 108     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 109         /* Wow!  That's big.  Too big, so don't expand. */
 110         return;
 111
 112     /* Calculate indices of bucket to split */
 113     oldsegment = table->split / HSEGSIZE;
 114     oldindex = table->split % HSEGSIZE;
 115
 116     newbucket = table->max + table->split;
 117
 118     /* And the indices of the new bucket */
 119     newsegment = newbucket / HSEGSIZE;
 120     newindex = newbucket % HSEGSIZE;
 121
 122     if (newindex == 0)
 123         allocSegment(table, newsegment);
 124
 125     if (++table->split == table->max) {
 126         table->split = 0;
 127         table->max *= 2;
 128         table->mask1 = table->mask2;
 129         table->mask2 = table->mask2 << 1 | 1;
 130     }
 131     table->bcount++;
 132
 133     /* Split the bucket, paying no attention to the original order */
 134
 135     old = new = NULL;
 136     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 137         next = hl->next;
 138         if (hash(table, hl->key) == newbucket) {
 139             hl->next = new;
 140             new = hl;
 141         } else {
 142             hl->next = old;
 143             old = hl;
 144         }
 145     }
 146     table->dir[oldsegment][oldindex] = old;
 147     table->dir[newsegment][newindex] = new;
 148
 149     return;
 150 }
 151
 152 \end{code}
 153
 154 \begin{code}
 155
 156 void *
 157 lookupHashTable(table, key)
 158 HashTable *table;
 159 StgWord key;
 160 {
 161     int bucket;
 162     int segment;
 163     int index;
 164     HashList *hl;
 165
 166     bucket = hash(table, key);
 167     segment = bucket / HSEGSIZE;
 168     index = bucket % HSEGSIZE;
 169
 170     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 171         if (hl->key == key)
 172             return hl->data;
 173
 174     /* It's not there */
 175     return NULL;
 176 }
 177
 178 \end{code}
 179
 180 We allocate the hashlist cells in large chunks to cut down on malloc
 181 overhead.  Although we keep a free list of hashlist cells, we make
 182 no effort to actually return the space to the malloc arena.
 183
 184 \begin{code}
 185
 186 static HashList *freeList = NULL;
 187
 188 static HashList *
 189 allocHashList(STG_NO_ARGS)
 190 {
 191     HashList *hl, *p;
 192
 193     if ((hl = freeList) != NULL) {
 194         freeList = hl->next;
 195     } else {
 196         hl = (HashList *) stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 197
 198         freeList = hl + 1;
 199         for (p = freeList; p < hl + HCHUNK - 1; p++)
 200             p->next = p + 1;
 201         p->next = NULL;
 202     }
 203     return hl;
 204 }
 205
 206 static void
 207 freeHashList(HashList *hl)
 208 {
 209     hl->next = freeList;
 210     freeList = hl;
 211 }
 212
 213 \end{code}
 214
 215 \begin{code}
 216
 217 void
 218 insertHashTable(table, key, data)
 219 HashTable *table;
 220 StgWord key;
 221 void *data;
 222 {
 223     int bucket;
 224     int segment;
 225     int index;
 226     HashList *hl;
 227
 228 #if 0
 229     /* We want no duplicates */
 230     ASSERT(lookupHashTable(table, key) == NULL);
 231 #endif
 232
 233     /* When the average load gets too high, we expand the table */
 234     if (++table->kcount >= HLOAD * table->bcount)
 235         expand(table);
 236
 237     bucket = hash(table, key);
 238     segment = bucket / HSEGSIZE;
 239     index = bucket % HSEGSIZE;
 240
 241     hl = allocHashList();
 242
 243     hl->key = key;
 244     hl->data = data;
 245     hl->next = table->dir[segment][index];
 246     table->dir[segment][index] = hl;
 247
 248 }
 249
 250 \end{code}
 251
 252 \begin{code}
 253
 254 void *
 255 removeHashTable(table, key, data)
 256 HashTable *table;
 257 StgWord key;
 258 void *data;
 259 {
 260     int bucket;
 261     int segment;
 262     int index;
 263     HashList *hl;
 264     HashList *prev = NULL;
 265
 266     bucket = hash(table, key);
 267     segment = bucket / HSEGSIZE;
 268     index = bucket % HSEGSIZE;
 269
 270     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 271         if (hl->key == key && (data == NULL || hl->data == data)) {
 272             if (prev == NULL)
 273                 table->dir[segment][index] = hl->next;
 274             else
 275                 prev->next = hl->next;
 276             table->kcount--;
 277             return hl->data;
 278         }
 279         prev = hl;
 280     }
 281
 282     /* It's not there */
 283     ASSERT(data == NULL);
 284     return NULL;
 285 }
 286
 287 \end{code}
 288
 289 When we free a hash table, we are also good enough to free the
 290 data part of each (key, data) pair, as long as our caller can tell
 291 us how to do it.
 292
 293 \begin{code}
 294
 295 void
 296 freeHashTable(table, freeDataFun)
 297   HashTable *table;
 298   void (*freeDataFun) PROTO((void *));
 299 {
 300     long segment;
 301     long index;
 302     HashList *hl;
 303     HashList *next;
 304
 305     /* The last bucket with something in it is table->max + table->split - 1 */
 306     segment = (table->max + table->split - 1) / HSEGSIZE;
 307     index = (table->max + table->split - 1) % HSEGSIZE;
 308
 309     while (segment >= 0) {
 310         while (index >= 0) {
 311             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 312                 next = hl->next;
 313                 if (freeDataFun != NULL)
 314                     (*freeDataFun)(hl->data);
 315                 freeHashList(hl);
 316             }
 317             index--;
 318         }
 319         free(table->dir[segment]);
 320         segment--;
 321         index = HSEGSIZE - 1;
 322     }
 323     free(table);
 324 }
 325 \end{code}
 326
 327 When we initialize a hash table, we set up the first segment as well,
 328 initializing all of the first segment's hash buckets to NULL.
 329
 330 \begin{code}
 331
 332 HashTable *
 333 allocHashTable(STG_NO_ARGS)
 334 {
 335     HashTable *table;
 336     HashList **hb;
 337
 338     table = (HashTable *) stgMallocBytes(sizeof(HashTable),"allocHashTable");
 339
 340     allocSegment(table, 0);
 341
 342     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 343         *hb = NULL;
 344
 345     table->split = 0;
 346     table->max = HSEGSIZE;
 347     table->mask1 = HSEGSIZE - 1;
 348     table->mask2 = 2 * HSEGSIZE - 1;
 349     table->kcount = 0;
 350     table->bcount = HSEGSIZE;
 351
 352     return table;
 353 }
 354
 355 #endif /* PAR -- whole file */
 356 \end{code}