ghc/rts/parallel/0Hash.c

   1 /*-----------------------------------------------------------------------------
   2  * $Id: 0Hash.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $
   3  *
   4  * (c) The AQUA Project, Glasgow University, 1995-1998
   5  * (c) The GHC Team, 1999
   6  *
   7  * Dynamically expanding linear hash tables, as described in
   8  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   9  * pp. 446 -- 457.
  10  * -------------------------------------------------------------------------- */
  11
  12 /*
  13    Replaced with ghc/rts/Hash.c in the new RTS
  14 */
  15
  16 #if 0
  17
  18 #include "Rts.h"
  19 #include "Hash.h"
  20 #include "RtsUtils.h"
  21
  22 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  23                             /* Also the minimum size of a hash table */
  24 #define HDIRSIZE    1024    /* Size of the segment directory */
  25                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  26 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  27
  28 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  29                             /* Number of HashList cells to allocate in one go */
  30
  31
  32 /* Linked list of (key, data) pairs for separate chaining */
  33 struct hashlist {
  34     StgWord key;
  35     void *data;
  36     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  37 };
  38
  39 typedef struct hashlist HashList;
  40
  41 struct hashtable {
  42     int split;              /* Next bucket to split when expanding */
  43     int max;                /* Max bucket of smaller table */
  44     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  45     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  46     int kcount;             /* Number of keys */
  47     int bcount;             /* Number of buckets */
  48     HashList **dir[HDIRSIZE];   /* Directory of segments */
  49 };
  50
  51 /* -----------------------------------------------------------------------------
  52  * Hash first using the smaller table.  If the bucket is less than the
  53  * next bucket to be split, re-hash using the larger table.
  54  * -------------------------------------------------------------------------- */
  55
  56 static int
  57 hash(HashTable *table, W_ key)
  58 {
  59     int bucket;
  60
  61     /* Strip the boring zero bits */
  62     key /= sizeof(StgWord);
  63
  64     /* Mod the size of the hash table (a power of 2) */
  65     bucket = key & table->mask1;
  66
  67     if (bucket < table->split) {
  68         /* Mod the size of the expanded hash table (also a power of 2) */
  69         bucket = key & table->mask2;
  70     }
  71     return bucket;
  72 }
  73
  74 /* -----------------------------------------------------------------------------
  75  * Allocate a new segment of the dynamically growing hash table.
  76  * -------------------------------------------------------------------------- */
  77
  78 static void
  79 allocSegment(HashTable *table, int segment)
  80 {
  81     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
  82                                          "allocSegment");
  83 }
  84
  85
  86 /* -----------------------------------------------------------------------------
  87  * Expand the larger hash table by one bucket, and split one bucket
  88  * from the smaller table into two parts.  Only the bucket referenced
  89  * by @table->split@ is affected by the expansion.
  90  * -------------------------------------------------------------------------- */
  91
  92 static void
  93 expand(HashTable *table)
  94 {
  95     int oldsegment;
  96     int oldindex;
  97     int newbucket;
  98     int newsegment;
  99     int newindex;
 100     HashList *hl;
 101     HashList *next;
 102     HashList *old, *new;
 103
 104     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
 105         /* Wow!  That's big.  Too big, so don't expand. */
 106         return;
 107
 108     /* Calculate indices of bucket to split */
 109     oldsegment = table->split / HSEGSIZE;
 110     oldindex = table->split % HSEGSIZE;
 111
 112     newbucket = table->max + table->split;
 113
 114     /* And the indices of the new bucket */
 115     newsegment = newbucket / HSEGSIZE;
 116     newindex = newbucket % HSEGSIZE;
 117
 118     if (newindex == 0)
 119         allocSegment(table, newsegment);
 120
 121     if (++table->split == table->max) {
 122         table->split = 0;
 123         table->max *= 2;
 124         table->mask1 = table->mask2;
 125         table->mask2 = table->mask2 << 1 | 1;
 126     }
 127     table->bcount++;
 128
 129     /* Split the bucket, paying no attention to the original order */
 130
 131     old = new = NULL;
 132     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 133         next = hl->next;
 134         if (hash(table, hl->key) == newbucket) {
 135             hl->next = new;
 136             new = hl;
 137         } else {
 138             hl->next = old;
 139             old = hl;
 140         }
 141     }
 142     table->dir[oldsegment][oldindex] = old;
 143     table->dir[newsegment][newindex] = new;
 144
 145     return;
 146 }
 147
 148 void *
 149 lookupHashTable(HashTable *table, StgWord key)
 150 {
 151     int bucket;
 152     int segment;
 153     int index;
 154     HashList *hl;
 155
 156     bucket = hash(table, key);
 157     segment = bucket / HSEGSIZE;
 158     index = bucket % HSEGSIZE;
 159
 160     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 161         if (hl->key == key)
 162             return hl->data;
 163
 164     /* It's not there */
 165     return NULL;
 166 }
 167
 168 /* -----------------------------------------------------------------------------
 169  * We allocate the hashlist cells in large chunks to cut down on malloc
 170  * overhead.  Although we keep a free list of hashlist cells, we make
 171  * no effort to actually return the space to the malloc arena.
 172  * -------------------------------------------------------------------------- */
 173
 174 static HashList *freeList = NULL;
 175
 176 static HashList *
 177 allocHashList(void)
 178 {
 179     HashList *hl, *p;
 180
 181     if ((hl = freeList) != NULL) {
 182         freeList = hl->next;
 183     } else {
 184         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 185
 186         freeList = hl + 1;
 187         for (p = freeList; p < hl + HCHUNK - 1; p++)
 188             p->next = p + 1;
 189         p->next = NULL;
 190     }
 191     return hl;
 192 }
 193
 194 static void
 195 freeHashList(HashList *hl)
 196 {
 197     hl->next = freeList;
 198     freeList = hl;
 199 }
 200
 201 void
 202 insertHashTable(HashTable *table, StgWord key, void *data)
 203 {
 204     int bucket;
 205     int segment;
 206     int index;
 207     HashList *hl;
 208
 209     /* We want no duplicates */
 210     ASSERT(lookupHashTable(table, key) == NULL);
 211
 212     /* When the average load gets too high, we expand the table */
 213     if (++table->kcount >= HLOAD * table->bcount)
 214         expand(table);
 215
 216     bucket = hash(table, key);
 217     segment = bucket / HSEGSIZE;
 218     index = bucket % HSEGSIZE;
 219
 220     hl = allocHashList();
 221
 222     hl->key = key;
 223     hl->data = data;
 224     hl->next = table->dir[segment][index];
 225     table->dir[segment][index] = hl;
 226
 227 }
 228
 229 void *
 230 removeHashTable(HashTable *table, StgWord key, void *data)
 231 {
 232     int bucket;
 233     int segment;
 234     int index;
 235     HashList *hl;
 236     HashList *prev = NULL;
 237
 238     bucket = hash(table, key);
 239     segment = bucket / HSEGSIZE;
 240     index = bucket % HSEGSIZE;
 241
 242     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 243         if (hl->key == key && (data == NULL || hl->data == data)) {
 244             if (prev == NULL)
 245                 table->dir[segment][index] = hl->next;
 246             else
 247                 prev->next = hl->next;
 248             table->kcount--;
 249             return hl->data;
 250         }
 251         prev = hl;
 252     }
 253
 254     /* It's not there */
 255     ASSERT(data == NULL);
 256     return NULL;
 257 }
 258
 259 /* -----------------------------------------------------------------------------
 260  * When we free a hash table, we are also good enough to free the
 261  * data part of each (key, data) pair, as long as our caller can tell
 262  * us how to do it.
 263  * -------------------------------------------------------------------------- */
 264
 265 void
 266 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 267 {
 268     long segment;
 269     long index;
 270     HashList *hl;
 271     HashList *next;
 272
 273     /* The last bucket with something in it is table->max + table->split - 1 */
 274     segment = (table->max + table->split - 1) / HSEGSIZE;
 275     index = (table->max + table->split - 1) % HSEGSIZE;
 276
 277     while (segment >= 0) {
 278         while (index >= 0) {
 279             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 280                 next = hl->next;
 281                 if (freeDataFun != NULL)
 282                     (*freeDataFun)(hl->data);
 283                 freeHashList(hl);
 284             }
 285             index--;
 286         }
 287         free(table->dir[segment]);
 288         segment--;
 289         index = HSEGSIZE - 1;
 290     }
 291     free(table);
 292 }
 293
 294 /* -----------------------------------------------------------------------------
 295  * When we initialize a hash table, we set up the first segment as well,
 296  * initializing all of the first segment's hash buckets to NULL.
 297  * -------------------------------------------------------------------------- */
 298
 299 HashTable *
 300 allocHashTable(void)
 301 {
 302     HashTable *table;
 303     HashList **hb;
 304
 305     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 306
 307     allocSegment(table, 0);
 308
 309     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 310         *hb = NULL;
 311
 312     table->split = 0;
 313     table->max = HSEGSIZE;
 314     table->mask1 = HSEGSIZE - 1;
 315     table->mask2 = 2 * HSEGSIZE - 1;
 316     table->kcount = 0;
 317     table->bcount = HSEGSIZE;
 318
 319     return table;
 320 }
 321 #endif