ghc/rts/Hash.c

   1 /*-----------------------------------------------------------------------------
   2  * $Id: Hash.c,v 1.1 1999/01/27 12:11:25 simonm Exp $
   3  *
   4  * (c) The AQUA Project, Glasgow University, 1995-1998
   5  * (c) The GHC Team, 1999
   6  *
   7  * Dynamically expanding linear hash tables, as described in
   8  * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
   9  * pp. 446 -- 457.
  10  * -------------------------------------------------------------------------- */
  11
  12 #include "Rts.h"
  13 #include "Hash.h"
  14 #include "RtsUtils.h"
  15
  16 #define HSEGSIZE    1024    /* Size of a single hash table segment */
  17                             /* Also the minimum size of a hash table */
  18 #define HDIRSIZE    1024    /* Size of the segment directory */
  19                             /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
  20 #define HLOAD       5       /* Maximum average load of a single hash bucket */
  21
  22 #define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
  23                             /* Number of HashList cells to allocate in one go */
  24
  25
  26 /* Linked list of (key, data) pairs for separate chaining */
  27 struct hashlist {
  28     StgWord key;
  29     void *data;
  30     struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
  31 };
  32
  33 typedef struct hashlist HashList;
  34
  35 struct hashtable {
  36     int split;              /* Next bucket to split when expanding */
  37     int max;                /* Max bucket of smaller table */
  38     int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
  39     int mask2;              /* Mask for doing the mod of h_2 (larger table) */
  40     int kcount;             /* Number of keys */
  41     int bcount;             /* Number of buckets */
  42     HashList **dir[HDIRSIZE];   /* Directory of segments */
  43 };
  44
  45 /* -----------------------------------------------------------------------------
  46  * Hash first using the smaller table.  If the bucket is less than the
  47  * next bucket to be split, re-hash using the larger table.
  48  * -------------------------------------------------------------------------- */
  49
  50 static int
  51 hash(HashTable *table, W_ key)
  52 {
  53     int bucket;
  54
  55     /* Strip the boring zero bits */
  56     key /= sizeof(StgWord);
  57
  58     /* Mod the size of the hash table (a power of 2) */
  59     bucket = key & table->mask1;
  60
  61     if (bucket < table->split) {
  62         /* Mod the size of the expanded hash table (also a power of 2) */
  63         bucket = key & table->mask2;
  64     }
  65     return bucket;
  66 }
  67
  68 /* -----------------------------------------------------------------------------
  69  * Allocate a new segment of the dynamically growing hash table.
  70  * -------------------------------------------------------------------------- */
  71
  72 static void
  73 allocSegment(HashTable *table, int segment)
  74 {
  75     table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
  76                                          "allocSegment");
  77 }
  78
  79
  80 /* -----------------------------------------------------------------------------
  81  * Expand the larger hash table by one bucket, and split one bucket
  82  * from the smaller table into two parts.  Only the bucket referenced
  83  * by @table->split@ is affected by the expansion.
  84  * -------------------------------------------------------------------------- */
  85
  86 static void
  87 expand(HashTable *table)
  88 {
  89     int oldsegment;
  90     int oldindex;
  91     int newbucket;
  92     int newsegment;
  93     int newindex;
  94     HashList *hl;
  95     HashList *next;
  96     HashList *old, *new;
  97
  98     if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
  99         /* Wow!  That's big.  Too big, so don't expand. */
 100         return;
 101
 102     /* Calculate indices of bucket to split */
 103     oldsegment = table->split / HSEGSIZE;
 104     oldindex = table->split % HSEGSIZE;
 105
 106     newbucket = table->max + table->split;
 107
 108     /* And the indices of the new bucket */
 109     newsegment = newbucket / HSEGSIZE;
 110     newindex = newbucket % HSEGSIZE;
 111
 112     if (newindex == 0)
 113         allocSegment(table, newsegment);
 114
 115     if (++table->split == table->max) {
 116         table->split = 0;
 117         table->max *= 2;
 118         table->mask1 = table->mask2;
 119         table->mask2 = table->mask2 << 1 | 1;
 120     }
 121     table->bcount++;
 122
 123     /* Split the bucket, paying no attention to the original order */
 124
 125     old = new = NULL;
 126     for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
 127         next = hl->next;
 128         if (hash(table, hl->key) == newbucket) {
 129             hl->next = new;
 130             new = hl;
 131         } else {
 132             hl->next = old;
 133             old = hl;
 134         }
 135     }
 136     table->dir[oldsegment][oldindex] = old;
 137     table->dir[newsegment][newindex] = new;
 138
 139     return;
 140 }
 141
 142 void *
 143 lookupHashTable(HashTable *table, StgWord key)
 144 {
 145     int bucket;
 146     int segment;
 147     int index;
 148     HashList *hl;
 149
 150     bucket = hash(table, key);
 151     segment = bucket / HSEGSIZE;
 152     index = bucket % HSEGSIZE;
 153
 154     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
 155         if (hl->key == key)
 156             return hl->data;
 157
 158     /* It's not there */
 159     return NULL;
 160 }
 161
 162 /* -----------------------------------------------------------------------------
 163  * We allocate the hashlist cells in large chunks to cut down on malloc
 164  * overhead.  Although we keep a free list of hashlist cells, we make
 165  * no effort to actually return the space to the malloc arena.
 166  * -------------------------------------------------------------------------- */
 167
 168 static HashList *freeList = NULL;
 169
 170 static HashList *
 171 allocHashList(void)
 172 {
 173     HashList *hl, *p;
 174
 175     if ((hl = freeList) != NULL) {
 176         freeList = hl->next;
 177     } else {
 178         hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
 179
 180         freeList = hl + 1;
 181         for (p = freeList; p < hl + HCHUNK - 1; p++)
 182             p->next = p + 1;
 183         p->next = NULL;
 184     }
 185     return hl;
 186 }
 187
 188 static void
 189 freeHashList(HashList *hl)
 190 {
 191     hl->next = freeList;
 192     freeList = hl;
 193 }
 194
 195 void
 196 insertHashTable(HashTable *table, StgWord key, void *data)
 197 {
 198     int bucket;
 199     int segment;
 200     int index;
 201     HashList *hl;
 202
 203     /* We want no duplicates */
 204     ASSERT(lookupHashTable(table, key) == NULL);
 205
 206     /* When the average load gets too high, we expand the table */
 207     if (++table->kcount >= HLOAD * table->bcount)
 208         expand(table);
 209
 210     bucket = hash(table, key);
 211     segment = bucket / HSEGSIZE;
 212     index = bucket % HSEGSIZE;
 213
 214     hl = allocHashList();
 215
 216     hl->key = key;
 217     hl->data = data;
 218     hl->next = table->dir[segment][index];
 219     table->dir[segment][index] = hl;
 220
 221 }
 222
 223 void *
 224 removeHashTable(HashTable *table, StgWord key, void *data)
 225 {
 226     int bucket;
 227     int segment;
 228     int index;
 229     HashList *hl;
 230     HashList *prev = NULL;
 231
 232     bucket = hash(table, key);
 233     segment = bucket / HSEGSIZE;
 234     index = bucket % HSEGSIZE;
 235
 236     for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
 237         if (hl->key == key && (data == NULL || hl->data == data)) {
 238             if (prev == NULL)
 239                 table->dir[segment][index] = hl->next;
 240             else
 241                 prev->next = hl->next;
 242             table->kcount--;
 243             return hl->data;
 244         }
 245         prev = hl;
 246     }
 247
 248     /* It's not there */
 249     ASSERT(data == NULL);
 250     return NULL;
 251 }
 252
 253 /* -----------------------------------------------------------------------------
 254  * When we free a hash table, we are also good enough to free the
 255  * data part of each (key, data) pair, as long as our caller can tell
 256  * us how to do it.
 257  * -------------------------------------------------------------------------- */
 258
 259 void
 260 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
 261 {
 262     long segment;
 263     long index;
 264     HashList *hl;
 265     HashList *next;
 266
 267     /* The last bucket with something in it is table->max + table->split - 1 */
 268     segment = (table->max + table->split - 1) / HSEGSIZE;
 269     index = (table->max + table->split - 1) % HSEGSIZE;
 270
 271     while (segment >= 0) {
 272         while (index >= 0) {
 273             for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
 274                 next = hl->next;
 275                 if (freeDataFun != NULL)
 276                     (*freeDataFun)(hl->data);
 277                 freeHashList(hl);
 278             }
 279             index--;
 280         }
 281         free(table->dir[segment]);
 282         segment--;
 283         index = HSEGSIZE - 1;
 284     }
 285     free(table);
 286 }
 287
 288 /* -----------------------------------------------------------------------------
 289  * When we initialize a hash table, we set up the first segment as well,
 290  * initializing all of the first segment's hash buckets to NULL.
 291  * -------------------------------------------------------------------------- */
 292
 293 HashTable *
 294 allocHashTable(void)
 295 {
 296     HashTable *table;
 297     HashList **hb;
 298
 299     table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
 300
 301     allocSegment(table, 0);
 302
 303     for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
 304         *hb = NULL;
 305
 306     table->split = 0;
 307     table->max = HSEGSIZE;
 308     table->mask1 = HSEGSIZE - 1;
 309     table->mask2 = 2 * HSEGSIZE - 1;
 310     table->kcount = 0;
 311     table->bcount = HSEGSIZE;
 312
 313     return table;
 314 }