%
% (c) The AQUA Project, Glasgow University, 1995
%
%************************************************************************
%*                                                                      *
\section[Hash.lc]{Dynamic Hash Tables}
%*									*
%************************************************************************

Dynamically expanding linear hash tables, as described in
Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
pp. 446 -- 457.

\begin{code}
#ifdef PAR /* whole file */

#include "rtsdefs.h"

#define HSEGSIZE    1024    /* Size of a single hash table segment */
			    /* Also the minimum size of a hash table */
#define HDIRSIZE    1024    /* Size of the segment directory */
			    /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
#define HLOAD	    5	    /* Maximum average load of a single hash bucket */

#define HCHUNK	    (1024 * sizeof(W_) / sizeof(HashList))
			    /* Number of HashList cells to allocate in one go */

\end{code}

Fill in the ADTs.

\begin{code}

/* Linked list of (key, data) pairs for separate chaining */
struct hashlist {
    StgWord key;
    void *data;
    struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
};

struct hashtable {
    int split;		    /* Next bucket to split when expanding */
    int max;		    /* Max bucket of smaller table */
    int mask1;		    /* Mask for doing the mod of h_1 (smaller table) */
    int mask2;		    /* Mask for doing the mod of h_2 (larger table) */
    int kcount;		    /* Number of keys */
    int bcount;		    /* Number of buckets */
    HashList **dir[HDIRSIZE];	/* Directory of segments */
};

\end{code}

Hash first using the smaller table.  If the bucket is less than the
next bucket to be split, re-hash using the larger table.

\begin{code}

static int
hash(HashTable *table, W_ key)
{
    int bucket;

    /* Strip the boring zero bits */
    key /= sizeof(StgWord);

    /* Mod the size of the hash table (a power of 2) */
    bucket = key & table->mask1;

    if (bucket < table->split) {
	/* Mod the size of the expanded hash table (also a power of 2) */
	bucket = key & table->mask2;
    }
    return bucket;
}

\end{code}

Allocate a new segment of the dynamically growing hash table.

\begin{code}

static void
allocSegment(HashTable *table, int segment)
{
    table->dir[segment] = (HashList **) stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment");
}

\end{code}

Expand the larger hash table by one bucket, and split one bucket
from the smaller table into two parts.  Only the bucket referenced
by @table->split@ is affected by the expansion.

\begin{code}

static void
expand(HashTable *table)
{
    int oldsegment;
    int oldindex;
    int newbucket;
    int newsegment;
    int newindex;
    HashList *hl;
    HashList *next;
    HashList *old, *new;

    if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
	/* Wow!  That's big.  Too big, so don't expand. */
	return;

    /* Calculate indices of bucket to split */
    oldsegment = table->split / HSEGSIZE;
    oldindex = table->split % HSEGSIZE;

    newbucket = table->max + table->split;

    /* And the indices of the new bucket */
    newsegment = newbucket / HSEGSIZE;
    newindex = newbucket % HSEGSIZE;

    if (newindex == 0)
	allocSegment(table, newsegment);

    if (++table->split == table->max) {
	table->split = 0;
	table->max *= 2;
	table->mask1 = table->mask2;
	table->mask2 = table->mask2 << 1 | 1;
    }
    table->bcount++;

    /* Split the bucket, paying no attention to the original order */

    old = new = NULL;
    for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
	next = hl->next;
	if (hash(table, hl->key) == newbucket) {
	    hl->next = new;
	    new = hl;
	} else {
	    hl->next = old;
	    old = hl;
	}
    }
    table->dir[oldsegment][oldindex] = old;
    table->dir[newsegment][newindex] = new;

    return;
}

\end{code}

\begin{code}

void *
lookupHashTable(table, key)
HashTable *table;
StgWord key;
{
    int bucket;
    int segment;
    int index;
    HashList *hl;

    bucket = hash(table, key);
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
	if (hl->key == key)
	    return hl->data;

    /* It's not there */
    return NULL;
}

\end{code}

We allocate the hashlist cells in large chunks to cut down on malloc
overhead.  Although we keep a free list of hashlist cells, we make
no effort to actually return the space to the malloc arena.

\begin{code}

static HashList *freeList = NULL;

static HashList *
allocHashList(STG_NO_ARGS)
{
    HashList *hl, *p;

    if ((hl = freeList) != NULL) {
	freeList = hl->next;
    } else {
	hl = (HashList *) stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");

	freeList = hl + 1;
	for (p = freeList; p < hl + HCHUNK - 1; p++)
	    p->next = p + 1;
	p->next = NULL;
    }
    return hl;
}

static void
freeHashList(HashList *hl)
{
    hl->next = freeList;
    freeList = hl;
}

\end{code}

\begin{code}

void
insertHashTable(table, key, data)
HashTable *table;
StgWord key;
void *data;
{
    int bucket;
    int segment;
    int index;
    HashList *hl;

#if 0
    /* We want no duplicates */
    ASSERT(lookupHashTable(table, key) == NULL);
#endif
    
    /* When the average load gets too high, we expand the table */
    if (++table->kcount >= HLOAD * table->bcount)
	expand(table);

    bucket = hash(table, key);
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    hl = allocHashList();

    hl->key = key;
    hl->data = data;
    hl->next = table->dir[segment][index];
    table->dir[segment][index] = hl;

}

\end{code}

\begin{code}

void *
removeHashTable(table, key, data)
HashTable *table;
StgWord key;
void *data;
{
    int bucket;
    int segment;
    int index;
    HashList *hl;
    HashList *prev = NULL;

    bucket = hash(table, key);
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
	if (hl->key == key && (data == NULL || hl->data == data)) {
	    if (prev == NULL)
		table->dir[segment][index] = hl->next;
	    else
		prev->next = hl->next;
	    table->kcount--;
	    return hl->data;
	}
	prev = hl;
    }

    /* It's not there */
    ASSERT(data == NULL);
    return NULL;
}

\end{code}

When we free a hash table, we are also good enough to free the
data part of each (key, data) pair, as long as our caller can tell
us how to do it.

\begin{code}
 
void
freeHashTable(table, freeDataFun)
  HashTable *table;
  void (*freeDataFun) PROTO((void *));
{
    long segment;
    long index;
    HashList *hl;
    HashList *next;

    /* The last bucket with something in it is table->max + table->split - 1 */
    segment = (table->max + table->split - 1) / HSEGSIZE;
    index = (table->max + table->split - 1) % HSEGSIZE;

    while (segment >= 0) {
	while (index >= 0) {
	    for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
		next = hl->next;
		if (freeDataFun != NULL)
		    (*freeDataFun)(hl->data);
		freeHashList(hl);
	    }
	    index--;
	}
	free(table->dir[segment]);
	segment--;
	index = HSEGSIZE - 1;
    }
    free(table);
}
\end{code}

When we initialize a hash table, we set up the first segment as well,
initializing all of the first segment's hash buckets to NULL.

\begin{code}

HashTable *
allocHashTable(STG_NO_ARGS)
{
    HashTable *table;
    HashList **hb;

    table = (HashTable *) stgMallocBytes(sizeof(HashTable),"allocHashTable");

    allocSegment(table, 0);

    for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
	*hb = NULL;

    table->split = 0;
    table->max = HSEGSIZE;
    table->mask1 = HSEGSIZE - 1;
    table->mask2 = 2 * HSEGSIZE - 1;
    table->kcount = 0;
    table->bcount = HSEGSIZE;

    return table;
}

#endif /* PAR -- whole file */
\end{code}