-{-# OPTIONS -fno-implicit-prelude #-}
+{-# OPTIONS_GHC -fno-implicit-prelude #-}
-----------------------------------------------------------------------------
-- |
module Data.HashTable (
-- * Basic hash table operations
- HashTable, new, insert, delete, lookup,
+ HashTable, new, insert, delete, lookup, update,
-- * Converting to and from lists
fromList, toList,
-- * Hash functions
-- This module is imported by Data.Dynamic, which is pretty low down in the
-- module hierarchy, so don't import "high-level" modules
+#ifdef __GLASGOW_HASKELL__
import GHC.Base
+#else
+import Prelude hiding ( lookup )
+#endif
import Data.Tuple ( fst )
import Data.Bits
import Data.Maybe
-import Data.List ( maximumBy, filter, length, concat )
+import Data.List ( maximumBy, filter, length, concat, foldl )
+import Data.Int ( Int32 )
+#if defined(__GLASGOW_HASKELL__)
import GHC.Num
-import GHC.Int ( Int32 )
import GHC.Real ( Integral(..), fromIntegral )
-import GHC.IOBase ( IO, IOArray, newIOArray, readIOArray, writeIOArray, unsafeReadIOArray, unsafeWriteIOArray,
+import GHC.IOBase ( IO, IOArray, newIOArray, readIOArray, writeIOArray,
+ unsafeReadIOArray, unsafeWriteIOArray,
IORef, newIORef, readIORef, writeIORef )
import GHC.Err ( undefined )
+#else
+import Data.Char ( ord )
+import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+# if defined(__HUGS__)
+import Hugs.IOArray ( IOArray, newIOArray, readIOArray, writeIOArray,
+ unsafeReadIOArray, unsafeWriteIOArray )
+# elif defined(__NHC__)
+import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray)
+# endif
+#endif
import Control.Monad ( when, mapM, sequence_ )
+
-----------------------------------------------------------------------
myReadArray :: IOArray Int32 a -> Int32 -> IO a
myWriteArray :: IOArray Int32 a -> Int32 -> a -> IO ()
-#ifdef DEBUG
+#if defined(DEBUG) || defined(__NHC__)
myReadArray = readIOArray
myWriteArray = writeIOArray
#else
-- which seems to give reasonable results.
--
hashString :: String -> Int32
-hashString = fromIntegral . foldr f 0
- where f c m = ord c + (m * 128) `rem` fromIntegral prime
+hashString = fromIntegral . foldl f 0
+ where f m c = ord c + (m * 128) `rem` fromIntegral prime
-- | A prime larger than the maximum hash table size
-prime = 1500007 :: Int32
+prime :: Int32
+prime = 1500007
-- -----------------------------------------------------------------------------
-- Parameters
-- -----------------------------------------------------------------------------
-- Creating a new hash table
--- | Creates a new hash table
+-- | Creates a new hash table. The following property should hold for the @eq@
+-- and @hash@ functions passed to 'new':
+--
+-- > eq A B => hash A == hash B
+--
new
- :: (key -> key -> Bool) -- ^ An equality comparison on keys
- -> (key -> Int32) -- ^ A hash function on keys
+ :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys
+ -> (key -> Int32) -- ^ @hash@: A hash function on keys
-> IO (HashTable key val) -- ^ Returns: an empty hash table
new cmp hash_fn = do
-- make a new hash table with a single, empty, segment
- dir <- newIOArray (0,dIR_SIZE) undefined
+ dir <- newIOArray (0,dIR_SIZE-1) undefined
segment <- newIOArray (0,sEGMENT_SIZE-1) []
myWriteArray dir 0 segment
-- -----------------------------------------------------------------------------
-- Inserting a key\/value pair into the hash table
--- | Inserts an key\/value mapping into the hash table.
+-- | Inserts an key\/value mapping into the hash table.
+--
+-- Note that 'insert' doesn't remove the old entry from the table -
+-- the behaviour is like an association list, where 'lookup' returns
+-- the most-recently-inserted mapping for a key in the table. The
+-- reason for this is to keep 'insert' as efficient as possible. If
+-- you need to update a mapping, then we provide 'update'.
+--
insert :: HashTable key val -> key -> val -> IO ()
insert (HashTable ref) key val = do
then expandHashTable table1
else return table1
writeIORef ref table2
- (segment_index,segment_offset) <- tableLocation table key
+ (segment_index,segment_offset) <- tableLocation table2 key
segment <- myReadArray dir segment_index
bucket <- myReadArray segment segment_offset
myWriteArray segment segment_offset ((key,val):bucket)
table@HT{ dir=dir,
split=split,
max_bucket=max,
+ bcount=bcount,
mask2=mask2 } = do
let
oldsegment = split `shiftR` sEGMENT_SHIFT
newsegment = newbucket `shiftR` sEGMENT_SHIFT
newindex = newbucket .&. sEGMENT_MASK
--
- when (newindex == 0) $
+ if newsegment >= dIR_SIZE -- make sure we don't overflow the table.
+ then return table
+ else do
+ --
+ when (newindex == 0) $
do segment <- newIOArray (0,sEGMENT_SIZE-1) []
- myWriteArray dir newsegment segment
+ writeIOArray dir newsegment segment
+ -- doesn't happen very often, so we might as well use a safe
+ -- array index here.
--
- let table' =
+ let table' =
if (split+1) < max
- then table{ split = split+1 }
+ then table{ split = split+1,
+ bcount = bcount+1 }
-- we've expanded all the buckets in this table, so start from
-- the beginning again.
else table{ split = 0,
+ bcount = bcount+1,
max_bucket = max * 2,
mask1 = mask2,
mask2 = mask2 `shiftL` 1 .|. 1 }
- let
+ let
split_bucket old new [] = do
segment <- myReadArray dir oldsegment
myWriteArray segment oldindex old
then split_bucket old ((k,v):new) xs
else split_bucket ((k,v):old) new xs
--
- segment <- myReadArray dir oldsegment
- bucket <- myReadArray segment oldindex
- split_bucket [] [] bucket
- return table'
+ segment <- myReadArray dir oldsegment
+ bucket <- myReadArray segment oldindex
+ split_bucket [] [] bucket
+ return table'
-- -----------------------------------------------------------------------------
-- Deleting a mapping from the hash table
return ()
-- -----------------------------------------------------------------------------
+-- Deleting a mapping from the hash table
+
+-- | Updates an entry in the hash table, returning 'True' if there was
+-- already an entry for this key, or 'False' otherwise. After 'update'
+-- there will always be exactly one entry for the given key in the table.
+--
+-- 'insert' is more efficient than 'update' if you don't care about
+-- multiple entries, or you know for sure that multiple entries can't
+-- occur. However, 'update' is more efficient than 'delete' followed
+-- by 'insert'.
+update :: HashTable key val -> key -> val -> IO Bool
+
+update (HashTable ref) key val = do
+ table@HT{ kcount=k, bcount=b, dir=dir, cmp=cmp } <- readIORef ref
+ let table1 = table{ kcount = k+1 }
+ -- optimistically expand the table
+ table2 <-
+ if (k > hLOAD * b)
+ then expandHashTable table1
+ else return table1
+ writeIORef ref table2
+ (segment_index,segment_offset) <- tableLocation table2 key
+ segment <- myReadArray dir segment_index
+ bucket <- myReadArray segment segment_offset
+ let
+ (deleted,bucket') = foldr filt (0::Int32,[]) bucket
+ filt pair@(k,v) (deleted,bucket)
+ | key `cmp` k = (deleted+1, bucket)
+ | otherwise = (deleted, pair:bucket)
+ -- in
+ myWriteArray segment segment_offset ((key,val):bucket')
+ -- update the table load, taking into account the number of
+ -- items we just deleted.
+ writeIORef ref table2{ kcount = kcount table2 - deleted }
+ return (deleted /= 0)
+
+-- -----------------------------------------------------------------------------
-- Looking up an entry in the hash table
-- | Looks up the value of a key in the hash table.