-{-# OPTIONS_GHC -fno-implicit-prelude #-}
+{-# OPTIONS_GHC -XNoImplicitPrelude -funbox-strict-fields #-}
-----------------------------------------------------------------------------
-- |
-----------------------------------------------------------------------------
module Data.HashTable (
- -- * Basic hash table operations
- HashTable, new, insert, delete, lookup, update,
- -- * Converting to and from lists
- fromList, toList,
- -- * Hash functions
- -- $hash_functions
- hashInt, hashString,
- prime,
- -- * Diagnostics
- longestChain
+ -- * Basic hash table operations
+ HashTable, new, insert, delete, lookup, update,
+ -- * Converting to and from lists
+ fromList, toList,
+ -- * Hash functions
+ -- $hash_functions
+ hashInt, hashString,
+ prime,
+ -- * Diagnostics
+ longestChain
) where
-- This module is imported by Data.Dynamic, which is pretty low down in the
#ifdef __GLASGOW_HASKELL__
import GHC.Base
#else
-import Prelude hiding ( lookup )
+import Prelude hiding ( lookup )
#endif
-import Data.Tuple ( fst )
+import Data.Tuple ( fst )
import Data.Bits
import Data.Maybe
-import Data.List ( maximumBy, length, concat, foldl', partition )
-import Data.Int ( Int32 )
+import Data.List ( maximumBy, length, concat, foldl', partition )
+import Data.Int ( Int32 )
#if defined(__GLASGOW_HASKELL__)
import GHC.Num
-import GHC.Real ( fromIntegral )
-import GHC.Show ( Show(..) )
-import GHC.Int ( Int64 )
+import GHC.Real ( fromIntegral )
+import GHC.Show ( Show(..) )
+import GHC.Int ( Int64 )
-import GHC.IOBase ( IO, IOArray, newIOArray,
- unsafeReadIOArray, unsafeWriteIOArray, unsafePerformIO,
- IORef, newIORef, readIORef, writeIORef )
+import GHC.IOBase ( IO, IOArray, newIOArray,
+ unsafeReadIOArray, unsafeWriteIOArray, unsafePerformIO,
+ IORef, newIORef, readIORef, writeIORef )
#else
-import Data.Char ( ord )
-import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
-import System.IO.Unsafe ( unsafePerformIO )
-import Data.Int ( Int64 )
+import Data.Char ( ord )
+import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import System.IO.Unsafe ( unsafePerformIO )
+import Data.Int ( Int64 )
# if defined(__HUGS__)
-import Hugs.IOArray ( IOArray, newIOArray,
- unsafeReadIOArray, unsafeWriteIOArray )
+import Hugs.IOArray ( IOArray, newIOArray,
+ unsafeReadIOArray, unsafeWriteIOArray )
# elif defined(__NHC__)
-import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray )
+import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray )
# endif
#endif
-import Control.Monad ( mapM, mapM_, sequence_ )
+import Control.Monad ( mapM, mapM_, sequence_ )
-----------------------------------------------------------------------
#endif
data HashTable key val = HashTable {
- cmp :: !(key -> key -> Bool),
- hash_fn :: !(key -> Int32),
+ cmp :: !(key -> key -> Bool),
+ hash_fn :: !(key -> Int32),
tab :: !(IORef (HT key val))
}
-- TODO: the IORef should really be an MVar.
data HT key val
= HT {
- kcount :: !Int32, -- Total number of keys.
+ kcount :: !Int32, -- Total number of keys.
bmask :: !Int32,
- buckets :: !(HTArray [(key,val)])
+ buckets :: !(HTArray [(key,val)])
}
-- ------------------------------------------------------------
-- implemented by extracting the uppermost 32 bits of the 64-bit
-- result of multiplying by a 33-bit constant. The constant is from
-- Knuth, derived from the golden ratio:
+--
-- > golden = round ((sqrt 5 - 1) * 2^32)
+--
-- We get good key uniqueness on small inputs
-- (a problem with previous versions):
-- (length $ group $ sort $ map hashInt [-32767..65536]) == 65536 + 32768
-- golden ratio and adding. The implementation is:
--
-- > hashString = foldl' f golden
--- > where f m c = fromIntegral (fromEnum c) * magic + hashInt32 m
+-- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
-- > magic = 0xdeadbeef
--
-- Where hashInt32 works just as hashInt shown above.
-- for combining together multiple keys to form one.
--
-- Here we know that individual characters c are often small, and this
--- produces frequent collisions if we use fromEnum c alone. A
+-- produces frequent collisions if we use ord c alone. A
-- particular problem are the shorter low ASCII and ISO-8859-1
-- character strings. We pre-multiply by a magic twiddle factor to
-- obtain a good distribution. In fact, given the following test:
-- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
-- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
-- > hs = foldl' f golden
--- > f m c = fromIntegral (fromEnum c) * k + hashInt32 m
+-- > f m c = fromIntegral (ord c) * k + hashInt32 m
-- > n = 100000
--
-- We discover that testp magic = 0.
hashString :: String -> Int32
hashString = foldl' f golden
- where f m c = fromIntegral (fromEnum c) * magic + hashInt32 m
+ where f m c = fromIntegral (ord c) * magic + hashInt32 m
magic = 0xdeadbeef
-- | A prime larger than the maximum hash table size
--
new
:: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys
- -> (key -> Int32) -- ^ @hash@: A hash function on keys
+ -> (key -> Int32) -- ^ @hash@: A hash function on keys
-> IO (HashTable key val) -- ^ Returns: an empty hash table
new cmpr hash = do