-{-# OPTIONS_GHC -fno-implicit-prelude #-}
+{-# LANGUAGE CPP, NoImplicitPrelude #-}
+{-# OPTIONS_GHC -funbox-strict-fields -fno-warn-name-shadowing #-}
-----------------------------------------------------------------------------
-- |
-----------------------------------------------------------------------------
module Data.HashTable (
- -- * Basic hash table operations
- HashTable, new, insert, delete, lookup, update,
- -- * Converting to and from lists
- fromList, toList,
- -- * Hash functions
- -- $hash_functions
- hashInt, hashString,
- prime,
- -- * Diagnostics
- longestChain
+ -- * Basic hash table operations
+ HashTable, new, newHint, insert, delete, lookup, update,
+ -- * Converting to and from lists
+ fromList, toList,
+ -- * Hash functions
+ -- $hash_functions
+ hashInt, hashString,
+ prime,
+ -- * Diagnostics
+ longestChain
) where
-- This module is imported by Data.Dynamic, which is pretty low down in the
#ifdef __GLASGOW_HASKELL__
import GHC.Base
#else
-import Prelude hiding ( lookup )
+import Prelude hiding ( lookup )
#endif
-import Data.Tuple ( fst )
+import Data.Tuple ( fst )
import Data.Bits
import Data.Maybe
-import Data.List ( maximumBy, length, concat, foldl', partition )
-import Data.Int ( Int32 )
+import Data.List ( maximumBy, length, concat, foldl', partition )
+import Data.Int ( Int32 )
#if defined(__GLASGOW_HASKELL__)
import GHC.Num
-import GHC.Real ( fromIntegral )
-import GHC.Show ( Show(..) )
-import GHC.Int ( Int64 )
+import GHC.Real ( fromIntegral )
+import GHC.Show ( Show(..) )
+import GHC.Int ( Int64 )
-import GHC.IOBase ( IO, IOArray, newIOArray,
- unsafeReadIOArray, unsafeWriteIOArray, unsafePerformIO,
- IORef, newIORef, readIORef, writeIORef )
+import GHC.IO
+import GHC.IOArray
+import GHC.IORef
#else
-import Data.Char ( ord )
-import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
-import System.IO.Unsafe ( unsafePerformIO )
-import Data.Int ( Int64 )
+import Data.Char ( ord )
+import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import System.IO.Unsafe ( unsafePerformIO )
+import Data.Int ( Int64 )
# if defined(__HUGS__)
-import Hugs.IOArray ( IOArray, newIOArray,
- unsafeReadIOArray, unsafeWriteIOArray )
+import Hugs.IOArray ( IOArray, newIOArray,
+ unsafeReadIOArray, unsafeWriteIOArray )
# elif defined(__NHC__)
-import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray )
+import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray )
# endif
#endif
-import Control.Monad ( mapM, mapM_, sequence_ )
+import Control.Monad ( mapM, mapM_, sequence_ )
-----------------------------------------------------------------------
readHTArray :: HTArray a -> Int32 -> IO a
writeMutArray :: MutArray a -> Int32 -> a -> IO ()
-freezeArray :: MutArray a -> IO (HTArray a)
-thawArray :: HTArray a -> IO (MutArray a)
newMutArray :: (Int32, Int32) -> a -> IO (MutArray a)
-#if defined(DEBUG) || defined(__NHC__)
+newMutArray = newIOArray
type MutArray a = IOArray Int32 a
type HTArray a = MutArray a
-newMutArray = newIOArray
+#if defined(DEBUG) || defined(__NHC__)
readHTArray = readIOArray
writeMutArray = writeIOArray
-freezeArray = return
-thawArray = return
#else
-type MutArray a = IOArray Int32 a
-type HTArray a = MutArray a -- Array Int32 a
-newMutArray = newIOArray
-readHTArray arr i = readMutArray arr i -- return $! (unsafeAt arr (fromIntegral i))
-readMutArray :: MutArray a -> Int32 -> IO a
-readMutArray arr i = unsafeReadIOArray arr (fromIntegral i)
+readHTArray arr i = unsafeReadIOArray arr (fromIntegral i)
writeMutArray arr i x = unsafeWriteIOArray arr (fromIntegral i) x
-freezeArray = return -- unsafeFreeze
-thawArray = return -- unsafeThaw
#endif
data HashTable key val = HashTable {
- cmp :: !(key -> key -> Bool),
- hash_fn :: !(key -> Int32),
+ cmp :: !(key -> key -> Bool),
+ hash_fn :: !(key -> Int32),
tab :: !(IORef (HT key val))
}
-- TODO: the IORef should really be an MVar.
data HT key val
= HT {
- kcount :: !Int32, -- Total number of keys.
+ kcount :: !Int32, -- Total number of keys.
bmask :: !Int32,
- buckets :: !(HTArray [(key,val)])
+ buckets :: !(HTArray [(key,val)])
}
-- ------------------------------------------------------------
-- implemented by extracting the uppermost 32 bits of the 64-bit
-- result of multiplying by a 33-bit constant. The constant is from
-- Knuth, derived from the golden ratio:
+--
-- > golden = round ((sqrt 5 - 1) * 2^32)
+--
-- We get good key uniqueness on small inputs
-- (a problem with previous versions):
-- (length $ group $ sort $ map hashInt [-32767..65536]) == 65536 + 32768
-- golden ratio and adding. The implementation is:
--
-- > hashString = foldl' f golden
--- > where f m c = fromIntegral (fromEnum c) * magic + hashInt32 m
+-- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
-- > magic = 0xdeadbeef
--
-- Where hashInt32 works just as hashInt shown above.
-- for combining together multiple keys to form one.
--
-- Here we know that individual characters c are often small, and this
--- produces frequent collisions if we use fromEnum c alone. A
+-- produces frequent collisions if we use ord c alone. A
-- particular problem are the shorter low ASCII and ISO-8859-1
-- character strings. We pre-multiply by a magic twiddle factor to
-- obtain a good distribution. In fact, given the following test:
-- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
-- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
-- > hs = foldl' f golden
--- > f m c = fromIntegral (fromEnum c) * k + hashInt32 m
+-- > f m c = fromIntegral (ord c) * k + hashInt32 m
-- > n = 100000
--
-- We discover that testp magic = 0.
hashString :: String -> Int32
hashString = foldl' f golden
- where f m c = fromIntegral (fromEnum c) * magic + hashInt32 m
+ where f m c = fromIntegral (ord c) * magic + hashInt32 m
magic = 0xdeadbeef
-- | A prime larger than the maximum hash table size
--
new
:: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys
- -> (key -> Int32) -- ^ @hash@: A hash function on keys
+ -> (key -> Int32) -- ^ @hash@: A hash function on keys
-> IO (HashTable key val) -- ^ Returns: an empty hash table
new cmpr hash = do
recordNew
-- make a new hash table with a single, empty, segment
let mask = tABLE_MIN-1
- bkts' <- newMutArray (0,mask) []
- bkts <- freezeArray bkts'
+ bkts <- newMutArray (0,mask) []
+
+ let
+ kcnt = 0
+ ht = HT { buckets=bkts, kcount=kcnt, bmask=mask }
+
+ table <- newIORef ht
+ return (HashTable { tab=table, hash_fn=hash, cmp=cmpr })
+
+{-
+ bitTwiddleSameAs takes as arguments positive Int32s less than maxBound/2 and
+ returns the smallest power of 2 that is greater than or equal to the
+ argument.
+ http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+-}
+bitTwiddleSameAs :: Int32 -> Int32
+bitTwiddleSameAs v0 =
+ let v1 = v0-1
+ v2 = v1 .|. (v1`shiftR`1)
+ v3 = v2 .|. (v2`shiftR`2)
+ v4 = v3 .|. (v3`shiftR`4)
+ v5 = v4 .|. (v4`shiftR`8)
+ v6 = v5 .|. (v5`shiftR`16)
+ in v6+1
+
+{-
+ powerOver takes as arguments Int32s and returns the smallest power of 2
+ that is greater than or equal to the argument if that power of 2 is
+ within [tABLE_MIN,tABLE_MAX]
+-}
+powerOver :: Int32 -> Int32
+powerOver n =
+ if n <= tABLE_MIN
+ then tABLE_MIN
+ else if n >= tABLE_MAX
+ then tABLE_MAX
+ else bitTwiddleSameAs n
+
+-- | Creates a new hash table with the given minimum size.
+newHint
+ :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys
+ -> (key -> Int32) -- ^ @hash@: A hash function on keys
+ -> Int -- ^ @minSize@: initial table size
+ -> IO (HashTable key val) -- ^ Returns: an empty hash table
+
+newHint cmpr hash minSize = do
+ recordNew
+ -- make a new hash table with a single, empty, segment
+ let mask = powerOver $ fromIntegral minSize
+ bkts <- newMutArray (0,mask) []
let
kcnt = 0
(bckt', inserts, result) <- return $ bucketFn bckt
let k' = k + inserts
table1 = table { kcount=k' }
- bkts' <- thawArray bkts
- writeMutArray bkts' indx bckt'
- freezeArray bkts'
+ writeMutArray bkts indx bckt'
table2 <- if canEnlarge == CanInsert && inserts > 0 then do
recordIns inserts k' bckt'
if tooBig k' b
then return table
else do
--
- newbkts' <- newMutArray (0,newmask) []
+ newbkts <- newMutArray (0,newmask) []
let
splitBucket oldindex = do
bucket <- readHTArray bkts oldindex
let (oldb,newb) =
partition ((oldindex==). bucketIndex newmask . hash . fst) bucket
- writeMutArray newbkts' oldindex oldb
- writeMutArray newbkts' (oldindex + oldsize) newb
+ writeMutArray newbkts oldindex oldb
+ writeMutArray newbkts (oldindex + oldsize) newb
mapM_ splitBucket [0..mask]
- newbkts <- freezeArray newbkts'
-
return ( table{ buckets=newbkts, bmask=newmask } )
-- -----------------------------------------------------------------------------