X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FHashTable.hs;h=e96160a59c07bd18b39c80bc271d7106d7d91b1b;hb=HEAD;hp=cb11e15418f15501c8574717e4b09bc22196913b;hpb=78a1058a24f1a18b06c50638d850f35bb3d54ea4;p=ghc-base.git diff --git a/Data/HashTable.hs b/Data/HashTable.hs index cb11e15..e96160a 100644 --- a/Data/HashTable.hs +++ b/Data/HashTable.hs @@ -1,4 +1,5 @@ -{-# OPTIONS_GHC -fno-implicit-prelude #-} +{-# LANGUAGE CPP, NoImplicitPrelude #-} +{-# OPTIONS_GHC -funbox-strict-fields -fno-warn-name-shadowing #-} ----------------------------------------------------------------------------- -- | @@ -18,16 +19,16 @@ ----------------------------------------------------------------------------- module Data.HashTable ( - -- * Basic hash table operations - HashTable, new, insert, delete, lookup, update, - -- * Converting to and from lists - fromList, toList, - -- * Hash functions - -- $hash_functions - hashInt, hashString, - prime, - -- * Diagnostics - longestChain + -- * Basic hash table operations + HashTable, new, newHint, insert, delete, lookup, update, + -- * Converting to and from lists + fromList, toList, + -- * Hash functions + -- $hash_functions + hashInt, hashString, + prime, + -- * Diagnostics + longestChain ) where -- This module is imported by Data.Dynamic, which is pretty low down in the @@ -36,36 +37,36 @@ module Data.HashTable ( #ifdef __GLASGOW_HASKELL__ import GHC.Base #else -import Prelude hiding ( lookup ) +import Prelude hiding ( lookup ) #endif -import Data.Tuple ( fst ) +import Data.Tuple ( fst ) import Data.Bits import Data.Maybe -import Data.List ( maximumBy, length, concat, foldl', partition ) -import Data.Int ( Int32 ) +import Data.List ( maximumBy, length, concat, foldl', partition ) +import Data.Int ( Int32 ) #if defined(__GLASGOW_HASKELL__) import GHC.Num -import GHC.Real ( fromIntegral ) -import GHC.Show ( Show(..) ) -import GHC.Int ( Int64 ) +import GHC.Real ( fromIntegral ) +import GHC.Show ( Show(..) ) +import GHC.Int ( Int64 ) -import GHC.IOBase ( IO, IOArray, newIOArray, - unsafeReadIOArray, unsafeWriteIOArray, unsafePerformIO, - IORef, newIORef, readIORef, writeIORef ) +import GHC.IO +import GHC.IOArray +import GHC.IORef #else -import Data.Char ( ord ) -import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) -import System.IO.Unsafe ( unsafePerformIO ) -import Data.Int ( Int64 ) +import Data.Char ( ord ) +import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) +import System.IO.Unsafe ( unsafePerformIO ) +import Data.Int ( Int64 ) # if defined(__HUGS__) -import Hugs.IOArray ( IOArray, newIOArray, - unsafeReadIOArray, unsafeWriteIOArray ) +import Hugs.IOArray ( IOArray, newIOArray, + unsafeReadIOArray, unsafeWriteIOArray ) # elif defined(__NHC__) -import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray ) +import NHC.IOExtras ( IOArray, newIOArray, readIOArray, writeIOArray ) # endif #endif -import Control.Monad ( mapM, mapM_, sequence_ ) +import Control.Monad ( mapM, mapM_, sequence_ ) ----------------------------------------------------------------------- @@ -77,41 +78,30 @@ iNSTRUMENTED = False readHTArray :: HTArray a -> Int32 -> IO a writeMutArray :: MutArray a -> Int32 -> a -> IO () -freezeArray :: MutArray a -> IO (HTArray a) -thawArray :: HTArray a -> IO (MutArray a) newMutArray :: (Int32, Int32) -> a -> IO (MutArray a) -#if defined(DEBUG) || defined(__NHC__) +newMutArray = newIOArray type MutArray a = IOArray Int32 a type HTArray a = MutArray a -newMutArray = newIOArray +#if defined(DEBUG) || defined(__NHC__) readHTArray = readIOArray writeMutArray = writeIOArray -freezeArray = return -thawArray = return #else -type MutArray a = IOArray Int32 a -type HTArray a = MutArray a -- Array Int32 a -newMutArray = newIOArray -readHTArray arr i = readMutArray arr i -- return $! (unsafeAt arr (fromIntegral i)) -readMutArray :: MutArray a -> Int32 -> IO a -readMutArray arr i = unsafeReadIOArray arr (fromIntegral i) +readHTArray arr i = unsafeReadIOArray arr (fromIntegral i) writeMutArray arr i x = unsafeWriteIOArray arr (fromIntegral i) x -freezeArray = return -- unsafeFreeze -thawArray = return -- unsafeThaw #endif data HashTable key val = HashTable { - cmp :: !(key -> key -> Bool), - hash_fn :: !(key -> Int32), + cmp :: !(key -> key -> Bool), + hash_fn :: !(key -> Int32), tab :: !(IORef (HT key val)) } -- TODO: the IORef should really be an MVar. data HT key val = HT { - kcount :: !Int32, -- Total number of keys. + kcount :: !Int32, -- Total number of keys. bmask :: !Int32, - buckets :: !(HTArray [(key,val)]) + buckets :: !(HTArray [(key,val)]) } -- ------------------------------------------------------------ @@ -278,15 +268,63 @@ hYSTERESIS = 64 -- entries to ignore in load computation -- new :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys - -> (key -> Int32) -- ^ @hash@: A hash function on keys + -> (key -> Int32) -- ^ @hash@: A hash function on keys -> IO (HashTable key val) -- ^ Returns: an empty hash table new cmpr hash = do recordNew -- make a new hash table with a single, empty, segment let mask = tABLE_MIN-1 - bkts' <- newMutArray (0,mask) [] - bkts <- freezeArray bkts' + bkts <- newMutArray (0,mask) [] + + let + kcnt = 0 + ht = HT { buckets=bkts, kcount=kcnt, bmask=mask } + + table <- newIORef ht + return (HashTable { tab=table, hash_fn=hash, cmp=cmpr }) + +{- + bitTwiddleSameAs takes as arguments positive Int32s less than maxBound/2 and + returns the smallest power of 2 that is greater than or equal to the + argument. + http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 +-} +bitTwiddleSameAs :: Int32 -> Int32 +bitTwiddleSameAs v0 = + let v1 = v0-1 + v2 = v1 .|. (v1`shiftR`1) + v3 = v2 .|. (v2`shiftR`2) + v4 = v3 .|. (v3`shiftR`4) + v5 = v4 .|. (v4`shiftR`8) + v6 = v5 .|. (v5`shiftR`16) + in v6+1 + +{- + powerOver takes as arguments Int32s and returns the smallest power of 2 + that is greater than or equal to the argument if that power of 2 is + within [tABLE_MIN,tABLE_MAX] +-} +powerOver :: Int32 -> Int32 +powerOver n = + if n <= tABLE_MIN + then tABLE_MIN + else if n >= tABLE_MAX + then tABLE_MAX + else bitTwiddleSameAs n + +-- | Creates a new hash table with the given minimum size. +newHint + :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys + -> (key -> Int32) -- ^ @hash@: A hash function on keys + -> Int -- ^ @minSize@: initial table size + -> IO (HashTable key val) -- ^ Returns: an empty hash table + +newHint cmpr hash minSize = do + recordNew + -- make a new hash table with a single, empty, segment + let mask = powerOver $ fromIntegral minSize + bkts <- newMutArray (0,mask) [] let kcnt = 0 @@ -369,9 +407,7 @@ updatingBucket canEnlarge bucketFn (bckt', inserts, result) <- return $ bucketFn bckt let k' = k + inserts table1 = table { kcount=k' } - bkts' <- thawArray bkts - writeMutArray bkts' indx bckt' - freezeArray bkts' + writeMutArray bkts indx bckt' table2 <- if canEnlarge == CanInsert && inserts > 0 then do recordIns inserts k' bckt' if tooBig k' b @@ -392,19 +428,17 @@ expandHashTable hash table@HT{ buckets=bkts, bmask=mask } = do then return table else do -- - newbkts' <- newMutArray (0,newmask) [] + newbkts <- newMutArray (0,newmask) [] let splitBucket oldindex = do bucket <- readHTArray bkts oldindex let (oldb,newb) = partition ((oldindex==). bucketIndex newmask . hash . fst) bucket - writeMutArray newbkts' oldindex oldb - writeMutArray newbkts' (oldindex + oldsize) newb + writeMutArray newbkts oldindex oldb + writeMutArray newbkts (oldindex + oldsize) newb mapM_ splitBucket [0..mask] - newbkts <- freezeArray newbkts' - return ( table{ buckets=newbkts, bmask=newmask } ) -- -----------------------------------------------------------------------------