X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FHashTable.hs;h=0f31af52509bde78501971800c3e8b0561d0bc9c;hb=7d469fba03085e1538f7b33227a3b69fc68d037f;hp=9fbe7353c19ca0362e6bcee0b7f93fe8b1a72f19;hpb=b8ac498face4c8b16c06d30fbc86666b7dc28173;p=ghc-base.git diff --git a/Data/HashTable.hs b/Data/HashTable.hs index 9fbe735..0f31af5 100644 --- a/Data/HashTable.hs +++ b/Data/HashTable.hs @@ -1,4 +1,4 @@ -{-# OPTIONS -fno-implicit-prelude #-} +{-# OPTIONS_GHC -fno-implicit-prelude #-} ----------------------------------------------------------------------------- -- | @@ -19,7 +19,7 @@ module Data.HashTable ( -- * Basic hash table operations - HashTable, new, insert, delete, lookup, + HashTable, new, insert, delete, lookup, update, -- * Converting to and from lists fromList, toList, -- * Hash functions @@ -41,7 +41,7 @@ import Prelude hiding ( lookup ) import Data.Tuple ( fst ) import Data.Bits import Data.Maybe -import Data.List ( maximumBy, filter, length, concat ) +import Data.List ( maximumBy, filter, length, concat, foldl ) import Data.Int ( Int32 ) #if defined(__GLASGOW_HASKELL__) @@ -153,8 +153,8 @@ hashInt = (`rem` prime) . fromIntegral -- which seems to give reasonable results. -- hashString :: String -> Int32 -hashString = fromIntegral . foldr f 0 - where f c m = ord c + (m * 128) `rem` fromIntegral prime +hashString = fromIntegral . foldl f 0 + where f m c = ord c + (m * 128) `rem` fromIntegral prime -- | A prime larger than the maximum hash table size prime :: Int32 @@ -187,7 +187,7 @@ new new cmp hash_fn = do -- make a new hash table with a single, empty, segment - dir <- newIOArray (0,dIR_SIZE) undefined + dir <- newIOArray (0,dIR_SIZE-1) undefined segment <- newIOArray (0,sEGMENT_SIZE-1) [] myWriteArray dir 0 segment @@ -209,7 +209,14 @@ new cmp hash_fn = do -- ----------------------------------------------------------------------------- -- Inserting a key\/value pair into the hash table --- | Inserts an key\/value mapping into the hash table. +-- | Inserts an key\/value mapping into the hash table. +-- +-- Note that 'insert' doesn't remove the old entry from the table - +-- the behaviour is like an association list, where 'lookup' returns +-- the most-recently-inserted mapping for a key in the table. The +-- reason for this is to keep 'insert' as efficient as possible. If +-- you need to update a mapping, then we provide 'update'. +-- insert :: HashTable key val -> key -> val -> IO () insert (HashTable ref) key val = do @@ -220,7 +227,7 @@ insert (HashTable ref) key val = do then expandHashTable table1 else return table1 writeIORef ref table2 - (segment_index,segment_offset) <- tableLocation table key + (segment_index,segment_offset) <- tableLocation table2 key segment <- myReadArray dir segment_index bucket <- myReadArray segment segment_offset myWriteArray segment segment_offset ((key,val):bucket) @@ -254,6 +261,7 @@ expandHashTable table@HT{ dir=dir, split=split, max_bucket=max, + bcount=bcount, mask2=mask2 } = do let oldsegment = split `shiftR` sEGMENT_SHIFT @@ -263,20 +271,28 @@ expandHashTable newsegment = newbucket `shiftR` sEGMENT_SHIFT newindex = newbucket .&. sEGMENT_MASK -- - when (newindex == 0) $ + if newsegment >= dIR_SIZE -- make sure we don't overflow the table. + then return table + else do + -- + when (newindex == 0) $ do segment <- newIOArray (0,sEGMENT_SIZE-1) [] - myWriteArray dir newsegment segment + writeIOArray dir newsegment segment + -- doesn't happen very often, so we might as well use a safe + -- array index here. -- - let table' = + let table' = if (split+1) < max - then table{ split = split+1 } + then table{ split = split+1, + bcount = bcount+1 } -- we've expanded all the buckets in this table, so start from -- the beginning again. else table{ split = 0, + bcount = bcount+1, max_bucket = max * 2, mask1 = mask2, mask2 = mask2 `shiftL` 1 .|. 1 } - let + let split_bucket old new [] = do segment <- myReadArray dir oldsegment myWriteArray segment oldindex old @@ -288,10 +304,10 @@ expandHashTable then split_bucket old ((k,v):new) xs else split_bucket ((k,v):old) new xs -- - segment <- myReadArray dir oldsegment - bucket <- myReadArray segment oldindex - split_bucket [] [] bucket - return table' + segment <- myReadArray dir oldsegment + bucket <- myReadArray segment oldindex + split_bucket [] [] bucket + return table' -- ----------------------------------------------------------------------------- -- Deleting a mapping from the hash table @@ -308,6 +324,43 @@ delete (HashTable ref) key = do return () -- ----------------------------------------------------------------------------- +-- Deleting a mapping from the hash table + +-- | Updates an entry in the hash table, returning 'True' if there was +-- already an entry for this key, or 'False' otherwise. After 'update' +-- there will always be exactly one entry for the given key in the table. +-- +-- 'insert' is more efficient than 'update' if you don't care about +-- multiple entries, or you know for sure that multiple entries can't +-- occur. However, 'update' is more efficient than 'delete' followed +-- by 'insert'. +update :: HashTable key val -> key -> val -> IO Bool + +update (HashTable ref) key val = do + table@HT{ kcount=k, bcount=b, dir=dir, cmp=cmp } <- readIORef ref + let table1 = table{ kcount = k+1 } + -- optimistically expand the table + table2 <- + if (k > hLOAD * b) + then expandHashTable table1 + else return table1 + writeIORef ref table2 + (segment_index,segment_offset) <- tableLocation table2 key + segment <- myReadArray dir segment_index + bucket <- myReadArray segment segment_offset + let + (deleted,bucket') = foldr filt (0::Int32,[]) bucket + filt pair@(k,v) (deleted,bucket) + | key `cmp` k = (deleted+1, bucket) + | otherwise = (deleted, pair:bucket) + -- in + myWriteArray segment segment_offset ((key,val):bucket') + -- update the table load, taking into account the number of + -- items we just deleted. + writeIORef ref table2{ kcount = kcount table2 - deleted } + return (deleted /= 0) + +-- ----------------------------------------------------------------------------- -- Looking up an entry in the hash table -- | Looks up the value of a key in the hash table.