X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FHashTable.hs;h=cc4c32b16796bced392fcbed5c9f78f6727f0fb3;hb=253bd8d0ee679e72731308456cea91eb9600ff70;hp=66ee2b1249594f738f1cfe7bc2fe77191fcea3bf;hpb=f00fe3e6cb628f3bfac049275660f0365daa3733;p=haskell-directory.git diff --git a/Data/HashTable.hs b/Data/HashTable.hs index 66ee2b1..cc4c32b 100644 --- a/Data/HashTable.hs +++ b/Data/HashTable.hs @@ -19,7 +19,7 @@ module Data.HashTable ( -- * Basic hash table operations - HashTable, new, insert, delete, lookup, + HashTable, new, insert, delete, lookup, update, -- * Converting to and from lists fromList, toList, -- * Hash functions @@ -41,7 +41,7 @@ import Prelude hiding ( lookup ) import Data.Tuple ( fst ) import Data.Bits import Data.Maybe -import Data.List ( maximumBy, filter, length, concat ) +import Data.List ( maximumBy, filter, length, concat, foldl ) import Data.Int ( Int32 ) #if defined(__GLASGOW_HASKELL__) @@ -153,8 +153,8 @@ hashInt = (`rem` prime) . fromIntegral -- which seems to give reasonable results. -- hashString :: String -> Int32 -hashString = fromIntegral . foldr f 0 - where f c m = ord c + (m * 128) `rem` fromIntegral prime +hashString = fromIntegral . foldl f 0 + where f m c = ord c + (m * 128) `rem` fromIntegral prime -- | A prime larger than the maximum hash table size prime :: Int32 @@ -175,10 +175,14 @@ hLOAD = 4 :: Int32 -- Maximum average load of a single hash bucket -- ----------------------------------------------------------------------------- -- Creating a new hash table --- | Creates a new hash table +-- | Creates a new hash table. The following property should hold for the @eq@ +-- and @hash@ functions passed to 'new': +-- +-- > eq A B => hash A == hash B +-- new - :: (key -> key -> Bool) -- ^ An equality comparison on keys - -> (key -> Int32) -- ^ A hash function on keys + :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys + -> (key -> Int32) -- ^ @hash@: A hash function on keys -> IO (HashTable key val) -- ^ Returns: an empty hash table new cmp hash_fn = do @@ -205,7 +209,14 @@ new cmp hash_fn = do -- ----------------------------------------------------------------------------- -- Inserting a key\/value pair into the hash table --- | Inserts an key\/value mapping into the hash table. +-- | Inserts an key\/value mapping into the hash table. +-- +-- Note that 'insert' doesn't remove the old entry from the table - +-- the behaviour is like an association list, where 'lookup' returns +-- the most-recently-inserted mapping for a key in the table. The +-- reason for this is to keep 'insert' as efficient as possible. If +-- you need to update a mapping, then we provide 'update'. +-- insert :: HashTable key val -> key -> val -> IO () insert (HashTable ref) key val = do @@ -216,7 +227,7 @@ insert (HashTable ref) key val = do then expandHashTable table1 else return table1 writeIORef ref table2 - (segment_index,segment_offset) <- tableLocation table key + (segment_index,segment_offset) <- tableLocation table2 key segment <- myReadArray dir segment_index bucket <- myReadArray segment segment_offset myWriteArray segment segment_offset ((key,val):bucket) @@ -250,6 +261,7 @@ expandHashTable table@HT{ dir=dir, split=split, max_bucket=max, + bcount=bcount, mask2=mask2 } = do let oldsegment = split `shiftR` sEGMENT_SHIFT @@ -265,10 +277,12 @@ expandHashTable -- let table' = if (split+1) < max - then table{ split = split+1 } + then table{ split = split+1, + bcount = bcount+1 } -- we've expanded all the buckets in this table, so start from -- the beginning again. else table{ split = 0, + bcount = bcount+1, max_bucket = max * 2, mask1 = mask2, mask2 = mask2 `shiftL` 1 .|. 1 } @@ -304,6 +318,43 @@ delete (HashTable ref) key = do return () -- ----------------------------------------------------------------------------- +-- Deleting a mapping from the hash table + +-- | Updates an entry in the hash table, returning 'True' if there was +-- already an entry for this key, or 'False' otherwise. After 'update' +-- there will always be exactly one entry for the given key in the table. +-- +-- 'insert' is more efficient than 'update' if you don't care about +-- multiple entries, or you know for sure that multiple entries can't +-- occur. However, 'update' is more efficient than 'delete' followed +-- by 'insert'. +update :: HashTable key val -> key -> val -> IO Bool + +update (HashTable ref) key val = do + table@HT{ kcount=k, bcount=b, dir=dir, cmp=cmp } <- readIORef ref + let table1 = table{ kcount = k+1 } + -- optimistically expand the table + table2 <- + if (k > hLOAD * b) + then expandHashTable table1 + else return table1 + writeIORef ref table2 + (segment_index,segment_offset) <- tableLocation table2 key + segment <- myReadArray dir segment_index + bucket <- myReadArray segment segment_offset + let + (deleted,bucket') = foldr filt (0,[]) bucket + filt pair@(k,v) (deleted,bucket) + | key `cmp` k = (deleted+1, bucket) + | otherwise = (deleted, pair:bucket) + -- in + myWriteArray segment segment_offset ((key,val):bucket') + -- update the table load, taking into account the number of + -- items we just deleted. + writeIORef ref table2{ kcount = kcount table2 - deleted } + return (deleted /= 0) + +-- ----------------------------------------------------------------------------- -- Looking up an entry in the hash table -- | Looks up the value of a key in the hash table.