X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FHashTable.hs;h=cc4c32b16796bced392fcbed5c9f78f6727f0fb3;hb=253bd8d0ee679e72731308456cea91eb9600ff70;hp=66ee2b1249594f738f1cfe7bc2fe77191fcea3bf;hpb=f00fe3e6cb628f3bfac049275660f0365daa3733;p=haskell-directory.git

diff --git a/Data/HashTable.hs b/Data/HashTable.hs
index 66ee2b1..cc4c32b 100644
--- a/Data/HashTable.hs
+++ b/Data/HashTable.hs
@@ -19,7 +19,7 @@
 
 module Data.HashTable (
 	-- * Basic hash table operations
-	HashTable, new, insert, delete, lookup,
+	HashTable, new, insert, delete, lookup, update,
 	-- * Converting to and from lists
 	fromList, toList,
 	-- * Hash functions
@@ -41,7 +41,7 @@ import Prelude	hiding	( lookup )
 import Data.Tuple	( fst )
 import Data.Bits
 import Data.Maybe
-import Data.List	( maximumBy, filter, length, concat )
+import Data.List	( maximumBy, filter, length, concat, foldl )
 import Data.Int		( Int32 )
 
 #if defined(__GLASGOW_HASKELL__)
@@ -153,8 +153,8 @@ hashInt = (`rem` prime) . fromIntegral
 -- which seems to give reasonable results.
 --
 hashString :: String -> Int32
-hashString = fromIntegral . foldr f 0
-  where f c m = ord c + (m * 128) `rem` fromIntegral prime
+hashString = fromIntegral . foldl f 0
+  where f m c = ord c + (m * 128) `rem` fromIntegral prime
 
 -- | A prime larger than the maximum hash table size
 prime :: Int32
@@ -175,10 +175,14 @@ hLOAD = 4 :: Int32 -- Maximum average load of a single hash bucket
 -- -----------------------------------------------------------------------------
 -- Creating a new hash table
 
--- | Creates a new hash table
+-- | Creates a new hash table.  The following property should hold for the @eq@
+-- and @hash@ functions passed to 'new':
+--
+-- >   eq A B  =>  hash A == hash B
+--
 new
-  :: (key -> key -> Bool)    -- ^ An equality comparison on keys
-  -> (key -> Int32)	     -- ^ A hash function on keys
+  :: (key -> key -> Bool)    -- ^ @eq@: An equality comparison on keys
+  -> (key -> Int32)	     -- ^ @hash@: A hash function on keys
   -> IO (HashTable key val)  -- ^ Returns: an empty hash table
 
 new cmp hash_fn = do
@@ -205,7 +209,14 @@ new cmp hash_fn = do
 -- -----------------------------------------------------------------------------
 -- Inserting a key\/value pair into the hash table
 
--- | Inserts an key\/value mapping into the hash table.
+-- | Inserts an key\/value mapping into the hash table.  
+--
+-- Note that 'insert' doesn't remove the old entry from the table -
+-- the behaviour is like an association list, where 'lookup' returns
+-- the most-recently-inserted mapping for a key in the table.  The
+-- reason for this is to keep 'insert' as efficient as possible.  If
+-- you need to update a mapping, then we provide 'update'.
+--
 insert :: HashTable key val -> key -> val -> IO ()
 
 insert (HashTable ref) key val = do
@@ -216,7 +227,7 @@ insert (HashTable ref) key val = do
 	   then expandHashTable table1
 	   else return table1
   writeIORef ref table2
-  (segment_index,segment_offset) <- tableLocation table key
+  (segment_index,segment_offset) <- tableLocation table2 key
   segment <- myReadArray dir segment_index
   bucket <- myReadArray segment segment_offset
   myWriteArray segment segment_offset ((key,val):bucket)
@@ -250,6 +261,7 @@ expandHashTable
       table@HT{ dir=dir,
 		split=split,
 		max_bucket=max,
+		bcount=bcount,
 		mask2=mask2 } = do
   let
       oldsegment = split `shiftR` sEGMENT_SHIFT
@@ -265,10 +277,12 @@ expandHashTable
   --
   let table' =
   	if (split+1) < max
-     	    then table{ split = split+1 }
+     	    then table{ split = split+1,
+			bcount = bcount+1 }
      		-- we've expanded all the buckets in this table, so start from
 		-- the beginning again.
      	    else table{ split = 0,
+			bcount = bcount+1,
 			max_bucket = max * 2,
 			mask1 = mask2,
 			mask2 = mask2 `shiftL` 1 .|. 1 }
@@ -304,6 +318,43 @@ delete (HashTable ref) key = do
   return ()
 
 -- -----------------------------------------------------------------------------
+-- Deleting a mapping from the hash table
+
+-- | Updates an entry in the hash table, returning 'True' if there was
+-- already an entry for this key, or 'False' otherwise.  After 'update'
+-- there will always be exactly one entry for the given key in the table.
+--
+-- 'insert' is more efficient than 'update' if you don't care about
+-- multiple entries, or you know for sure that multiple entries can't
+-- occur.  However, 'update' is more efficient than 'delete' followed
+-- by 'insert'.
+update :: HashTable key val -> key -> val -> IO Bool
+
+update (HashTable ref) key val = do
+  table@HT{ kcount=k, bcount=b, dir=dir, cmp=cmp } <- readIORef ref
+  let table1 = table{ kcount = k+1 }
+  -- optimistically expand the table
+  table2 <-
+	if (k > hLOAD * b)
+	   then expandHashTable table1
+	   else return table1
+  writeIORef ref table2
+  (segment_index,segment_offset) <- tableLocation table2 key
+  segment <- myReadArray dir segment_index
+  bucket <- myReadArray segment segment_offset
+  let 
+    (deleted,bucket') = foldr filt (0,[]) bucket
+    filt pair@(k,v) (deleted,bucket)
+	| key `cmp` k = (deleted+1, bucket)
+	| otherwise   = (deleted,   pair:bucket)
+  -- in  
+  myWriteArray segment segment_offset ((key,val):bucket')
+  -- update the table load, taking into account the number of
+  -- items we just deleted.
+  writeIORef ref table2{ kcount = kcount table2 - deleted }
+  return (deleted /= 0)
+
+-- -----------------------------------------------------------------------------
 -- Looking up an entry in the hash table
 
 -- | Looks up the value of a key in the hash table.