X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Futils%2FFastString.lhs;h=60a519162b767acfe779fbec31a520dcff84e730;hp=4417c0850fa8d2dd756f7f4d076670800a9c023c;hb=2662dbc5b2c30fc11ccb99e7f9b2dba794d680ba;hpb=30c122df62ec75f9ed7f392f24c2925675bf1d06 diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs index 4417c08..60a5191 100644 --- a/compiler/utils/FastString.lhs +++ b/compiler/utils/FastString.lhs @@ -2,26 +2,38 @@ % (c) The University of Glasgow, 1997-2006 % \begin{code} -{- -FastString: A compact, hash-consed, representation of character strings. - Comparison is O(1), and you can get a Unique from them. - Generated by the FSLIT macro - Turn into SDoc with Outputable.ftext - -LitString: Just a wrapper for the Addr# of a C string (Ptr CChar). - Practically no operations - Outputing them is fast - Generated by the SLIT macro - Turn into SDoc with Outputable.ptext - -Use LitString unless you want the facilities of FastString --} +{-# OPTIONS -fno-warn-unused-imports #-} +-- XXX GHC 6.9 seems to be confused by unpackCString# being used only in +-- a RULE + +{-# OPTIONS_GHC -O -funbox-strict-fields #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected + +-- | +-- There are two principal string types used internally by GHC: +-- +-- 'FastString': +-- * A compact, hash-consed, representation of character strings. +-- * Comparison is O(1), and you can get a 'Unique.Unique' from them. +-- * Generated by 'fsLit'. +-- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'. +-- +-- 'LitString': +-- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@). +-- * Practically no operations. +-- * Outputing them is fast. +-- * Generated by 'sLit'. +-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext' +-- +-- Use 'LitString' unless you want the facilities of 'FastString'. module FastString ( -- * FastStrings FastString(..), -- not abstract, for now. -- ** Construction + fsLit, mkFastString, mkFastStringBytes, mkFastStringByteList, @@ -60,15 +72,19 @@ module FastString -- * LitStrings LitString, + + -- ** Construction + sLit, #if defined(__GLASGOW_HASKELL__) mkLitString#, -#else - mkLitString, #endif + mkLitString, + + -- ** Deconstruction unpackLitString, - strLength, - - ptrStrLength + + -- ** Operations + lengthLS ) where #include "HsVersions.h" @@ -83,14 +99,20 @@ import Foreign.C import GHC.Exts import System.IO import System.IO.Unsafe ( unsafePerformIO ) -import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) +import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef ) import Data.Maybe ( isJust ) -#if !defined(__GLASGOW_HASKELL__) import Data.Char ( ord ) + +#if __GLASGOW_HASKELL__ >= 611 +import GHC.IO ( IO(..) ) +#else +import GHC.IOBase ( IO(..) ) #endif -import GHC.IOBase ( IO(..) ) import GHC.Ptr ( Ptr(..) ) +#if defined(__GLASGOW_HASKELL__) +import GHC.Base ( unpackCString# ) +#endif #define hASH_TBL_SIZE 4091 #define hASH_TBL_SIZE_UNBOXED 4091# @@ -185,100 +207,61 @@ lookupTbl :: FastStringTable -> Int -> IO [FastString] lookupTbl (FastStringTable _ arr#) (I# i#) = IO $ \ s# -> readArray# arr# i# s# -updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO () -updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do +updTbl :: FastStringTable -> Int -> [FastString] -> IO FastStringTable +updTbl (FastStringTable uid arr#) (I# i#) ls = do (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) - writeIORef fs_table_var (FastStringTable (uid+1) arr#) + return (FastStringTable (uid+1) arr#) + +-- | Helper function for various forms of fast string constructors. +mkFSInternal :: Ptr Word8 -> Int + -> (Int -> IO FastString) + -> IO FastString +-- The interesting part is the use of unsafePerformIO to make the +-- argument to atomicModifyIORef pure. This is safe because any +-- effect dependencies are enforced by data dependencies. +-- Furthermore, every result is used and hence there should be no +-- space leaks. +mkFSInternal ptr len mk_it = do + r <- atomicModifyIORef string_table $ + \fs_tbl@(FastStringTable uid _) -> + let h = hashStr ptr len + add_it ls = do + fs <- mk_it uid + fst' <- updTbl fs_tbl h (fs:ls) + fs `seq` fst' `seq` return (fst', fs) + in unsafePerformIO $ do + lookup_result <- lookupTbl fs_tbl h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> return (fs_tbl, v) + r `seq` return r mkFastString# :: Addr# -> FastString mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr) where ptr = Ptr a# mkFastStringBytes :: Ptr Word8 -> Int -> FastString -mkFastStringBytes ptr len = unsafePerformIO $ do - ft@(FastStringTable uid _) <- readIORef string_table - let - h = hashStr ptr len - add_it ls = do - fs <- copyNewFastString uid ptr len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v +mkFastStringBytes ptr len = inlinePerformIO $ do + mkFSInternal ptr len (\uid -> copyNewFastString uid ptr len) mkZFastStringBytes :: Ptr Word8 -> Int -> FastString -mkZFastStringBytes ptr len = unsafePerformIO $ do - ft@(FastStringTable uid _) <- readIORef string_table - let - h = hashStr ptr len - add_it ls = do - fs <- copyNewZFastString uid ptr len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v +mkZFastStringBytes ptr len = inlinePerformIO $ do + mkFSInternal ptr len (\uid -> copyNewZFastString uid ptr len) -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference -- between this and 'mkFastStringBytes' is that we don't have to copy -- the bytes if the string is new to the table. mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString mkFastStringForeignPtr ptr fp len = do - ft@(FastStringTable uid _) <- readIORef string_table --- _trace ("hashed: "++show (I# h)) $ - let - h = hashStr ptr len - add_it ls = do - fs <- mkNewFastString uid ptr fp len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v + mkFSInternal ptr len (\uid -> mkNewFastString uid ptr fp len) mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString mkZFastStringForeignPtr ptr fp len = do - ft@(FastStringTable uid _) <- readIORef string_table --- _trace ("hashed: "++show (I# h)) $ - let - h = hashStr ptr len - add_it ls = do - fs <- mkNewZFastString uid ptr fp len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v - + mkFSInternal ptr len (\uid -> mkNewZFastString uid ptr fp len) -- | Creates a UTF-8 encoded 'FastString' from a 'String' mkFastString :: String -> FastString @@ -363,9 +346,9 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0# where loop h n | n GHC.Exts.==# len# = I# h | otherwise = loop h2 (n GHC.Exts.+# 1#) - where c = ord# (indexCharOffAddr# a# n) - h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#` - hASH_TBL_SIZE# + where !c = ord# (indexCharOffAddr# a# n) + !h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#` + hASH_TBL_SIZE# -- ----------------------------------------------------------------------------- -- Operations @@ -374,12 +357,12 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0# lengthFS :: FastString -> Int lengthFS f = n_chars f --- | Returns 'True' if the 'FastString' is Z-encoded +-- | Returns @True@ if the 'FastString' is Z-encoded isZEncoded :: FastString -> Bool isZEncoded fs | ZEncoded <- enc fs = True | otherwise = False --- | Returns 'True' if this 'FastString' is not Z-encoded but already has +-- | Returns @True@ if this 'FastString' is not Z-encoded but already has -- a Z-encoding cached (used in producing stats). hasZEncoding :: FastString -> Bool hasZEncoding (FastString _ _ _ _ enc) = @@ -390,11 +373,11 @@ hasZEncoding (FastString _ _ _ _ enc) = m <- readIORef ref return (isJust m) --- | Returns 'True' if the 'FastString' is empty +-- | Returns @True@ if the 'FastString' is empty nullFS :: FastString -> Bool nullFS f = n_bytes f == 0 --- | unpacks and decodes the FastString +-- | Unpacks and decodes the FastString unpackFS :: FastString -> String unpackFS (FastString _ n_bytes _ buf enc) = inlinePerformIO $ withForeignPtr buf $ \ptr -> @@ -407,7 +390,7 @@ bytesFS (FastString _ n_bytes _ buf _) = inlinePerformIO $ withForeignPtr buf $ \ptr -> peekArray n_bytes ptr --- | returns a Z-encoded version of a 'FastString'. This might be the +-- | Returns a Z-encoded version of a 'FastString'. This might be the -- original, if it was already Z-encoded. The first time this -- function is applied to a particular 'FastString', the results are -- memoized. @@ -418,13 +401,13 @@ zEncodeFS fs@(FastString _ _ _ _ enc) = ZEncoded -> fs UTF8Encoded ref -> inlinePerformIO $ do - m <- readIORef ref - case m of - Just fs -> return fs - Nothing -> do - let efs = mkZFastString (zEncodeString (unpackFS fs)) - writeIORef ref (Just efs) - return efs + r <- atomicModifyIORef ref $ \m -> + case m of + Just fs -> (m, fs) + Nothing -> + let efs = mkZFastString (zEncodeString (unpackFS fs)) in + efs `seq` (Just efs, efs) + r `seq` return r appendFS :: FastString -> FastString -> FastString appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) @@ -500,7 +483,7 @@ type LitString = Ptr Word8 #if defined(__GLASGOW_HASKELL__) mkLitString# :: Addr# -> LitString mkLitString# a# = Ptr a# -#else +#endif --can/should we use FastTypes here? --Is this likely to be memory-preserving if only used on constant strings? --should we inline it? If lucky, that would make a CAF that wouldn't @@ -519,10 +502,12 @@ mkLitString s = loop n (c:cs) = do pokeByteOff p n (fromIntegral (ord c) :: Word8) loop (1+n) cs + -- XXX GHC isn't smart enough to know that we have already covered + -- this case. + loop _ [] = panic "mkLitString" loop 0 s return p ) -#endif unpackLitString :: LitString -> String unpackLitString p_ = case pUnbox p_ of @@ -532,8 +517,8 @@ unpackLitString p_ = case pUnbox p_ of ch -> if ch `eqFastChar` _CLIT('\0') then [] else cBox ch : unpack (n +# _ILIT(1)) -strLength :: LitString -> Int -strLength = ptrStrLength +lengthLS :: LitString -> Int +lengthLS = ptrStrLength -- for now, use a simple String representation --no, let's not do that right now - it's work in other places @@ -546,8 +531,8 @@ mkLitString = id unpackLitString :: LitString -> String unpackLitString = id -strLength :: LitString -> Int -strLength = length +lengthLS :: LitString -> Int +lengthLS = length #endif @@ -568,7 +553,16 @@ pokeCAString ptr str = in go str 0 -#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 602 -peekCAStringLen = peekCStringLen -#endif +{-# NOINLINE sLit #-} +sLit :: String -> LitString +sLit x = mkLitString x + +{-# NOINLINE fsLit #-} +fsLit :: String -> FastString +fsLit x = mkFastString x + +{-# RULES "slit" + forall x . sLit (unpackCString# x) = mkLitString# x #-} +{-# RULES "fslit" + forall x . fsLit (unpackCString# x) = mkFastString# x #-} \end{code}