% (c) The University of Glasgow, 1997-2006
%
\begin{code}
-{-
-FastString: A compact, hash-consed, representation of character strings.
- Comparison is O(1), and you can get a Unique from them.
- Generated by fsLit
- Turn into SDoc with Outputable.ftext
-
-LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
- Practically no operations
- Outputing them is fast
- Generated by sLit
- Turn into SDoc with Outputable.ptext
-
-Use LitString unless you want the facilities of FastString
--}
+{-# OPTIONS -fno-warn-unused-imports #-}
+-- XXX GHC 6.9 seems to be confused by unpackCString# being used only in
+-- a RULE
+
+{-# OPTIONS_GHC -O -funbox-strict-fields #-}
+-- We always optimise this, otherwise performance of a non-optimised
+-- compiler is severely affected
+
+-- |
+-- There are two principal string types used internally by GHC:
+--
+-- 'FastString':
+-- * A compact, hash-consed, representation of character strings.
+-- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
+-- * Generated by 'fsLit'.
+-- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
+--
+-- 'LitString':
+-- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@).
+-- * Practically no operations.
+-- * Outputing them is fast.
+-- * Generated by 'sLit'.
+-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
+--
+-- Use 'LitString' unless you want the facilities of 'FastString'.
module FastString
(
-- * FastStrings
FastString(..), -- not abstract, for now.
-- ** Construction
+ fsLit,
mkFastString,
mkFastStringBytes,
mkFastStringByteList,
-- * LitStrings
LitString,
+
+ -- ** Construction
+ sLit,
#if defined(__GLASGOW_HASKELL__)
mkLitString#,
#endif
mkLitString,
+
+ -- ** Deconstruction
unpackLitString,
- strLength,
-
- ptrStrLength,
-
- sLit,
- fsLit,
+
+ -- ** Operations
+ lengthLS
) where
#include "HsVersions.h"
import GHC.Exts
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
-import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef )
import Data.Maybe ( isJust )
import Data.Char ( ord )
-import GHC.IOBase ( IO(..) )
+#if __GLASGOW_HASKELL__ >= 611
+import GHC.IO ( IO(..) )
+#else
+import GHC.IOBase ( IO(..) )
+#endif
+
import GHC.Ptr ( Ptr(..) )
#if defined(__GLASGOW_HASKELL__)
import GHC.Base ( unpackCString# )
lookupTbl (FastStringTable _ arr#) (I# i#) =
IO $ \ s# -> readArray# arr# i# s#
-updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
-updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+updTbl :: FastStringTable -> Int -> [FastString] -> IO FastStringTable
+updTbl (FastStringTable uid arr#) (I# i#) ls = do
(IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
- writeIORef fs_table_var (FastStringTable (uid+1) arr#)
+ return (FastStringTable (uid+1) arr#)
+
+-- | Helper function for various forms of fast string constructors.
+mkFSInternal :: Ptr Word8 -> Int
+ -> (Int -> IO FastString)
+ -> IO FastString
+-- The interesting part is the use of unsafePerformIO to make the
+-- argument to atomicModifyIORef pure. This is safe because any
+-- effect dependencies are enforced by data dependencies.
+-- Furthermore, every result is used and hence there should be no
+-- space leaks.
+mkFSInternal ptr len mk_it = do
+ r <- atomicModifyIORef string_table $
+ \fs_tbl@(FastStringTable uid _) ->
+ let h = hashStr ptr len
+ add_it ls = do
+ fs <- mk_it uid
+ fst' <- updTbl fs_tbl h (fs:ls)
+ fs `seq` fst' `seq` return (fst', fs)
+ in unsafePerformIO $ do
+ lookup_result <- lookupTbl fs_tbl h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> return (fs_tbl, v)
+ r `seq` return r
mkFastString# :: Addr# -> FastString
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
where ptr = Ptr a#
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
-mkFastStringBytes ptr len = unsafePerformIO $ do
- ft@(FastStringTable uid _) <- readIORef string_table
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- copyNewFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+mkFastStringBytes ptr len = inlinePerformIO $ do
+ mkFSInternal ptr len (\uid -> copyNewFastString uid ptr len)
mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
-mkZFastStringBytes ptr len = unsafePerformIO $ do
- ft@(FastStringTable uid _) <- readIORef string_table
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- copyNewZFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+mkZFastStringBytes ptr len = inlinePerformIO $ do
+ mkFSInternal ptr len (\uid -> copyNewZFastString uid ptr len)
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkFastStringForeignPtr ptr fp len = do
- ft@(FastStringTable uid _) <- readIORef string_table
--- _trace ("hashed: "++show (I# h)) $
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- mkNewFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ mkFSInternal ptr len (\uid -> mkNewFastString uid ptr fp len)
mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkZFastStringForeignPtr ptr fp len = do
- ft@(FastStringTable uid _) <- readIORef string_table
--- _trace ("hashed: "++show (I# h)) $
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- mkNewZFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
-
+ mkFSInternal ptr len (\uid -> mkNewZFastString uid ptr fp len)
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
where
loop h n | n GHC.Exts.==# len# = I# h
| otherwise = loop h2 (n GHC.Exts.+# 1#)
- where c = ord# (indexCharOffAddr# a# n)
- h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
- hASH_TBL_SIZE#
+ where !c = ord# (indexCharOffAddr# a# n)
+ !h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
+ hASH_TBL_SIZE#
-- -----------------------------------------------------------------------------
-- Operations
lengthFS :: FastString -> Int
lengthFS f = n_chars f
--- | Returns 'True' if the 'FastString' is Z-encoded
+-- | Returns @True@ if the 'FastString' is Z-encoded
isZEncoded :: FastString -> Bool
isZEncoded fs | ZEncoded <- enc fs = True
| otherwise = False
--- | Returns 'True' if this 'FastString' is not Z-encoded but already has
+-- | Returns @True@ if this 'FastString' is not Z-encoded but already has
-- a Z-encoding cached (used in producing stats).
hasZEncoding :: FastString -> Bool
hasZEncoding (FastString _ _ _ _ enc) =
m <- readIORef ref
return (isJust m)
--- | Returns 'True' if the 'FastString' is empty
+-- | Returns @True@ if the 'FastString' is empty
nullFS :: FastString -> Bool
nullFS f = n_bytes f == 0
--- | unpacks and decodes the FastString
+-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
unpackFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekArray n_bytes ptr
--- | returns a Z-encoded version of a 'FastString'. This might be the
+-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
-- function is applied to a particular 'FastString', the results are
-- memoized.
ZEncoded -> fs
UTF8Encoded ref ->
inlinePerformIO $ do
- m <- readIORef ref
- case m of
- Just fs -> return fs
- Nothing -> do
- let efs = mkZFastString (zEncodeString (unpackFS fs))
- writeIORef ref (Just efs)
- return efs
+ r <- atomicModifyIORef ref $ \m ->
+ case m of
+ Just fs -> (m, fs)
+ Nothing ->
+ let efs = mkZFastString (zEncodeString (unpackFS fs)) in
+ efs `seq` (Just efs, efs)
+ r `seq` return r
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
ch -> if ch `eqFastChar` _CLIT('\0')
then [] else cBox ch : unpack (n +# _ILIT(1))
-strLength :: LitString -> Int
-strLength = ptrStrLength
+lengthLS :: LitString -> Int
+lengthLS = ptrStrLength
-- for now, use a simple String representation
--no, let's not do that right now - it's work in other places
unpackLitString :: LitString -> String
unpackLitString = id
-strLength :: LitString -> Int
-strLength = length
+lengthLS :: LitString -> Int
+lengthLS = length
#endif
in
go str 0
-#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 602
-peekCAStringLen = peekCStringLen
-#endif
-
{-# NOINLINE sLit #-}
sLit :: String -> LitString
sLit x = mkLitString x