% (c) The University of Glasgow, 1997-2006
%
\begin{code}
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
+{-# OPTIONS -fno-warn-unused-imports #-}
+-- XXX GHC 6.9 seems to be confused by unpackCString# being used only in
+-- a RULE
-{-
-FastString: A compact, hash-consed, representation of character strings.
- Comparison is O(1), and you can get a Unique from them.
- Generated by the FSLIT macro
- Turn into SDoc with Outputable.ftext
-
-LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
- Practically no operations
- Outputing them is fast
- Generated by the SLIT macro
- Turn into SDoc with Outputable.ptext
-
-Use LitString unless you want the facilities of FastString
--}
+{-# OPTIONS_GHC -O -funbox-strict-fields #-}
+-- We always optimise this, otherwise performance of a non-optimised
+-- compiler is severely affected
+
+-- |
+-- There are two principal string types used internally by GHC:
+--
+-- 'FastString':
+-- * A compact, hash-consed, representation of character strings.
+-- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
+-- * Generated by 'fsLit'.
+-- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
+--
+-- 'LitString':
+-- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@).
+-- * Practically no operations.
+-- * Outputing them is fast.
+-- * Generated by 'sLit'.
+-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
+--
+-- Use 'LitString' unless you want the facilities of 'FastString'.
module FastString
(
-- * FastStrings
FastString(..), -- not abstract, for now.
-- ** Construction
+ fsLit,
mkFastString,
mkFastStringBytes,
mkFastStringByteList,
-- * LitStrings
LitString,
+
+ -- ** Construction
+ sLit,
#if defined(__GLASGOW_HASKELL__)
mkLitString#,
-#else
- mkLitString,
#endif
+ mkLitString,
+
+ -- ** Deconstruction
unpackLitString,
- strLength,
-
- ptrStrLength
+
+ -- ** Operations
+ lengthLS
) where
--- This #define suppresses the "import FastString" that
--- HsVersions otherwise produces
-#define COMPILING_FAST_STRING
#include "HsVersions.h"
import Encoding
import FastTypes
import FastFunctions
+import Panic
import Foreign
import Foreign.C
import GHC.Exts
+import System.IO
import System.IO.Unsafe ( unsafePerformIO )
-import Control.Monad.ST ( stToIO )
-import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
-import System.IO ( hPutBuf )
+import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef )
import Data.Maybe ( isJust )
import Data.Char ( ord )
-import GHC.ST
-import GHC.IOBase ( IO(..) )
+#if __GLASGOW_HASKELL__ >= 611
+import GHC.IO ( IO(..) )
+#else
+import GHC.IOBase ( IO(..) )
+#endif
+
import GHC.Ptr ( Ptr(..) )
+#if defined(__GLASGOW_HASKELL__)
+import GHC.Base ( unpackCString# )
+#endif
#define hASH_TBL_SIZE 4091
#define hASH_TBL_SIZE_UNBOXED 4091#
lookupTbl (FastStringTable _ arr#) (I# i#) =
IO $ \ s# -> readArray# arr# i# s#
-updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
-updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+updTbl :: FastStringTable -> Int -> [FastString] -> IO FastStringTable
+updTbl (FastStringTable uid arr#) (I# i#) ls = do
(IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
- writeIORef fs_table_var (FastStringTable (uid+1) arr#)
+ return (FastStringTable (uid+1) arr#)
+
+-- | Helper function for various forms of fast string constructors.
+mkFSInternal :: Ptr Word8 -> Int
+ -> (Int -> IO FastString)
+ -> IO FastString
+-- The interesting part is the use of unsafePerformIO to make the
+-- argument to atomicModifyIORef pure. This is safe because any
+-- effect dependencies are enforced by data dependencies.
+-- Furthermore, every result is used and hence there should be no
+-- space leaks.
+mkFSInternal ptr len mk_it = do
+ r <- atomicModifyIORef string_table $
+ \fs_tbl@(FastStringTable uid _) ->
+ let h = hashStr ptr len
+ add_it ls = do
+ fs <- mk_it uid
+ fst' <- updTbl fs_tbl h (fs:ls)
+ fs `seq` fst' `seq` return (fst', fs)
+ in unsafePerformIO $ do
+ lookup_result <- lookupTbl fs_tbl h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> return (fs_tbl, v)
+ r `seq` return r
mkFastString# :: Addr# -> FastString
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
where ptr = Ptr a#
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
-mkFastStringBytes ptr len = unsafePerformIO $ do
- ft@(FastStringTable uid tbl#) <- readIORef string_table
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- copyNewFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+mkFastStringBytes ptr len = inlinePerformIO $ do
+ mkFSInternal ptr len (\uid -> copyNewFastString uid ptr len)
mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
-mkZFastStringBytes ptr len = unsafePerformIO $ do
- ft@(FastStringTable uid tbl#) <- readIORef string_table
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- copyNewZFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+mkZFastStringBytes ptr len = inlinePerformIO $ do
+ mkFSInternal ptr len (\uid -> copyNewZFastString uid ptr len)
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkFastStringForeignPtr ptr fp len = do
- ft@(FastStringTable uid tbl#) <- readIORef string_table
--- _trace ("hashed: "++show (I# h)) $
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- mkNewFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ mkFSInternal ptr len (\uid -> mkNewFastString uid ptr fp len)
mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkZFastStringForeignPtr ptr fp len = do
- ft@(FastStringTable uid tbl#) <- readIORef string_table
--- _trace ("hashed: "++show (I# h)) $
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- mkNewZFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
-
+ mkFSInternal ptr len (\uid -> mkNewZFastString uid ptr fp len)
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
pokeCAString (castPtr ptr) str
mkZFastStringForeignPtr ptr buf l
+bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
bucket_match (v@(FastString _ l _ buf _):ls) len ptr
| len == l = do
| otherwise =
bucket_match ls len ptr
+mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
+ -> IO FastString
mkNewFastString uid ptr fp len = do
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid len n_chars fp (UTF8Encoded ref))
-mkNewZFastString uid ptr fp len = do
+mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
+ -> IO FastString
+mkNewZFastString uid _ fp len = do
return (FastString uid len len fp ZEncoded)
-
+copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
copyNewFastString uid ptr len = do
fp <- copyBytesToForeignPtr ptr len
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid len n_chars fp (UTF8Encoded ref))
+copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
copyNewZFastString uid ptr len = do
fp <- copyBytesToForeignPtr ptr len
return (FastString uid len len fp ZEncoded)
-
+copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
copyBytesToForeignPtr ptr len = do
fp <- mallocForeignPtrBytes len
withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
where
loop h n | n GHC.Exts.==# len# = I# h
| otherwise = loop h2 (n GHC.Exts.+# 1#)
- where c = ord# (indexCharOffAddr# a# n)
- h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
- hASH_TBL_SIZE#
+ where !c = ord# (indexCharOffAddr# a# n)
+ !h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
+ hASH_TBL_SIZE#
-- -----------------------------------------------------------------------------
-- Operations
lengthFS :: FastString -> Int
lengthFS f = n_chars f
--- | Returns 'True' if the 'FastString' is Z-encoded
+-- | Returns @True@ if the 'FastString' is Z-encoded
isZEncoded :: FastString -> Bool
isZEncoded fs | ZEncoded <- enc fs = True
| otherwise = False
--- | Returns 'True' if this 'FastString' is not Z-encoded but already has
+-- | Returns @True@ if this 'FastString' is not Z-encoded but already has
-- a Z-encoding cached (used in producing stats).
hasZEncoding :: FastString -> Bool
-hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
+hasZEncoding (FastString _ _ _ _ enc) =
case enc of
ZEncoded -> False
UTF8Encoded ref ->
m <- readIORef ref
return (isJust m)
--- | Returns 'True' if the 'FastString' is empty
+-- | Returns @True@ if the 'FastString' is empty
nullFS :: FastString -> Bool
nullFS f = n_bytes f == 0
--- | unpacks and decodes the FastString
+-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
unpackFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
UTF8Encoded _ -> utf8DecodeString ptr n_bytes
bytesFS :: FastString -> [Word8]
-bytesFS (FastString _ n_bytes _ buf enc) =
+bytesFS (FastString _ n_bytes _ buf _) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekArray n_bytes ptr
--- | returns a Z-encoded version of a 'FastString'. This might be the
+-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
-- function is applied to a particular 'FastString', the results are
-- memoized.
--
zEncodeFS :: FastString -> FastString
-zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
+zEncodeFS fs@(FastString _ _ _ _ enc) =
case enc of
ZEncoded -> fs
UTF8Encoded ref ->
inlinePerformIO $ do
- m <- readIORef ref
- case m of
- Just fs -> return fs
- Nothing -> do
- let efs = mkZFastString (zEncodeString (unpackFS fs))
- writeIORef ref (Just efs)
- return efs
+ r <- atomicModifyIORef ref $ \m ->
+ case m of
+ Just fs -> (m, fs)
+ Nothing ->
+ let efs = mkZFastString (zEncodeString (unpackFS fs)) in
+ efs `seq` (Just efs, efs)
+ r `seq` return r
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
headFS :: FastString -> Char
-headFS (FastString _ n_bytes _ buf enc) =
+headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
+headFS (FastString _ _ _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
ZEncoded -> do
return (fst (utf8DecodeChar ptr))
tailFS :: FastString -> FastString
+tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
tailFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
uniqueOfFS :: FastString -> FastInt
uniqueOfFS (FastString u _ _ _ _) = iUnbox u
+nilFS :: FastString
nilFS = mkFastString ""
-- -----------------------------------------------------------------------------
-- |Outputs a 'FastString' with /no decoding at all/, that is, you
-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS :: Handle -> FastString -> IO ()
hPutFS handle (FastString _ len _ fp _)
| len == 0 = return ()
| otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
mkLitString# :: Addr# -> LitString
mkLitString# a# = Ptr a#
#endif
-
--can/should we use FastTypes here?
--Is this likely to be memory-preserving if only used on constant strings?
--should we inline it? If lucky, that would make a CAF that wouldn't
loop n (c:cs) = do
pokeByteOff p n (fromIntegral (ord c) :: Word8)
loop (1+n) cs
+ -- XXX GHC isn't smart enough to know that we have already covered
+ -- this case.
+ loop _ [] = panic "mkLitString"
loop 0 s
return p
)
ch -> if ch `eqFastChar` _CLIT('\0')
then [] else cBox ch : unpack (n +# _ILIT(1))
-strLength :: LitString -> Int
-strLength = ptrStrLength
+lengthLS :: LitString -> Int
+lengthLS = ptrStrLength
-- for now, use a simple String representation
--no, let's not do that right now - it's work in other places
unpackLitString :: LitString -> String
unpackLitString = id
-strLength :: LitString -> Int
-strLength = length
+lengthLS :: LitString -> Int
+lengthLS = length
#endif
pokeCAString :: Ptr CChar -> String -> IO ()
pokeCAString ptr str =
let
- go [] n = return ()
+ go [] _ = return ()
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in
go str 0
-#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 602
-peekCAStringLen = peekCStringLen
-#endif
+{-# NOINLINE sLit #-}
+sLit :: String -> LitString
+sLit x = mkLitString x
+
+{-# NOINLINE fsLit #-}
+fsLit :: String -> FastString
+fsLit x = mkFastString x
+
+{-# RULES "slit"
+ forall x . sLit (unpackCString# x) = mkLitString# x #-}
+{-# RULES "fslit"
+ forall x . fsLit (unpackCString# x) = mkFastString# x #-}
\end{code}