-- for details
{-
-FastString: A compact, hash-consed, representation of character strings.
- Comparison is O(1), and you can get a Unique from them.
- Generated by the FSLIT macro
- Turn into SDoc with Outputable.ftext
+FastString: A compact, hash-consed, representation of character strings.
+ Comparison is O(1), and you can get a Unique from them.
+ Generated by the FSLIT macro
+ Turn into SDoc with Outputable.ftext
-LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
- Practically no operations
- Outputing them is fast
- Generated by the SLIT macro
- Turn into SDoc with Outputable.ptext
+LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
+ Practically no operations
+ Outputing them is fast
+ Generated by the SLIT macro
+ Turn into SDoc with Outputable.ptext
Use LitString unless you want the facilities of FastString
-}
module FastString
(
- -- * FastStrings
- FastString(..), -- not abstract, for now.
+ -- * FastStrings
+ FastString(..), -- not abstract, for now.
- -- ** Construction
+ -- ** Construction
mkFastString,
- mkFastStringBytes,
+ mkFastStringBytes,
mkFastStringByteList,
- mkFastStringForeignPtr,
+ mkFastStringForeignPtr,
#if defined(__GLASGOW_HASKELL__)
- mkFastString#,
+ mkFastString#,
#endif
- mkZFastString,
- mkZFastStringBytes,
+ mkZFastString,
+ mkZFastStringBytes,
- -- ** Deconstruction
- unpackFS, -- :: FastString -> String
- bytesFS, -- :: FastString -> [Word8]
+ -- ** Deconstruction
+ unpackFS, -- :: FastString -> String
+ bytesFS, -- :: FastString -> [Word8]
- -- ** Encoding
- isZEncoded,
- zEncodeFS,
+ -- ** Encoding
+ isZEncoded,
+ zEncodeFS,
- -- ** Operations
+ -- ** Operations
uniqueOfFS,
- lengthFS,
- nullFS,
- appendFS,
+ lengthFS,
+ nullFS,
+ appendFS,
headFS,
tailFS,
- concatFS,
+ concatFS,
consFS,
- nilFS,
+ nilFS,
- -- ** Outputing
+ -- ** Outputing
hPutFS,
- -- ** Internal
- getFastStringTable,
- hasZEncoding,
+ -- ** Internal
+ getFastStringTable,
+ hasZEncoding,
- -- * LitStrings
- LitString,
+ -- * LitStrings
+ LitString,
#if defined(__GLASGOW_HASKELL__)
- mkLitString#,
+ mkLitString#,
#else
- mkLitString,
+ mkLitString,
#endif
- unpackLitString,
- strLength,
+ unpackLitString,
+ strLength,
- ptrStrLength
+ ptrStrLength
) where
-- This #define suppresses the "import FastString" that
import Foreign.C
import GHC.Exts
import System.IO.Unsafe ( unsafePerformIO )
-import Control.Monad.ST ( stToIO )
-import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
-import System.IO ( hPutBuf )
-import Data.Maybe ( isJust )
-import Data.Char ( ord )
+import Control.Monad.ST ( stToIO )
+import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import System.IO ( hPutBuf )
+import Data.Maybe ( isJust )
+import Data.Char ( ord )
import GHC.ST
-import GHC.IOBase ( IO(..) )
-import GHC.Ptr ( Ptr(..) )
+import GHC.IOBase ( IO(..) )
+import GHC.Ptr ( Ptr(..) )
#define hASH_TBL_SIZE 4091
#define hASH_TBL_SIZE_UNBOXED 4091#
-}
data FastString = FastString {
- uniq :: {-# UNPACK #-} !Int, -- unique id
- n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
- n_chars :: {-# UNPACK #-} !Int, -- number of chars
+ uniq :: {-# UNPACK #-} !Int, -- unique id
+ n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
+ n_chars :: {-# UNPACK #-} !Int, -- number of chars
buf :: {-# UNPACK #-} !(ForeignPtr Word8),
enc :: FSEncoding
}
data FSEncoding
+ -- including strings that don't need any encoding
= ZEncoded
- -- including strings that don't need any encoding
+ -- A UTF-8 string with a memoized Z-encoding
| UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
- -- A UTF-8 string with a memoized Z-encoding
instance Eq FastString where
f1 == f2 = uniq f1 == uniq f2
instance Ord FastString where
- -- Compares lexicographically, not by unique
+ -- Compares lexicographically, not by unique
a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
- a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
+ a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
- a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
- max x y | x >= y = x
- | otherwise = y
- min x y | x <= y = x
- | otherwise = y
+ a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
+ max x y | x >= y = x
+ | otherwise = y
+ min x y | x <= y = x
+ | otherwise = y
compare a b = cmpFS a b
instance Show FastString where
memcmp p1 p2 l
#ifndef __HADDOCK__
-foreign import ccall unsafe "ghc_memcmp"
+foreign import ccall unsafe "ghc_memcmp"
memcmp :: Ptr a -> Ptr b -> Int -> IO Int
#endif
@FastString@ if there was a hit.
-}
-data FastStringTable =
+data FastStringTable =
FastStringTable
{-# UNPACK #-} !Int
(MutableArray# RealWorld [FastString])
{-# NOINLINE string_table #-}
string_table :: IORef FastStringTable
-string_table =
+string_table =
unsafePerformIO $ do
tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
(# s2#, arr# #) ->
let
h = hashStr ptr len
add_it ls = do
- fs <- copyNewFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
+ fs <- copyNewFastString uid ptr len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
--
lookup_result <- lookupTbl ft h
case lookup_result of
ls -> do
b <- bucket_match ls len ptr
case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
mkZFastStringBytes ptr len = unsafePerformIO $ do
let
h = hashStr ptr len
add_it ls = do
- fs <- copyNewZFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
+ fs <- copyNewZFastString uid ptr len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
--
lookup_result <- lookupTbl ft h
case lookup_result of
ls -> do
b <- bucket_match ls len ptr
case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
let
h = hashStr ptr len
add_it ls = do
- fs <- mkNewFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
+ fs <- mkNewFastString uid ptr fp len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
--
lookup_result <- lookupTbl ft h
case lookup_result of
ls -> do
b <- bucket_match ls len ptr
case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkZFastStringForeignPtr ptr fp len = do
let
h = hashStr ptr len
add_it ls = do
- fs <- mkNewZFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
+ fs <- mkNewZFastString uid ptr fp len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
--
lookup_result <- lookupTbl ft h
case lookup_result of
ls -> do
b <- bucket_match ls len ptr
case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
-mkFastString str =
+mkFastString str =
inlinePerformIO $ do
let l = utf8EncodedLength str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
utf8EncodeString ptr str
- mkFastStringForeignPtr ptr buf l
+ mkFastStringForeignPtr ptr buf l
-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
mkFastStringByteList :: [Word8] -> FastString
-mkFastStringByteList str =
+mkFastStringByteList str =
inlinePerformIO $ do
let l = Prelude.length str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
pokeArray (castPtr ptr) str
- mkFastStringForeignPtr ptr buf l
+ mkFastStringForeignPtr ptr buf l
-- | Creates a Z-encoded 'FastString' from a 'String'
mkZFastString :: String -> FastString
-mkZFastString str =
+mkZFastString str =
inlinePerformIO $ do
let l = Prelude.length str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
pokeCAString (castPtr ptr) str
- mkZFastStringForeignPtr ptr buf l
+ mkZFastStringForeignPtr ptr buf l
bucket_match [] _ _ = return Nothing
bucket_match (v@(FastString _ l _ buf _):ls) len ptr
| len == l = do
- b <- cmpStringPrefix ptr buf len
- if b then return (Just v)
- else bucket_match ls len ptr
- | otherwise =
- bucket_match ls len ptr
+ b <- cmpStringPrefix ptr buf len
+ if b then return (Just v)
+ else bucket_match ls len ptr
+ | otherwise =
+ bucket_match ls len ptr
mkNewFastString uid ptr fp len = do
ref <- newIORef Nothing
hashStr :: Ptr Word8 -> Int -> Int
-- use the Addr to produce a hash value between 0 & m (inclusive)
hashStr (Ptr a#) (I# len#) = loop 0# 0#
- where
+ where
loop h n | n GHC.Exts.==# len# = I# h
- | otherwise = loop h2 (n GHC.Exts.+# 1#)
- where c = ord# (indexCharOffAddr# a# n)
- h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#` hASH_TBL_SIZE#
+ | otherwise = loop h2 (n GHC.Exts.+# 1#)
+ where c = ord# (indexCharOffAddr# a# n)
+ h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
+ hASH_TBL_SIZE#
-- -----------------------------------------------------------------------------
-- Operations
-- | Returns 'True' if the 'FastString' is Z-encoded
isZEncoded :: FastString -> Bool
isZEncoded fs | ZEncoded <- enc fs = True
- | otherwise = False
+ | otherwise = False
-- | Returns 'True' if this 'FastString' is not Z-encoded but already has
-- a Z-encoding cached (used in producing stats).
UTF8Encoded ref ->
inlinePerformIO $ do
m <- readIORef ref
- return (isJust m)
+ return (isJust m)
-- | Returns 'True' if the 'FastString' is empty
nullFS :: FastString -> Bool
-- | unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ n_bytes _ buf enc) =
+unpackFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
case enc of
- ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
- UTF8Encoded _ -> utf8DecodeString ptr n_bytes
+ ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
+ UTF8Encoded _ -> utf8DecodeString ptr n_bytes
bytesFS :: FastString -> [Word8]
-bytesFS (FastString _ n_bytes _ buf enc) =
+bytesFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekArray n_bytes ptr
inlinePerformIO $ do
m <- readIORef ref
case m of
- Just fs -> return fs
- Nothing -> do
+ Just fs -> return fs
+ Nothing -> do
let efs = mkZFastString (zEncodeString (unpackFS fs))
- writeIORef ref (Just efs)
- return efs
+ writeIORef ref (Just efs)
+ return efs
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
headFS :: FastString -> Char
-headFS (FastString _ n_bytes _ buf enc) =
+headFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
- ZEncoded -> do
- w <- peek (castPtr ptr)
- return (castCCharToChar w)
- UTF8Encoded _ ->
- return (fst (utf8DecodeChar ptr))
+ ZEncoded -> do
+ w <- peek (castPtr ptr)
+ return (castCCharToChar w)
+ UTF8Encoded _ ->
+ return (fst (utf8DecodeChar ptr))
tailFS :: FastString -> FastString
-tailFS (FastString _ n_bytes _ buf enc) =
+tailFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
ZEncoded -> do
- return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
+ return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
UTF8Encoded _ -> do
- let (_,ptr') = utf8DecodeChar ptr
- let off = ptr' `minusPtr` ptr
- return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
+ let (_,ptr') = utf8DecodeChar ptr
+ let off = ptr' `minusPtr` ptr
+ return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)
-- -----------------------------------------------------------------------------
-- under the carpet
-foreign import ccall unsafe "ghc_strlen"
+foreign import ccall unsafe "ghc_strlen"
ptrStrLength :: Ptr Word8 -> Int
-- NB. does *not* add a '\0'-terminator.
pokeCAString :: Ptr CChar -> String -> IO ()
pokeCAString ptr str =
let
- go [] n = return ()
- go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
+ go [] n = return ()
+ go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in
go str 0