%
\section{Fast strings}
-Compact representations of character strings with
-unique identifiers (hash-cons'ish).
+FastString: A compact, hash-consed, representation of character strings.
+ Comparison is O(1), and you can get a Unique from them.
+ Generated by the FSLIT macro
+ Turn into SDoc with Outputable.ftext
+
+LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
+ Practically no operations
+ Outputing them is fast
+ Generated by the SLIT macro
+ Turn into SDoc with Outputable.ptext
+
+Use LitString unless you want the facilities of FastString
\begin{code}
module FastString
(
FastString(..), -- not abstract, for now.
- --names?
mkFastString, -- :: String -> FastString
mkFastStringNarrow, -- :: String -> FastString
mkFastSubString, -- :: Addr -> Int -> Int -> FastString
- -- These ones hold on to the Addr after they return, and aren't hashed;
- -- they are used for literals
- mkFastCharString, -- :: Addr -> FastString
- mkFastCharString#, -- :: Addr# -> FastString
- mkFastCharString2, -- :: Addr -> Int -> FastString
-
- mkFastString#, -- :: Addr# -> Int# -> FastString
+ mkFastString#, -- :: Addr# -> FastString
mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
- mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
mkFastStringInt, -- :: [Int] -> FastString
concatFS, -- :: [FastString] -> FastString
consFS, -- :: Char -> FastString -> FastString
indexFS, -- :: FastString -> Int -> Char
+ nilFS, -- :: FastString
- hPutFS -- :: Handle -> FastString -> IO ()
+ hPutFS, -- :: Handle -> FastString -> IO ()
+
+ LitString,
+ mkLitString# -- :: Addr# -> LitString
) where
-- This #define suppresses the "import FastString" that
#include "HsVersions.h"
#if __GLASGOW_HASKELL__ < 503
-import PrelPack
import PrelIOBase ( IO(..) )
#else
-import CString
import GHC.IOBase ( IO(..) )
#endif
import PrimPacked
-import GlaExts
-#if __GLASGOW_HASKELL__ < 411
-import PrelAddr ( Addr(..) )
-#else
-import Addr ( Addr(..) )
-import Ptr ( Ptr(..) )
-#endif
+import GLAEXTS
+import UNSAFE_IO ( unsafePerformIO )
+import MONAD_ST ( stToIO )
+import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
+
#if __GLASGOW_HASKELL__ < 503
import PrelArr ( STArray(..), newSTArray )
-import IOExts ( hPutBufFull, hPutBufBAFull )
#else
import GHC.Arr ( STArray(..), newSTArray )
-import System.IO ( hPutBuf )
-import IOExts ( hPutBufBA )
-import CString ( unpackNBytesBA# )
#endif
-import IOExts ( IORef, newIORef, readIORef, writeIORef )
+#if __GLASGOW_HASKELL__ >= 504
+import GHC.IOBase
+import GHC.Handle
+import Foreign.C
+#else
+import IOExts ( hPutBufBAFull )
+#endif
+
import IO
import Char ( chr, ord )
#define hASH_TBL_SIZE 993
-
-#if __GLASGOW_HASKELL__ < 503
-hPutBuf = hPutBufFull
-hPutBufBA = hPutBufBAFull
-#endif
\end{code}
@FastString@s are packed representations of strings
Int# -- length
ByteArray# -- stuff
- | CharStr -- external C string
- Addr# -- pointer to the (null-terminated) bytes in C land.
- Int# -- length (cached)
-
| UnicodeStr -- if contains characters outside '\1'..'\xFF'
Int# -- unique id
[Int] -- character numbers
instance Eq FastString where
+ -- shortcut for real FastStrings
+ (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
+
+ (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
instance Ord FastString where
+ -- Compares lexicographically, not by unique
a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
lengthFS :: FastString -> Int
lengthFS (FastString _ l# _) = I# l#
-lengthFS (CharStr a# l#) = I# l#
lengthFS (UnicodeStr _ s) = length s
nullFastString :: FastString -> Bool
nullFastString (FastString _ l# _) = l# ==# 0#
-nullFastString (CharStr _ l#) = l# ==# 0#
nullFastString (UnicodeStr _ []) = True
nullFastString (UnicodeStr _ (_:_)) = False
unpackFS :: FastString -> String
-unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
-unpackFS (CharStr addr len#) =
- unpack 0#
- where
- unpack nh
- | nh ==# len# = []
- | otherwise = C# ch : unpack (nh +# 1#)
- where
- ch = indexCharOffAddr# addr nh
+unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
unpackFS (UnicodeStr _ s) = map chr s
unpackIntFS :: FastString -> [Int]
headFS :: FastString -> Char
headFS (FastString _ l# ba#) =
if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
-headFS (CharStr a# l#) =
- if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
headFS (UnicodeStr _ (c:_)) = chr c
headFS (UnicodeStr _ []) = error ("headFS: empty FS")
FastString _ l# ba#
| l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
| otherwise -> error (msg (I# l#))
- CharStr a# l#
- | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
- | otherwise -> error (msg (I# l#))
UnicodeStr _ s -> chr (s!!i)
where
msg l = "indexFS: out of range: " ++ show (l,i)
uniqueOfFS :: FastString -> Int#
uniqueOfFS (FastString u# _ _) = u#
-uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
- {-
- [A somewhat moby hack]: to avoid entering all sorts
- of junk into the hash table, all C char strings
- are by default left out. The benefit of being in
- the table is that string comparisons are lightning fast,
- just an Int# comparison.
-
- But, if you want to get the Unique of a CharStr, we
- enter it into the table and return that unique. This
- works, but causes the CharStr to be looked up in the hash
- table each time it is accessed..
- -}
uniqueOfFS (UnicodeStr u# _) = u#
+
+nilFS = mkFastString ""
\end{code}
Internally, the compiler will maintain a fast string symbol
(# s2#, () #) }) >>
writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
-mkFastString# :: Addr# -> Int# -> FastString
-mkFastString# a# len# =
+mkFastString# :: Addr# -> FastString
+mkFastString# a# =
+ case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
+
+mkFastStringLen# :: Addr# -> Int# -> FastString
+mkFastStringLen# a# len# =
unsafePerformIO (
readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
let
-- no match, add it to table by copying out the
-- the string into a ByteArray
-- _trace "empty bucket" $
- case copyPrefixStr (A# a#) (I# len#) of
- (ByteArray _ _ barr#) ->
+ case copyPrefixStr a# (I# len#) of
+ BA barr# ->
let f_str = FastString uid# len# barr# in
updTbl string_table ft h [f_str] >>
({- _trace ("new: " ++ show f_str) $ -} return f_str)
-- _trace ("non-empty bucket"++show ls) $
case bucket_match ls len# a# of
Nothing ->
- case copyPrefixStr (A# a#) (I# len#) of
- (ByteArray _ _ barr#) ->
+ case copyPrefixStr a# (I# len#) of
+ BA barr# ->
let f_str = FastString uid# len# barr# in
updTbl string_table ft h (f_str:ls) >>
( {- _trace ("new: " ++ show f_str) $ -} return f_str)
bucket_match (UnicodeStr _ _ : ls) len# a# =
bucket_match ls len# a#
-mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
-mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
-
mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
mkFastSubStringBA# barr# start# len# =
unsafePerformIO (
-- no match, add it to table by copying out the
-- the string into a ByteArray
-- _trace "empty bucket(b)" $
- case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
- (ByteArray _ _ ba#) ->
+ case copySubStrBA (BA barr#) (I# start#) (I# len#) of
+ BA ba# ->
let f_str = FastString uid# len# ba# in
updTbl string_table ft h [f_str] >>
-- _trace ("new(b): " ++ show f_str) $
-- _trace ("non-empty bucket(b)"++show ls) $
case bucket_match ls start# len# barr# of
Nothing ->
- case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
- (ByteArray _ _ ba#) ->
+ case copySubStrBA (BA barr#) (I# start#) (I# len#) of
+ BA ba# ->
let f_str = FastString uid# len# ba# in
updTbl string_table ft h (f_str:ls) >>
-- _trace ("new(b): " ++ show f_str) $
return v
)
where
- btm = error ""
-
bucket_match [] _ _ _ = Nothing
bucket_match (v:ls) start# len# ba# =
case v of
if s' == s then Just v else bucket_match ls
bucket_match (FastString _ _ _ : ls) = bucket_match ls
-mkFastCharString :: Addr -> FastString
-mkFastCharString a@(A# a#) =
- case strLength a of{ (I# len#) -> CharStr a# len# }
-
-mkFastCharString# :: Addr# -> FastString
-mkFastCharString# a# =
- case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
-
-mkFastCharString2 :: Addr -> Int -> FastString
-mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
-
mkFastStringNarrow :: String -> FastString
mkFastStringNarrow str =
- case packString str of
- (ByteArray _ (I# len#) frozen#) ->
+ case packString str of { (I# len#, BA frozen#) ->
mkFastSubStringBA# frozen# 0# len#
- {- 0-indexed array, len# == index to one beyond end of string,
- i.e., (0,1) => empty string. -}
+ }
+ {- 0-indexed array, len# == index to one beyond end of string,
+ i.e., (0,1) => empty string. -}
mkFastString :: String -> FastString
mkFastString str = if all good str
where
good c = c >= 1 && c <= 0xFF
-mkFastSubString :: Addr -> Int -> Int -> FastString
-mkFastSubString (A# a#) (I# start#) (I# len#) =
- mkFastString# (addrOffset# a# start#) len#
+mkFastSubString :: Addr# -> Int -> Int -> FastString
+mkFastSubString a# (I# start#) (I# len#) =
+ mkFastStringLen# (a# `plusAddr#` start#) len#
\end{code}
\begin{code}
2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
_ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
where
- c0 = indexCharArray# ba# 0#
- c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
- c2 = indexCharArray# ba# (len# -# 1#)
+ c0 = indexCharArray# ba# (start# +# 0#)
+ c1 = indexCharArray# ba# (start# +# (len# `quotInt#` 2# -# 1#))
+ c2 = indexCharArray# ba# (start# +# (len# -# 1#))
-- c1 = indexCharArray# ba# 1#
-- c2 = indexCharArray# ba# 2#
else compare s1 s2
cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
-cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
- if u1# ==# u2# then
- EQ
- else
- unsafePerformIO (
- _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
+cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
+ if u1# ==# u2# then EQ else
+ let l# = if l1# <=# l2# then l1# else l2# in
+ unsafePerformIO (
+ memcmp b1# b2# l# >>= \ (I# res) ->
return (
if res <# 0# then LT
- else if res ==# 0# then EQ
+ else if res ==# 0# then
+ if l1# ==# l2# then EQ
+ else if l1# <# l2# then LT else GT
else GT
))
- where
- bot :: Int
- bot = error "tagCmp"
-cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
- = unsafePerformIO (
- _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
- return (
- if res <# 0# then LT
- else if res ==# 0# then EQ
- else GT
- ))
- where
- ba1 = A# bs1
- ba2 = A# bs2
-cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
- = unsafePerformIO (
- _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
- return (
- if res <# 0# then LT
- else if res ==# 0# then EQ
- else GT
- ))
- where
- ba1 = ByteArray (error "") ((error "")::Int) bs1
- ba2 = A# bs2
-cmpFS a@(CharStr _ _) b@(FastString _ _ _)
- = -- try them the other way 'round
- case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
+foreign import ccall "ghc_memcmp" unsafe
+ memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
-\end{code}
+-- -----------------------------------------------------------------------------
+-- Outputting 'FastString's
-Outputting @FastString@s is quick, just block copying the chunk (using
-@fwrite@).
+#if __GLASGOW_HASKELL__ >= 504
+
+-- this is our own version of hPutBuf for FastStrings, because in
+-- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
+-- The closest is hPutArray in Data.Array.IO, but that does some extra
+-- range checks that we want to avoid here.
+
+foreign import ccall unsafe "__hscore_memcpy_dst_off"
+ memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
+
+hPutFS handle (FastString _ l# ba#)
+ | l# ==# 0# = return ()
+ | otherwise
+ = do wantWritableHandle "hPutFS" handle $
+ \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
+
+ old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
+ <- readIORef ref
+
+ let count = I# l#
+ raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
+
+ -- enough room in handle buffer?
+ if (size - w > count)
+ -- There's enough room in the buffer:
+ -- just copy the data in and update bufWPtr.
+ then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
+ writeIORef ref old_buf{ bufWPtr = w + count }
+ return ()
+
+ -- else, we have to flush
+ else do flushed_buf <- flushWriteBuffer fd stream old_buf
+ writeIORef ref flushed_buf
+ let this_buf =
+ Buffer{ bufBuf=raw, bufState=WriteBuffer,
+ bufRPtr=0, bufWPtr=count, bufSize=count }
+ flushWriteBuffer fd stream this_buf
+ return ()
+
+#else
-\begin{code}
hPutFS :: Handle -> FastString -> IO ()
hPutFS handle (FastString _ l# ba#)
| l# ==# 0# = return ()
| otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
- hPutBufBA handle mba (I# l#)
+ hPutBufBAFull handle mba (I# l#)
where
bot = error "hPutFS.ba"
---ToDo: avoid silly code duplic.
-
-hPutFS handle (CharStr a# l#)
- | l# ==# 0# = return ()
-#if __GLASGOW_HASKELL__ < 411
- | otherwise = hPutBuf handle (A# a#) (I# l#)
-#else
- | otherwise = hPutBuf handle (Ptr a#) (I# l#)
#endif
-- ONLY here for debugging the NCG (so -ddump-stix works for string
-- literals); no idea if this is really necessary. JRS, 010131
hPutFS handle (UnicodeStr _ is)
= hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
+
+-- -----------------------------------------------------------------------------
+-- LitStrings, here for convenience only.
+
+type LitString = Ptr ()
+
+mkLitString# :: Addr# -> LitString
+mkLitString# a# = Ptr a#
\end{code}