X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FFastString.lhs;h=ea369571b5144cbab799131f065e0311e7f52a98;hb=a1c9a60f9d3b45184f723b454017edcbf258f70e;hp=86b2a8a444b542b9be8f1a2e0d2cc18d8798b426;hpb=2cc5b907318f97e19b28b2ad8ed9ff8c1f401dcc;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs index 86b2a8a..ea36957 100644 --- a/ghc/compiler/utils/FastString.lhs +++ b/ghc/compiler/utils/FastString.lhs @@ -11,20 +11,12 @@ module FastString ( FastString(..), -- not abstract, for now. - --names? mkFastString, -- :: String -> FastString mkFastStringNarrow, -- :: String -> FastString mkFastSubString, -- :: Addr -> Int -> Int -> FastString - -- These ones hold on to the Addr after they return, and aren't hashed; - -- they are used for literals - mkFastCharString, -- :: Addr -> FastString - mkFastCharString#, -- :: Addr# -> FastString - mkFastCharString2, -- :: Addr -> Int -> FastString - - mkFastString#, -- :: Addr# -> Int# -> FastString + mkFastString#, -- :: Addr# -> FastString mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString - mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString mkFastStringInt, -- :: [Int] -> FastString @@ -41,8 +33,12 @@ module FastString concatFS, -- :: [FastString] -> FastString consFS, -- :: Char -> FastString -> FastString indexFS, -- :: FastString -> Int -> Char + nilFS, -- :: FastString + + hPutFS, -- :: Handle -> FastString -> IO () - hPutFS -- :: Handle -> FastString -> IO () + LitString, + mkLitString# -- :: Addr# -> LitString ) where -- This #define suppresses the "import FastString" that @@ -51,41 +47,35 @@ module FastString #include "HsVersions.h" #if __GLASGOW_HASKELL__ < 503 -import PrelPack import PrelIOBase ( IO(..) ) #else -import CString import GHC.IOBase ( IO(..) ) #endif import PrimPacked -import GlaExts -#if __GLASGOW_HASKELL__ < 411 -import PrelAddr ( Addr(..) ) -#else -import Addr ( Addr(..) ) -import Ptr ( Ptr(..) ) -#endif +import GLAEXTS +import UNSAFE_IO ( unsafePerformIO ) +import MONAD_ST ( stToIO ) +import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef ) + #if __GLASGOW_HASKELL__ < 503 import PrelArr ( STArray(..), newSTArray ) -import IOExts ( hPutBufFull, hPutBufBAFull ) #else import GHC.Arr ( STArray(..), newSTArray ) -import System.IO ( hPutBuf ) -import IOExts ( hPutBufBA ) -import CString ( unpackNBytesBA# ) #endif -import IOExts ( IORef, newIORef, readIORef, writeIORef ) +#if __GLASGOW_HASKELL__ >= 504 +import GHC.IOBase +import GHC.Handle +import Foreign.C +#else +import IOExts ( hPutBufBAFull ) +#endif + import IO import Char ( chr, ord ) #define hASH_TBL_SIZE 993 - -#if __GLASGOW_HASKELL__ < 503 -hPutBuf = hPutBufFull -hPutBufBA = hPutBufBAFull -#endif \end{code} @FastString@s are packed representations of strings @@ -103,16 +93,16 @@ data FastString Int# -- length ByteArray# -- stuff - | CharStr -- external C string - Addr# -- pointer to the (null-terminated) bytes in C land. - Int# -- length (cached) - | UnicodeStr -- if contains characters outside '\1'..'\xFF' Int# -- unique id [Int] -- character numbers instance Eq FastString where + -- shortcut for real FastStrings + (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False } + + (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True } instance Ord FastString where @@ -128,25 +118,15 @@ instance Ord FastString where lengthFS :: FastString -> Int lengthFS (FastString _ l# _) = I# l# -lengthFS (CharStr a# l#) = I# l# lengthFS (UnicodeStr _ s) = length s nullFastString :: FastString -> Bool nullFastString (FastString _ l# _) = l# ==# 0# -nullFastString (CharStr _ l#) = l# ==# 0# nullFastString (UnicodeStr _ []) = True nullFastString (UnicodeStr _ (_:_)) = False unpackFS :: FastString -> String -unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l# -unpackFS (CharStr addr len#) = - unpack 0# - where - unpack nh - | nh ==# len# = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh +unpackFS (FastString _ l# ba#) = unpackCStringBA (BA ba#) (I# l#) unpackFS (UnicodeStr _ s) = map chr s unpackIntFS :: FastString -> [Int] @@ -162,8 +142,6 @@ concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better headFS :: FastString -> Char headFS (FastString _ l# ba#) = if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS") -headFS (CharStr a# l#) = - if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS") headFS (UnicodeStr _ (c:_)) = chr c headFS (UnicodeStr _ []) = error ("headFS: empty FS") @@ -177,9 +155,6 @@ indexFS f i@(I# i#) = FastString _ l# ba# | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#) | otherwise -> error (msg (I# l#)) - CharStr a# l# - | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#) - | otherwise -> error (msg (I# l#)) UnicodeStr _ s -> chr (s!!i) where msg l = "indexFS: out of range: " ++ show (l,i) @@ -193,20 +168,9 @@ consFS c fs = mkFastStringInt (ord c : unpackIntFS fs) uniqueOfFS :: FastString -> Int# uniqueOfFS (FastString u# _ _) = u# -uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh! - {- - [A somewhat moby hack]: to avoid entering all sorts - of junk into the hash table, all C char strings - are by default left out. The benefit of being in - the table is that string comparisons are lightning fast, - just an Int# comparison. - - But, if you want to get the Unique of a CharStr, we - enter it into the table and return that unique. This - works, but causes the CharStr to be looked up in the hash - table each time it is accessed.. - -} uniqueOfFS (UnicodeStr u# _) = u# + +nilFS = mkFastString "" \end{code} Internally, the compiler will maintain a fast string symbol @@ -244,8 +208,12 @@ updTbl fs_table_var (FastStringTable uid# arr#) i# ls = (# s2#, () #) }) >> writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#) -mkFastString# :: Addr# -> Int# -> FastString -mkFastString# a# len# = +mkFastString# :: Addr# -> FastString +mkFastString# a# = + case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# } + +mkFastStringLen# :: Addr# -> Int# -> FastString +mkFastStringLen# a# len# = unsafePerformIO ( readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> let @@ -258,8 +226,8 @@ mkFastString# a# len# = -- no match, add it to table by copying out the -- the string into a ByteArray -- _trace "empty bucket" $ - case copyPrefixStr (A# a#) (I# len#) of - (ByteArray _ _ barr#) -> + case copyPrefixStr a# (I# len#) of + BA barr# -> let f_str = FastString uid# len# barr# in updTbl string_table ft h [f_str] >> ({- _trace ("new: " ++ show f_str) $ -} return f_str) @@ -269,8 +237,8 @@ mkFastString# a# len# = -- _trace ("non-empty bucket"++show ls) $ case bucket_match ls len# a# of Nothing -> - case copyPrefixStr (A# a#) (I# len#) of - (ByteArray _ _ barr#) -> + case copyPrefixStr a# (I# len#) of + BA barr# -> let f_str = FastString uid# len# barr# in updTbl string_table ft h (f_str:ls) >> ( {- _trace ("new: " ++ show f_str) $ -} return f_str) @@ -285,9 +253,6 @@ mkFastString# a# len# = bucket_match (UnicodeStr _ _ : ls) len# a# = bucket_match ls len# a# -mkFastSubString# :: Addr# -> Int# -> Int# -> FastString -mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#) - mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString mkFastSubStringBA# barr# start# len# = unsafePerformIO ( @@ -302,8 +267,8 @@ mkFastSubStringBA# barr# start# len# = -- no match, add it to table by copying out the -- the string into a ByteArray -- _trace "empty bucket(b)" $ - case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of - (ByteArray _ _ ba#) -> + case copySubStrBA (BA barr#) (I# start#) (I# len#) of + BA ba# -> let f_str = FastString uid# len# ba# in updTbl string_table ft h [f_str] >> -- _trace ("new(b): " ++ show f_str) $ @@ -314,8 +279,8 @@ mkFastSubStringBA# barr# start# len# = -- _trace ("non-empty bucket(b)"++show ls) $ case bucket_match ls start# len# barr# of Nothing -> - case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of - (ByteArray _ _ ba#) -> + case copySubStrBA (BA barr#) (I# start#) (I# len#) of + BA ba# -> let f_str = FastString uid# len# ba# in updTbl string_table ft h (f_str:ls) >> -- _trace ("new(b): " ++ show f_str) $ @@ -325,8 +290,6 @@ mkFastSubStringBA# barr# start# len# = return v ) where - btm = error "" - bucket_match [] _ _ _ = Nothing bucket_match (v:ls) start# len# ba# = case v of @@ -374,24 +337,13 @@ mkFastStringUnicode s = if s' == s then Just v else bucket_match ls bucket_match (FastString _ _ _ : ls) = bucket_match ls -mkFastCharString :: Addr -> FastString -mkFastCharString a@(A# a#) = - case strLength a of{ (I# len#) -> CharStr a# len# } - -mkFastCharString# :: Addr# -> FastString -mkFastCharString# a# = - case strLength (A# a#) of { (I# len#) -> CharStr a# len# } - -mkFastCharString2 :: Addr -> Int -> FastString -mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len# - mkFastStringNarrow :: String -> FastString mkFastStringNarrow str = - case packString str of - (ByteArray _ (I# len#) frozen#) -> + case packString str of { (I# len#, BA frozen#) -> mkFastSubStringBA# frozen# 0# len# - {- 0-indexed array, len# == index to one beyond end of string, - i.e., (0,1) => empty string. -} + } + {- 0-indexed array, len# == index to one beyond end of string, + i.e., (0,1) => empty string. -} mkFastString :: String -> FastString mkFastString str = if all good str @@ -407,9 +359,9 @@ mkFastStringInt str = if all good str where good c = c >= 1 && c <= 0xFF -mkFastSubString :: Addr -> Int -> Int -> FastString -mkFastSubString (A# a#) (I# start#) (I# len#) = - mkFastString# (addrOffset# a# start#) len# +mkFastSubString :: Addr# -> Int -> Int -> FastString +mkFastSubString a# (I# start#) (I# len#) = + mkFastStringLen# (a# `plusAddr#` start#) len# \end{code} \begin{code} @@ -471,68 +423,81 @@ cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars EQ else unsafePerformIO ( - _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) -> + strcmp b1# b2# >>= \ (I# res) -> return ( if res <# 0# then LT else if res ==# 0# then EQ else GT )) - where - bot :: Int - bot = error "tagCmp" -cmpFS (CharStr bs1 len1) (CharStr bs2 len2) - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = A# bs1 - ba2 = A# bs2 -cmpFS (FastString _ len1 bs1) (CharStr bs2 len2) - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray (error "") ((error "")::Int) bs1 - ba2 = A# bs2 -cmpFS a@(CharStr _ _) b@(FastString _ _ _) - = -- try them the other way 'round - case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT } +foreign import ccall "strcmp" unsafe + strcmp :: ByteArray# -> ByteArray# -> IO Int -\end{code} +-- ----------------------------------------------------------------------------- +-- Outputting 'FastString's -Outputting @FastString@s is quick, just block copying the chunk (using -@fwrite@). +#if __GLASGOW_HASKELL__ >= 504 + +-- this is our own version of hPutBuf for FastStrings, because in +-- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA. +-- The closest is hPutArray in Data.Array.IO, but that does some extra +-- range checks that we want to avoid here. + +foreign import ccall unsafe "__hscore_memcpy_dst_off" + memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ()) + +hPutFS handle (FastString _ l# ba#) + | l# ==# 0# = return () + | otherwise + = do wantWritableHandle "hPutFS" handle $ + \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do + + old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size } + <- readIORef ref + + let count = I# l# + raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld + + -- enough room in handle buffer? + if (size - w > count) + -- There's enough room in the buffer: + -- just copy the data in and update bufWPtr. + then do memcpy_baoff_ba old_raw w raw (fromIntegral count) + writeIORef ref old_buf{ bufWPtr = w + count } + return () + + -- else, we have to flush + else do flushed_buf <- flushWriteBuffer fd stream old_buf + writeIORef ref flushed_buf + let this_buf = + Buffer{ bufBuf=raw, bufState=WriteBuffer, + bufRPtr=0, bufWPtr=count, bufSize=count } + flushWriteBuffer fd stream this_buf + return () + +#else -\begin{code} hPutFS :: Handle -> FastString -> IO () hPutFS handle (FastString _ l# ba#) | l# ==# 0# = return () | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#) - hPutBufBA handle mba (I# l#) + hPutBufBAFull handle mba (I# l#) where bot = error "hPutFS.ba" ---ToDo: avoid silly code duplic. - -hPutFS handle (CharStr a# l#) - | l# ==# 0# = return () -#if __GLASGOW_HASKELL__ < 411 - | otherwise = hPutBuf handle (A# a#) (I# l#) -#else - | otherwise = hPutBuf handle (Ptr a#) (I# l#) #endif -- ONLY here for debugging the NCG (so -ddump-stix works for string -- literals); no idea if this is really necessary. JRS, 010131 hPutFS handle (UnicodeStr _ is) = hPutStr handle ("(UnicodeStr " ++ show is ++ ")") + +-- ----------------------------------------------------------------------------- +-- LitStrings, here for convenience only. + +type LitString = Ptr () +-- ToDo: make it a Ptr when we don't have to support 4.08 any more + +mkLitString# :: Addr# -> LitString +mkLitString# a# = Ptr a# \end{code}