X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FFastString.lhs;h=2558c5630a0d0ebfa27167db48777470c5e761aa;hb=9d7da331989abcd1844e9d03b8d1e4163796fa85;hp=75606423d7ab23b17e83fc98003ba06bb00837a5;hpb=e1d3748d9a5aa116c6ebdc17f2c7af82f164e3a8;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs index 7560642..2558c56 100644 --- a/ghc/compiler/utils/FastString.lhs +++ b/ghc/compiler/utils/FastString.lhs @@ -1,47 +1,62 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 +% (c) The University of Glasgow, 1997-2006 % -\section{Fast strings} - -Compact representations of character strings with -unique identifiers (hash-cons'ish). - \begin{code} +{-# OPTIONS -fglasgow-exts -O #-} + +{- +FastString: A compact, hash-consed, representation of character strings. + Comparison is O(1), and you can get a Unique from them. + Generated by the FSLIT macro + Turn into SDoc with Outputable.ftext + +LitString: Just a wrapper for the Addr# of a C string (Ptr CChar). + Practically no operations + Outputing them is fast + Generated by the SLIT macro + Turn into SDoc with Outputable.ptext + +Use LitString unless you want the facilities of FastString +-} module FastString ( + -- * FastStrings FastString(..), -- not abstract, for now. - --names? - mkFastString, -- :: String -> FastString - mkFastSubString, -- :: Addr -> Int -> Int -> FastString - mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString - - -- These ones hold on to the Addr after they return, and aren't hashed; - -- they are used for literals - mkFastCharString, -- :: Addr -> FastString - mkFastCharString#, -- :: Addr# -> FastString - mkFastCharString2, -- :: Addr -> Int -> FastString - - mkFastString#, -- :: Addr# -> Int# -> FastString - mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString - mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString - mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString - - uniqueOfFS, -- :: FastString -> Int# - lengthFS, -- :: FastString -> Int - nullFastString, -- :: FastString -> Bool - - getByteArray#, -- :: FastString -> ByteArray# - getByteArray, -- :: FastString -> _ByteArray Int + -- ** Construction + mkFastString, + mkFastStringBytes, + mkFastStringForeignPtr, + mkFastString#, + mkZFastString, + mkZFastStringBytes, + + -- ** Deconstruction unpackFS, -- :: FastString -> String - appendFS, -- :: FastString -> FastString -> FastString - headFS, -- :: FastString -> Char - tailFS, -- :: FastString -> FastString - concatFS, -- :: [FastString] -> FastString - consFS, -- :: Char -> FastString -> FastString - indexFS, -- :: FastString -> Int -> Char - - hPutFS -- :: Handle -> FastString -> IO () + bytesFS, -- :: FastString -> [Word8] + + -- ** Encoding + isZEncoded, + zEncodeFS, + + -- ** Operations + uniqueOfFS, + lengthFS, + nullFS, + appendFS, + headFS, + tailFS, + concatFS, + consFS, + nilFS, + + -- ** Outputing + hPutFS, + + -- * LitStrings + LitString, + mkLitString#, + strLength ) where -- This #define suppresses the "import FastString" that @@ -49,92 +64,52 @@ module FastString #define COMPILING_FAST_STRING #include "HsVersions.h" -#if __GLASGOW_HASKELL__ < 301 -import PackBase -import STBase ( StateAndPtr#(..) ) -import IOHandle ( filePtr, readHandle, writeHandle ) -import IOBase ( Handle__(..), IOError(..), IOErrorType(..), - IOResult(..), IO(..), - constructError - ) -#else -import PrelPack -#if __GLASGOW_HASKELL__ < 400 -import PrelST ( StateAndPtr#(..) ) -#endif +import Encoding -#if __GLASGOW_HASKELL__ <= 303 -import PrelHandle ( readHandle, -# if __GLASGOW_HASKELL__ < 303 - filePtr, -# endif - writeHandle - ) -#endif +import Foreign +import Foreign.C +import GLAEXTS +import UNSAFE_IO ( unsafePerformIO ) +import MONAD_ST ( stToIO ) +import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef ) +import System.IO ( hPutBuf ) -import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..), -#if __GLASGOW_HASKELL__ < 400 - IOResult(..), -#endif - IO(..), -#if __GLASGOW_HASKELL__ >= 303 - Handle__Type(..), -#endif - constructError - ) -#endif +import GHC.Arr ( STArray(..), newSTArray ) +import GHC.IOBase ( IO(..) ) -import PrimPacked -import GlaExts -import Addr ( Addr(..) ) -#if __GLASGOW_HASKELL__ < 407 -import MutableArray ( MutableArray(..) ) -#else -import PrelArr ( STArray(..), newSTArray ) -import IOExts ( hPutBuf, hPutBufBA ) -#endif +import IO --- ForeignObj is now exported abstractly. -#if __GLASGOW_HASKELL__ >= 303 -import qualified PrelForeign as Foreign ( ForeignObj(..) ) -#else -import Foreign ( ForeignObj(..) ) -#endif +#define hASH_TBL_SIZE 4091 -import IOExts ( IORef, newIORef, readIORef, writeIORef ) -import IO -#define hASH_TBL_SIZE 993 +{-| +A 'FastString' is an array of bytes, hashed to support fast O(1) +comparison. It is also associated with a character encoding, so that +we know how to convert a 'FastString' to the local encoding, or to the +Z-encoding used by the compiler internally. -#if __GLASGOW_HASKELL__ >= 400 -#define IOok STret -#endif -\end{code} +'FastString's support a memoized conversion to the Z-encoding via zEncodeFS. +-} -@FastString@s are packed representations of strings -with a unique id for fast comparisons. The unique id -is assigned when creating the @FastString@, using -a hash table to map from the character string representation -to the unique ID. +data FastString = FastString { + uniq :: {-# UNPACK #-} !Int, -- unique id + n_bytes :: {-# UNPACK #-} !Int, -- number of bytes + n_chars :: {-# UNPACK #-} !Int, -- number of chars + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + enc :: FSEncoding + } -\begin{code} -data FastString - = FastString -- packed repr. on the heap. - Int# -- unique id - -- 0 => string literal, comparison - -- will - Int# -- length - ByteArray# -- stuff - - | CharStr -- external C string - Addr# -- pointer to the (null-terminated) bytes in C land. - Int# -- length (cached) +data FSEncoding + = ZEncoded + -- including strings that don't need any encoding + | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString)) + -- A UTF-8 string with a memoized Z-encoding instance Eq FastString where - a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False } - a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True } + f1 == f2 = uniq f1 == uniq f2 instance Ord FastString where + -- Compares lexicographically, not by unique a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } @@ -145,511 +120,340 @@ instance Ord FastString where | otherwise = y compare a b = cmpFS a b -getByteArray# :: FastString -> ByteArray# -getByteArray# (FastString _ _ ba#) = ba# +instance Show FastString where + show fs = show (unpackFS fs) -getByteArray :: FastString -> ByteArray Int -#if __GLASGOW_HASKELL__ < 405 -getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba# -#else -getByteArray (FastString _ l# ba#) = ByteArray 0 (I# l#) ba# +cmpFS :: FastString -> FastString -> Ordering +cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) = + if u1 == u2 then EQ else + let l = if l1 <= l2 then l1 else l2 in + inlinePerformIO $ + withForeignPtr buf1 $ \p1 -> + withForeignPtr buf2 $ \p2 -> do + res <- memcmp p1 p2 l + case () of + _ | res < 0 -> return LT + | res == 0 -> if l1 == l2 then return EQ + else if l1 < l2 then return LT + else return GT + | otherwise -> return GT + +#ifndef __HADDOCK__ +foreign import ccall unsafe "ghc_memcmp" + memcmp :: Ptr a -> Ptr b -> Int -> IO Int #endif -lengthFS :: FastString -> Int -lengthFS (FastString _ l# _) = I# l# -lengthFS (CharStr a# l#) = I# l# - -nullFastString :: FastString -> Bool -nullFastString (FastString _ l# _) = l# ==# 0# -nullFastString (CharStr _ l#) = l# ==# 0# - -unpackFS :: FastString -> String -unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l# -unpackFS (CharStr addr len#) = - unpack 0# - where - unpack nh - | nh ==# len# = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh - -appendFS :: FastString -> FastString -> FastString -appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) - -concatFS :: [FastString] -> FastString -concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better - -headFS :: FastString -> Char -headFS f@(FastString _ l# ba#) = - if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f) -headFS f@(CharStr a# l#) = - if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f) - -indexFS :: FastString -> Int -> Char -indexFS f i@(I# i#) = - case f of - FastString _ l# ba# - | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#) - | otherwise -> error (msg (I# l#)) - CharStr a# l# - | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#) - | otherwise -> error (msg (I# l#)) - where - msg l = "indexFS: out of range: " ++ show (l,i) - -tailFS :: FastString -> FastString -tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#) - -consFS :: Char -> FastString -> FastString -consFS c fs = mkFastString (c:unpackFS fs) - -uniqueOfFS :: FastString -> Int# -uniqueOfFS (FastString u# _ _) = u# -uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh! - {- - [A somewhat moby hack]: to avoid entering all sorts - of junk into the hash table, all C char strings - are by default left out. The benefit of being in - the table is that string comparisons are lightning fast, - just an Int# comparison. - - But, if you want to get the Unique of a CharStr, we - enter it into the table and return that unique. This - works, but causes the CharStr to be looked up in the hash - table each time it is accessed.. - -} -\end{code} +-- ----------------------------------------------------------------------------- +-- Construction +{- Internally, the compiler will maintain a fast string symbol table, providing sharing and fast comparison. Creation of new @FastString@s then covertly does a lookup, re-using the @FastString@ if there was a hit. +-} -\begin{code} data FastStringTable = FastStringTable - Int# + {-# UNPACK #-} !Int (MutableArray# RealWorld [FastString]) -type FastStringTableVar = IORef FastStringTable - -string_table :: FastStringTableVar +string_table :: IORef FastStringTable string_table = - unsafePerformIO ( -#if __GLASGOW_HASKELL__ < 405 - stToIO (newArray (0::Int,hASH_TBL_SIZE) []) - >>= \ (MutableArray _ arr#) -> -#elif __GLASGOW_HASKELL__ < 407 - stToIO (newArray (0::Int,hASH_TBL_SIZE) []) - >>= \ (MutableArray _ _ arr#) -> -#else - stToIO (newSTArray (0::Int,hASH_TBL_SIZE) []) - >>= \ (STArray _ _ arr#) -> -#endif - newIORef (FastStringTable 0# arr#)) - -lookupTbl :: FastStringTable -> Int# -> IO [FastString] -lookupTbl (FastStringTable _ arr#) i# = - IO ( \ s# -> -#if __GLASGOW_HASKELL__ < 400 - case readArray# arr# i# s# of { StateAndPtr# s2# r -> - IOok s2# r }) -#else - readArray# arr# i# s#) -#endif - -updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO () -updTbl fs_table_var (FastStringTable uid# arr#) i# ls = - IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> -#if __GLASGOW_HASKELL__ < 400 - IOok s2# () }) >> -#else - (# s2#, () #) }) >> -#endif - writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#) - -mkFastString# :: Addr# -> Int# -> FastString -mkFastString# a# len# = - unsafePerformIO ( - readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> + unsafePerformIO $ do + (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) []) + newIORef (FastStringTable 0 arr#) + +lookupTbl :: FastStringTable -> Int -> IO [FastString] +lookupTbl (FastStringTable _ arr#) (I# i#) = + IO $ \ s# -> readArray# arr# i# s# + +updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO () +updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do + (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) + writeIORef fs_table_var (FastStringTable (uid+1) arr#) + +mkFastString# :: Addr# -> FastString +mkFastString# a# = mkFastStringBytes ptr (strLength ptr) + where ptr = Ptr a# + +mkFastStringBytes :: Ptr Word8 -> Int -> FastString +mkFastStringBytes ptr len = unsafePerformIO $ do + ft@(FastStringTable uid tbl#) <- readIORef string_table let - h = hashStr a# len# - in --- _trace ("hashed: "++show (I# h)) $ - lookupTbl ft h >>= \ lookup_result -> + h = hashStr ptr len + add_it ls = do + fs <- copyNewFastString uid ptr len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h case lookup_result of - [] -> - -- no match, add it to table by copying out the - -- the string into a ByteArray - -- _trace "empty bucket" $ - case copyPrefixStr (A# a#) (I# len#) of -#if __GLASGOW_HASKELL__ < 405 - (ByteArray _ barr#) -> -#else - (ByteArray _ _ barr#) -> -#endif - let f_str = FastString uid# len# barr# in - updTbl string_table ft h [f_str] >> - ({- _trace ("new: " ++ show f_str) $ -} return f_str) - ls -> - -- non-empty `bucket', scan the list looking - -- entry with same length and compare byte by byte. - -- _trace ("non-empty bucket"++show ls) $ - case bucket_match ls len# a# of - Nothing -> - case copyPrefixStr (A# a#) (I# len#) of -#if __GLASGOW_HASKELL__ < 405 - (ByteArray _ barr#) -> -#else - (ByteArray _ _ barr#) -> -#endif - let f_str = FastString uid# len# barr# in - updTbl string_table ft h (f_str:ls) >> - ( {- _trace ("new: " ++ show f_str) $ -} return f_str) - Just v -> {- _trace ("re-use: "++show v) $ -} return v) - where - bucket_match [] _ _ = Nothing - bucket_match (v@(FastString _ l# ba#):ls) len# a# = - if len# ==# l# && eqStrPrefix a# ba# l# then - Just v - else - bucket_match ls len# a# - -mkFastSubString# :: Addr# -> Int# -> Int# -> FastString -mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#) - -mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString -mkFastSubStringFO# fo# start# len# = - unsafePerformIO ( - readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +mkZFastStringBytes :: Ptr Word8 -> Int -> FastString +mkZFastStringBytes ptr len = unsafePerformIO $ do + ft@(FastStringTable uid tbl#) <- readIORef string_table let - h = hashSubStrFO fo# start# len# - in - lookupTbl ft h >>= \ lookup_result -> + h = hashStr ptr len + add_it ls = do + fs <- copyNewZFastString uid ptr len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h case lookup_result of - [] -> - -- no match, add it to table by copying out the - -- the string into a ByteArray - case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of -#if __GLASGOW_HASKELL__ < 405 - (ByteArray _ barr#) -> -#else - (ByteArray _ _ barr#) -> -#endif - let f_str = FastString uid# len# barr# in - updTbl string_table ft h [f_str] >> - return f_str - ls -> - -- non-empty `bucket', scan the list looking - -- entry with same length and compare byte by byte. - case bucket_match ls start# len# fo# of - Nothing -> - case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of -#if __GLASGOW_HASKELL__ < 405 - (ByteArray _ barr#) -> -#else - (ByteArray _ _ barr#) -> -#endif - let f_str = FastString uid# len# barr# in - updTbl string_table ft h (f_str:ls) >> - ( {- _trace ("new: " ++ show f_str) $ -} return f_str) - Just v -> {- _trace ("re-use: "++show v) $ -} return v) - where - bucket_match [] _ _ _ = Nothing - bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# = - if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then - Just v - else - bucket_match ls start# len# fo# - - -mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString -mkFastSubStringBA# barr# start# len# = - unsafePerformIO ( - readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkFastStringForeignPtr ptr fp len = do + ft@(FastStringTable uid tbl#) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ let - h = hashSubStrBA barr# start# len# - in --- _trace ("hashed(b): "++show (I# h)) $ - lookupTbl ft h >>= \ lookup_result -> + h = hashStr ptr len + add_it ls = do + fs <- mkNewFastString uid ptr fp len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h case lookup_result of - [] -> - -- no match, add it to table by copying out the - -- the string into a ByteArray - -- _trace "empty bucket(b)" $ -#if __GLASGOW_HASKELL__ < 405 - case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of - (ByteArray _ ba#) -> -#else - case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of - (ByteArray _ _ ba#) -> -#endif - let f_str = FastString uid# len# ba# in - updTbl string_table ft h [f_str] >> - -- _trace ("new(b): " ++ show f_str) $ - return f_str - ls -> - -- non-empty `bucket', scan the list looking - -- entry with same length and compare byte by byte. - -- _trace ("non-empty bucket(b)"++show ls) $ - case bucket_match ls start# len# barr# of - Nothing -> -#if __GLASGOW_HASKELL__ < 405 - case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of - (ByteArray _ ba#) -> -#else - case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of - (ByteArray _ _ ba#) -> -#endif - let f_str = FastString uid# len# ba# in - updTbl string_table ft h (f_str:ls) >> - -- _trace ("new(b): " ++ show f_str) $ - return f_str - Just v -> - -- _trace ("re-use(b): "++show v) $ - return v - ) - where - btm = error "" - - bucket_match [] _ _ _ = Nothing - bucket_match (v:ls) start# len# ba# = - case v of - FastString _ l# barr# -> - if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then - Just v - else - bucket_match ls start# len# ba# - -mkFastCharString :: Addr -> FastString -mkFastCharString a@(A# a#) = - case strLength a of{ (I# len#) -> CharStr a# len# } - -mkFastCharString# :: Addr# -> FastString -mkFastCharString# a# = - case strLength (A# a#) of { (I# len#) -> CharStr a# len# } - -mkFastCharString2 :: Addr -> Int -> FastString -mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len# + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkZFastStringForeignPtr ptr fp len = do + ft@(FastStringTable uid tbl#) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ + let + h = hashStr ptr len + add_it ls = do + fs <- mkNewZFastString uid ptr fp len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +-- | Creates a UTF-8 encoded 'FastString' from a 'String' mkFastString :: String -> FastString mkFastString str = - case packString str of -#if __GLASGOW_HASKELL__ < 405 - (ByteArray (_,I# len#) frozen#) -> -#else - (ByteArray _ (I# len#) frozen#) -> -#endif - mkFastSubStringBA# frozen# 0# len# - {- 0-indexed array, len# == index to one beyond end of string, - i.e., (0,1) => empty string. -} + inlinePerformIO $ do + let l = utf8EncodedLength str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + mkFastStringForeignPtr ptr buf l + + +-- | Creates a Z-encoded 'FastString' from a 'String' +mkZFastString :: String -> FastString +mkZFastString str = + inlinePerformIO $ do + let l = Prelude.length str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + pokeCAString (castPtr ptr) str + mkZFastStringForeignPtr ptr buf l + +bucket_match [] _ _ = return Nothing +bucket_match (v@(FastString _ l _ buf _):ls) len ptr + | len == l = do + b <- cmpStringPrefix ptr buf len + if b then return (Just v) + else bucket_match ls len ptr + | otherwise = + bucket_match ls len ptr + +mkNewFastString uid ptr fp len = do + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid len n_chars fp (UTF8Encoded ref)) + +mkNewZFastString uid ptr fp len = do + return (FastString uid len len fp ZEncoded) + + +copyNewFastString uid ptr len = do + fp <- copyBytesToForeignPtr ptr len + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid len n_chars fp (UTF8Encoded ref)) + +copyNewZFastString uid ptr len = do + fp <- copyBytesToForeignPtr ptr len + return (FastString uid len len fp ZEncoded) + + +copyBytesToForeignPtr ptr len = do + fp <- mallocForeignPtrBytes len + withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len + return fp + +cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool +cmpStringPrefix ptr fp len = + withForeignPtr fp $ \ptr' -> do + r <- memcmp ptr ptr' len + return (r == 0) + + +hashStr :: Ptr Word8 -> Int -> Int + -- use the Addr to produce a hash value between 0 & m (inclusive) +hashStr (Ptr a#) (I# len#) = loop 0# 0# + where + loop h n | n ==# len# = I# h + | otherwise = loop h2 (n +# 1#) + where c = ord# (indexCharOffAddr# a# n) + h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE# -mkFastSubString :: Addr -> Int -> Int -> FastString -mkFastSubString (A# a#) (I# start#) (I# len#) = - mkFastString# (addrOffset# a# start#) len# +-- ----------------------------------------------------------------------------- +-- Operations -mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString -mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) = - mkFastSubStringFO# fo# start# len# -\end{code} +-- | Returns the length of the 'FastString' in characters +lengthFS :: FastString -> Int +lengthFS f = n_chars f -\begin{code} -hashStr :: Addr# -> Int# -> Int# - -- use the Addr to produce a hash value between 0 & m (inclusive) -hashStr a# len# = - case len# of - 0# -> 0# - 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE# - 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE# - _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE# - where - c0 = indexCharOffAddr# a# 0# - c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#) - c2 = indexCharOffAddr# a# (len# -# 1#) -{- - c1 = indexCharOffAddr# a# 1# - c2 = indexCharOffAddr# a# 2# --} +-- | Returns 'True' if the 'FastString' is Z-encoded +isZEncoded :: FastString -> Bool +isZEncoded fs | ZEncoded <- enc fs = True + | otherwise = False -hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int# - -- use the FO to produce a hash value between 0 & m (inclusive) -hashSubStrFO fo# start# len# = - case len# of - 0# -> 0# - 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE# - 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE# - _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE# - where - c0 = indexCharOffForeignObj# fo# 0# - c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#) - c2 = indexCharOffForeignObj# fo# (len# -# 1#) - --- c1 = indexCharOffFO# fo# 1# --- c2 = indexCharOffFO# fo# 2# - - -hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int# - -- use the byte array to produce a hash value between 0 & m (inclusive) -hashSubStrBA ba# start# len# = - case len# of - 0# -> 0# - 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE# - 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE# - _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE# - where - c0 = indexCharArray# ba# 0# - c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#) - c2 = indexCharArray# ba# (len# -# 1#) - --- c1 = indexCharArray# ba# 1# --- c2 = indexCharArray# ba# 2# +-- | Returns 'True' if the 'FastString' is empty +nullFS :: FastString -> Bool +nullFS f = n_bytes f == 0 -\end{code} +-- | unpacks and decodes the FastString +unpackFS :: FastString -> String +unpackFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + case enc of + ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes) + UTF8Encoded _ -> utf8DecodeString ptr n_bytes + +bytesFS :: FastString -> [Word8] +bytesFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + peekArray n_bytes ptr + +-- | returns a Z-encoded version of a 'FastString'. This might be the +-- original, if it was already Z-encoded. The first time this +-- function is applied to a particular 'FastString', the results are +-- memoized. +-- +zEncodeFS :: FastString -> FastString +zEncodeFS fs@(FastString uid n_bytes _ fp enc) = + case enc of + ZEncoded -> fs + UTF8Encoded ref -> + inlinePerformIO $ do + m <- readIORef ref + case m of + Just fs -> return fs + Nothing -> do + let efs = mkZFastString (zEncodeString (unpackFS fs)) + writeIORef ref (Just efs) + return efs -\begin{code} -cmpFS :: FastString -> FastString -> Ordering -cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars - if u1# ==# u2# then - EQ - else - unsafePerformIO ( -#if __GLASGOW_HASKELL__ < 405 - _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) -> -#else - _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) -> -#endif - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where -#if __GLASGOW_HASKELL__ < 405 - bot :: (Int,Int) -#else - bot :: Int -#endif - bot = error "tagCmp" -cmpFS (CharStr bs1 len1) (CharStr bs2 len2) - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = A# bs1 - ba2 = A# bs2 -cmpFS (FastString _ len1 bs1) (CharStr bs2 len2) - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where -#if __GLASGOW_HASKELL__ < 405 - ba1 = ByteArray ((error "")::(Int,Int)) bs1 -#else - ba1 = ByteArray (error "") ((error "")::Int) bs1 -#endif - ba2 = A# bs2 +appendFS :: FastString -> FastString -> FastString +appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) -cmpFS a@(CharStr _ _) b@(FastString _ _ _) - = -- try them the other way 'round - case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT } +concatFS :: [FastString] -> FastString +concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better -\end{code} +headFS :: FastString -> Char +headFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + case enc of + ZEncoded -> do + w <- peek (castPtr ptr) + return (castCCharToChar w) + UTF8Encoded _ -> + return (fst (utf8DecodeChar ptr)) -Outputting @FastString@s is quick, just block copying the chunk (using -@fwrite@). +tailFS :: FastString -> FastString +tailFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + case enc of + ZEncoded -> do + return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1) + UTF8Encoded _ -> do + let (_,ptr') = utf8DecodeChar ptr + let off = ptr' `minusPtr` ptr + return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off) -\begin{code} -hPutFS :: Handle -> FastString -> IO () -#if __GLASGOW_HASKELL__ <= 302 -hPutFS handle (FastString _ l# ba#) = - if l# ==# 0# then - return () - else - readHandle handle >>= \ htype -> - case htype of - ErrorHandle ioError -> - writeHandle handle htype >> - fail ioError - ClosedHandle -> - writeHandle handle htype >> - fail MkIOError(handle,IllegalOperation,"handle is closed") - SemiClosedHandle _ _ -> - writeHandle handle htype >> - fail MkIOError(handle,IllegalOperation,"handle is closed") - ReadHandle _ _ _ -> - writeHandle handle htype >> - fail MkIOError(handle,IllegalOperation,"handle is not open for writing") - other -> - let fp = filePtr htype in - -- here we go.. -#if __GLASGOW_HASKELL__ < 405 - _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc -> -#else - _ccall_ writeFile (ByteArray ((error "")::Int) ((error "")::Int) ba#) fp (I# l#) >>= \rc -> -#endif - if rc==0 then - return () - else - constructError "hPutFS" >>= \ err -> - fail err -hPutFS handle (CharStr a# l#) = - if l# ==# 0# then - return () - else - readHandle handle >>= \ htype -> - case htype of - ErrorHandle ioError -> - writeHandle handle htype >> - fail ioError - ClosedHandle -> - writeHandle handle htype >> - fail MkIOError(handle,IllegalOperation,"handle is closed") - SemiClosedHandle _ _ -> - writeHandle handle htype >> - fail MkIOError(handle,IllegalOperation,"handle is closed") - ReadHandle _ _ _ -> - writeHandle handle htype >> - fail MkIOError(handle,IllegalOperation,"handle is not open for writing") - other -> - let fp = filePtr htype in - -- here we go.. - _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc -> - if rc==0 then - return () - else - constructError "hPutFS" >>= \ err -> - fail err - - -#else -hPutFS handle (FastString _ l# ba#) - | l# ==# 0# = return () -#if __GLASGOW_HASKELL__ < 405 - | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#) -#elif __GLASGOW_HASKELL__ < 407 - | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#) -#else - | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#) - hPutBufBA handle mba (I# l#) -#endif - where - bot = error "hPutFS.ba" +consFS :: Char -> FastString -> FastString +consFS c fs = mkFastString (c : unpackFS fs) + +uniqueOfFS :: FastString -> Int# +uniqueOfFS (FastString (I# u#) _ _ _ _) = u# ---ToDo: avoid silly code duplic. +nilFS = mkFastString "" -hPutFS handle (CharStr a# l#) - | l# ==# 0# = return () - | otherwise = hPutBuf handle (A# a#) (I# l#) +-- ----------------------------------------------------------------------------- +-- Outputting 'FastString's +-- |Outputs a 'FastString' with /no decoding at all/, that is, you +-- get the actual bytes in the 'FastString' written to the 'Handle'. +hPutFS handle (FastString _ len _ fp _) + | len == 0 = return () + | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len + +-- ToDo: we'll probably want an hPutFSLocal, or something, to output +-- in the current locale's encoding (for error messages and suchlike). + +-- ----------------------------------------------------------------------------- +-- LitStrings, here for convenience only. + +type LitString = Ptr () + +mkLitString# :: Addr# -> LitString +mkLitString# a# = Ptr a# + +foreign import ccall unsafe "ghc_strlen" + strLength :: Ptr () -> Int + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +pokeCAString :: Ptr CChar -> String -> IO () +pokeCAString ptr str = + let + go [] n = pokeElemOff ptr n 0 + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + in + go str 0 -#endif \end{code}