X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FFastString.lhs;fp=ghc%2Fcompiler%2Futils%2FFastString.lhs;h=0000000000000000000000000000000000000000;hb=0065d5ab628975892cea1ec7303f968c3338cbe1;hp=ea307799c424f6f164fbf1074f49a6c819e14750;hpb=28a464a75e14cece5db40f2765a29348273ff2d2;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs deleted file mode 100644 index ea30779..0000000 --- a/ghc/compiler/utils/FastString.lhs +++ /dev/null @@ -1,499 +0,0 @@ -% -% (c) The University of Glasgow, 1997-2006 -% -\begin{code} -{- -FastString: A compact, hash-consed, representation of character strings. - Comparison is O(1), and you can get a Unique from them. - Generated by the FSLIT macro - Turn into SDoc with Outputable.ftext - -LitString: Just a wrapper for the Addr# of a C string (Ptr CChar). - Practically no operations - Outputing them is fast - Generated by the SLIT macro - Turn into SDoc with Outputable.ptext - -Use LitString unless you want the facilities of FastString --} -module FastString - ( - -- * FastStrings - FastString(..), -- not abstract, for now. - - -- ** Construction - mkFastString, - mkFastStringBytes, - mkFastStringForeignPtr, - mkFastString#, - mkZFastString, - mkZFastStringBytes, - - -- ** Deconstruction - unpackFS, -- :: FastString -> String - bytesFS, -- :: FastString -> [Word8] - - -- ** Encoding - isZEncoded, - zEncodeFS, - - -- ** Operations - uniqueOfFS, - lengthFS, - nullFS, - appendFS, - headFS, - tailFS, - concatFS, - consFS, - nilFS, - - -- ** Outputing - hPutFS, - - -- ** Internal - getFastStringTable, - hasZEncoding, - - -- * LitStrings - LitString, - mkLitString#, - strLength - ) where - --- This #define suppresses the "import FastString" that --- HsVersions otherwise produces -#define COMPILING_FAST_STRING -#include "HsVersions.h" - -import Encoding - -import Foreign -import Foreign.C -import GHC.Exts -import System.IO.Unsafe ( unsafePerformIO ) -import Control.Monad.ST ( stToIO ) -import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) -import System.IO ( hPutBuf ) -import Data.Maybe ( isJust ) - -import GHC.Arr ( STArray(..), newSTArray ) -import GHC.IOBase ( IO(..) ) -import GHC.Ptr ( Ptr(..) ) - -#define hASH_TBL_SIZE 4091 - - -{-| -A 'FastString' is an array of bytes, hashed to support fast O(1) -comparison. It is also associated with a character encoding, so that -we know how to convert a 'FastString' to the local encoding, or to the -Z-encoding used by the compiler internally. - -'FastString's support a memoized conversion to the Z-encoding via zEncodeFS. --} - -data FastString = FastString { - uniq :: {-# UNPACK #-} !Int, -- unique id - n_bytes :: {-# UNPACK #-} !Int, -- number of bytes - n_chars :: {-# UNPACK #-} !Int, -- number of chars - buf :: {-# UNPACK #-} !(ForeignPtr Word8), - enc :: FSEncoding - } - -data FSEncoding - = ZEncoded - -- including strings that don't need any encoding - | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString)) - -- A UTF-8 string with a memoized Z-encoding - -instance Eq FastString where - f1 == f2 = uniq f1 == uniq f2 - -instance Ord FastString where - -- Compares lexicographically, not by unique - a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } - a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } - a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } - a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True } - max x y | x >= y = x - | otherwise = y - min x y | x <= y = x - | otherwise = y - compare a b = cmpFS a b - -instance Show FastString where - show fs = show (unpackFS fs) - -cmpFS :: FastString -> FastString -> Ordering -cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) = - if u1 == u2 then EQ else - let l = if l1 <= l2 then l1 else l2 in - inlinePerformIO $ - withForeignPtr buf1 $ \p1 -> - withForeignPtr buf2 $ \p2 -> do - res <- memcmp p1 p2 l - case () of - _ | res < 0 -> return LT - | res == 0 -> if l1 == l2 then return EQ - else if l1 < l2 then return LT - else return GT - | otherwise -> return GT - -#ifndef __HADDOCK__ -foreign import ccall unsafe "ghc_memcmp" - memcmp :: Ptr a -> Ptr b -> Int -> IO Int -#endif - --- ----------------------------------------------------------------------------- --- Construction - -{- -Internally, the compiler will maintain a fast string symbol -table, providing sharing and fast comparison. Creation of -new @FastString@s then covertly does a lookup, re-using the -@FastString@ if there was a hit. --} - -data FastStringTable = - FastStringTable - {-# UNPACK #-} !Int - (MutableArray# RealWorld [FastString]) - -string_table :: IORef FastStringTable -string_table = - unsafePerformIO $ do - (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) []) - newIORef (FastStringTable 0 arr#) - -lookupTbl :: FastStringTable -> Int -> IO [FastString] -lookupTbl (FastStringTable _ arr#) (I# i#) = - IO $ \ s# -> readArray# arr# i# s# - -updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO () -updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do - (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) - writeIORef fs_table_var (FastStringTable (uid+1) arr#) - -mkFastString# :: Addr# -> FastString -mkFastString# a# = mkFastStringBytes ptr (strLength ptr) - where ptr = Ptr a# - -mkFastStringBytes :: Ptr Word8 -> Int -> FastString -mkFastStringBytes ptr len = unsafePerformIO $ do - ft@(FastStringTable uid tbl#) <- readIORef string_table - let - h = hashStr ptr len - add_it ls = do - fs <- copyNewFastString uid ptr len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v - -mkZFastStringBytes :: Ptr Word8 -> Int -> FastString -mkZFastStringBytes ptr len = unsafePerformIO $ do - ft@(FastStringTable uid tbl#) <- readIORef string_table - let - h = hashStr ptr len - add_it ls = do - fs <- copyNewZFastString uid ptr len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v - --- | Create a 'FastString' from an existing 'ForeignPtr'; the difference --- between this and 'mkFastStringBytes' is that we don't have to copy --- the bytes if the string is new to the table. -mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString -mkFastStringForeignPtr ptr fp len = do - ft@(FastStringTable uid tbl#) <- readIORef string_table --- _trace ("hashed: "++show (I# h)) $ - let - h = hashStr ptr len - add_it ls = do - fs <- mkNewFastString uid ptr fp len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v - -mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString -mkZFastStringForeignPtr ptr fp len = do - ft@(FastStringTable uid tbl#) <- readIORef string_table --- _trace ("hashed: "++show (I# h)) $ - let - h = hashStr ptr len - add_it ls = do - fs <- mkNewZFastString uid ptr fp len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v - - --- | Creates a UTF-8 encoded 'FastString' from a 'String' -mkFastString :: String -> FastString -mkFastString str = - inlinePerformIO $ do - let l = utf8EncodedLength str - buf <- mallocForeignPtrBytes l - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str - mkFastStringForeignPtr ptr buf l - - --- | Creates a Z-encoded 'FastString' from a 'String' -mkZFastString :: String -> FastString -mkZFastString str = - inlinePerformIO $ do - let l = Prelude.length str - buf <- mallocForeignPtrBytes l - withForeignPtr buf $ \ptr -> do - pokeCAString (castPtr ptr) str - mkZFastStringForeignPtr ptr buf l - -bucket_match [] _ _ = return Nothing -bucket_match (v@(FastString _ l _ buf _):ls) len ptr - | len == l = do - b <- cmpStringPrefix ptr buf len - if b then return (Just v) - else bucket_match ls len ptr - | otherwise = - bucket_match ls len ptr - -mkNewFastString uid ptr fp len = do - ref <- newIORef Nothing - n_chars <- countUTF8Chars ptr len - return (FastString uid len n_chars fp (UTF8Encoded ref)) - -mkNewZFastString uid ptr fp len = do - return (FastString uid len len fp ZEncoded) - - -copyNewFastString uid ptr len = do - fp <- copyBytesToForeignPtr ptr len - ref <- newIORef Nothing - n_chars <- countUTF8Chars ptr len - return (FastString uid len n_chars fp (UTF8Encoded ref)) - -copyNewZFastString uid ptr len = do - fp <- copyBytesToForeignPtr ptr len - return (FastString uid len len fp ZEncoded) - - -copyBytesToForeignPtr ptr len = do - fp <- mallocForeignPtrBytes len - withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len - return fp - -cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool -cmpStringPrefix ptr fp len = - withForeignPtr fp $ \ptr' -> do - r <- memcmp ptr ptr' len - return (r == 0) - - -hashStr :: Ptr Word8 -> Int -> Int - -- use the Addr to produce a hash value between 0 & m (inclusive) -hashStr (Ptr a#) (I# len#) = loop 0# 0# - where - loop h n | n ==# len# = I# h - | otherwise = loop h2 (n +# 1#) - where c = ord# (indexCharOffAddr# a# n) - h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE# - --- ----------------------------------------------------------------------------- --- Operations - --- | Returns the length of the 'FastString' in characters -lengthFS :: FastString -> Int -lengthFS f = n_chars f - --- | Returns 'True' if the 'FastString' is Z-encoded -isZEncoded :: FastString -> Bool -isZEncoded fs | ZEncoded <- enc fs = True - | otherwise = False - --- | Returns 'True' if this 'FastString' is not Z-encoded but already has --- a Z-encoding cached (used in producing stats). -hasZEncoding :: FastString -> Bool -hasZEncoding fs@(FastString uid n_bytes _ fp enc) = - case enc of - ZEncoded -> False - UTF8Encoded ref -> - inlinePerformIO $ do - m <- readIORef ref - return (isJust m) - --- | Returns 'True' if the 'FastString' is empty -nullFS :: FastString -> Bool -nullFS f = n_bytes f == 0 - --- | unpacks and decodes the FastString -unpackFS :: FastString -> String -unpackFS (FastString _ n_bytes _ buf enc) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> - case enc of - ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes) - UTF8Encoded _ -> utf8DecodeString ptr n_bytes - -bytesFS :: FastString -> [Word8] -bytesFS (FastString _ n_bytes _ buf enc) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> - peekArray n_bytes ptr - --- | returns a Z-encoded version of a 'FastString'. This might be the --- original, if it was already Z-encoded. The first time this --- function is applied to a particular 'FastString', the results are --- memoized. --- -zEncodeFS :: FastString -> FastString -zEncodeFS fs@(FastString uid n_bytes _ fp enc) = - case enc of - ZEncoded -> fs - UTF8Encoded ref -> - inlinePerformIO $ do - m <- readIORef ref - case m of - Just fs -> return fs - Nothing -> do - let efs = mkZFastString (zEncodeString (unpackFS fs)) - writeIORef ref (Just efs) - return efs - -appendFS :: FastString -> FastString -> FastString -appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) - -concatFS :: [FastString] -> FastString -concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better - -headFS :: FastString -> Char -headFS (FastString _ n_bytes _ buf enc) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> do - case enc of - ZEncoded -> do - w <- peek (castPtr ptr) - return (castCCharToChar w) - UTF8Encoded _ -> - return (fst (utf8DecodeChar ptr)) - -tailFS :: FastString -> FastString -tailFS (FastString _ n_bytes _ buf enc) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> do - case enc of - ZEncoded -> do - return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1) - UTF8Encoded _ -> do - let (_,ptr') = utf8DecodeChar ptr - let off = ptr' `minusPtr` ptr - return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off) - -consFS :: Char -> FastString -> FastString -consFS c fs = mkFastString (c : unpackFS fs) - -uniqueOfFS :: FastString -> Int# -uniqueOfFS (FastString (I# u#) _ _ _ _) = u# - -nilFS = mkFastString "" - --- ----------------------------------------------------------------------------- --- Stats - -getFastStringTable :: IO [[FastString]] -getFastStringTable = do - tbl <- readIORef string_table - buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE] - return buckets - --- ----------------------------------------------------------------------------- --- Outputting 'FastString's - --- |Outputs a 'FastString' with /no decoding at all/, that is, you --- get the actual bytes in the 'FastString' written to the 'Handle'. -hPutFS handle (FastString _ len _ fp _) - | len == 0 = return () - | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len - --- ToDo: we'll probably want an hPutFSLocal, or something, to output --- in the current locale's encoding (for error messages and suchlike). - --- ----------------------------------------------------------------------------- --- LitStrings, here for convenience only. - -type LitString = Ptr () - -mkLitString# :: Addr# -> LitString -mkLitString# a# = Ptr a# - -foreign import ccall unsafe "ghc_strlen" - strLength :: Ptr () -> Int - --- ----------------------------------------------------------------------------- --- under the carpet - --- Just like unsafePerformIO, but we inline it. -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r - --- NB. does *not* add a '\0'-terminator. -pokeCAString :: Ptr CChar -> String -> IO () -pokeCAString ptr str = - let - go [] n = return () - go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) - in - go str 0 - -#if __GLASGOW_HASKELL__ < 600 - -mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) -mallocForeignPtrBytes n = do - r <- mallocBytes n - newForeignPtr r (finalizerFree r) - -foreign import ccall unsafe "stdlib.h free" - finalizerFree :: Ptr a -> IO () - -peekCAStringLen = peekCStringLen - -#elif __GLASGOW_HASKELL__ <= 602 - -peekCAStringLen = peekCStringLen - -#endif -\end{code}