%
-% (c) The GRASP/AQUA Project, Glasgow University, 1997
+% (c) The University of Glasgow, 1997-2006
%
-\section{Fast strings}
-
-Compact representations of character strings with
-unique identifiers (hash-cons'ish).
-
\begin{code}
-#include "HsVersions.h"
-
+{-
+FastString: A compact, hash-consed, representation of character strings.
+ Comparison is O(1), and you can get a Unique from them.
+ Generated by the FSLIT macro
+ Turn into SDoc with Outputable.ftext
+
+LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
+ Practically no operations
+ Outputing them is fast
+ Generated by the SLIT macro
+ Turn into SDoc with Outputable.ptext
+
+Use LitString unless you want the facilities of FastString
+-}
module FastString
(
+ -- * FastStrings
FastString(..), -- not abstract, for now.
- --names?
- mkFastString, -- :: String -> FastString
- mkFastCharString, -- :: _Addr -> FastString
- mkFastCharString2, -- :: _Addr -> Int -> FastString
- mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
- mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
-
- mkFastString#, -- :: Addr# -> Int# -> FastString
- mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
- mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
- mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
-
- lengthFS, -- :: FastString -> Int
- nullFastString, -- :: FastString -> Bool
-
- getByteArray#, -- :: FastString -> ByteArray#
- getByteArray, -- :: FastString -> _ByteArray Int
+ -- ** Construction
+ mkFastString,
+ mkFastStringBytes,
+ mkFastStringForeignPtr,
+ mkFastString#,
+ mkZFastString,
+ mkZFastStringBytes,
+
+ -- ** Deconstruction
unpackFS, -- :: FastString -> String
- appendFS, -- :: FastString -> FastString -> FastString
- headFS, -- :: FastString -> Char
- tailFS, -- :: FastString -> FastString
- concatFS, -- :: [FastString] -> FastString
- consFS, -- :: Char -> FastString -> FastString
-
- hPutFS, -- :: Handle -> FastString -> IO ()
- tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
+ bytesFS, -- :: FastString -> [Word8]
+
+ -- ** Encoding
+ isZEncoded,
+ zEncodeFS,
+
+ -- ** Operations
+ uniqueOfFS,
+ lengthFS,
+ nullFS,
+ appendFS,
+ headFS,
+ tailFS,
+ concatFS,
+ consFS,
+ nilFS,
+
+ -- ** Outputing
+ hPutFS,
+
+ -- * LitStrings
+ LitString,
+ mkLitString#,
+ strLength
) where
-#if __GLASGOW_HASKELL__ <= 201
-import PreludeGlaST
-import PreludeGlaMisc
-import HandleHack
-import Ubiq
-#else
-import GlaExts
+-- This #define suppresses the "import FastString" that
+-- HsVersions otherwise produces
+#define COMPILING_FAST_STRING
+#include "HsVersions.h"
+
+import Encoding
+
import Foreign
-import IOBase
-import IOHandle
-import ST
-import STBase
-import {-# SOURCE #-} Unique ( mkUniqueGrimily, Unique, Uniquable(..) )
-#if __GLASGOW_HASKELL__ == 202
-import PrelBase ( Char (..) )
-#endif
-#if __GLASGOW_HASKELL__ >= 206
-import PackBase
-#endif
-#endif
+import Foreign.C
+import GHC.Exts
+import System.IO.Unsafe ( unsafePerformIO )
+import Control.Monad.ST ( stToIO )
+import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import System.IO ( hPutBuf )
-import PrimPacked
+import GHC.Arr ( STArray(..), newSTArray )
+import GHC.IOBase ( IO(..) )
+import GHC.Ptr ( Ptr(..) )
-#define hASH_TBL_SIZE 993
+#define hASH_TBL_SIZE 4091
-\end{code}
-@FastString@s are packed representations of strings
-with a unique id for fast comparisons. The unique id
-is assigned when creating the @FastString@, using
-a hash table to map from the character string representation
-to the unique ID.
+{-|
+A 'FastString' is an array of bytes, hashed to support fast O(1)
+comparison. It is also associated with a character encoding, so that
+we know how to convert a 'FastString' to the local encoding, or to the
+Z-encoding used by the compiler internally.
-\begin{code}
-data FastString
- = FastString -- packed repr. on the heap.
- Int# -- unique id
- -- 0 => string literal, comparison
- -- will
- Int# -- length
- ByteArray# -- stuff
-
- | CharStr -- external C string
- Addr# -- pointer to the (null-terminated) bytes in C land.
- Int# -- length (cached)
+'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
+-}
+
+data FastString = FastString {
+ uniq :: {-# UNPACK #-} !Int, -- unique id
+ n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
+ n_chars :: {-# UNPACK #-} !Int, -- number of chars
+ buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+ enc :: FSEncoding
+ }
+
+data FSEncoding
+ = ZEncoded
+ -- including strings that don't need any encoding
+ | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
+ -- A UTF-8 string with a memoized Z-encoding
instance Eq FastString where
- a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
- a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
+ f1 == f2 = uniq f1 == uniq f2
+
+instance Ord FastString where
+ -- Compares lexicographically, not by unique
+ a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
+ a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
+ a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
+ a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
+ max x y | x >= y = x
+ | otherwise = y
+ min x y | x <= y = x
+ | otherwise = y
+ compare a b = cmpFS a b
+
+instance Show FastString where
+ show fs = show (unpackFS fs)
+
+cmpFS :: FastString -> FastString -> Ordering
+cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
+ if u1 == u2 then EQ else
+ let l = if l1 <= l2 then l1 else l2 in
+ inlinePerformIO $
+ withForeignPtr buf1 $ \p1 ->
+ withForeignPtr buf2 $ \p2 -> do
+ res <- memcmp p1 p2 l
+ case () of
+ _ | res < 0 -> return LT
+ | res == 0 -> if l1 == l2 then return EQ
+ else if l1 < l2 then return LT
+ else return GT
+ | otherwise -> return GT
+
+#ifndef __HADDOCK__
+foreign import ccall unsafe "ghc_memcmp"
+ memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+#endif
+
+-- -----------------------------------------------------------------------------
+-- Construction
{-
- (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
+Internally, the compiler will maintain a fast string symbol
+table, providing sharing and fast comparison. Creation of
+new @FastString@s then covertly does a lookup, re-using the
+@FastString@ if there was a hit.
-}
-instance Uniquable FastString where
- uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
- uniqueOf (CharStr a# l#) =
- {-
- [A somewhat moby hack]: to avoid entering all sorts
- of junk into the hash table, all C char strings
- are by default left out. The benefit of being in
- the table is that string comparisons are lightning fast,
- just an Int# comparison.
-
- But, if you want to get the Unique of a CharStr, we
- enter it into the table and return that unique. This
- works, but causes the CharStr to be looked up in the hash
- table each time it is accessed..
- -}
- mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
-
-instance Uniquable Int where
- uniqueOf (I# i#) = mkUniqueGrimily i#
-
-instance Text FastString where
- showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
- showsPrec p ps r = showsPrec p (unpackFS ps) r
-
-getByteArray# :: FastString -> ByteArray#
-getByteArray# (FastString _ _ ba#) = ba#
-
-getByteArray :: FastString -> _ByteArray Int
-getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
+data FastStringTable =
+ FastStringTable
+ {-# UNPACK #-} !Int
+ (MutableArray# RealWorld [FastString])
+
+string_table :: IORef FastStringTable
+string_table =
+ unsafePerformIO $ do
+ (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
+ newIORef (FastStringTable 0 arr#)
+
+lookupTbl :: FastStringTable -> Int -> IO [FastString]
+lookupTbl (FastStringTable _ arr#) (I# i#) =
+ IO $ \ s# -> readArray# arr# i# s#
+
+updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
+updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+ (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
+ writeIORef fs_table_var (FastStringTable (uid+1) arr#)
+
+mkFastString# :: Addr# -> FastString
+mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
+ where ptr = Ptr a#
+
+mkFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkFastStringBytes ptr len = unsafePerformIO $ do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
+ let
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- copyNewFastString uid ptr len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkZFastStringBytes ptr len = unsafePerformIO $ do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
+ let
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- copyNewZFastString uid ptr len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkFastStringForeignPtr ptr fp len = do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
+-- _trace ("hashed: "++show (I# h)) $
+ let
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- mkNewFastString uid ptr fp len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkZFastStringForeignPtr ptr fp len = do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
+-- _trace ("hashed: "++show (I# h)) $
+ let
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- mkNewZFastString uid ptr fp len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+
+-- | Creates a UTF-8 encoded 'FastString' from a 'String'
+mkFastString :: String -> FastString
+mkFastString str =
+ inlinePerformIO $ do
+ let l = utf8EncodedLength str
+ buf <- mallocForeignPtrBytes l
+ withForeignPtr buf $ \ptr -> do
+ utf8EncodeString ptr str
+ mkFastStringForeignPtr ptr buf l
+
+
+-- | Creates a Z-encoded 'FastString' from a 'String'
+mkZFastString :: String -> FastString
+mkZFastString str =
+ inlinePerformIO $ do
+ let l = Prelude.length str
+ buf <- mallocForeignPtrBytes l
+ withForeignPtr buf $ \ptr -> do
+ pokeCAString (castPtr ptr) str
+ mkZFastStringForeignPtr ptr buf l
+
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ l _ buf _):ls) len ptr
+ | len == l = do
+ b <- cmpStringPrefix ptr buf len
+ if b then return (Just v)
+ else bucket_match ls len ptr
+ | otherwise =
+ bucket_match ls len ptr
+
+mkNewFastString uid ptr fp len = do
+ ref <- newIORef Nothing
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+mkNewZFastString uid ptr fp len = do
+ return (FastString uid len len fp ZEncoded)
+
+
+copyNewFastString uid ptr len = do
+ fp <- copyBytesToForeignPtr ptr len
+ ref <- newIORef Nothing
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+copyNewZFastString uid ptr len = do
+ fp <- copyBytesToForeignPtr ptr len
+ return (FastString uid len len fp ZEncoded)
+
+
+copyBytesToForeignPtr ptr len = do
+ fp <- mallocForeignPtrBytes len
+ withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
+ return fp
+
+cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
+cmpStringPrefix ptr fp len =
+ withForeignPtr fp $ \ptr' -> do
+ r <- memcmp ptr ptr' len
+ return (r == 0)
+
+
+hashStr :: Ptr Word8 -> Int -> Int
+ -- use the Addr to produce a hash value between 0 & m (inclusive)
+hashStr (Ptr a#) (I# len#) = loop 0# 0#
+ where
+ loop h n | n ==# len# = I# h
+ | otherwise = loop h2 (n +# 1#)
+ where c = ord# (indexCharOffAddr# a# n)
+ h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
+
+-- -----------------------------------------------------------------------------
+-- Operations
+-- | Returns the length of the 'FastString' in characters
lengthFS :: FastString -> Int
-lengthFS (FastString _ l# _) = I# l#
-lengthFS (CharStr a# l#) = I# l#
+lengthFS f = n_chars f
-nullFastString :: FastString -> Bool
-nullFastString (FastString _ l# _) = l# ==# 0#
-nullFastString (CharStr _ l#) = l# ==# 0#
+-- | Returns 'True' if the 'FastString' is Z-encoded
+isZEncoded :: FastString -> Bool
+isZEncoded fs | ZEncoded <- enc fs = True
+ | otherwise = False
+-- | Returns 'True' if the 'FastString' is empty
+nullFS :: FastString -> Bool
+nullFS f = n_bytes f == 0
+
+-- | unpacks and decodes the FastString
unpackFS :: FastString -> String
-#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
-unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
-#else
-unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
-#endif
-unpackFS (CharStr addr len#) =
- unpack 0#
- where
- unpack nh
- | nh ==# len# = []
- | otherwise = C# ch : unpack (nh +# 1#)
- where
- ch = indexCharOffAddr# addr nh
+unpackFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr ->
+ case enc of
+ ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
+ UTF8Encoded _ -> utf8DecodeString ptr n_bytes
+
+bytesFS :: FastString -> [Word8]
+bytesFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr ->
+ peekArray n_bytes ptr
+
+-- | returns a Z-encoded version of a 'FastString'. This might be the
+-- original, if it was already Z-encoded. The first time this
+-- function is applied to a particular 'FastString', the results are
+-- memoized.
+--
+zEncodeFS :: FastString -> FastString
+zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
+ case enc of
+ ZEncoded -> fs
+ UTF8Encoded ref ->
+ inlinePerformIO $ do
+ m <- readIORef ref
+ case m of
+ Just fs -> return fs
+ Nothing -> do
+ let efs = mkZFastString (zEncodeString (unpackFS fs))
+ writeIORef ref (Just efs)
+ return efs
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
concatFS :: [FastString] -> FastString
-concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
+concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
headFS :: FastString -> Char
-headFS f@(FastString _ l# ba#) =
- if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
-headFS f@(CharStr a# l#) =
- if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
+headFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+ case enc of
+ ZEncoded -> do
+ w <- peek (castPtr ptr)
+ return (castCCharToChar w)
+ UTF8Encoded _ ->
+ return (fst (utf8DecodeChar ptr))
tailFS :: FastString -> FastString
-tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
+tailFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+ case enc of
+ ZEncoded -> do
+ return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
+ UTF8Encoded _ -> do
+ let (_,ptr') = utf8DecodeChar ptr
+ let off = ptr' `minusPtr` ptr
+ return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
consFS :: Char -> FastString -> FastString
-consFS c fs = mkFastString (c:unpackFS fs)
+consFS c fs = mkFastString (c : unpackFS fs)
-\end{code}
+uniqueOfFS :: FastString -> Int#
+uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
-Internally, the compiler will maintain a fast string symbol
-table, providing sharing and fast comparison. Creation of
-new @FastString@s then covertly does a lookup, re-using the
-@FastString@ if there was a hit.
+nilFS = mkFastString ""
-\begin{code}
-data FastStringTable =
- FastStringTable
- Int#
- (MutableArray# _RealWorld [FastString])
+-- -----------------------------------------------------------------------------
+-- Outputting 'FastString's
-type FastStringTableVar = MutableVar _RealWorld FastStringTable
+-- |Outputs a 'FastString' with /no decoding at all/, that is, you
+-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS handle (FastString _ len _ fp _)
+ | len == 0 = return ()
+ | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
-string_table :: FastStringTableVar
-string_table =
- unsafePerformPrimIO (
- newArray (0::Int,hASH_TBL_SIZE) [] `thenPrimIO` \ (_MutableArray _ arr#) ->
- newVar (FastStringTable 0# arr#))
-
-lookupTbl :: FastStringTable -> Int# -> PrimIO [FastString]
-lookupTbl (FastStringTable _ arr#) i# =
- MkST ( \ (S# s#) ->
- case readArray# arr# i# s# of { StateAndPtr# s2# r ->
- (r, S# s2#) })
-
-updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
-updTbl (_MutableArray _ var#) (FastStringTable uid# arr#) i# ls =
- MkST ( \ (S# s#) ->
- case writeArray# arr# i# ls s# of { s2# ->
- case writeArray# var# 0# (FastStringTable (uid# +# 1#) arr#) s2# of { s3# ->
- ((), S# s3#) }})
-
-mkFastString# :: Addr# -> Int# -> FastString
-mkFastString# a# len# =
- unsafePerformPrimIO (
- readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
- let
- h = hashStr a# len#
- in
--- _trace ("hashed: "++show (I# h)) $
- lookupTbl ft h `thenPrimIO` \ lookup_result ->
- case lookup_result of
- [] ->
- -- no match, add it to table by copying out the
- -- the string into a ByteArray
- -- _trace "empty bucket" $
- case copyPrefixStr (A# a#) (I# len#) of
- (_ByteArray _ barr#) ->
- let f_str = FastString uid# len# barr# in
- updTbl string_table ft h [f_str] `seqPrimIO`
- ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
- ls ->
- -- non-empty `bucket', scan the list looking
- -- entry with same length and compare byte by byte.
- -- _trace ("non-empty bucket"++show ls) $
- case bucket_match ls len# a# of
- Nothing ->
- case copyPrefixStr (A# a#) (I# len#) of
- (_ByteArray _ barr#) ->
- let f_str = FastString uid# len# barr# in
- updTbl string_table ft h (f_str:ls) `seqPrimIO`
- ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
- Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
- where
- bucket_match [] _ _ = Nothing
- bucket_match (v@(FastString _ l# ba#):ls) len# a# =
- if len# ==# l# && eqStrPrefix a# ba# l# then
- Just v
- else
- bucket_match ls len# a#
-
-mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
-mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
-
-mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
-mkFastSubStringFO# fo# start# len# =
- unsafePerformPrimIO (
- readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
- let
- h = hashSubStrFO fo# start# len#
- in
- lookupTbl ft h `thenPrimIO` \ lookup_result ->
- case lookup_result of
- [] ->
- -- no match, add it to table by copying out the
- -- the string into a ByteArray
- case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
- (_ByteArray _ barr#) ->
- let f_str = FastString uid# len# barr# in
- updTbl string_table ft h [f_str] `seqPrimIO`
- returnPrimIO f_str
- ls ->
- -- non-empty `bucket', scan the list looking
- -- entry with same length and compare byte by byte.
- case bucket_match ls start# len# fo# of
- Nothing ->
- case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
- (_ByteArray _ barr#) ->
- let f_str = FastString uid# len# barr# in
- updTbl string_table ft h (f_str:ls) `seqPrimIO`
- ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
- Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
- where
- bucket_match [] _ _ _ = Nothing
- bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
- if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
- Just v
- else
- bucket_match ls start# len# fo#
-
-
-mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
-mkFastSubStringBA# barr# start# len# =
- unsafePerformPrimIO (
- readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
- let
- h = hashSubStrBA barr# start# len#
- in
--- _trace ("hashed(b): "++show (I# h)) $
- lookupTbl ft h `thenPrimIO` \ lookup_result ->
- case lookup_result of
- [] ->
- -- no match, add it to table by copying out the
- -- the string into a ByteArray
- -- _trace "empty bucket(b)" $
- case copySubStrBA (_ByteArray btm barr#) (I# start#) (I# len#) of
- (_ByteArray _ ba#) ->
- let f_str = FastString uid# len# ba# in
- updTbl string_table ft h [f_str] `seqPrimIO`
- -- _trace ("new(b): " ++ show f_str) $
- returnPrimIO f_str
- ls ->
- -- non-empty `bucket', scan the list looking
- -- entry with same length and compare byte by byte.
- -- _trace ("non-empty bucket(b)"++show ls) $
- case bucket_match ls start# len# barr# of
- Nothing ->
- case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
- (_ByteArray _ ba#) ->
- let f_str = FastString uid# len# ba# in
- updTbl string_table ft h (f_str:ls) `seqPrimIO`
- -- _trace ("new(b): " ++ show f_str) $
- returnPrimIO f_str
- Just v ->
- -- _trace ("re-use(b): "++show v) $
- returnPrimIO v
- )
- where
- btm = error ""
-
- bucket_match [] _ _ _ = Nothing
- bucket_match (v:ls) start# len# ba# =
- case v of
- FastString _ l# barr# ->
- if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
- Just v
- else
- bucket_match ls start# len# ba#
-
-mkFastCharString :: _Addr -> FastString
-mkFastCharString a@(A# a#) =
- case strLength a of{ (I# len#) -> CharStr a# len# }
-
-mkFastCharString2 :: _Addr -> Int -> FastString
-mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
+-- ToDo: we'll probably want an hPutFSLocal, or something, to output
+-- in the current locale's encoding (for error messages and suchlike).
-mkFastString :: String -> FastString
-mkFastString str =
-#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
- case stringToByteArray str of
-#else
- case packString str of
-#endif
- (_ByteArray (_,I# len#) frozen#) ->
- mkFastSubStringBA# frozen# 0# len#
- {- 0-indexed array, len# == index to one beyond end of string,
- i.e., (0,1) => empty string. -}
+-- -----------------------------------------------------------------------------
+-- LitStrings, here for convenience only.
-mkFastSubString :: _Addr -> Int -> Int -> FastString
-mkFastSubString (A# a#) (I# start#) (I# len#) =
- mkFastString# (addrOffset# a# start#) len#
+type LitString = Ptr ()
-mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
-mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
- mkFastSubStringFO# fo# start# len#
+mkLitString# :: Addr# -> LitString
+mkLitString# a# = Ptr a#
-\end{code}
+foreign import ccall unsafe "ghc_strlen"
+ strLength :: Ptr () -> Int
-\begin{code}
-hashStr :: Addr# -> Int# -> Int#
- -- use the Addr to produce a hash value between 0 & m (inclusive)
-hashStr a# len# =
- case len# of
- 0# -> 0#
- 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
- 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
- _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
- where
- c0 = indexCharOffAddr# a# 0#
- c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
- c2 = indexCharOffAddr# a# (len# -# 1#)
-{-
- c1 = indexCharOffAddr# a# 1#
- c2 = indexCharOffAddr# a# 2#
--}
+-- -----------------------------------------------------------------------------
+-- under the carpet
-hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
- -- use the FO to produce a hash value between 0 & m (inclusive)
-hashSubStrFO fo# start# len# =
- case len# of
- 0# -> 0#
- 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
- 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
- _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
- where
- c0 = indexCharOffFO# fo# 0#
- c1 = indexCharOffFO# fo# (len# `quotInt#` 2# -# 1#)
- c2 = indexCharOffFO# fo# (len# -# 1#)
-
--- c1 = indexCharOffFO# fo# 1#
--- c2 = indexCharOffFO# fo# 2#
-
-
-hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
- -- use the byte array to produce a hash value between 0 & m (inclusive)
-hashSubStrBA ba# start# len# =
- case len# of
- 0# -> 0#
- 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
- 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
- _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
- where
- c0 = indexCharArray# ba# 0#
- c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
- c2 = indexCharArray# ba# (len# -# 1#)
-
--- c1 = indexCharArray# ba# 1#
--- c2 = indexCharArray# ba# 2#
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-\end{code}
+-- NB. does *not* add a '\0'-terminator.
+pokeCAString :: Ptr CChar -> String -> IO ()
+pokeCAString ptr str =
+ let
+ go [] n = return ()
+ go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
+ in
+ go str 0
-\begin{code}
-tagCmpFS :: FastString -> FastString -> _CMP_TAG
-tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
- if u1# ==# u2# then
- _EQ
- else
- unsafePerformPrimIO (
- _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
- returnPrimIO (
- if res <# 0# then _LT
- else if res ==# 0# then _EQ
- else _GT
- ))
- where
- bottom :: (Int,Int)
- bottom = error "tagCmp"
-tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
- = unsafePerformPrimIO (
- _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
- returnPrimIO (
- if res <# 0# then _LT
- else if res ==# 0# then _EQ
- else _GT
- ))
- where
- ba1 = A# bs1
- ba2 = A# bs2
-tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
- = unsafePerformPrimIO (
- _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
- returnPrimIO (
- if res <# 0# then _LT
- else if res ==# 0# then _EQ
- else _GT
- ))
- where
- ba1 = _ByteArray ((error "")::(Int,Int)) bs1
- ba2 = A# bs2
-
-tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
- = -- try them the other way 'round
- case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
+#if __GLASGOW_HASKELL__ < 600
-instance Ord FastString where
- a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
- a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
- a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
- a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
- max x y | x >= y = x
- | otherwise = y
- min x y | x <= y = x
- | otherwise = y
- _tagCmp a b = tagCmpFS a b
+mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
+mallocForeignPtrBytes n = do
+ r <- mallocBytes n
+ newForeignPtr r (finalizerFree r)
-\end{code}
+foreign import ccall unsafe "stdlib.h free"
+ finalizerFree :: Ptr a -> IO ()
-Outputting @FastString@s is quick, just block copying the chunk (using
-@fwrite@).
+peekCAStringLen = peekCStringLen
-\begin{code}
-#if __GLASGOW_HASKELL__ >= 201
-#define _ErrorHandle IOBase.ErrorHandle
-#define _ReadHandle IOBase.ReadHandle
-#define _ClosedHandle IOBase.ClosedHandle
-#define _SemiClosedHandle IOBase.SemiClosedHandle
-#define _constructError IOBase.constructError
-#define _filePtr IOHandle.filePtr
-#define failWith fail
-#endif
+#elif __GLASGOW_HASKELL__ <= 602
-hPutFS :: Handle -> FastString -> IO ()
-hPutFS handle (FastString _ l# ba#) =
- if l# ==# 0# then
- return ()
- else
- _readHandle handle >>= \ htype ->
- case htype of
- _ErrorHandle ioError ->
- _writeHandle handle htype >>
- failWith ioError
- _ClosedHandle ->
- _writeHandle handle htype >>
- failWith MkIOError(handle,IllegalOperation,"handle is closed")
- _SemiClosedHandle _ _ ->
- _writeHandle handle htype >>
- failWith MkIOError(handle,IllegalOperation,"handle is closed")
- _ReadHandle _ _ _ ->
- _writeHandle handle htype >>
- failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
- other ->
- let fp = _filePtr htype in
- -- here we go..
- _ccall_ writeFile (_ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) `CCALL_THEN` \rc ->
- if rc==0 then
- return ()
- else
- _constructError "hPutFS" `CCALL_THEN` \ err ->
- failWith err
-hPutFS handle (CharStr a# l#) =
- if l# ==# 0# then
- return ()
- else
- _readHandle handle >>= \ htype ->
- case htype of
- _ErrorHandle ioError ->
- _writeHandle handle htype >>
- failWith ioError
- _ClosedHandle ->
- _writeHandle handle htype >>
- failWith MkIOError(handle,IllegalOperation,"handle is closed")
- _SemiClosedHandle _ _ ->
- _writeHandle handle htype >>
- failWith MkIOError(handle,IllegalOperation,"handle is closed")
- _ReadHandle _ _ _ ->
- _writeHandle handle htype >>
- failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
- other ->
- let fp = _filePtr htype in
- -- here we go..
- _ccall_ writeFile (A# a#) fp (I# l#) `CCALL_THEN` \rc ->
- if rc==0 then
- return ()
- else
- _constructError "hPutFS" `CCALL_THEN` \ err ->
- failWith err
-
---ToDo: avoid silly code duplic.
+peekCAStringLen = peekCStringLen
+
+#endif
\end{code}