2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
27 mkFastStringForeignPtr,
33 unpackFS, -- :: FastString -> String
34 bytesFS, -- :: FastString -> [Word8]
60 -- This #define suppresses the "import FastString" that
61 -- HsVersions otherwise produces
62 #define COMPILING_FAST_STRING
63 #include "HsVersions.h"
70 import System.IO.Unsafe ( unsafePerformIO )
71 import Control.Monad.ST ( stToIO )
72 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
73 import System.IO ( hPutBuf )
75 import GHC.Arr ( STArray(..), newSTArray )
76 import GHC.IOBase ( IO(..) )
77 import GHC.Ptr ( Ptr(..) )
79 #define hASH_TBL_SIZE 4091
83 A 'FastString' is an array of bytes, hashed to support fast O(1)
84 comparison. It is also associated with a character encoding, so that
85 we know how to convert a 'FastString' to the local encoding, or to the
86 Z-encoding used by the compiler internally.
88 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
91 data FastString = FastString {
92 uniq :: {-# UNPACK #-} !Int, -- unique id
93 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
94 n_chars :: {-# UNPACK #-} !Int, -- number of chars
95 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
101 -- including strings that don't need any encoding
102 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
103 -- A UTF-8 string with a memoized Z-encoding
105 instance Eq FastString where
106 f1 == f2 = uniq f1 == uniq f2
108 instance Ord FastString where
109 -- Compares lexicographically, not by unique
110 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
111 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
112 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
113 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
118 compare a b = cmpFS a b
120 instance Show FastString where
121 show fs = show (unpackFS fs)
123 cmpFS :: FastString -> FastString -> Ordering
124 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
125 if u1 == u2 then EQ else
126 let l = if l1 <= l2 then l1 else l2 in
128 withForeignPtr buf1 $ \p1 ->
129 withForeignPtr buf2 $ \p2 -> do
130 res <- memcmp p1 p2 l
132 _ | res < 0 -> return LT
133 | res == 0 -> if l1 == l2 then return EQ
134 else if l1 < l2 then return LT
136 | otherwise -> return GT
139 foreign import ccall unsafe "ghc_memcmp"
140 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
143 -- -----------------------------------------------------------------------------
147 Internally, the compiler will maintain a fast string symbol
148 table, providing sharing and fast comparison. Creation of
149 new @FastString@s then covertly does a lookup, re-using the
150 @FastString@ if there was a hit.
153 data FastStringTable =
156 (MutableArray# RealWorld [FastString])
158 string_table :: IORef FastStringTable
161 (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
162 newIORef (FastStringTable 0 arr#)
164 lookupTbl :: FastStringTable -> Int -> IO [FastString]
165 lookupTbl (FastStringTable _ arr#) (I# i#) =
166 IO $ \ s# -> readArray# arr# i# s#
168 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
169 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
170 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
171 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
173 mkFastString# :: Addr# -> FastString
174 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
177 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
178 mkFastStringBytes ptr len = unsafePerformIO $ do
179 ft@(FastStringTable uid tbl#) <- readIORef string_table
183 fs <- copyNewFastString uid ptr len
184 updTbl string_table ft h (fs:ls)
185 {- _trace ("new: " ++ show f_str) $ -}
188 lookup_result <- lookupTbl ft h
189 case lookup_result of
192 b <- bucket_match ls len ptr
195 Just v -> {- _trace ("re-use: "++show v) $ -} return v
197 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
198 mkZFastStringBytes ptr len = unsafePerformIO $ do
199 ft@(FastStringTable uid tbl#) <- readIORef string_table
203 fs <- copyNewZFastString uid ptr len
204 updTbl string_table ft h (fs:ls)
205 {- _trace ("new: " ++ show f_str) $ -}
208 lookup_result <- lookupTbl ft h
209 case lookup_result of
212 b <- bucket_match ls len ptr
215 Just v -> {- _trace ("re-use: "++show v) $ -} return v
217 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
218 -- between this and 'mkFastStringBytes' is that we don't have to copy
219 -- the bytes if the string is new to the table.
220 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
221 mkFastStringForeignPtr ptr fp len = do
222 ft@(FastStringTable uid tbl#) <- readIORef string_table
223 -- _trace ("hashed: "++show (I# h)) $
227 fs <- mkNewFastString uid ptr fp len
228 updTbl string_table ft h (fs:ls)
229 {- _trace ("new: " ++ show f_str) $ -}
232 lookup_result <- lookupTbl ft h
233 case lookup_result of
236 b <- bucket_match ls len ptr
239 Just v -> {- _trace ("re-use: "++show v) $ -} return v
241 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
242 mkZFastStringForeignPtr ptr fp len = do
243 ft@(FastStringTable uid tbl#) <- readIORef string_table
244 -- _trace ("hashed: "++show (I# h)) $
248 fs <- mkNewZFastString uid ptr fp len
249 updTbl string_table ft h (fs:ls)
250 {- _trace ("new: " ++ show f_str) $ -}
253 lookup_result <- lookupTbl ft h
254 case lookup_result of
257 b <- bucket_match ls len ptr
260 Just v -> {- _trace ("re-use: "++show v) $ -} return v
263 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
264 mkFastString :: String -> FastString
267 let l = utf8EncodedLength str
268 buf <- mallocForeignPtrBytes l
269 withForeignPtr buf $ \ptr -> do
270 utf8EncodeString ptr str
271 mkFastStringForeignPtr ptr buf l
274 -- | Creates a Z-encoded 'FastString' from a 'String'
275 mkZFastString :: String -> FastString
278 let l = Prelude.length str
279 buf <- mallocForeignPtrBytes l
280 withForeignPtr buf $ \ptr -> do
281 pokeCAString (castPtr ptr) str
282 mkZFastStringForeignPtr ptr buf l
284 bucket_match [] _ _ = return Nothing
285 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
287 b <- cmpStringPrefix ptr buf len
288 if b then return (Just v)
289 else bucket_match ls len ptr
291 bucket_match ls len ptr
293 mkNewFastString uid ptr fp len = do
294 ref <- newIORef Nothing
295 n_chars <- countUTF8Chars ptr len
296 return (FastString uid len n_chars fp (UTF8Encoded ref))
298 mkNewZFastString uid ptr fp len = do
299 return (FastString uid len len fp ZEncoded)
302 copyNewFastString uid ptr len = do
303 fp <- copyBytesToForeignPtr ptr len
304 ref <- newIORef Nothing
305 n_chars <- countUTF8Chars ptr len
306 return (FastString uid len n_chars fp (UTF8Encoded ref))
308 copyNewZFastString uid ptr len = do
309 fp <- copyBytesToForeignPtr ptr len
310 return (FastString uid len len fp ZEncoded)
313 copyBytesToForeignPtr ptr len = do
314 fp <- mallocForeignPtrBytes len
315 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
318 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
319 cmpStringPrefix ptr fp len =
320 withForeignPtr fp $ \ptr' -> do
321 r <- memcmp ptr ptr' len
325 hashStr :: Ptr Word8 -> Int -> Int
326 -- use the Addr to produce a hash value between 0 & m (inclusive)
327 hashStr (Ptr a#) (I# len#) = loop 0# 0#
329 loop h n | n ==# len# = I# h
330 | otherwise = loop h2 (n +# 1#)
331 where c = ord# (indexCharOffAddr# a# n)
332 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
334 -- -----------------------------------------------------------------------------
337 -- | Returns the length of the 'FastString' in characters
338 lengthFS :: FastString -> Int
339 lengthFS f = n_chars f
341 -- | Returns 'True' if the 'FastString' is Z-encoded
342 isZEncoded :: FastString -> Bool
343 isZEncoded fs | ZEncoded <- enc fs = True
346 -- | Returns 'True' if the 'FastString' is empty
347 nullFS :: FastString -> Bool
348 nullFS f = n_bytes f == 0
350 -- | unpacks and decodes the FastString
351 unpackFS :: FastString -> String
352 unpackFS (FastString _ n_bytes _ buf enc) =
353 inlinePerformIO $ withForeignPtr buf $ \ptr ->
355 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
356 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
358 bytesFS :: FastString -> [Word8]
359 bytesFS (FastString _ n_bytes _ buf enc) =
360 inlinePerformIO $ withForeignPtr buf $ \ptr ->
361 peekArray n_bytes ptr
363 -- | returns a Z-encoded version of a 'FastString'. This might be the
364 -- original, if it was already Z-encoded. The first time this
365 -- function is applied to a particular 'FastString', the results are
368 zEncodeFS :: FastString -> FastString
369 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
378 let efs = mkZFastString (zEncodeString (unpackFS fs))
379 writeIORef ref (Just efs)
382 appendFS :: FastString -> FastString -> FastString
383 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
385 concatFS :: [FastString] -> FastString
386 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
388 headFS :: FastString -> Char
389 headFS (FastString _ n_bytes _ buf enc) =
390 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
393 w <- peek (castPtr ptr)
394 return (castCCharToChar w)
396 return (fst (utf8DecodeChar ptr))
398 tailFS :: FastString -> FastString
399 tailFS (FastString _ n_bytes _ buf enc) =
400 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
403 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
405 let (_,ptr') = utf8DecodeChar ptr
406 let off = ptr' `minusPtr` ptr
407 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
409 consFS :: Char -> FastString -> FastString
410 consFS c fs = mkFastString (c : unpackFS fs)
412 uniqueOfFS :: FastString -> Int#
413 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
415 nilFS = mkFastString ""
417 -- -----------------------------------------------------------------------------
418 -- Outputting 'FastString's
420 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
421 -- get the actual bytes in the 'FastString' written to the 'Handle'.
422 hPutFS handle (FastString _ len _ fp _)
423 | len == 0 = return ()
424 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
426 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
427 -- in the current locale's encoding (for error messages and suchlike).
429 -- -----------------------------------------------------------------------------
430 -- LitStrings, here for convenience only.
432 type LitString = Ptr ()
434 mkLitString# :: Addr# -> LitString
435 mkLitString# a# = Ptr a#
437 foreign import ccall unsafe "ghc_strlen"
438 strLength :: Ptr () -> Int
440 -- -----------------------------------------------------------------------------
443 -- Just like unsafePerformIO, but we inline it.
444 {-# INLINE inlinePerformIO #-}
445 inlinePerformIO :: IO a -> a
446 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
448 -- NB. does *not* add a '\0'-terminator.
449 pokeCAString :: Ptr CChar -> String -> IO ()
450 pokeCAString ptr str =
453 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
457 #if __GLASGOW_HASKELL__ < 600
459 mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
460 mallocForeignPtrBytes n = do
462 newForeignPtr r (finalizerFree r)
464 foreign import ccall unsafe "stdlib.h free"
465 finalizerFree :: Ptr a -> IO ()
467 peekCAStringLen = peekCStringLen
469 #elif __GLASGOW_HASKELL__ <= 602
471 peekCAStringLen = peekCStringLen