2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
28 mkFastStringForeignPtr,
34 unpackFS, -- :: FastString -> String
35 bytesFS, -- :: FastString -> [Word8]
65 -- This #define suppresses the "import FastString" that
66 -- HsVersions otherwise produces
67 #define COMPILING_FAST_STRING
68 #include "HsVersions.h"
75 import System.IO.Unsafe ( unsafePerformIO )
76 import Control.Monad.ST ( stToIO )
77 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
78 import System.IO ( hPutBuf )
79 import Data.Maybe ( isJust )
81 import GHC.Arr ( STArray(..), newSTArray )
82 import GHC.IOBase ( IO(..) )
83 import GHC.Ptr ( Ptr(..) )
85 #define hASH_TBL_SIZE 4091
89 A 'FastString' is an array of bytes, hashed to support fast O(1)
90 comparison. It is also associated with a character encoding, so that
91 we know how to convert a 'FastString' to the local encoding, or to the
92 Z-encoding used by the compiler internally.
94 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
97 data FastString = FastString {
98 uniq :: {-# UNPACK #-} !Int, -- unique id
99 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
100 n_chars :: {-# UNPACK #-} !Int, -- number of chars
101 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
107 -- including strings that don't need any encoding
108 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
109 -- A UTF-8 string with a memoized Z-encoding
111 instance Eq FastString where
112 f1 == f2 = uniq f1 == uniq f2
114 instance Ord FastString where
115 -- Compares lexicographically, not by unique
116 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
117 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
118 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
119 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
124 compare a b = cmpFS a b
126 instance Show FastString where
127 show fs = show (unpackFS fs)
129 cmpFS :: FastString -> FastString -> Ordering
130 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
131 if u1 == u2 then EQ else
132 let l = if l1 <= l2 then l1 else l2 in
134 withForeignPtr buf1 $ \p1 ->
135 withForeignPtr buf2 $ \p2 -> do
136 res <- memcmp p1 p2 l
138 _ | res < 0 -> return LT
139 | res == 0 -> if l1 == l2 then return EQ
140 else if l1 < l2 then return LT
142 | otherwise -> return GT
145 foreign import ccall unsafe "ghc_memcmp"
146 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
149 -- -----------------------------------------------------------------------------
153 Internally, the compiler will maintain a fast string symbol
154 table, providing sharing and fast comparison. Creation of
155 new @FastString@s then covertly does a lookup, re-using the
156 @FastString@ if there was a hit.
159 data FastStringTable =
162 (MutableArray# RealWorld [FastString])
164 {-# NOINLINE string_table #-}
165 string_table :: IORef FastStringTable
168 (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
169 newIORef (FastStringTable 0 arr#)
171 lookupTbl :: FastStringTable -> Int -> IO [FastString]
172 lookupTbl (FastStringTable _ arr#) (I# i#) =
173 IO $ \ s# -> readArray# arr# i# s#
175 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
176 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
177 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
178 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
180 mkFastString# :: Addr# -> FastString
181 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
184 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
185 mkFastStringBytes ptr len = unsafePerformIO $ do
186 ft@(FastStringTable uid tbl#) <- readIORef string_table
190 fs <- copyNewFastString uid ptr len
191 updTbl string_table ft h (fs:ls)
192 {- _trace ("new: " ++ show f_str) $ -}
195 lookup_result <- lookupTbl ft h
196 case lookup_result of
199 b <- bucket_match ls len ptr
202 Just v -> {- _trace ("re-use: "++show v) $ -} return v
204 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
205 mkZFastStringBytes ptr len = unsafePerformIO $ do
206 ft@(FastStringTable uid tbl#) <- readIORef string_table
210 fs <- copyNewZFastString uid ptr len
211 updTbl string_table ft h (fs:ls)
212 {- _trace ("new: " ++ show f_str) $ -}
215 lookup_result <- lookupTbl ft h
216 case lookup_result of
219 b <- bucket_match ls len ptr
222 Just v -> {- _trace ("re-use: "++show v) $ -} return v
224 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
225 -- between this and 'mkFastStringBytes' is that we don't have to copy
226 -- the bytes if the string is new to the table.
227 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
228 mkFastStringForeignPtr ptr fp len = do
229 ft@(FastStringTable uid tbl#) <- readIORef string_table
230 -- _trace ("hashed: "++show (I# h)) $
234 fs <- mkNewFastString uid ptr fp len
235 updTbl string_table ft h (fs:ls)
236 {- _trace ("new: " ++ show f_str) $ -}
239 lookup_result <- lookupTbl ft h
240 case lookup_result of
243 b <- bucket_match ls len ptr
246 Just v -> {- _trace ("re-use: "++show v) $ -} return v
248 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
249 mkZFastStringForeignPtr ptr fp len = do
250 ft@(FastStringTable uid tbl#) <- readIORef string_table
251 -- _trace ("hashed: "++show (I# h)) $
255 fs <- mkNewZFastString uid ptr fp len
256 updTbl string_table ft h (fs:ls)
257 {- _trace ("new: " ++ show f_str) $ -}
260 lookup_result <- lookupTbl ft h
261 case lookup_result of
264 b <- bucket_match ls len ptr
267 Just v -> {- _trace ("re-use: "++show v) $ -} return v
270 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
271 mkFastString :: String -> FastString
274 let l = utf8EncodedLength str
275 buf <- mallocForeignPtrBytes l
276 withForeignPtr buf $ \ptr -> do
277 utf8EncodeString ptr str
278 mkFastStringForeignPtr ptr buf l
280 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
281 mkFastStringByteList :: [Word8] -> FastString
282 mkFastStringByteList str =
284 let l = Prelude.length str
285 buf <- mallocForeignPtrBytes l
286 withForeignPtr buf $ \ptr -> do
287 pokeArray (castPtr ptr) str
288 mkFastStringForeignPtr ptr buf l
290 -- | Creates a Z-encoded 'FastString' from a 'String'
291 mkZFastString :: String -> FastString
294 let l = Prelude.length str
295 buf <- mallocForeignPtrBytes l
296 withForeignPtr buf $ \ptr -> do
297 pokeCAString (castPtr ptr) str
298 mkZFastStringForeignPtr ptr buf l
300 bucket_match [] _ _ = return Nothing
301 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
303 b <- cmpStringPrefix ptr buf len
304 if b then return (Just v)
305 else bucket_match ls len ptr
307 bucket_match ls len ptr
309 mkNewFastString uid ptr fp len = do
310 ref <- newIORef Nothing
311 n_chars <- countUTF8Chars ptr len
312 return (FastString uid len n_chars fp (UTF8Encoded ref))
314 mkNewZFastString uid ptr fp len = do
315 return (FastString uid len len fp ZEncoded)
318 copyNewFastString uid ptr len = do
319 fp <- copyBytesToForeignPtr ptr len
320 ref <- newIORef Nothing
321 n_chars <- countUTF8Chars ptr len
322 return (FastString uid len n_chars fp (UTF8Encoded ref))
324 copyNewZFastString uid ptr len = do
325 fp <- copyBytesToForeignPtr ptr len
326 return (FastString uid len len fp ZEncoded)
329 copyBytesToForeignPtr ptr len = do
330 fp <- mallocForeignPtrBytes len
331 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
334 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
335 cmpStringPrefix ptr fp len =
336 withForeignPtr fp $ \ptr' -> do
337 r <- memcmp ptr ptr' len
341 hashStr :: Ptr Word8 -> Int -> Int
342 -- use the Addr to produce a hash value between 0 & m (inclusive)
343 hashStr (Ptr a#) (I# len#) = loop 0# 0#
345 loop h n | n ==# len# = I# h
346 | otherwise = loop h2 (n +# 1#)
347 where c = ord# (indexCharOffAddr# a# n)
348 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
350 -- -----------------------------------------------------------------------------
353 -- | Returns the length of the 'FastString' in characters
354 lengthFS :: FastString -> Int
355 lengthFS f = n_chars f
357 -- | Returns 'True' if the 'FastString' is Z-encoded
358 isZEncoded :: FastString -> Bool
359 isZEncoded fs | ZEncoded <- enc fs = True
362 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
363 -- a Z-encoding cached (used in producing stats).
364 hasZEncoding :: FastString -> Bool
365 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
373 -- | Returns 'True' if the 'FastString' is empty
374 nullFS :: FastString -> Bool
375 nullFS f = n_bytes f == 0
377 -- | unpacks and decodes the FastString
378 unpackFS :: FastString -> String
379 unpackFS (FastString _ n_bytes _ buf enc) =
380 inlinePerformIO $ withForeignPtr buf $ \ptr ->
382 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
383 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
385 bytesFS :: FastString -> [Word8]
386 bytesFS (FastString _ n_bytes _ buf enc) =
387 inlinePerformIO $ withForeignPtr buf $ \ptr ->
388 peekArray n_bytes ptr
390 -- | returns a Z-encoded version of a 'FastString'. This might be the
391 -- original, if it was already Z-encoded. The first time this
392 -- function is applied to a particular 'FastString', the results are
395 zEncodeFS :: FastString -> FastString
396 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
405 let efs = mkZFastString (zEncodeString (unpackFS fs))
406 writeIORef ref (Just efs)
409 appendFS :: FastString -> FastString -> FastString
410 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
412 concatFS :: [FastString] -> FastString
413 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
415 headFS :: FastString -> Char
416 headFS (FastString _ n_bytes _ buf enc) =
417 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
420 w <- peek (castPtr ptr)
421 return (castCCharToChar w)
423 return (fst (utf8DecodeChar ptr))
425 tailFS :: FastString -> FastString
426 tailFS (FastString _ n_bytes _ buf enc) =
427 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
430 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
432 let (_,ptr') = utf8DecodeChar ptr
433 let off = ptr' `minusPtr` ptr
434 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
436 consFS :: Char -> FastString -> FastString
437 consFS c fs = mkFastString (c : unpackFS fs)
439 uniqueOfFS :: FastString -> Int#
440 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
442 nilFS = mkFastString ""
444 -- -----------------------------------------------------------------------------
447 getFastStringTable :: IO [[FastString]]
448 getFastStringTable = do
449 tbl <- readIORef string_table
450 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
453 -- -----------------------------------------------------------------------------
454 -- Outputting 'FastString's
456 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
457 -- get the actual bytes in the 'FastString' written to the 'Handle'.
458 hPutFS handle (FastString _ len _ fp _)
459 | len == 0 = return ()
460 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
462 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
463 -- in the current locale's encoding (for error messages and suchlike).
465 -- -----------------------------------------------------------------------------
466 -- LitStrings, here for convenience only.
468 type LitString = Ptr ()
470 mkLitString# :: Addr# -> LitString
471 mkLitString# a# = Ptr a#
473 foreign import ccall unsafe "ghc_strlen"
474 strLength :: Ptr () -> Int
476 -- -----------------------------------------------------------------------------
479 -- Just like unsafePerformIO, but we inline it.
480 {-# INLINE inlinePerformIO #-}
481 inlinePerformIO :: IO a -> a
482 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
484 -- NB. does *not* add a '\0'-terminator.
485 pokeCAString :: Ptr CChar -> String -> IO ()
486 pokeCAString ptr str =
489 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
493 #if __GLASGOW_HASKELL__ <= 602
494 peekCAStringLen = peekCStringLen