2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
28 mkFastStringForeignPtr,
34 unpackFS, -- :: FastString -> String
35 bytesFS, -- :: FastString -> [Word8]
65 -- This #define suppresses the "import FastString" that
66 -- HsVersions otherwise produces
67 #define COMPILING_FAST_STRING
68 #include "HsVersions.h"
75 import System.IO.Unsafe ( unsafePerformIO )
76 import Control.Monad.ST ( stToIO )
77 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
78 import System.IO ( hPutBuf )
79 import Data.Maybe ( isJust )
82 import GHC.IOBase ( IO(..) )
83 import GHC.Ptr ( Ptr(..) )
85 #define hASH_TBL_SIZE 4091
86 #define hASH_TBL_SIZE_UNBOXED 4091#
90 A 'FastString' is an array of bytes, hashed to support fast O(1)
91 comparison. It is also associated with a character encoding, so that
92 we know how to convert a 'FastString' to the local encoding, or to the
93 Z-encoding used by the compiler internally.
95 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
98 data FastString = FastString {
99 uniq :: {-# UNPACK #-} !Int, -- unique id
100 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
101 n_chars :: {-# UNPACK #-} !Int, -- number of chars
102 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
108 -- including strings that don't need any encoding
109 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
110 -- A UTF-8 string with a memoized Z-encoding
112 instance Eq FastString where
113 f1 == f2 = uniq f1 == uniq f2
115 instance Ord FastString where
116 -- Compares lexicographically, not by unique
117 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
118 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
119 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
120 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
125 compare a b = cmpFS a b
127 instance Show FastString where
128 show fs = show (unpackFS fs)
130 cmpFS :: FastString -> FastString -> Ordering
131 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
132 if u1 == u2 then EQ else
133 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
138 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
139 unsafeMemcmp buf1 buf2 l =
141 withForeignPtr buf1 $ \p1 ->
142 withForeignPtr buf2 $ \p2 ->
146 foreign import ccall unsafe "ghc_memcmp"
147 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
150 -- -----------------------------------------------------------------------------
154 Internally, the compiler will maintain a fast string symbol
155 table, providing sharing and fast comparison. Creation of
156 new @FastString@s then covertly does a lookup, re-using the
157 @FastString@ if there was a hit.
160 data FastStringTable =
163 (MutableArray# RealWorld [FastString])
165 {-# NOINLINE string_table #-}
166 string_table :: IORef FastStringTable
169 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
171 (# s2#, FastStringTable 0 arr# #)
174 lookupTbl :: FastStringTable -> Int -> IO [FastString]
175 lookupTbl (FastStringTable _ arr#) (I# i#) =
176 IO $ \ s# -> readArray# arr# i# s#
178 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
179 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
180 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
181 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
183 mkFastString# :: Addr# -> FastString
184 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
187 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
188 mkFastStringBytes ptr len = unsafePerformIO $ do
189 ft@(FastStringTable uid tbl#) <- readIORef string_table
193 fs <- copyNewFastString uid ptr len
194 updTbl string_table ft h (fs:ls)
195 {- _trace ("new: " ++ show f_str) $ -}
198 lookup_result <- lookupTbl ft h
199 case lookup_result of
202 b <- bucket_match ls len ptr
205 Just v -> {- _trace ("re-use: "++show v) $ -} return v
207 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
208 mkZFastStringBytes ptr len = unsafePerformIO $ do
209 ft@(FastStringTable uid tbl#) <- readIORef string_table
213 fs <- copyNewZFastString uid ptr len
214 updTbl string_table ft h (fs:ls)
215 {- _trace ("new: " ++ show f_str) $ -}
218 lookup_result <- lookupTbl ft h
219 case lookup_result of
222 b <- bucket_match ls len ptr
225 Just v -> {- _trace ("re-use: "++show v) $ -} return v
227 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
228 -- between this and 'mkFastStringBytes' is that we don't have to copy
229 -- the bytes if the string is new to the table.
230 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
231 mkFastStringForeignPtr ptr fp len = do
232 ft@(FastStringTable uid tbl#) <- readIORef string_table
233 -- _trace ("hashed: "++show (I# h)) $
237 fs <- mkNewFastString uid ptr fp len
238 updTbl string_table ft h (fs:ls)
239 {- _trace ("new: " ++ show f_str) $ -}
242 lookup_result <- lookupTbl ft h
243 case lookup_result of
246 b <- bucket_match ls len ptr
249 Just v -> {- _trace ("re-use: "++show v) $ -} return v
251 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
252 mkZFastStringForeignPtr ptr fp len = do
253 ft@(FastStringTable uid tbl#) <- readIORef string_table
254 -- _trace ("hashed: "++show (I# h)) $
258 fs <- mkNewZFastString uid ptr fp len
259 updTbl string_table ft h (fs:ls)
260 {- _trace ("new: " ++ show f_str) $ -}
263 lookup_result <- lookupTbl ft h
264 case lookup_result of
267 b <- bucket_match ls len ptr
270 Just v -> {- _trace ("re-use: "++show v) $ -} return v
273 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
274 mkFastString :: String -> FastString
277 let l = utf8EncodedLength str
278 buf <- mallocForeignPtrBytes l
279 withForeignPtr buf $ \ptr -> do
280 utf8EncodeString ptr str
281 mkFastStringForeignPtr ptr buf l
283 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
284 mkFastStringByteList :: [Word8] -> FastString
285 mkFastStringByteList str =
287 let l = Prelude.length str
288 buf <- mallocForeignPtrBytes l
289 withForeignPtr buf $ \ptr -> do
290 pokeArray (castPtr ptr) str
291 mkFastStringForeignPtr ptr buf l
293 -- | Creates a Z-encoded 'FastString' from a 'String'
294 mkZFastString :: String -> FastString
297 let l = Prelude.length str
298 buf <- mallocForeignPtrBytes l
299 withForeignPtr buf $ \ptr -> do
300 pokeCAString (castPtr ptr) str
301 mkZFastStringForeignPtr ptr buf l
303 bucket_match [] _ _ = return Nothing
304 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
306 b <- cmpStringPrefix ptr buf len
307 if b then return (Just v)
308 else bucket_match ls len ptr
310 bucket_match ls len ptr
312 mkNewFastString uid ptr fp len = do
313 ref <- newIORef Nothing
314 n_chars <- countUTF8Chars ptr len
315 return (FastString uid len n_chars fp (UTF8Encoded ref))
317 mkNewZFastString uid ptr fp len = do
318 return (FastString uid len len fp ZEncoded)
321 copyNewFastString uid ptr len = do
322 fp <- copyBytesToForeignPtr ptr len
323 ref <- newIORef Nothing
324 n_chars <- countUTF8Chars ptr len
325 return (FastString uid len n_chars fp (UTF8Encoded ref))
327 copyNewZFastString uid ptr len = do
328 fp <- copyBytesToForeignPtr ptr len
329 return (FastString uid len len fp ZEncoded)
332 copyBytesToForeignPtr ptr len = do
333 fp <- mallocForeignPtrBytes len
334 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
337 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
338 cmpStringPrefix ptr fp len =
339 withForeignPtr fp $ \ptr' -> do
340 r <- memcmp ptr ptr' len
344 hashStr :: Ptr Word8 -> Int -> Int
345 -- use the Addr to produce a hash value between 0 & m (inclusive)
346 hashStr (Ptr a#) (I# len#) = loop 0# 0#
348 loop h n | n ==# len# = I# h
349 | otherwise = loop h2 (n +# 1#)
350 where c = ord# (indexCharOffAddr# a# n)
351 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
353 -- -----------------------------------------------------------------------------
356 -- | Returns the length of the 'FastString' in characters
357 lengthFS :: FastString -> Int
358 lengthFS f = n_chars f
360 -- | Returns 'True' if the 'FastString' is Z-encoded
361 isZEncoded :: FastString -> Bool
362 isZEncoded fs | ZEncoded <- enc fs = True
365 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
366 -- a Z-encoding cached (used in producing stats).
367 hasZEncoding :: FastString -> Bool
368 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
376 -- | Returns 'True' if the 'FastString' is empty
377 nullFS :: FastString -> Bool
378 nullFS f = n_bytes f == 0
380 -- | unpacks and decodes the FastString
381 unpackFS :: FastString -> String
382 unpackFS (FastString _ n_bytes _ buf enc) =
383 inlinePerformIO $ withForeignPtr buf $ \ptr ->
385 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
386 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
388 bytesFS :: FastString -> [Word8]
389 bytesFS (FastString _ n_bytes _ buf enc) =
390 inlinePerformIO $ withForeignPtr buf $ \ptr ->
391 peekArray n_bytes ptr
393 -- | returns a Z-encoded version of a 'FastString'. This might be the
394 -- original, if it was already Z-encoded. The first time this
395 -- function is applied to a particular 'FastString', the results are
398 zEncodeFS :: FastString -> FastString
399 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
408 let efs = mkZFastString (zEncodeString (unpackFS fs))
409 writeIORef ref (Just efs)
412 appendFS :: FastString -> FastString -> FastString
413 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
415 concatFS :: [FastString] -> FastString
416 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
418 headFS :: FastString -> Char
419 headFS (FastString _ n_bytes _ buf enc) =
420 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
423 w <- peek (castPtr ptr)
424 return (castCCharToChar w)
426 return (fst (utf8DecodeChar ptr))
428 tailFS :: FastString -> FastString
429 tailFS (FastString _ n_bytes _ buf enc) =
430 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
433 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
435 let (_,ptr') = utf8DecodeChar ptr
436 let off = ptr' `minusPtr` ptr
437 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
439 consFS :: Char -> FastString -> FastString
440 consFS c fs = mkFastString (c : unpackFS fs)
442 uniqueOfFS :: FastString -> Int#
443 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
445 nilFS = mkFastString ""
447 -- -----------------------------------------------------------------------------
450 getFastStringTable :: IO [[FastString]]
451 getFastStringTable = do
452 tbl <- readIORef string_table
453 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
456 -- -----------------------------------------------------------------------------
457 -- Outputting 'FastString's
459 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
460 -- get the actual bytes in the 'FastString' written to the 'Handle'.
461 hPutFS handle (FastString _ len _ fp _)
462 | len == 0 = return ()
463 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
465 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
466 -- in the current locale's encoding (for error messages and suchlike).
468 -- -----------------------------------------------------------------------------
469 -- LitStrings, here for convenience only.
471 type LitString = Ptr ()
473 mkLitString# :: Addr# -> LitString
474 mkLitString# a# = Ptr a#
476 foreign import ccall unsafe "ghc_strlen"
477 strLength :: Ptr () -> Int
479 -- -----------------------------------------------------------------------------
482 -- Just like unsafePerformIO, but we inline it.
483 {-# INLINE inlinePerformIO #-}
484 inlinePerformIO :: IO a -> a
485 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
487 -- NB. does *not* add a '\0'-terminator.
488 pokeCAString :: Ptr CChar -> String -> IO ()
489 pokeCAString ptr str =
492 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
496 #if __GLASGOW_HASKELL__ <= 602
497 peekCAStringLen = peekCStringLen