2 % (c) The University of Glasgow, 1997-2006
6 -- The above warning supression flag is a temporary kludge.
7 -- While working on this module you are encouraged to remove it and fix
8 -- any warnings in the module. See
9 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
13 FastString: A compact, hash-consed, representation of character strings.
14 Comparison is O(1), and you can get a Unique from them.
15 Generated by the FSLIT macro
16 Turn into SDoc with Outputable.ftext
18 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
19 Practically no operations
20 Outputing them is fast
21 Generated by the SLIT macro
22 Turn into SDoc with Outputable.ptext
24 Use LitString unless you want the facilities of FastString
29 FastString(..), -- not abstract, for now.
35 mkFastStringForeignPtr,
41 unpackFS, -- :: FastString -> String
42 bytesFS, -- :: FastString -> [Word8]
72 -- This #define suppresses the "import FastString" that
73 -- HsVersions otherwise produces
74 #define COMPILING_FAST_STRING
75 #include "HsVersions.h"
82 import System.IO.Unsafe ( unsafePerformIO )
83 import Control.Monad.ST ( stToIO )
84 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
85 import System.IO ( hPutBuf )
86 import Data.Maybe ( isJust )
89 import GHC.IOBase ( IO(..) )
90 import GHC.Ptr ( Ptr(..) )
92 #define hASH_TBL_SIZE 4091
93 #define hASH_TBL_SIZE_UNBOXED 4091#
97 A 'FastString' is an array of bytes, hashed to support fast O(1)
98 comparison. It is also associated with a character encoding, so that
99 we know how to convert a 'FastString' to the local encoding, or to the
100 Z-encoding used by the compiler internally.
102 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
105 data FastString = FastString {
106 uniq :: {-# UNPACK #-} !Int, -- unique id
107 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
108 n_chars :: {-# UNPACK #-} !Int, -- number of chars
109 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
115 -- including strings that don't need any encoding
116 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
117 -- A UTF-8 string with a memoized Z-encoding
119 instance Eq FastString where
120 f1 == f2 = uniq f1 == uniq f2
122 instance Ord FastString where
123 -- Compares lexicographically, not by unique
124 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
125 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
126 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
127 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
132 compare a b = cmpFS a b
134 instance Show FastString where
135 show fs = show (unpackFS fs)
137 cmpFS :: FastString -> FastString -> Ordering
138 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
139 if u1 == u2 then EQ else
140 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
145 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
146 unsafeMemcmp buf1 buf2 l =
148 withForeignPtr buf1 $ \p1 ->
149 withForeignPtr buf2 $ \p2 ->
153 foreign import ccall unsafe "ghc_memcmp"
154 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
157 -- -----------------------------------------------------------------------------
161 Internally, the compiler will maintain a fast string symbol
162 table, providing sharing and fast comparison. Creation of
163 new @FastString@s then covertly does a lookup, re-using the
164 @FastString@ if there was a hit.
167 data FastStringTable =
170 (MutableArray# RealWorld [FastString])
172 {-# NOINLINE string_table #-}
173 string_table :: IORef FastStringTable
176 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
178 (# s2#, FastStringTable 0 arr# #)
181 lookupTbl :: FastStringTable -> Int -> IO [FastString]
182 lookupTbl (FastStringTable _ arr#) (I# i#) =
183 IO $ \ s# -> readArray# arr# i# s#
185 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
186 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
187 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
188 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
190 mkFastString# :: Addr# -> FastString
191 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
194 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
195 mkFastStringBytes ptr len = unsafePerformIO $ do
196 ft@(FastStringTable uid tbl#) <- readIORef string_table
200 fs <- copyNewFastString uid ptr len
201 updTbl string_table ft h (fs:ls)
202 {- _trace ("new: " ++ show f_str) $ -}
205 lookup_result <- lookupTbl ft h
206 case lookup_result of
209 b <- bucket_match ls len ptr
212 Just v -> {- _trace ("re-use: "++show v) $ -} return v
214 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
215 mkZFastStringBytes ptr len = unsafePerformIO $ do
216 ft@(FastStringTable uid tbl#) <- readIORef string_table
220 fs <- copyNewZFastString uid ptr len
221 updTbl string_table ft h (fs:ls)
222 {- _trace ("new: " ++ show f_str) $ -}
225 lookup_result <- lookupTbl ft h
226 case lookup_result of
229 b <- bucket_match ls len ptr
232 Just v -> {- _trace ("re-use: "++show v) $ -} return v
234 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
235 -- between this and 'mkFastStringBytes' is that we don't have to copy
236 -- the bytes if the string is new to the table.
237 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
238 mkFastStringForeignPtr ptr fp len = do
239 ft@(FastStringTable uid tbl#) <- readIORef string_table
240 -- _trace ("hashed: "++show (I# h)) $
244 fs <- mkNewFastString uid ptr fp len
245 updTbl string_table ft h (fs:ls)
246 {- _trace ("new: " ++ show f_str) $ -}
249 lookup_result <- lookupTbl ft h
250 case lookup_result of
253 b <- bucket_match ls len ptr
256 Just v -> {- _trace ("re-use: "++show v) $ -} return v
258 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
259 mkZFastStringForeignPtr ptr fp len = do
260 ft@(FastStringTable uid tbl#) <- readIORef string_table
261 -- _trace ("hashed: "++show (I# h)) $
265 fs <- mkNewZFastString uid ptr fp len
266 updTbl string_table ft h (fs:ls)
267 {- _trace ("new: " ++ show f_str) $ -}
270 lookup_result <- lookupTbl ft h
271 case lookup_result of
274 b <- bucket_match ls len ptr
277 Just v -> {- _trace ("re-use: "++show v) $ -} return v
280 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
281 mkFastString :: String -> FastString
284 let l = utf8EncodedLength str
285 buf <- mallocForeignPtrBytes l
286 withForeignPtr buf $ \ptr -> do
287 utf8EncodeString ptr str
288 mkFastStringForeignPtr ptr buf l
290 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
291 mkFastStringByteList :: [Word8] -> FastString
292 mkFastStringByteList str =
294 let l = Prelude.length str
295 buf <- mallocForeignPtrBytes l
296 withForeignPtr buf $ \ptr -> do
297 pokeArray (castPtr ptr) str
298 mkFastStringForeignPtr ptr buf l
300 -- | Creates a Z-encoded 'FastString' from a 'String'
301 mkZFastString :: String -> FastString
304 let l = Prelude.length str
305 buf <- mallocForeignPtrBytes l
306 withForeignPtr buf $ \ptr -> do
307 pokeCAString (castPtr ptr) str
308 mkZFastStringForeignPtr ptr buf l
310 bucket_match [] _ _ = return Nothing
311 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
313 b <- cmpStringPrefix ptr buf len
314 if b then return (Just v)
315 else bucket_match ls len ptr
317 bucket_match ls len ptr
319 mkNewFastString uid ptr fp len = do
320 ref <- newIORef Nothing
321 n_chars <- countUTF8Chars ptr len
322 return (FastString uid len n_chars fp (UTF8Encoded ref))
324 mkNewZFastString uid ptr fp len = do
325 return (FastString uid len len fp ZEncoded)
328 copyNewFastString uid ptr len = do
329 fp <- copyBytesToForeignPtr ptr len
330 ref <- newIORef Nothing
331 n_chars <- countUTF8Chars ptr len
332 return (FastString uid len n_chars fp (UTF8Encoded ref))
334 copyNewZFastString uid ptr len = do
335 fp <- copyBytesToForeignPtr ptr len
336 return (FastString uid len len fp ZEncoded)
339 copyBytesToForeignPtr ptr len = do
340 fp <- mallocForeignPtrBytes len
341 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
344 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
345 cmpStringPrefix ptr fp len =
346 withForeignPtr fp $ \ptr' -> do
347 r <- memcmp ptr ptr' len
351 hashStr :: Ptr Word8 -> Int -> Int
352 -- use the Addr to produce a hash value between 0 & m (inclusive)
353 hashStr (Ptr a#) (I# len#) = loop 0# 0#
355 loop h n | n ==# len# = I# h
356 | otherwise = loop h2 (n +# 1#)
357 where c = ord# (indexCharOffAddr# a# n)
358 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
360 -- -----------------------------------------------------------------------------
363 -- | Returns the length of the 'FastString' in characters
364 lengthFS :: FastString -> Int
365 lengthFS f = n_chars f
367 -- | Returns 'True' if the 'FastString' is Z-encoded
368 isZEncoded :: FastString -> Bool
369 isZEncoded fs | ZEncoded <- enc fs = True
372 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
373 -- a Z-encoding cached (used in producing stats).
374 hasZEncoding :: FastString -> Bool
375 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
383 -- | Returns 'True' if the 'FastString' is empty
384 nullFS :: FastString -> Bool
385 nullFS f = n_bytes f == 0
387 -- | unpacks and decodes the FastString
388 unpackFS :: FastString -> String
389 unpackFS (FastString _ n_bytes _ buf enc) =
390 inlinePerformIO $ withForeignPtr buf $ \ptr ->
392 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
393 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
395 bytesFS :: FastString -> [Word8]
396 bytesFS (FastString _ n_bytes _ buf enc) =
397 inlinePerformIO $ withForeignPtr buf $ \ptr ->
398 peekArray n_bytes ptr
400 -- | returns a Z-encoded version of a 'FastString'. This might be the
401 -- original, if it was already Z-encoded. The first time this
402 -- function is applied to a particular 'FastString', the results are
405 zEncodeFS :: FastString -> FastString
406 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
415 let efs = mkZFastString (zEncodeString (unpackFS fs))
416 writeIORef ref (Just efs)
419 appendFS :: FastString -> FastString -> FastString
420 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
422 concatFS :: [FastString] -> FastString
423 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
425 headFS :: FastString -> Char
426 headFS (FastString _ n_bytes _ buf enc) =
427 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
430 w <- peek (castPtr ptr)
431 return (castCCharToChar w)
433 return (fst (utf8DecodeChar ptr))
435 tailFS :: FastString -> FastString
436 tailFS (FastString _ n_bytes _ buf enc) =
437 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
440 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
442 let (_,ptr') = utf8DecodeChar ptr
443 let off = ptr' `minusPtr` ptr
444 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
446 consFS :: Char -> FastString -> FastString
447 consFS c fs = mkFastString (c : unpackFS fs)
449 uniqueOfFS :: FastString -> Int#
450 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
452 nilFS = mkFastString ""
454 -- -----------------------------------------------------------------------------
457 getFastStringTable :: IO [[FastString]]
458 getFastStringTable = do
459 tbl <- readIORef string_table
460 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
463 -- -----------------------------------------------------------------------------
464 -- Outputting 'FastString's
466 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
467 -- get the actual bytes in the 'FastString' written to the 'Handle'.
468 hPutFS handle (FastString _ len _ fp _)
469 | len == 0 = return ()
470 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
472 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
473 -- in the current locale's encoding (for error messages and suchlike).
475 -- -----------------------------------------------------------------------------
476 -- LitStrings, here for convenience only.
478 type LitString = Ptr ()
480 mkLitString# :: Addr# -> LitString
481 mkLitString# a# = Ptr a#
483 foreign import ccall unsafe "ghc_strlen"
484 strLength :: Ptr () -> Int
486 -- -----------------------------------------------------------------------------
489 -- Just like unsafePerformIO, but we inline it.
490 {-# INLINE inlinePerformIO #-}
491 inlinePerformIO :: IO a -> a
492 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
494 -- NB. does *not* add a '\0'-terminator.
495 pokeCAString :: Ptr CChar -> String -> IO ()
496 pokeCAString ptr str =
499 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
503 #if __GLASGOW_HASKELL__ <= 602
504 peekCAStringLen = peekCStringLen