2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
28 mkFastStringForeignPtr,
34 unpackFS, -- :: FastString -> String
35 bytesFS, -- :: FastString -> [Word8]
65 -- This #define suppresses the "import FastString" that
66 -- HsVersions otherwise produces
67 #define COMPILING_FAST_STRING
68 #include "HsVersions.h"
75 import System.IO.Unsafe ( unsafePerformIO )
76 import Control.Monad.ST ( stToIO )
77 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
78 import System.IO ( hPutBuf )
79 import Data.Maybe ( isJust )
81 import GHC.Arr ( STArray(..), newSTArray )
82 import GHC.IOBase ( IO(..) )
83 import GHC.Ptr ( Ptr(..) )
85 #define hASH_TBL_SIZE 4091
89 A 'FastString' is an array of bytes, hashed to support fast O(1)
90 comparison. It is also associated with a character encoding, so that
91 we know how to convert a 'FastString' to the local encoding, or to the
92 Z-encoding used by the compiler internally.
94 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
97 data FastString = FastString {
98 uniq :: {-# UNPACK #-} !Int, -- unique id
99 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
100 n_chars :: {-# UNPACK #-} !Int, -- number of chars
101 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
107 -- including strings that don't need any encoding
108 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
109 -- A UTF-8 string with a memoized Z-encoding
111 instance Eq FastString where
112 f1 == f2 = uniq f1 == uniq f2
114 instance Ord FastString where
115 -- Compares lexicographically, not by unique
116 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
117 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
118 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
119 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
124 compare a b = cmpFS a b
126 instance Show FastString where
127 show fs = show (unpackFS fs)
129 cmpFS :: FastString -> FastString -> Ordering
130 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
131 if u1 == u2 then EQ else
132 let l = if l1 <= l2 then l1 else l2 in
134 withForeignPtr buf1 $ \p1 ->
135 withForeignPtr buf2 $ \p2 -> do
136 res <- memcmp p1 p2 l
137 return $ case compare res 0 of
143 foreign import ccall unsafe "ghc_memcmp"
144 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
147 -- -----------------------------------------------------------------------------
151 Internally, the compiler will maintain a fast string symbol
152 table, providing sharing and fast comparison. Creation of
153 new @FastString@s then covertly does a lookup, re-using the
154 @FastString@ if there was a hit.
157 data FastStringTable =
160 (MutableArray# RealWorld [FastString])
162 {-# NOINLINE string_table #-}
163 string_table :: IORef FastStringTable
166 (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
167 newIORef (FastStringTable 0 arr#)
169 lookupTbl :: FastStringTable -> Int -> IO [FastString]
170 lookupTbl (FastStringTable _ arr#) (I# i#) =
171 IO $ \ s# -> readArray# arr# i# s#
173 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
174 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
175 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
176 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
178 mkFastString# :: Addr# -> FastString
179 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
182 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
183 mkFastStringBytes ptr len = unsafePerformIO $ do
184 ft@(FastStringTable uid tbl#) <- readIORef string_table
188 fs <- copyNewFastString uid ptr len
189 updTbl string_table ft h (fs:ls)
190 {- _trace ("new: " ++ show f_str) $ -}
193 lookup_result <- lookupTbl ft h
194 case lookup_result of
197 b <- bucket_match ls len ptr
200 Just v -> {- _trace ("re-use: "++show v) $ -} return v
202 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
203 mkZFastStringBytes ptr len = unsafePerformIO $ do
204 ft@(FastStringTable uid tbl#) <- readIORef string_table
208 fs <- copyNewZFastString uid ptr len
209 updTbl string_table ft h (fs:ls)
210 {- _trace ("new: " ++ show f_str) $ -}
213 lookup_result <- lookupTbl ft h
214 case lookup_result of
217 b <- bucket_match ls len ptr
220 Just v -> {- _trace ("re-use: "++show v) $ -} return v
222 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
223 -- between this and 'mkFastStringBytes' is that we don't have to copy
224 -- the bytes if the string is new to the table.
225 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
226 mkFastStringForeignPtr ptr fp len = do
227 ft@(FastStringTable uid tbl#) <- readIORef string_table
228 -- _trace ("hashed: "++show (I# h)) $
232 fs <- mkNewFastString uid ptr fp len
233 updTbl string_table ft h (fs:ls)
234 {- _trace ("new: " ++ show f_str) $ -}
237 lookup_result <- lookupTbl ft h
238 case lookup_result of
241 b <- bucket_match ls len ptr
244 Just v -> {- _trace ("re-use: "++show v) $ -} return v
246 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
247 mkZFastStringForeignPtr ptr fp len = do
248 ft@(FastStringTable uid tbl#) <- readIORef string_table
249 -- _trace ("hashed: "++show (I# h)) $
253 fs <- mkNewZFastString uid ptr fp len
254 updTbl string_table ft h (fs:ls)
255 {- _trace ("new: " ++ show f_str) $ -}
258 lookup_result <- lookupTbl ft h
259 case lookup_result of
262 b <- bucket_match ls len ptr
265 Just v -> {- _trace ("re-use: "++show v) $ -} return v
268 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
269 mkFastString :: String -> FastString
272 let l = utf8EncodedLength str
273 buf <- mallocForeignPtrBytes l
274 withForeignPtr buf $ \ptr -> do
275 utf8EncodeString ptr str
276 mkFastStringForeignPtr ptr buf l
278 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
279 mkFastStringByteList :: [Word8] -> FastString
280 mkFastStringByteList str =
282 let l = Prelude.length str
283 buf <- mallocForeignPtrBytes l
284 withForeignPtr buf $ \ptr -> do
285 pokeArray (castPtr ptr) str
286 mkFastStringForeignPtr ptr buf l
288 -- | Creates a Z-encoded 'FastString' from a 'String'
289 mkZFastString :: String -> FastString
292 let l = Prelude.length str
293 buf <- mallocForeignPtrBytes l
294 withForeignPtr buf $ \ptr -> do
295 pokeCAString (castPtr ptr) str
296 mkZFastStringForeignPtr ptr buf l
298 bucket_match [] _ _ = return Nothing
299 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
301 b <- cmpStringPrefix ptr buf len
302 if b then return (Just v)
303 else bucket_match ls len ptr
305 bucket_match ls len ptr
307 mkNewFastString uid ptr fp len = do
308 ref <- newIORef Nothing
309 n_chars <- countUTF8Chars ptr len
310 return (FastString uid len n_chars fp (UTF8Encoded ref))
312 mkNewZFastString uid ptr fp len = do
313 return (FastString uid len len fp ZEncoded)
316 copyNewFastString uid ptr len = do
317 fp <- copyBytesToForeignPtr ptr len
318 ref <- newIORef Nothing
319 n_chars <- countUTF8Chars ptr len
320 return (FastString uid len n_chars fp (UTF8Encoded ref))
322 copyNewZFastString uid ptr len = do
323 fp <- copyBytesToForeignPtr ptr len
324 return (FastString uid len len fp ZEncoded)
327 copyBytesToForeignPtr ptr len = do
328 fp <- mallocForeignPtrBytes len
329 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
332 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
333 cmpStringPrefix ptr fp len =
334 withForeignPtr fp $ \ptr' -> do
335 r <- memcmp ptr ptr' len
339 hashStr :: Ptr Word8 -> Int -> Int
340 -- use the Addr to produce a hash value between 0 & m (inclusive)
341 hashStr (Ptr a#) (I# len#) = loop 0# 0#
343 loop h n | n ==# len# = I# h
344 | otherwise = loop h2 (n +# 1#)
345 where c = ord# (indexCharOffAddr# a# n)
346 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
348 -- -----------------------------------------------------------------------------
351 -- | Returns the length of the 'FastString' in characters
352 lengthFS :: FastString -> Int
353 lengthFS f = n_chars f
355 -- | Returns 'True' if the 'FastString' is Z-encoded
356 isZEncoded :: FastString -> Bool
357 isZEncoded fs | ZEncoded <- enc fs = True
360 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
361 -- a Z-encoding cached (used in producing stats).
362 hasZEncoding :: FastString -> Bool
363 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
371 -- | Returns 'True' if the 'FastString' is empty
372 nullFS :: FastString -> Bool
373 nullFS f = n_bytes f == 0
375 -- | unpacks and decodes the FastString
376 unpackFS :: FastString -> String
377 unpackFS (FastString _ n_bytes _ buf enc) =
378 inlinePerformIO $ withForeignPtr buf $ \ptr ->
380 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
381 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
383 bytesFS :: FastString -> [Word8]
384 bytesFS (FastString _ n_bytes _ buf enc) =
385 inlinePerformIO $ withForeignPtr buf $ \ptr ->
386 peekArray n_bytes ptr
388 -- | returns a Z-encoded version of a 'FastString'. This might be the
389 -- original, if it was already Z-encoded. The first time this
390 -- function is applied to a particular 'FastString', the results are
393 zEncodeFS :: FastString -> FastString
394 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
403 let efs = mkZFastString (zEncodeString (unpackFS fs))
404 writeIORef ref (Just efs)
407 appendFS :: FastString -> FastString -> FastString
408 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
410 concatFS :: [FastString] -> FastString
411 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
413 headFS :: FastString -> Char
414 headFS (FastString _ n_bytes _ buf enc) =
415 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
418 w <- peek (castPtr ptr)
419 return (castCCharToChar w)
421 return (fst (utf8DecodeChar ptr))
423 tailFS :: FastString -> FastString
424 tailFS (FastString _ n_bytes _ buf enc) =
425 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
428 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
430 let (_,ptr') = utf8DecodeChar ptr
431 let off = ptr' `minusPtr` ptr
432 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
434 consFS :: Char -> FastString -> FastString
435 consFS c fs = mkFastString (c : unpackFS fs)
437 uniqueOfFS :: FastString -> Int#
438 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
440 nilFS = mkFastString ""
442 -- -----------------------------------------------------------------------------
445 getFastStringTable :: IO [[FastString]]
446 getFastStringTable = do
447 tbl <- readIORef string_table
448 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
451 -- -----------------------------------------------------------------------------
452 -- Outputting 'FastString's
454 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
455 -- get the actual bytes in the 'FastString' written to the 'Handle'.
456 hPutFS handle (FastString _ len _ fp _)
457 | len == 0 = return ()
458 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
460 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
461 -- in the current locale's encoding (for error messages and suchlike).
463 -- -----------------------------------------------------------------------------
464 -- LitStrings, here for convenience only.
466 type LitString = Ptr ()
468 mkLitString# :: Addr# -> LitString
469 mkLitString# a# = Ptr a#
471 foreign import ccall unsafe "ghc_strlen"
472 strLength :: Ptr () -> Int
474 -- -----------------------------------------------------------------------------
477 -- Just like unsafePerformIO, but we inline it.
478 {-# INLINE inlinePerformIO #-}
479 inlinePerformIO :: IO a -> a
480 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
482 -- NB. does *not* add a '\0'-terminator.
483 pokeCAString :: Ptr CChar -> String -> IO ()
484 pokeCAString ptr str =
487 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
491 #if __GLASGOW_HASKELL__ <= 602
492 peekCAStringLen = peekCStringLen