2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
28 mkFastStringForeignPtr,
34 unpackFS, -- :: FastString -> String
35 bytesFS, -- :: FastString -> [Word8]
65 -- This #define suppresses the "import FastString" that
66 -- HsVersions otherwise produces
67 #define COMPILING_FAST_STRING
68 #include "HsVersions.h"
75 import System.IO.Unsafe ( unsafePerformIO )
76 import Control.Monad.ST ( stToIO )
77 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
78 import System.IO ( hPutBuf )
79 import Data.Maybe ( isJust )
81 import GHC.Arr ( STArray(..), newSTArray )
82 import GHC.IOBase ( IO(..) )
83 import GHC.Ptr ( Ptr(..) )
85 #define hASH_TBL_SIZE 4091
89 A 'FastString' is an array of bytes, hashed to support fast O(1)
90 comparison. It is also associated with a character encoding, so that
91 we know how to convert a 'FastString' to the local encoding, or to the
92 Z-encoding used by the compiler internally.
94 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
97 data FastString = FastString {
98 uniq :: {-# UNPACK #-} !Int, -- unique id
99 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
100 n_chars :: {-# UNPACK #-} !Int, -- number of chars
101 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
107 -- including strings that don't need any encoding
108 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
109 -- A UTF-8 string with a memoized Z-encoding
111 instance Eq FastString where
112 f1 == f2 = uniq f1 == uniq f2
114 instance Ord FastString where
115 -- Compares lexicographically, not by unique
116 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
117 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
118 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
119 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
124 compare a b = cmpFS a b
126 instance Show FastString where
127 show fs = show (unpackFS fs)
129 cmpFS :: FastString -> FastString -> Ordering
130 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
131 if u1 == u2 then EQ else
132 let l = if l1 <= l2 then l1 else l2 in
134 withForeignPtr buf1 $ \p1 ->
135 withForeignPtr buf2 $ \p2 -> do
136 res <- memcmp p1 p2 l
138 _ | res < 0 -> return LT
139 | res == 0 -> if l1 == l2 then return EQ
140 else if l1 < l2 then return LT
142 | otherwise -> return GT
145 foreign import ccall unsafe "ghc_memcmp"
146 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
149 -- -----------------------------------------------------------------------------
153 Internally, the compiler will maintain a fast string symbol
154 table, providing sharing and fast comparison. Creation of
155 new @FastString@s then covertly does a lookup, re-using the
156 @FastString@ if there was a hit.
159 data FastStringTable =
162 (MutableArray# RealWorld [FastString])
164 string_table :: IORef FastStringTable
167 (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
168 newIORef (FastStringTable 0 arr#)
170 lookupTbl :: FastStringTable -> Int -> IO [FastString]
171 lookupTbl (FastStringTable _ arr#) (I# i#) =
172 IO $ \ s# -> readArray# arr# i# s#
174 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
175 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
176 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
177 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
179 mkFastString# :: Addr# -> FastString
180 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
183 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
184 mkFastStringBytes ptr len = unsafePerformIO $ do
185 ft@(FastStringTable uid tbl#) <- readIORef string_table
189 fs <- copyNewFastString uid ptr len
190 updTbl string_table ft h (fs:ls)
191 {- _trace ("new: " ++ show f_str) $ -}
194 lookup_result <- lookupTbl ft h
195 case lookup_result of
198 b <- bucket_match ls len ptr
201 Just v -> {- _trace ("re-use: "++show v) $ -} return v
203 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
204 mkZFastStringBytes ptr len = unsafePerformIO $ do
205 ft@(FastStringTable uid tbl#) <- readIORef string_table
209 fs <- copyNewZFastString uid ptr len
210 updTbl string_table ft h (fs:ls)
211 {- _trace ("new: " ++ show f_str) $ -}
214 lookup_result <- lookupTbl ft h
215 case lookup_result of
218 b <- bucket_match ls len ptr
221 Just v -> {- _trace ("re-use: "++show v) $ -} return v
223 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
224 -- between this and 'mkFastStringBytes' is that we don't have to copy
225 -- the bytes if the string is new to the table.
226 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
227 mkFastStringForeignPtr ptr fp len = do
228 ft@(FastStringTable uid tbl#) <- readIORef string_table
229 -- _trace ("hashed: "++show (I# h)) $
233 fs <- mkNewFastString uid ptr fp len
234 updTbl string_table ft h (fs:ls)
235 {- _trace ("new: " ++ show f_str) $ -}
238 lookup_result <- lookupTbl ft h
239 case lookup_result of
242 b <- bucket_match ls len ptr
245 Just v -> {- _trace ("re-use: "++show v) $ -} return v
247 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
248 mkZFastStringForeignPtr ptr fp len = do
249 ft@(FastStringTable uid tbl#) <- readIORef string_table
250 -- _trace ("hashed: "++show (I# h)) $
254 fs <- mkNewZFastString uid ptr fp len
255 updTbl string_table ft h (fs:ls)
256 {- _trace ("new: " ++ show f_str) $ -}
259 lookup_result <- lookupTbl ft h
260 case lookup_result of
263 b <- bucket_match ls len ptr
266 Just v -> {- _trace ("re-use: "++show v) $ -} return v
269 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
270 mkFastString :: String -> FastString
273 let l = utf8EncodedLength str
274 buf <- mallocForeignPtrBytes l
275 withForeignPtr buf $ \ptr -> do
276 utf8EncodeString ptr str
277 mkFastStringForeignPtr ptr buf l
279 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
280 mkFastStringByteList :: [Word8] -> FastString
281 mkFastStringByteList str =
283 let l = Prelude.length str
284 buf <- mallocForeignPtrBytes l
285 withForeignPtr buf $ \ptr -> do
286 pokeArray (castPtr ptr) str
287 mkFastStringForeignPtr ptr buf l
289 -- | Creates a Z-encoded 'FastString' from a 'String'
290 mkZFastString :: String -> FastString
293 let l = Prelude.length str
294 buf <- mallocForeignPtrBytes l
295 withForeignPtr buf $ \ptr -> do
296 pokeCAString (castPtr ptr) str
297 mkZFastStringForeignPtr ptr buf l
299 bucket_match [] _ _ = return Nothing
300 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
302 b <- cmpStringPrefix ptr buf len
303 if b then return (Just v)
304 else bucket_match ls len ptr
306 bucket_match ls len ptr
308 mkNewFastString uid ptr fp len = do
309 ref <- newIORef Nothing
310 n_chars <- countUTF8Chars ptr len
311 return (FastString uid len n_chars fp (UTF8Encoded ref))
313 mkNewZFastString uid ptr fp len = do
314 return (FastString uid len len fp ZEncoded)
317 copyNewFastString uid ptr len = do
318 fp <- copyBytesToForeignPtr ptr len
319 ref <- newIORef Nothing
320 n_chars <- countUTF8Chars ptr len
321 return (FastString uid len n_chars fp (UTF8Encoded ref))
323 copyNewZFastString uid ptr len = do
324 fp <- copyBytesToForeignPtr ptr len
325 return (FastString uid len len fp ZEncoded)
328 copyBytesToForeignPtr ptr len = do
329 fp <- mallocForeignPtrBytes len
330 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
333 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
334 cmpStringPrefix ptr fp len =
335 withForeignPtr fp $ \ptr' -> do
336 r <- memcmp ptr ptr' len
340 hashStr :: Ptr Word8 -> Int -> Int
341 -- use the Addr to produce a hash value between 0 & m (inclusive)
342 hashStr (Ptr a#) (I# len#) = loop 0# 0#
344 loop h n | n ==# len# = I# h
345 | otherwise = loop h2 (n +# 1#)
346 where c = ord# (indexCharOffAddr# a# n)
347 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
349 -- -----------------------------------------------------------------------------
352 -- | Returns the length of the 'FastString' in characters
353 lengthFS :: FastString -> Int
354 lengthFS f = n_chars f
356 -- | Returns 'True' if the 'FastString' is Z-encoded
357 isZEncoded :: FastString -> Bool
358 isZEncoded fs | ZEncoded <- enc fs = True
361 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
362 -- a Z-encoding cached (used in producing stats).
363 hasZEncoding :: FastString -> Bool
364 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
372 -- | Returns 'True' if the 'FastString' is empty
373 nullFS :: FastString -> Bool
374 nullFS f = n_bytes f == 0
376 -- | unpacks and decodes the FastString
377 unpackFS :: FastString -> String
378 unpackFS (FastString _ n_bytes _ buf enc) =
379 inlinePerformIO $ withForeignPtr buf $ \ptr ->
381 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
382 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
384 bytesFS :: FastString -> [Word8]
385 bytesFS (FastString _ n_bytes _ buf enc) =
386 inlinePerformIO $ withForeignPtr buf $ \ptr ->
387 peekArray n_bytes ptr
389 -- | returns a Z-encoded version of a 'FastString'. This might be the
390 -- original, if it was already Z-encoded. The first time this
391 -- function is applied to a particular 'FastString', the results are
394 zEncodeFS :: FastString -> FastString
395 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
404 let efs = mkZFastString (zEncodeString (unpackFS fs))
405 writeIORef ref (Just efs)
408 appendFS :: FastString -> FastString -> FastString
409 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
411 concatFS :: [FastString] -> FastString
412 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
414 headFS :: FastString -> Char
415 headFS (FastString _ n_bytes _ buf enc) =
416 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
419 w <- peek (castPtr ptr)
420 return (castCCharToChar w)
422 return (fst (utf8DecodeChar ptr))
424 tailFS :: FastString -> FastString
425 tailFS (FastString _ n_bytes _ buf enc) =
426 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
429 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
431 let (_,ptr') = utf8DecodeChar ptr
432 let off = ptr' `minusPtr` ptr
433 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
435 consFS :: Char -> FastString -> FastString
436 consFS c fs = mkFastString (c : unpackFS fs)
438 uniqueOfFS :: FastString -> Int#
439 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
441 nilFS = mkFastString ""
443 -- -----------------------------------------------------------------------------
446 getFastStringTable :: IO [[FastString]]
447 getFastStringTable = do
448 tbl <- readIORef string_table
449 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
452 -- -----------------------------------------------------------------------------
453 -- Outputting 'FastString's
455 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
456 -- get the actual bytes in the 'FastString' written to the 'Handle'.
457 hPutFS handle (FastString _ len _ fp _)
458 | len == 0 = return ()
459 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
461 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
462 -- in the current locale's encoding (for error messages and suchlike).
464 -- -----------------------------------------------------------------------------
465 -- LitStrings, here for convenience only.
467 type LitString = Ptr ()
469 mkLitString# :: Addr# -> LitString
470 mkLitString# a# = Ptr a#
472 foreign import ccall unsafe "ghc_strlen"
473 strLength :: Ptr () -> Int
475 -- -----------------------------------------------------------------------------
478 -- Just like unsafePerformIO, but we inline it.
479 {-# INLINE inlinePerformIO #-}
480 inlinePerformIO :: IO a -> a
481 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
483 -- NB. does *not* add a '\0'-terminator.
484 pokeCAString :: Ptr CChar -> String -> IO ()
485 pokeCAString ptr str =
488 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
492 #if __GLASGOW_HASKELL__ < 600
494 mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
495 mallocForeignPtrBytes n = do
497 newForeignPtr r (finalizerFree r)
499 foreign import ccall unsafe "stdlib.h free"
500 finalizerFree :: Ptr a -> IO ()
502 peekCAStringLen = peekCStringLen
504 #elif __GLASGOW_HASKELL__ <= 602
506 peekCAStringLen = peekCStringLen