2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
27 mkFastStringForeignPtr,
33 unpackFS, -- :: FastString -> String
34 bytesFS, -- :: FastString -> [Word8]
64 -- This #define suppresses the "import FastString" that
65 -- HsVersions otherwise produces
66 #define COMPILING_FAST_STRING
67 #include "HsVersions.h"
74 import System.IO.Unsafe ( unsafePerformIO )
75 import Control.Monad.ST ( stToIO )
76 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
77 import System.IO ( hPutBuf )
78 import Data.Maybe ( isJust )
80 import GHC.Arr ( STArray(..), newSTArray )
81 import GHC.IOBase ( IO(..) )
82 import GHC.Ptr ( Ptr(..) )
84 #define hASH_TBL_SIZE 4091
88 A 'FastString' is an array of bytes, hashed to support fast O(1)
89 comparison. It is also associated with a character encoding, so that
90 we know how to convert a 'FastString' to the local encoding, or to the
91 Z-encoding used by the compiler internally.
93 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
96 data FastString = FastString {
97 uniq :: {-# UNPACK #-} !Int, -- unique id
98 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
99 n_chars :: {-# UNPACK #-} !Int, -- number of chars
100 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
106 -- including strings that don't need any encoding
107 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
108 -- A UTF-8 string with a memoized Z-encoding
110 instance Eq FastString where
111 f1 == f2 = uniq f1 == uniq f2
113 instance Ord FastString where
114 -- Compares lexicographically, not by unique
115 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
116 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
117 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
118 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
123 compare a b = cmpFS a b
125 instance Show FastString where
126 show fs = show (unpackFS fs)
128 cmpFS :: FastString -> FastString -> Ordering
129 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
130 if u1 == u2 then EQ else
131 let l = if l1 <= l2 then l1 else l2 in
133 withForeignPtr buf1 $ \p1 ->
134 withForeignPtr buf2 $ \p2 -> do
135 res <- memcmp p1 p2 l
137 _ | res < 0 -> return LT
138 | res == 0 -> if l1 == l2 then return EQ
139 else if l1 < l2 then return LT
141 | otherwise -> return GT
144 foreign import ccall unsafe "ghc_memcmp"
145 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
148 -- -----------------------------------------------------------------------------
152 Internally, the compiler will maintain a fast string symbol
153 table, providing sharing and fast comparison. Creation of
154 new @FastString@s then covertly does a lookup, re-using the
155 @FastString@ if there was a hit.
158 data FastStringTable =
161 (MutableArray# RealWorld [FastString])
163 string_table :: IORef FastStringTable
166 (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
167 newIORef (FastStringTable 0 arr#)
169 lookupTbl :: FastStringTable -> Int -> IO [FastString]
170 lookupTbl (FastStringTable _ arr#) (I# i#) =
171 IO $ \ s# -> readArray# arr# i# s#
173 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
174 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
175 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
176 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
178 mkFastString# :: Addr# -> FastString
179 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
182 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
183 mkFastStringBytes ptr len = unsafePerformIO $ do
184 ft@(FastStringTable uid tbl#) <- readIORef string_table
188 fs <- copyNewFastString uid ptr len
189 updTbl string_table ft h (fs:ls)
190 {- _trace ("new: " ++ show f_str) $ -}
193 lookup_result <- lookupTbl ft h
194 case lookup_result of
197 b <- bucket_match ls len ptr
200 Just v -> {- _trace ("re-use: "++show v) $ -} return v
202 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
203 mkZFastStringBytes ptr len = unsafePerformIO $ do
204 ft@(FastStringTable uid tbl#) <- readIORef string_table
208 fs <- copyNewZFastString uid ptr len
209 updTbl string_table ft h (fs:ls)
210 {- _trace ("new: " ++ show f_str) $ -}
213 lookup_result <- lookupTbl ft h
214 case lookup_result of
217 b <- bucket_match ls len ptr
220 Just v -> {- _trace ("re-use: "++show v) $ -} return v
222 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
223 -- between this and 'mkFastStringBytes' is that we don't have to copy
224 -- the bytes if the string is new to the table.
225 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
226 mkFastStringForeignPtr ptr fp len = do
227 ft@(FastStringTable uid tbl#) <- readIORef string_table
228 -- _trace ("hashed: "++show (I# h)) $
232 fs <- mkNewFastString uid ptr fp len
233 updTbl string_table ft h (fs:ls)
234 {- _trace ("new: " ++ show f_str) $ -}
237 lookup_result <- lookupTbl ft h
238 case lookup_result of
241 b <- bucket_match ls len ptr
244 Just v -> {- _trace ("re-use: "++show v) $ -} return v
246 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
247 mkZFastStringForeignPtr ptr fp len = do
248 ft@(FastStringTable uid tbl#) <- readIORef string_table
249 -- _trace ("hashed: "++show (I# h)) $
253 fs <- mkNewZFastString uid ptr fp len
254 updTbl string_table ft h (fs:ls)
255 {- _trace ("new: " ++ show f_str) $ -}
258 lookup_result <- lookupTbl ft h
259 case lookup_result of
262 b <- bucket_match ls len ptr
265 Just v -> {- _trace ("re-use: "++show v) $ -} return v
268 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
269 mkFastString :: String -> FastString
272 let l = utf8EncodedLength str
273 buf <- mallocForeignPtrBytes l
274 withForeignPtr buf $ \ptr -> do
275 utf8EncodeString ptr str
276 mkFastStringForeignPtr ptr buf l
279 -- | Creates a Z-encoded 'FastString' from a 'String'
280 mkZFastString :: String -> FastString
283 let l = Prelude.length str
284 buf <- mallocForeignPtrBytes l
285 withForeignPtr buf $ \ptr -> do
286 pokeCAString (castPtr ptr) str
287 mkZFastStringForeignPtr ptr buf l
289 bucket_match [] _ _ = return Nothing
290 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
292 b <- cmpStringPrefix ptr buf len
293 if b then return (Just v)
294 else bucket_match ls len ptr
296 bucket_match ls len ptr
298 mkNewFastString uid ptr fp len = do
299 ref <- newIORef Nothing
300 n_chars <- countUTF8Chars ptr len
301 return (FastString uid len n_chars fp (UTF8Encoded ref))
303 mkNewZFastString uid ptr fp len = do
304 return (FastString uid len len fp ZEncoded)
307 copyNewFastString uid ptr len = do
308 fp <- copyBytesToForeignPtr ptr len
309 ref <- newIORef Nothing
310 n_chars <- countUTF8Chars ptr len
311 return (FastString uid len n_chars fp (UTF8Encoded ref))
313 copyNewZFastString uid ptr len = do
314 fp <- copyBytesToForeignPtr ptr len
315 return (FastString uid len len fp ZEncoded)
318 copyBytesToForeignPtr ptr len = do
319 fp <- mallocForeignPtrBytes len
320 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
323 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
324 cmpStringPrefix ptr fp len =
325 withForeignPtr fp $ \ptr' -> do
326 r <- memcmp ptr ptr' len
330 hashStr :: Ptr Word8 -> Int -> Int
331 -- use the Addr to produce a hash value between 0 & m (inclusive)
332 hashStr (Ptr a#) (I# len#) = loop 0# 0#
334 loop h n | n ==# len# = I# h
335 | otherwise = loop h2 (n +# 1#)
336 where c = ord# (indexCharOffAddr# a# n)
337 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
339 -- -----------------------------------------------------------------------------
342 -- | Returns the length of the 'FastString' in characters
343 lengthFS :: FastString -> Int
344 lengthFS f = n_chars f
346 -- | Returns 'True' if the 'FastString' is Z-encoded
347 isZEncoded :: FastString -> Bool
348 isZEncoded fs | ZEncoded <- enc fs = True
351 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
352 -- a Z-encoding cached (used in producing stats).
353 hasZEncoding :: FastString -> Bool
354 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
362 -- | Returns 'True' if the 'FastString' is empty
363 nullFS :: FastString -> Bool
364 nullFS f = n_bytes f == 0
366 -- | unpacks and decodes the FastString
367 unpackFS :: FastString -> String
368 unpackFS (FastString _ n_bytes _ buf enc) =
369 inlinePerformIO $ withForeignPtr buf $ \ptr ->
371 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
372 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
374 bytesFS :: FastString -> [Word8]
375 bytesFS (FastString _ n_bytes _ buf enc) =
376 inlinePerformIO $ withForeignPtr buf $ \ptr ->
377 peekArray n_bytes ptr
379 -- | returns a Z-encoded version of a 'FastString'. This might be the
380 -- original, if it was already Z-encoded. The first time this
381 -- function is applied to a particular 'FastString', the results are
384 zEncodeFS :: FastString -> FastString
385 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
394 let efs = mkZFastString (zEncodeString (unpackFS fs))
395 writeIORef ref (Just efs)
398 appendFS :: FastString -> FastString -> FastString
399 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
401 concatFS :: [FastString] -> FastString
402 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
404 headFS :: FastString -> Char
405 headFS (FastString _ n_bytes _ buf enc) =
406 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
409 w <- peek (castPtr ptr)
410 return (castCCharToChar w)
412 return (fst (utf8DecodeChar ptr))
414 tailFS :: FastString -> FastString
415 tailFS (FastString _ n_bytes _ buf enc) =
416 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
419 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
421 let (_,ptr') = utf8DecodeChar ptr
422 let off = ptr' `minusPtr` ptr
423 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
425 consFS :: Char -> FastString -> FastString
426 consFS c fs = mkFastString (c : unpackFS fs)
428 uniqueOfFS :: FastString -> Int#
429 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
431 nilFS = mkFastString ""
433 -- -----------------------------------------------------------------------------
436 getFastStringTable :: IO [[FastString]]
437 getFastStringTable = do
438 tbl <- readIORef string_table
439 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
442 -- -----------------------------------------------------------------------------
443 -- Outputting 'FastString's
445 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
446 -- get the actual bytes in the 'FastString' written to the 'Handle'.
447 hPutFS handle (FastString _ len _ fp _)
448 | len == 0 = return ()
449 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
451 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
452 -- in the current locale's encoding (for error messages and suchlike).
454 -- -----------------------------------------------------------------------------
455 -- LitStrings, here for convenience only.
457 type LitString = Ptr ()
459 mkLitString# :: Addr# -> LitString
460 mkLitString# a# = Ptr a#
462 foreign import ccall unsafe "ghc_strlen"
463 strLength :: Ptr () -> Int
465 -- -----------------------------------------------------------------------------
468 -- Just like unsafePerformIO, but we inline it.
469 {-# INLINE inlinePerformIO #-}
470 inlinePerformIO :: IO a -> a
471 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
473 -- NB. does *not* add a '\0'-terminator.
474 pokeCAString :: Ptr CChar -> String -> IO ()
475 pokeCAString ptr str =
478 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
482 #if __GLASGOW_HASKELL__ < 600
484 mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
485 mallocForeignPtrBytes n = do
487 newForeignPtr r (finalizerFree r)
489 foreign import ccall unsafe "stdlib.h free"
490 finalizerFree :: Ptr a -> IO ()
492 peekCAStringLen = peekCStringLen
494 #elif __GLASGOW_HASKELL__ <= 602
496 peekCAStringLen = peekCStringLen