2 % (c) The University of Glasgow, 1997-2006
6 -- The above warning supression flag is a temporary kludge.
7 -- While working on this module you are encouraged to remove it and fix
8 -- any warnings in the module. See
9 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
13 FastString: A compact, hash-consed, representation of character strings.
14 Comparison is O(1), and you can get a Unique from them.
15 Generated by the FSLIT macro
16 Turn into SDoc with Outputable.ftext
18 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
19 Practically no operations
20 Outputing them is fast
21 Generated by the SLIT macro
22 Turn into SDoc with Outputable.ptext
24 Use LitString unless you want the facilities of FastString
29 FastString(..), -- not abstract, for now.
35 mkFastStringForeignPtr,
36 #if defined(__GLASGOW_HASKELL__)
43 unpackFS, -- :: FastString -> String
44 bytesFS, -- :: FastString -> [Word8]
70 #if defined(__GLASGOW_HASKELL__)
81 -- This #define suppresses the "import FastString" that
82 -- HsVersions otherwise produces
83 #define COMPILING_FAST_STRING
84 #include "HsVersions.h"
93 import System.IO.Unsafe ( unsafePerformIO )
94 import Control.Monad.ST ( stToIO )
95 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
96 import System.IO ( hPutBuf )
97 import Data.Maybe ( isJust )
98 import Data.Char ( ord )
101 import GHC.IOBase ( IO(..) )
102 import GHC.Ptr ( Ptr(..) )
104 #define hASH_TBL_SIZE 4091
105 #define hASH_TBL_SIZE_UNBOXED 4091#
109 A 'FastString' is an array of bytes, hashed to support fast O(1)
110 comparison. It is also associated with a character encoding, so that
111 we know how to convert a 'FastString' to the local encoding, or to the
112 Z-encoding used by the compiler internally.
114 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
117 data FastString = FastString {
118 uniq :: {-# UNPACK #-} !Int, -- unique id
119 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
120 n_chars :: {-# UNPACK #-} !Int, -- number of chars
121 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
127 -- including strings that don't need any encoding
128 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
129 -- A UTF-8 string with a memoized Z-encoding
131 instance Eq FastString where
132 f1 == f2 = uniq f1 == uniq f2
134 instance Ord FastString where
135 -- Compares lexicographically, not by unique
136 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
137 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
138 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
139 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
144 compare a b = cmpFS a b
146 instance Show FastString where
147 show fs = show (unpackFS fs)
149 cmpFS :: FastString -> FastString -> Ordering
150 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
151 if u1 == u2 then EQ else
152 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
157 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
158 unsafeMemcmp buf1 buf2 l =
160 withForeignPtr buf1 $ \p1 ->
161 withForeignPtr buf2 $ \p2 ->
165 foreign import ccall unsafe "ghc_memcmp"
166 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
169 -- -----------------------------------------------------------------------------
173 Internally, the compiler will maintain a fast string symbol
174 table, providing sharing and fast comparison. Creation of
175 new @FastString@s then covertly does a lookup, re-using the
176 @FastString@ if there was a hit.
179 data FastStringTable =
182 (MutableArray# RealWorld [FastString])
184 {-# NOINLINE string_table #-}
185 string_table :: IORef FastStringTable
188 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
190 (# s2#, FastStringTable 0 arr# #)
193 lookupTbl :: FastStringTable -> Int -> IO [FastString]
194 lookupTbl (FastStringTable _ arr#) (I# i#) =
195 IO $ \ s# -> readArray# arr# i# s#
197 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
198 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
199 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
200 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
202 mkFastString# :: Addr# -> FastString
203 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
206 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
207 mkFastStringBytes ptr len = unsafePerformIO $ do
208 ft@(FastStringTable uid tbl#) <- readIORef string_table
212 fs <- copyNewFastString uid ptr len
213 updTbl string_table ft h (fs:ls)
214 {- _trace ("new: " ++ show f_str) $ -}
217 lookup_result <- lookupTbl ft h
218 case lookup_result of
221 b <- bucket_match ls len ptr
224 Just v -> {- _trace ("re-use: "++show v) $ -} return v
226 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
227 mkZFastStringBytes ptr len = unsafePerformIO $ do
228 ft@(FastStringTable uid tbl#) <- readIORef string_table
232 fs <- copyNewZFastString uid ptr len
233 updTbl string_table ft h (fs:ls)
234 {- _trace ("new: " ++ show f_str) $ -}
237 lookup_result <- lookupTbl ft h
238 case lookup_result of
241 b <- bucket_match ls len ptr
244 Just v -> {- _trace ("re-use: "++show v) $ -} return v
246 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
247 -- between this and 'mkFastStringBytes' is that we don't have to copy
248 -- the bytes if the string is new to the table.
249 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
250 mkFastStringForeignPtr ptr fp len = do
251 ft@(FastStringTable uid tbl#) <- readIORef string_table
252 -- _trace ("hashed: "++show (I# h)) $
256 fs <- mkNewFastString uid ptr fp len
257 updTbl string_table ft h (fs:ls)
258 {- _trace ("new: " ++ show f_str) $ -}
261 lookup_result <- lookupTbl ft h
262 case lookup_result of
265 b <- bucket_match ls len ptr
268 Just v -> {- _trace ("re-use: "++show v) $ -} return v
270 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
271 mkZFastStringForeignPtr ptr fp len = do
272 ft@(FastStringTable uid tbl#) <- readIORef string_table
273 -- _trace ("hashed: "++show (I# h)) $
277 fs <- mkNewZFastString uid ptr fp len
278 updTbl string_table ft h (fs:ls)
279 {- _trace ("new: " ++ show f_str) $ -}
282 lookup_result <- lookupTbl ft h
283 case lookup_result of
286 b <- bucket_match ls len ptr
289 Just v -> {- _trace ("re-use: "++show v) $ -} return v
292 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
293 mkFastString :: String -> FastString
296 let l = utf8EncodedLength str
297 buf <- mallocForeignPtrBytes l
298 withForeignPtr buf $ \ptr -> do
299 utf8EncodeString ptr str
300 mkFastStringForeignPtr ptr buf l
302 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
303 mkFastStringByteList :: [Word8] -> FastString
304 mkFastStringByteList str =
306 let l = Prelude.length str
307 buf <- mallocForeignPtrBytes l
308 withForeignPtr buf $ \ptr -> do
309 pokeArray (castPtr ptr) str
310 mkFastStringForeignPtr ptr buf l
312 -- | Creates a Z-encoded 'FastString' from a 'String'
313 mkZFastString :: String -> FastString
316 let l = Prelude.length str
317 buf <- mallocForeignPtrBytes l
318 withForeignPtr buf $ \ptr -> do
319 pokeCAString (castPtr ptr) str
320 mkZFastStringForeignPtr ptr buf l
322 bucket_match [] _ _ = return Nothing
323 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
325 b <- cmpStringPrefix ptr buf len
326 if b then return (Just v)
327 else bucket_match ls len ptr
329 bucket_match ls len ptr
331 mkNewFastString uid ptr fp len = do
332 ref <- newIORef Nothing
333 n_chars <- countUTF8Chars ptr len
334 return (FastString uid len n_chars fp (UTF8Encoded ref))
336 mkNewZFastString uid ptr fp len = do
337 return (FastString uid len len fp ZEncoded)
340 copyNewFastString uid ptr len = do
341 fp <- copyBytesToForeignPtr ptr len
342 ref <- newIORef Nothing
343 n_chars <- countUTF8Chars ptr len
344 return (FastString uid len n_chars fp (UTF8Encoded ref))
346 copyNewZFastString uid ptr len = do
347 fp <- copyBytesToForeignPtr ptr len
348 return (FastString uid len len fp ZEncoded)
351 copyBytesToForeignPtr ptr len = do
352 fp <- mallocForeignPtrBytes len
353 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
356 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
357 cmpStringPrefix ptr fp len =
358 withForeignPtr fp $ \ptr' -> do
359 r <- memcmp ptr ptr' len
363 hashStr :: Ptr Word8 -> Int -> Int
364 -- use the Addr to produce a hash value between 0 & m (inclusive)
365 hashStr (Ptr a#) (I# len#) = loop 0# 0#
367 loop h n | n GHC.Exts.==# len# = I# h
368 | otherwise = loop h2 (n GHC.Exts.+# 1#)
369 where c = ord# (indexCharOffAddr# a# n)
370 h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#` hASH_TBL_SIZE#
372 -- -----------------------------------------------------------------------------
375 -- | Returns the length of the 'FastString' in characters
376 lengthFS :: FastString -> Int
377 lengthFS f = n_chars f
379 -- | Returns 'True' if the 'FastString' is Z-encoded
380 isZEncoded :: FastString -> Bool
381 isZEncoded fs | ZEncoded <- enc fs = True
384 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
385 -- a Z-encoding cached (used in producing stats).
386 hasZEncoding :: FastString -> Bool
387 hasZEncoding fs@(FastString uid n_bytes _ fp enc) =
395 -- | Returns 'True' if the 'FastString' is empty
396 nullFS :: FastString -> Bool
397 nullFS f = n_bytes f == 0
399 -- | unpacks and decodes the FastString
400 unpackFS :: FastString -> String
401 unpackFS (FastString _ n_bytes _ buf enc) =
402 inlinePerformIO $ withForeignPtr buf $ \ptr ->
404 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
405 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
407 bytesFS :: FastString -> [Word8]
408 bytesFS (FastString _ n_bytes _ buf enc) =
409 inlinePerformIO $ withForeignPtr buf $ \ptr ->
410 peekArray n_bytes ptr
412 -- | returns a Z-encoded version of a 'FastString'. This might be the
413 -- original, if it was already Z-encoded. The first time this
414 -- function is applied to a particular 'FastString', the results are
417 zEncodeFS :: FastString -> FastString
418 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
427 let efs = mkZFastString (zEncodeString (unpackFS fs))
428 writeIORef ref (Just efs)
431 appendFS :: FastString -> FastString -> FastString
432 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
434 concatFS :: [FastString] -> FastString
435 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
437 headFS :: FastString -> Char
438 headFS (FastString _ n_bytes _ buf enc) =
439 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
442 w <- peek (castPtr ptr)
443 return (castCCharToChar w)
445 return (fst (utf8DecodeChar ptr))
447 tailFS :: FastString -> FastString
448 tailFS (FastString _ n_bytes _ buf enc) =
449 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
452 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
454 let (_,ptr') = utf8DecodeChar ptr
455 let off = ptr' `minusPtr` ptr
456 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
458 consFS :: Char -> FastString -> FastString
459 consFS c fs = mkFastString (c : unpackFS fs)
461 uniqueOfFS :: FastString -> FastInt
462 uniqueOfFS (FastString u _ _ _ _) = iUnbox u
464 nilFS = mkFastString ""
466 -- -----------------------------------------------------------------------------
469 getFastStringTable :: IO [[FastString]]
470 getFastStringTable = do
471 tbl <- readIORef string_table
472 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
475 -- -----------------------------------------------------------------------------
476 -- Outputting 'FastString's
478 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
479 -- get the actual bytes in the 'FastString' written to the 'Handle'.
480 hPutFS handle (FastString _ len _ fp _)
481 | len == 0 = return ()
482 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
484 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
485 -- in the current locale's encoding (for error messages and suchlike).
487 -- -----------------------------------------------------------------------------
488 -- LitStrings, here for convenience only.
490 -- hmm, not unboxed (or rather FastPtr), interesting
491 --a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't
492 --really care about C types in naming, where we can help it.
493 type LitString = Ptr Word8
494 --Why do we recalculate length every time it's requested?
495 --If it's commonly needed, we should perhaps have
496 --data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt
498 #if defined(__GLASGOW_HASKELL__)
499 mkLitString# :: Addr# -> LitString
500 mkLitString# a# = Ptr a#
503 --can/should we use FastTypes here?
504 --Is this likely to be memory-preserving if only used on constant strings?
505 --should we inline it? If lucky, that would make a CAF that wouldn't
506 --be computationally repeated... although admittedly we're not
507 --really intending to use mkLitString when __GLASGOW_HASKELL__...
508 --(I wonder, is unicode / multi-byte characters allowed in LitStrings
510 {-# INLINE mkLitString #-}
511 mkLitString :: String -> LitString
514 p <- mallocBytes (length s + 1)
516 loop :: Int -> String -> IO ()
517 loop n cs | n `seq` null cs = pokeByteOff p n (0 :: Word8)
519 pokeByteOff p n (fromIntegral (ord c) :: Word8)
525 unpackLitString :: LitString -> String
526 unpackLitString p_ = case pUnbox p_ of
527 p -> unpack (_ILIT(0))
529 unpack n = case indexWord8OffFastPtrAsFastChar p n of
530 ch -> if ch `eqFastChar` _CLIT('\0')
531 then [] else cBox ch : unpack (n +# _ILIT(1))
533 strLength :: LitString -> Int
534 strLength = ptrStrLength
536 -- for now, use a simple String representation
537 --no, let's not do that right now - it's work in other places
539 type LitString = String
541 mkLitString :: String -> LitString
544 unpackLitString :: LitString -> String
547 strLength :: LitString -> Int
552 -- -----------------------------------------------------------------------------
555 foreign import ccall unsafe "ghc_strlen"
556 ptrStrLength :: Ptr Word8 -> Int
558 -- NB. does *not* add a '\0'-terminator.
559 -- We only use CChar here to be parallel to the imported
560 -- peekC(A)StringLen.
561 pokeCAString :: Ptr CChar -> String -> IO ()
562 pokeCAString ptr str =
565 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
569 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 602
570 peekCAStringLen = peekCStringLen