2 % (c) The University of Glasgow, 1997-2006
5 {-# OPTIONS_GHC -O -funbox-strict-fields #-}
6 -- We always optimise this, otherwise performance of a non-optimised
7 -- compiler is severely affected
10 FastString: A compact, hash-consed, representation of character strings.
11 Comparison is O(1), and you can get a Unique from them.
13 Turn into SDoc with Outputable.ftext
15 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
16 Practically no operations
17 Outputing them is fast
19 Turn into SDoc with Outputable.ptext
21 Use LitString unless you want the facilities of FastString
26 FastString(..), -- not abstract, for now.
32 mkFastStringForeignPtr,
33 #if defined(__GLASGOW_HASKELL__)
40 unpackFS, -- :: FastString -> String
41 bytesFS, -- :: FastString -> [Word8]
67 #if defined(__GLASGOW_HASKELL__)
80 #include "HsVersions.h"
91 import System.IO.Unsafe ( unsafePerformIO )
92 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
93 import Data.Maybe ( isJust )
94 import Data.Char ( ord )
96 import GHC.IOBase ( IO(..) )
97 import GHC.Ptr ( Ptr(..) )
98 #if defined(__GLASGOW_HASKELL__)
99 import GHC.Base ( unpackCString# )
102 #define hASH_TBL_SIZE 4091
103 #define hASH_TBL_SIZE_UNBOXED 4091#
107 A 'FastString' is an array of bytes, hashed to support fast O(1)
108 comparison. It is also associated with a character encoding, so that
109 we know how to convert a 'FastString' to the local encoding, or to the
110 Z-encoding used by the compiler internally.
112 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
115 data FastString = FastString {
116 uniq :: {-# UNPACK #-} !Int, -- unique id
117 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
118 n_chars :: {-# UNPACK #-} !Int, -- number of chars
119 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
124 -- including strings that don't need any encoding
126 -- A UTF-8 string with a memoized Z-encoding
127 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
129 instance Eq FastString where
130 f1 == f2 = uniq f1 == uniq f2
132 instance Ord FastString where
133 -- Compares lexicographically, not by unique
134 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
135 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
136 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
137 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
142 compare a b = cmpFS a b
144 instance Show FastString where
145 show fs = show (unpackFS fs)
147 cmpFS :: FastString -> FastString -> Ordering
148 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
149 if u1 == u2 then EQ else
150 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
155 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
156 unsafeMemcmp buf1 buf2 l =
158 withForeignPtr buf1 $ \p1 ->
159 withForeignPtr buf2 $ \p2 ->
163 foreign import ccall unsafe "ghc_memcmp"
164 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
167 -- -----------------------------------------------------------------------------
171 Internally, the compiler will maintain a fast string symbol
172 table, providing sharing and fast comparison. Creation of
173 new @FastString@s then covertly does a lookup, re-using the
174 @FastString@ if there was a hit.
177 data FastStringTable =
180 (MutableArray# RealWorld [FastString])
182 {-# NOINLINE string_table #-}
183 string_table :: IORef FastStringTable
186 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
188 (# s2#, FastStringTable 0 arr# #)
191 lookupTbl :: FastStringTable -> Int -> IO [FastString]
192 lookupTbl (FastStringTable _ arr#) (I# i#) =
193 IO $ \ s# -> readArray# arr# i# s#
195 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
196 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
197 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
198 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
200 mkFastString# :: Addr# -> FastString
201 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
204 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
205 mkFastStringBytes ptr len = unsafePerformIO $ do
206 ft@(FastStringTable uid _) <- readIORef string_table
210 fs <- copyNewFastString uid ptr len
211 updTbl string_table ft h (fs:ls)
212 {- _trace ("new: " ++ show f_str) $ -}
215 lookup_result <- lookupTbl ft h
216 case lookup_result of
219 b <- bucket_match ls len ptr
222 Just v -> {- _trace ("re-use: "++show v) $ -} return v
224 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
225 mkZFastStringBytes ptr len = unsafePerformIO $ do
226 ft@(FastStringTable uid _) <- readIORef string_table
230 fs <- copyNewZFastString uid ptr len
231 updTbl string_table ft h (fs:ls)
232 {- _trace ("new: " ++ show f_str) $ -}
235 lookup_result <- lookupTbl ft h
236 case lookup_result of
239 b <- bucket_match ls len ptr
242 Just v -> {- _trace ("re-use: "++show v) $ -} return v
244 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
245 -- between this and 'mkFastStringBytes' is that we don't have to copy
246 -- the bytes if the string is new to the table.
247 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
248 mkFastStringForeignPtr ptr fp len = do
249 ft@(FastStringTable uid _) <- readIORef string_table
250 -- _trace ("hashed: "++show (I# h)) $
254 fs <- mkNewFastString uid ptr fp len
255 updTbl string_table ft h (fs:ls)
256 {- _trace ("new: " ++ show f_str) $ -}
259 lookup_result <- lookupTbl ft h
260 case lookup_result of
263 b <- bucket_match ls len ptr
266 Just v -> {- _trace ("re-use: "++show v) $ -} return v
268 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
269 mkZFastStringForeignPtr ptr fp len = do
270 ft@(FastStringTable uid _) <- readIORef string_table
271 -- _trace ("hashed: "++show (I# h)) $
275 fs <- mkNewZFastString uid ptr fp len
276 updTbl string_table ft h (fs:ls)
277 {- _trace ("new: " ++ show f_str) $ -}
280 lookup_result <- lookupTbl ft h
281 case lookup_result of
284 b <- bucket_match ls len ptr
287 Just v -> {- _trace ("re-use: "++show v) $ -} return v
290 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
291 mkFastString :: String -> FastString
294 let l = utf8EncodedLength str
295 buf <- mallocForeignPtrBytes l
296 withForeignPtr buf $ \ptr -> do
297 utf8EncodeString ptr str
298 mkFastStringForeignPtr ptr buf l
300 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
301 mkFastStringByteList :: [Word8] -> FastString
302 mkFastStringByteList str =
304 let l = Prelude.length str
305 buf <- mallocForeignPtrBytes l
306 withForeignPtr buf $ \ptr -> do
307 pokeArray (castPtr ptr) str
308 mkFastStringForeignPtr ptr buf l
310 -- | Creates a Z-encoded 'FastString' from a 'String'
311 mkZFastString :: String -> FastString
314 let l = Prelude.length str
315 buf <- mallocForeignPtrBytes l
316 withForeignPtr buf $ \ptr -> do
317 pokeCAString (castPtr ptr) str
318 mkZFastStringForeignPtr ptr buf l
320 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
321 bucket_match [] _ _ = return Nothing
322 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
324 b <- cmpStringPrefix ptr buf len
325 if b then return (Just v)
326 else bucket_match ls len ptr
328 bucket_match ls len ptr
330 mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
332 mkNewFastString uid ptr fp len = do
333 ref <- newIORef Nothing
334 n_chars <- countUTF8Chars ptr len
335 return (FastString uid len n_chars fp (UTF8Encoded ref))
337 mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
339 mkNewZFastString uid _ fp len = do
340 return (FastString uid len len fp ZEncoded)
342 copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
343 copyNewFastString uid ptr len = do
344 fp <- copyBytesToForeignPtr ptr len
345 ref <- newIORef Nothing
346 n_chars <- countUTF8Chars ptr len
347 return (FastString uid len n_chars fp (UTF8Encoded ref))
349 copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
350 copyNewZFastString uid ptr len = do
351 fp <- copyBytesToForeignPtr ptr len
352 return (FastString uid len len fp ZEncoded)
354 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
355 copyBytesToForeignPtr ptr len = do
356 fp <- mallocForeignPtrBytes len
357 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
360 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
361 cmpStringPrefix ptr fp len =
362 withForeignPtr fp $ \ptr' -> do
363 r <- memcmp ptr ptr' len
367 hashStr :: Ptr Word8 -> Int -> Int
368 -- use the Addr to produce a hash value between 0 & m (inclusive)
369 hashStr (Ptr a#) (I# len#) = loop 0# 0#
371 loop h n | n GHC.Exts.==# len# = I# h
372 | otherwise = loop h2 (n GHC.Exts.+# 1#)
373 where c = ord# (indexCharOffAddr# a# n)
374 h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
377 -- -----------------------------------------------------------------------------
380 -- | Returns the length of the 'FastString' in characters
381 lengthFS :: FastString -> Int
382 lengthFS f = n_chars f
384 -- | Returns 'True' if the 'FastString' is Z-encoded
385 isZEncoded :: FastString -> Bool
386 isZEncoded fs | ZEncoded <- enc fs = True
389 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
390 -- a Z-encoding cached (used in producing stats).
391 hasZEncoding :: FastString -> Bool
392 hasZEncoding (FastString _ _ _ _ enc) =
400 -- | Returns 'True' if the 'FastString' is empty
401 nullFS :: FastString -> Bool
402 nullFS f = n_bytes f == 0
404 -- | unpacks and decodes the FastString
405 unpackFS :: FastString -> String
406 unpackFS (FastString _ n_bytes _ buf enc) =
407 inlinePerformIO $ withForeignPtr buf $ \ptr ->
409 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
410 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
412 bytesFS :: FastString -> [Word8]
413 bytesFS (FastString _ n_bytes _ buf _) =
414 inlinePerformIO $ withForeignPtr buf $ \ptr ->
415 peekArray n_bytes ptr
417 -- | returns a Z-encoded version of a 'FastString'. This might be the
418 -- original, if it was already Z-encoded. The first time this
419 -- function is applied to a particular 'FastString', the results are
422 zEncodeFS :: FastString -> FastString
423 zEncodeFS fs@(FastString _ _ _ _ enc) =
432 let efs = mkZFastString (zEncodeString (unpackFS fs))
433 writeIORef ref (Just efs)
436 appendFS :: FastString -> FastString -> FastString
437 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
439 concatFS :: [FastString] -> FastString
440 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
442 headFS :: FastString -> Char
443 headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
444 headFS (FastString _ _ _ buf enc) =
445 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
448 w <- peek (castPtr ptr)
449 return (castCCharToChar w)
451 return (fst (utf8DecodeChar ptr))
453 tailFS :: FastString -> FastString
454 tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
455 tailFS (FastString _ n_bytes _ buf enc) =
456 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
459 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
461 let (_,ptr') = utf8DecodeChar ptr
462 let off = ptr' `minusPtr` ptr
463 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
465 consFS :: Char -> FastString -> FastString
466 consFS c fs = mkFastString (c : unpackFS fs)
468 uniqueOfFS :: FastString -> FastInt
469 uniqueOfFS (FastString u _ _ _ _) = iUnbox u
472 nilFS = mkFastString ""
474 -- -----------------------------------------------------------------------------
477 getFastStringTable :: IO [[FastString]]
478 getFastStringTable = do
479 tbl <- readIORef string_table
480 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
483 -- -----------------------------------------------------------------------------
484 -- Outputting 'FastString's
486 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
487 -- get the actual bytes in the 'FastString' written to the 'Handle'.
488 hPutFS :: Handle -> FastString -> IO ()
489 hPutFS handle (FastString _ len _ fp _)
490 | len == 0 = return ()
491 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
493 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
494 -- in the current locale's encoding (for error messages and suchlike).
496 -- -----------------------------------------------------------------------------
497 -- LitStrings, here for convenience only.
499 -- hmm, not unboxed (or rather FastPtr), interesting
500 --a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't
501 --really care about C types in naming, where we can help it.
502 type LitString = Ptr Word8
503 --Why do we recalculate length every time it's requested?
504 --If it's commonly needed, we should perhaps have
505 --data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt
507 #if defined(__GLASGOW_HASKELL__)
508 mkLitString# :: Addr# -> LitString
509 mkLitString# a# = Ptr a#
511 --can/should we use FastTypes here?
512 --Is this likely to be memory-preserving if only used on constant strings?
513 --should we inline it? If lucky, that would make a CAF that wouldn't
514 --be computationally repeated... although admittedly we're not
515 --really intending to use mkLitString when __GLASGOW_HASKELL__...
516 --(I wonder, is unicode / multi-byte characters allowed in LitStrings
518 {-# INLINE mkLitString #-}
519 mkLitString :: String -> LitString
522 p <- mallocBytes (length s + 1)
524 loop :: Int -> String -> IO ()
525 loop n cs | n `seq` null cs = pokeByteOff p n (0 :: Word8)
527 pokeByteOff p n (fromIntegral (ord c) :: Word8)
529 -- XXX GHC isn't smart enough to know that we have already covered
531 loop _ [] = panic "mkLitString"
536 unpackLitString :: LitString -> String
537 unpackLitString p_ = case pUnbox p_ of
538 p -> unpack (_ILIT(0))
540 unpack n = case indexWord8OffFastPtrAsFastChar p n of
541 ch -> if ch `eqFastChar` _CLIT('\0')
542 then [] else cBox ch : unpack (n +# _ILIT(1))
544 strLength :: LitString -> Int
545 strLength = ptrStrLength
547 -- for now, use a simple String representation
548 --no, let's not do that right now - it's work in other places
550 type LitString = String
552 mkLitString :: String -> LitString
555 unpackLitString :: LitString -> String
558 strLength :: LitString -> Int
563 -- -----------------------------------------------------------------------------
566 foreign import ccall unsafe "ghc_strlen"
567 ptrStrLength :: Ptr Word8 -> Int
569 -- NB. does *not* add a '\0'-terminator.
570 -- We only use CChar here to be parallel to the imported
571 -- peekC(A)StringLen.
572 pokeCAString :: Ptr CChar -> String -> IO ()
573 pokeCAString ptr str =
576 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
580 {-# NOINLINE sLit #-}
581 sLit :: String -> LitString
582 sLit x = mkLitString x
584 {-# NOINLINE fsLit #-}
585 fsLit :: String -> FastString
586 fsLit x = mkFastString x
589 forall x . sLit (unpackCString# x) = mkLitString# x #-}
591 forall x . fsLit (unpackCString# x) = mkFastString# x #-}