2 % (c) The University of Glasgow, 1997-2006
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
28 mkFastStringForeignPtr,
29 #if defined(__GLASGOW_HASKELL__)
36 unpackFS, -- :: FastString -> String
37 bytesFS, -- :: FastString -> [Word8]
63 #if defined(__GLASGOW_HASKELL__)
76 #include "HsVersions.h"
87 import System.IO.Unsafe ( unsafePerformIO )
88 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
89 import Data.Maybe ( isJust )
90 import Data.Char ( ord )
92 import GHC.IOBase ( IO(..) )
93 import GHC.Ptr ( Ptr(..) )
94 #if defined(__GLASGOW_HASKELL__)
95 import GHC.Base ( unpackCString# )
98 #define hASH_TBL_SIZE 4091
99 #define hASH_TBL_SIZE_UNBOXED 4091#
103 A 'FastString' is an array of bytes, hashed to support fast O(1)
104 comparison. It is also associated with a character encoding, so that
105 we know how to convert a 'FastString' to the local encoding, or to the
106 Z-encoding used by the compiler internally.
108 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
111 data FastString = FastString {
112 uniq :: {-# UNPACK #-} !Int, -- unique id
113 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
114 n_chars :: {-# UNPACK #-} !Int, -- number of chars
115 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
120 -- including strings that don't need any encoding
122 -- A UTF-8 string with a memoized Z-encoding
123 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
125 instance Eq FastString where
126 f1 == f2 = uniq f1 == uniq f2
128 instance Ord FastString where
129 -- Compares lexicographically, not by unique
130 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
131 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
132 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
133 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
138 compare a b = cmpFS a b
140 instance Show FastString where
141 show fs = show (unpackFS fs)
143 cmpFS :: FastString -> FastString -> Ordering
144 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
145 if u1 == u2 then EQ else
146 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
151 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
152 unsafeMemcmp buf1 buf2 l =
154 withForeignPtr buf1 $ \p1 ->
155 withForeignPtr buf2 $ \p2 ->
159 foreign import ccall unsafe "ghc_memcmp"
160 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
163 -- -----------------------------------------------------------------------------
167 Internally, the compiler will maintain a fast string symbol
168 table, providing sharing and fast comparison. Creation of
169 new @FastString@s then covertly does a lookup, re-using the
170 @FastString@ if there was a hit.
173 data FastStringTable =
176 (MutableArray# RealWorld [FastString])
178 {-# NOINLINE string_table #-}
179 string_table :: IORef FastStringTable
182 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
184 (# s2#, FastStringTable 0 arr# #)
187 lookupTbl :: FastStringTable -> Int -> IO [FastString]
188 lookupTbl (FastStringTable _ arr#) (I# i#) =
189 IO $ \ s# -> readArray# arr# i# s#
191 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
192 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
193 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
194 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
196 mkFastString# :: Addr# -> FastString
197 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
200 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
201 mkFastStringBytes ptr len = unsafePerformIO $ do
202 ft@(FastStringTable uid _) <- readIORef string_table
206 fs <- copyNewFastString uid ptr len
207 updTbl string_table ft h (fs:ls)
208 {- _trace ("new: " ++ show f_str) $ -}
211 lookup_result <- lookupTbl ft h
212 case lookup_result of
215 b <- bucket_match ls len ptr
218 Just v -> {- _trace ("re-use: "++show v) $ -} return v
220 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
221 mkZFastStringBytes ptr len = unsafePerformIO $ do
222 ft@(FastStringTable uid _) <- readIORef string_table
226 fs <- copyNewZFastString uid ptr len
227 updTbl string_table ft h (fs:ls)
228 {- _trace ("new: " ++ show f_str) $ -}
231 lookup_result <- lookupTbl ft h
232 case lookup_result of
235 b <- bucket_match ls len ptr
238 Just v -> {- _trace ("re-use: "++show v) $ -} return v
240 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
241 -- between this and 'mkFastStringBytes' is that we don't have to copy
242 -- the bytes if the string is new to the table.
243 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
244 mkFastStringForeignPtr ptr fp len = do
245 ft@(FastStringTable uid _) <- readIORef string_table
246 -- _trace ("hashed: "++show (I# h)) $
250 fs <- mkNewFastString uid ptr fp len
251 updTbl string_table ft h (fs:ls)
252 {- _trace ("new: " ++ show f_str) $ -}
255 lookup_result <- lookupTbl ft h
256 case lookup_result of
259 b <- bucket_match ls len ptr
262 Just v -> {- _trace ("re-use: "++show v) $ -} return v
264 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
265 mkZFastStringForeignPtr ptr fp len = do
266 ft@(FastStringTable uid _) <- readIORef string_table
267 -- _trace ("hashed: "++show (I# h)) $
271 fs <- mkNewZFastString uid ptr fp len
272 updTbl string_table ft h (fs:ls)
273 {- _trace ("new: " ++ show f_str) $ -}
276 lookup_result <- lookupTbl ft h
277 case lookup_result of
280 b <- bucket_match ls len ptr
283 Just v -> {- _trace ("re-use: "++show v) $ -} return v
286 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
287 mkFastString :: String -> FastString
290 let l = utf8EncodedLength str
291 buf <- mallocForeignPtrBytes l
292 withForeignPtr buf $ \ptr -> do
293 utf8EncodeString ptr str
294 mkFastStringForeignPtr ptr buf l
296 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
297 mkFastStringByteList :: [Word8] -> FastString
298 mkFastStringByteList str =
300 let l = Prelude.length str
301 buf <- mallocForeignPtrBytes l
302 withForeignPtr buf $ \ptr -> do
303 pokeArray (castPtr ptr) str
304 mkFastStringForeignPtr ptr buf l
306 -- | Creates a Z-encoded 'FastString' from a 'String'
307 mkZFastString :: String -> FastString
310 let l = Prelude.length str
311 buf <- mallocForeignPtrBytes l
312 withForeignPtr buf $ \ptr -> do
313 pokeCAString (castPtr ptr) str
314 mkZFastStringForeignPtr ptr buf l
316 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
317 bucket_match [] _ _ = return Nothing
318 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
320 b <- cmpStringPrefix ptr buf len
321 if b then return (Just v)
322 else bucket_match ls len ptr
324 bucket_match ls len ptr
326 mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
328 mkNewFastString uid ptr fp len = do
329 ref <- newIORef Nothing
330 n_chars <- countUTF8Chars ptr len
331 return (FastString uid len n_chars fp (UTF8Encoded ref))
333 mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
335 mkNewZFastString uid _ fp len = do
336 return (FastString uid len len fp ZEncoded)
338 copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
339 copyNewFastString uid ptr len = do
340 fp <- copyBytesToForeignPtr ptr len
341 ref <- newIORef Nothing
342 n_chars <- countUTF8Chars ptr len
343 return (FastString uid len n_chars fp (UTF8Encoded ref))
345 copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
346 copyNewZFastString uid ptr len = do
347 fp <- copyBytesToForeignPtr ptr len
348 return (FastString uid len len fp ZEncoded)
350 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
351 copyBytesToForeignPtr ptr len = do
352 fp <- mallocForeignPtrBytes len
353 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
356 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
357 cmpStringPrefix ptr fp len =
358 withForeignPtr fp $ \ptr' -> do
359 r <- memcmp ptr ptr' len
363 hashStr :: Ptr Word8 -> Int -> Int
364 -- use the Addr to produce a hash value between 0 & m (inclusive)
365 hashStr (Ptr a#) (I# len#) = loop 0# 0#
367 loop h n | n GHC.Exts.==# len# = I# h
368 | otherwise = loop h2 (n GHC.Exts.+# 1#)
369 where c = ord# (indexCharOffAddr# a# n)
370 h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
373 -- -----------------------------------------------------------------------------
376 -- | Returns the length of the 'FastString' in characters
377 lengthFS :: FastString -> Int
378 lengthFS f = n_chars f
380 -- | Returns 'True' if the 'FastString' is Z-encoded
381 isZEncoded :: FastString -> Bool
382 isZEncoded fs | ZEncoded <- enc fs = True
385 -- | Returns 'True' if this 'FastString' is not Z-encoded but already has
386 -- a Z-encoding cached (used in producing stats).
387 hasZEncoding :: FastString -> Bool
388 hasZEncoding (FastString _ _ _ _ enc) =
396 -- | Returns 'True' if the 'FastString' is empty
397 nullFS :: FastString -> Bool
398 nullFS f = n_bytes f == 0
400 -- | unpacks and decodes the FastString
401 unpackFS :: FastString -> String
402 unpackFS (FastString _ n_bytes _ buf enc) =
403 inlinePerformIO $ withForeignPtr buf $ \ptr ->
405 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
406 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
408 bytesFS :: FastString -> [Word8]
409 bytesFS (FastString _ n_bytes _ buf _) =
410 inlinePerformIO $ withForeignPtr buf $ \ptr ->
411 peekArray n_bytes ptr
413 -- | returns a Z-encoded version of a 'FastString'. This might be the
414 -- original, if it was already Z-encoded. The first time this
415 -- function is applied to a particular 'FastString', the results are
418 zEncodeFS :: FastString -> FastString
419 zEncodeFS fs@(FastString _ _ _ _ enc) =
428 let efs = mkZFastString (zEncodeString (unpackFS fs))
429 writeIORef ref (Just efs)
432 appendFS :: FastString -> FastString -> FastString
433 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
435 concatFS :: [FastString] -> FastString
436 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
438 headFS :: FastString -> Char
439 headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
440 headFS (FastString _ _ _ buf enc) =
441 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
444 w <- peek (castPtr ptr)
445 return (castCCharToChar w)
447 return (fst (utf8DecodeChar ptr))
449 tailFS :: FastString -> FastString
450 tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
451 tailFS (FastString _ n_bytes _ buf enc) =
452 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
455 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
457 let (_,ptr') = utf8DecodeChar ptr
458 let off = ptr' `minusPtr` ptr
459 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
461 consFS :: Char -> FastString -> FastString
462 consFS c fs = mkFastString (c : unpackFS fs)
464 uniqueOfFS :: FastString -> FastInt
465 uniqueOfFS (FastString u _ _ _ _) = iUnbox u
468 nilFS = mkFastString ""
470 -- -----------------------------------------------------------------------------
473 getFastStringTable :: IO [[FastString]]
474 getFastStringTable = do
475 tbl <- readIORef string_table
476 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
479 -- -----------------------------------------------------------------------------
480 -- Outputting 'FastString's
482 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
483 -- get the actual bytes in the 'FastString' written to the 'Handle'.
484 hPutFS :: Handle -> FastString -> IO ()
485 hPutFS handle (FastString _ len _ fp _)
486 | len == 0 = return ()
487 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
489 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
490 -- in the current locale's encoding (for error messages and suchlike).
492 -- -----------------------------------------------------------------------------
493 -- LitStrings, here for convenience only.
495 -- hmm, not unboxed (or rather FastPtr), interesting
496 --a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't
497 --really care about C types in naming, where we can help it.
498 type LitString = Ptr Word8
499 --Why do we recalculate length every time it's requested?
500 --If it's commonly needed, we should perhaps have
501 --data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt
503 #if defined(__GLASGOW_HASKELL__)
504 mkLitString# :: Addr# -> LitString
505 mkLitString# a# = Ptr a#
507 --can/should we use FastTypes here?
508 --Is this likely to be memory-preserving if only used on constant strings?
509 --should we inline it? If lucky, that would make a CAF that wouldn't
510 --be computationally repeated... although admittedly we're not
511 --really intending to use mkLitString when __GLASGOW_HASKELL__...
512 --(I wonder, is unicode / multi-byte characters allowed in LitStrings
514 {-# INLINE mkLitString #-}
515 mkLitString :: String -> LitString
518 p <- mallocBytes (length s + 1)
520 loop :: Int -> String -> IO ()
521 loop n cs | n `seq` null cs = pokeByteOff p n (0 :: Word8)
523 pokeByteOff p n (fromIntegral (ord c) :: Word8)
525 -- XXX GHC isn't smart enough to know that we have already covered
527 loop _ [] = panic "mkLitString"
532 unpackLitString :: LitString -> String
533 unpackLitString p_ = case pUnbox p_ of
534 p -> unpack (_ILIT(0))
536 unpack n = case indexWord8OffFastPtrAsFastChar p n of
537 ch -> if ch `eqFastChar` _CLIT('\0')
538 then [] else cBox ch : unpack (n +# _ILIT(1))
540 strLength :: LitString -> Int
541 strLength = ptrStrLength
543 -- for now, use a simple String representation
544 --no, let's not do that right now - it's work in other places
546 type LitString = String
548 mkLitString :: String -> LitString
551 unpackLitString :: LitString -> String
554 strLength :: LitString -> Int
559 -- -----------------------------------------------------------------------------
562 foreign import ccall unsafe "ghc_strlen"
563 ptrStrLength :: Ptr Word8 -> Int
565 -- NB. does *not* add a '\0'-terminator.
566 -- We only use CChar here to be parallel to the imported
567 -- peekC(A)StringLen.
568 pokeCAString :: Ptr CChar -> String -> IO ()
569 pokeCAString ptr str =
572 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
576 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 602
577 peekCAStringLen = peekCStringLen
580 {-# NOINLINE sLit #-}
581 sLit :: String -> LitString
582 sLit x = mkLitString x
584 {-# NOINLINE fsLit #-}
585 fsLit :: String -> FastString
586 fsLit x = mkFastString x
589 forall x . sLit (unpackCString# x) = mkLitString# x #-}
591 forall x . fsLit (unpackCString# x) = mkFastString# x #-}