2 % (c) The University of Glasgow, 1997-2006
5 {-# OPTIONS -fno-warn-unused-imports #-}
6 -- XXX GHC 6.9 seems to be confused by unpackCString# being used only in
9 {-# OPTIONS_GHC -O -funbox-strict-fields #-}
10 -- We always optimise this, otherwise performance of a non-optimised
11 -- compiler is severely affected
14 -- There are two principal string types used internally by GHC:
17 -- * A compact, hash-consed, representation of character strings.
18 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
19 -- * Generated by 'fsLit'.
20 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
23 -- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@).
24 -- * Practically no operations.
25 -- * Outputing them is fast.
26 -- * Generated by 'sLit'.
27 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
29 -- Use 'LitString' unless you want the facilities of 'FastString'.
33 FastString(..), -- not abstract, for now.
40 mkFastStringForeignPtr,
41 #if defined(__GLASGOW_HASKELL__)
48 unpackFS, -- :: FastString -> String
49 bytesFS, -- :: FastString -> [Word8]
78 #if defined(__GLASGOW_HASKELL__)
90 #include "HsVersions.h"
101 import System.IO.Unsafe ( unsafePerformIO )
102 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
103 import Data.Maybe ( isJust )
104 import Data.Char ( ord )
106 #if __GLASGOW_HASKELL__ >= 611
107 import GHC.IO ( IO(..) )
109 import GHC.IOBase ( IO(..) )
112 import GHC.Ptr ( Ptr(..) )
113 #if defined(__GLASGOW_HASKELL__)
114 import GHC.Base ( unpackCString# )
117 #define hASH_TBL_SIZE 4091
118 #define hASH_TBL_SIZE_UNBOXED 4091#
122 A 'FastString' is an array of bytes, hashed to support fast O(1)
123 comparison. It is also associated with a character encoding, so that
124 we know how to convert a 'FastString' to the local encoding, or to the
125 Z-encoding used by the compiler internally.
127 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
130 data FastString = FastString {
131 uniq :: {-# UNPACK #-} !Int, -- unique id
132 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
133 n_chars :: {-# UNPACK #-} !Int, -- number of chars
134 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
139 -- including strings that don't need any encoding
141 -- A UTF-8 string with a memoized Z-encoding
142 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
144 instance Eq FastString where
145 f1 == f2 = uniq f1 == uniq f2
147 instance Ord FastString where
148 -- Compares lexicographically, not by unique
149 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
150 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
151 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
152 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
157 compare a b = cmpFS a b
159 instance Show FastString where
160 show fs = show (unpackFS fs)
162 cmpFS :: FastString -> FastString -> Ordering
163 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
164 if u1 == u2 then EQ else
165 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
170 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
171 unsafeMemcmp buf1 buf2 l =
173 withForeignPtr buf1 $ \p1 ->
174 withForeignPtr buf2 $ \p2 ->
178 foreign import ccall unsafe "ghc_memcmp"
179 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
182 -- -----------------------------------------------------------------------------
186 Internally, the compiler will maintain a fast string symbol
187 table, providing sharing and fast comparison. Creation of
188 new @FastString@s then covertly does a lookup, re-using the
189 @FastString@ if there was a hit.
192 data FastStringTable =
195 (MutableArray# RealWorld [FastString])
197 {-# NOINLINE string_table #-}
198 string_table :: IORef FastStringTable
201 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
203 (# s2#, FastStringTable 0 arr# #)
206 lookupTbl :: FastStringTable -> Int -> IO [FastString]
207 lookupTbl (FastStringTable _ arr#) (I# i#) =
208 IO $ \ s# -> readArray# arr# i# s#
210 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
211 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
212 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
213 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
215 mkFastString# :: Addr# -> FastString
216 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
219 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
220 mkFastStringBytes ptr len = unsafePerformIO $ do
221 ft@(FastStringTable uid _) <- readIORef string_table
225 fs <- copyNewFastString uid ptr len
226 updTbl string_table ft h (fs:ls)
227 {- _trace ("new: " ++ show f_str) $ -}
230 lookup_result <- lookupTbl ft h
231 case lookup_result of
234 b <- bucket_match ls len ptr
237 Just v -> {- _trace ("re-use: "++show v) $ -} return v
239 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
240 mkZFastStringBytes ptr len = unsafePerformIO $ do
241 ft@(FastStringTable uid _) <- readIORef string_table
245 fs <- copyNewZFastString uid ptr len
246 updTbl string_table ft h (fs:ls)
247 {- _trace ("new: " ++ show f_str) $ -}
250 lookup_result <- lookupTbl ft h
251 case lookup_result of
254 b <- bucket_match ls len ptr
257 Just v -> {- _trace ("re-use: "++show v) $ -} return v
259 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
260 -- between this and 'mkFastStringBytes' is that we don't have to copy
261 -- the bytes if the string is new to the table.
262 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
263 mkFastStringForeignPtr ptr fp len = do
264 ft@(FastStringTable uid _) <- readIORef string_table
265 -- _trace ("hashed: "++show (I# h)) $
269 fs <- mkNewFastString uid ptr fp len
270 updTbl string_table ft h (fs:ls)
271 {- _trace ("new: " ++ show f_str) $ -}
274 lookup_result <- lookupTbl ft h
275 case lookup_result of
278 b <- bucket_match ls len ptr
281 Just v -> {- _trace ("re-use: "++show v) $ -} return v
283 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
284 mkZFastStringForeignPtr ptr fp len = do
285 ft@(FastStringTable uid _) <- readIORef string_table
286 -- _trace ("hashed: "++show (I# h)) $
290 fs <- mkNewZFastString uid ptr fp len
291 updTbl string_table ft h (fs:ls)
292 {- _trace ("new: " ++ show f_str) $ -}
295 lookup_result <- lookupTbl ft h
296 case lookup_result of
299 b <- bucket_match ls len ptr
302 Just v -> {- _trace ("re-use: "++show v) $ -} return v
305 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
306 mkFastString :: String -> FastString
309 let l = utf8EncodedLength str
310 buf <- mallocForeignPtrBytes l
311 withForeignPtr buf $ \ptr -> do
312 utf8EncodeString ptr str
313 mkFastStringForeignPtr ptr buf l
315 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
316 mkFastStringByteList :: [Word8] -> FastString
317 mkFastStringByteList str =
319 let l = Prelude.length str
320 buf <- mallocForeignPtrBytes l
321 withForeignPtr buf $ \ptr -> do
322 pokeArray (castPtr ptr) str
323 mkFastStringForeignPtr ptr buf l
325 -- | Creates a Z-encoded 'FastString' from a 'String'
326 mkZFastString :: String -> FastString
329 let l = Prelude.length str
330 buf <- mallocForeignPtrBytes l
331 withForeignPtr buf $ \ptr -> do
332 pokeCAString (castPtr ptr) str
333 mkZFastStringForeignPtr ptr buf l
335 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
336 bucket_match [] _ _ = return Nothing
337 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
339 b <- cmpStringPrefix ptr buf len
340 if b then return (Just v)
341 else bucket_match ls len ptr
343 bucket_match ls len ptr
345 mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
347 mkNewFastString uid ptr fp len = do
348 ref <- newIORef Nothing
349 n_chars <- countUTF8Chars ptr len
350 return (FastString uid len n_chars fp (UTF8Encoded ref))
352 mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
354 mkNewZFastString uid _ fp len = do
355 return (FastString uid len len fp ZEncoded)
357 copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
358 copyNewFastString uid ptr len = do
359 fp <- copyBytesToForeignPtr ptr len
360 ref <- newIORef Nothing
361 n_chars <- countUTF8Chars ptr len
362 return (FastString uid len n_chars fp (UTF8Encoded ref))
364 copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
365 copyNewZFastString uid ptr len = do
366 fp <- copyBytesToForeignPtr ptr len
367 return (FastString uid len len fp ZEncoded)
369 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
370 copyBytesToForeignPtr ptr len = do
371 fp <- mallocForeignPtrBytes len
372 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
375 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
376 cmpStringPrefix ptr fp len =
377 withForeignPtr fp $ \ptr' -> do
378 r <- memcmp ptr ptr' len
382 hashStr :: Ptr Word8 -> Int -> Int
383 -- use the Addr to produce a hash value between 0 & m (inclusive)
384 hashStr (Ptr a#) (I# len#) = loop 0# 0#
386 loop h n | n GHC.Exts.==# len# = I# h
387 | otherwise = loop h2 (n GHC.Exts.+# 1#)
388 where !c = ord# (indexCharOffAddr# a# n)
389 !h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
392 -- -----------------------------------------------------------------------------
395 -- | Returns the length of the 'FastString' in characters
396 lengthFS :: FastString -> Int
397 lengthFS f = n_chars f
399 -- | Returns @True@ if the 'FastString' is Z-encoded
400 isZEncoded :: FastString -> Bool
401 isZEncoded fs | ZEncoded <- enc fs = True
404 -- | Returns @True@ if this 'FastString' is not Z-encoded but already has
405 -- a Z-encoding cached (used in producing stats).
406 hasZEncoding :: FastString -> Bool
407 hasZEncoding (FastString _ _ _ _ enc) =
415 -- | Returns @True@ if the 'FastString' is empty
416 nullFS :: FastString -> Bool
417 nullFS f = n_bytes f == 0
419 -- | Unpacks and decodes the FastString
420 unpackFS :: FastString -> String
421 unpackFS (FastString _ n_bytes _ buf enc) =
422 inlinePerformIO $ withForeignPtr buf $ \ptr ->
424 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
425 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
427 bytesFS :: FastString -> [Word8]
428 bytesFS (FastString _ n_bytes _ buf _) =
429 inlinePerformIO $ withForeignPtr buf $ \ptr ->
430 peekArray n_bytes ptr
432 -- | Returns a Z-encoded version of a 'FastString'. This might be the
433 -- original, if it was already Z-encoded. The first time this
434 -- function is applied to a particular 'FastString', the results are
437 zEncodeFS :: FastString -> FastString
438 zEncodeFS fs@(FastString _ _ _ _ enc) =
447 let efs = mkZFastString (zEncodeString (unpackFS fs))
448 writeIORef ref (Just efs)
451 appendFS :: FastString -> FastString -> FastString
452 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
454 concatFS :: [FastString] -> FastString
455 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
457 headFS :: FastString -> Char
458 headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
459 headFS (FastString _ _ _ buf enc) =
460 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
463 w <- peek (castPtr ptr)
464 return (castCCharToChar w)
466 return (fst (utf8DecodeChar ptr))
468 tailFS :: FastString -> FastString
469 tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
470 tailFS (FastString _ n_bytes _ buf enc) =
471 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
474 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
476 let (_,ptr') = utf8DecodeChar ptr
477 let off = ptr' `minusPtr` ptr
478 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
480 consFS :: Char -> FastString -> FastString
481 consFS c fs = mkFastString (c : unpackFS fs)
483 uniqueOfFS :: FastString -> FastInt
484 uniqueOfFS (FastString u _ _ _ _) = iUnbox u
487 nilFS = mkFastString ""
489 -- -----------------------------------------------------------------------------
492 getFastStringTable :: IO [[FastString]]
493 getFastStringTable = do
494 tbl <- readIORef string_table
495 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
498 -- -----------------------------------------------------------------------------
499 -- Outputting 'FastString's
501 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
502 -- get the actual bytes in the 'FastString' written to the 'Handle'.
503 hPutFS :: Handle -> FastString -> IO ()
504 hPutFS handle (FastString _ len _ fp _)
505 | len == 0 = return ()
506 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
508 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
509 -- in the current locale's encoding (for error messages and suchlike).
511 -- -----------------------------------------------------------------------------
512 -- LitStrings, here for convenience only.
514 -- hmm, not unboxed (or rather FastPtr), interesting
515 --a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't
516 --really care about C types in naming, where we can help it.
517 type LitString = Ptr Word8
518 --Why do we recalculate length every time it's requested?
519 --If it's commonly needed, we should perhaps have
520 --data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt
522 #if defined(__GLASGOW_HASKELL__)
523 mkLitString# :: Addr# -> LitString
524 mkLitString# a# = Ptr a#
526 --can/should we use FastTypes here?
527 --Is this likely to be memory-preserving if only used on constant strings?
528 --should we inline it? If lucky, that would make a CAF that wouldn't
529 --be computationally repeated... although admittedly we're not
530 --really intending to use mkLitString when __GLASGOW_HASKELL__...
531 --(I wonder, is unicode / multi-byte characters allowed in LitStrings
533 {-# INLINE mkLitString #-}
534 mkLitString :: String -> LitString
537 p <- mallocBytes (length s + 1)
539 loop :: Int -> String -> IO ()
540 loop n cs | n `seq` null cs = pokeByteOff p n (0 :: Word8)
542 pokeByteOff p n (fromIntegral (ord c) :: Word8)
544 -- XXX GHC isn't smart enough to know that we have already covered
546 loop _ [] = panic "mkLitString"
551 unpackLitString :: LitString -> String
552 unpackLitString p_ = case pUnbox p_ of
553 p -> unpack (_ILIT(0))
555 unpack n = case indexWord8OffFastPtrAsFastChar p n of
556 ch -> if ch `eqFastChar` _CLIT('\0')
557 then [] else cBox ch : unpack (n +# _ILIT(1))
559 lengthLS :: LitString -> Int
560 lengthLS = ptrStrLength
562 -- for now, use a simple String representation
563 --no, let's not do that right now - it's work in other places
565 type LitString = String
567 mkLitString :: String -> LitString
570 unpackLitString :: LitString -> String
573 lengthLS :: LitString -> Int
578 -- -----------------------------------------------------------------------------
581 foreign import ccall unsafe "ghc_strlen"
582 ptrStrLength :: Ptr Word8 -> Int
584 -- NB. does *not* add a '\0'-terminator.
585 -- We only use CChar here to be parallel to the imported
586 -- peekC(A)StringLen.
587 pokeCAString :: Ptr CChar -> String -> IO ()
588 pokeCAString ptr str =
591 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
595 {-# NOINLINE sLit #-}
596 sLit :: String -> LitString
597 sLit x = mkLitString x
599 {-# NOINLINE fsLit #-}
600 fsLit :: String -> FastString
601 fsLit x = mkFastString x
604 forall x . sLit (unpackCString# x) = mkLitString# x #-}
606 forall x . fsLit (unpackCString# x) = mkFastString# x #-}