2 % (c) The University of Glasgow, 1997-2006
5 {-# OPTIONS -fno-warn-unused-imports #-}
6 -- XXX GHC 6.9 seems to be confused by unpackCString# being used only in
9 {-# OPTIONS_GHC -O -funbox-strict-fields #-}
10 -- We always optimise this, otherwise performance of a non-optimised
11 -- compiler is severely affected
14 -- There are two principal string types used internally by GHC:
17 -- * A compact, hash-consed, representation of character strings.
18 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
19 -- * Generated by 'fsLit'.
20 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
23 -- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@).
24 -- * Practically no operations.
25 -- * Outputing them is fast.
26 -- * Generated by 'sLit'.
27 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
29 -- Use 'LitString' unless you want the facilities of 'FastString'.
33 FastString(..), -- not abstract, for now.
40 mkFastStringForeignPtr,
41 #if defined(__GLASGOW_HASKELL__)
48 unpackFS, -- :: FastString -> String
49 bytesFS, -- :: FastString -> [Word8]
78 #if defined(__GLASGOW_HASKELL__)
90 #include "HsVersions.h"
102 import System.IO.Unsafe ( unsafePerformIO )
104 import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
105 import Data.Maybe ( isJust )
106 import Data.Char ( ord )
108 #if __GLASGOW_HASKELL__ >= 611
109 import GHC.IO ( IO(..) )
111 import GHC.IOBase ( IO(..) )
114 import GHC.Ptr ( Ptr(..) )
115 #if defined(__GLASGOW_HASKELL__)
116 import GHC.Base ( unpackCString# )
119 #define hASH_TBL_SIZE 4091
120 #define hASH_TBL_SIZE_UNBOXED 4091#
124 A 'FastString' is an array of bytes, hashed to support fast O(1)
125 comparison. It is also associated with a character encoding, so that
126 we know how to convert a 'FastString' to the local encoding, or to the
127 Z-encoding used by the compiler internally.
129 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
132 data FastString = FastString {
133 uniq :: {-# UNPACK #-} !Int, -- unique id
134 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
135 n_chars :: {-# UNPACK #-} !Int, -- number of chars
136 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
141 -- including strings that don't need any encoding
143 -- A UTF-8 string with a memoized Z-encoding
144 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
146 instance Eq FastString where
147 f1 == f2 = uniq f1 == uniq f2
149 instance Ord FastString where
150 -- Compares lexicographically, not by unique
151 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
152 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
153 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
154 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
159 compare a b = cmpFS a b
161 instance Show FastString where
162 show fs = show (unpackFS fs)
164 instance Data FastString where
166 toConstr _ = abstractConstr "FastString"
167 gunfold _ _ = error "gunfold"
168 dataTypeOf _ = mkNoRepType "FastString"
170 cmpFS :: FastString -> FastString -> Ordering
171 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
172 if u1 == u2 then EQ else
173 case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
178 unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
179 unsafeMemcmp buf1 buf2 l =
181 withForeignPtr buf1 $ \p1 ->
182 withForeignPtr buf2 $ \p2 ->
186 foreign import ccall unsafe "ghc_memcmp"
187 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
190 -- -----------------------------------------------------------------------------
194 Internally, the compiler will maintain a fast string symbol
195 table, providing sharing and fast comparison. Creation of
196 new @FastString@s then covertly does a lookup, re-using the
197 @FastString@ if there was a hit.
200 data FastStringTable =
203 (MutableArray# RealWorld [FastString])
205 {-# NOINLINE string_table #-}
206 string_table :: IORef FastStringTable
209 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
211 (# s2#, FastStringTable 0 arr# #)
214 lookupTbl :: FastStringTable -> Int -> IO [FastString]
215 lookupTbl (FastStringTable _ arr#) (I# i#) =
216 IO $ \ s# -> readArray# arr# i# s#
218 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
219 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
220 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
221 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
223 mkFastString# :: Addr# -> FastString
224 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
227 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
228 mkFastStringBytes ptr len = unsafePerformIO $ do
229 ft@(FastStringTable uid _) <- readIORef string_table
233 fs <- copyNewFastString uid ptr len
234 updTbl string_table ft h (fs:ls)
235 {- _trace ("new: " ++ show f_str) $ -}
238 lookup_result <- lookupTbl ft h
239 case lookup_result of
242 b <- bucket_match ls len ptr
245 Just v -> {- _trace ("re-use: "++show v) $ -} return v
247 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
248 mkZFastStringBytes ptr len = unsafePerformIO $ do
249 ft@(FastStringTable uid _) <- readIORef string_table
253 fs <- copyNewZFastString uid ptr len
254 updTbl string_table ft h (fs:ls)
255 {- _trace ("new: " ++ show f_str) $ -}
258 lookup_result <- lookupTbl ft h
259 case lookup_result of
262 b <- bucket_match ls len ptr
265 Just v -> {- _trace ("re-use: "++show v) $ -} return v
267 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
268 -- between this and 'mkFastStringBytes' is that we don't have to copy
269 -- the bytes if the string is new to the table.
270 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
271 mkFastStringForeignPtr ptr fp len = do
272 ft@(FastStringTable uid _) <- readIORef string_table
273 -- _trace ("hashed: "++show (I# h)) $
277 fs <- mkNewFastString uid ptr fp len
278 updTbl string_table ft h (fs:ls)
279 {- _trace ("new: " ++ show f_str) $ -}
282 lookup_result <- lookupTbl ft h
283 case lookup_result of
286 b <- bucket_match ls len ptr
289 Just v -> {- _trace ("re-use: "++show v) $ -} return v
291 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
292 mkZFastStringForeignPtr ptr fp len = do
293 ft@(FastStringTable uid _) <- readIORef string_table
294 -- _trace ("hashed: "++show (I# h)) $
298 fs <- mkNewZFastString uid ptr fp len
299 updTbl string_table ft h (fs:ls)
300 {- _trace ("new: " ++ show f_str) $ -}
303 lookup_result <- lookupTbl ft h
304 case lookup_result of
307 b <- bucket_match ls len ptr
310 Just v -> {- _trace ("re-use: "++show v) $ -} return v
313 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
314 mkFastString :: String -> FastString
317 let l = utf8EncodedLength str
318 buf <- mallocForeignPtrBytes l
319 withForeignPtr buf $ \ptr -> do
320 utf8EncodeString ptr str
321 mkFastStringForeignPtr ptr buf l
323 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
324 mkFastStringByteList :: [Word8] -> FastString
325 mkFastStringByteList str =
327 let l = Prelude.length str
328 buf <- mallocForeignPtrBytes l
329 withForeignPtr buf $ \ptr -> do
330 pokeArray (castPtr ptr) str
331 mkFastStringForeignPtr ptr buf l
333 -- | Creates a Z-encoded 'FastString' from a 'String'
334 mkZFastString :: String -> FastString
337 let l = Prelude.length str
338 buf <- mallocForeignPtrBytes l
339 withForeignPtr buf $ \ptr -> do
340 pokeCAString (castPtr ptr) str
341 mkZFastStringForeignPtr ptr buf l
343 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
344 bucket_match [] _ _ = return Nothing
345 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
347 b <- cmpStringPrefix ptr buf len
348 if b then return (Just v)
349 else bucket_match ls len ptr
351 bucket_match ls len ptr
353 mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
355 mkNewFastString uid ptr fp len = do
356 ref <- newIORef Nothing
357 n_chars <- countUTF8Chars ptr len
358 return (FastString uid len n_chars fp (UTF8Encoded ref))
360 mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
362 mkNewZFastString uid _ fp len = do
363 return (FastString uid len len fp ZEncoded)
365 copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
366 copyNewFastString uid ptr len = do
367 fp <- copyBytesToForeignPtr ptr len
368 ref <- newIORef Nothing
369 n_chars <- countUTF8Chars ptr len
370 return (FastString uid len n_chars fp (UTF8Encoded ref))
372 copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
373 copyNewZFastString uid ptr len = do
374 fp <- copyBytesToForeignPtr ptr len
375 return (FastString uid len len fp ZEncoded)
377 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
378 copyBytesToForeignPtr ptr len = do
379 fp <- mallocForeignPtrBytes len
380 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
383 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
384 cmpStringPrefix ptr fp len =
385 withForeignPtr fp $ \ptr' -> do
386 r <- memcmp ptr ptr' len
390 hashStr :: Ptr Word8 -> Int -> Int
391 -- use the Addr to produce a hash value between 0 & m (inclusive)
392 hashStr (Ptr a#) (I# len#) = loop 0# 0#
394 loop h n | n GHC.Exts.==# len# = I# h
395 | otherwise = loop h2 (n GHC.Exts.+# 1#)
396 where !c = ord# (indexCharOffAddr# a# n)
397 !h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
400 -- -----------------------------------------------------------------------------
403 -- | Returns the length of the 'FastString' in characters
404 lengthFS :: FastString -> Int
405 lengthFS f = n_chars f
407 -- | Returns @True@ if the 'FastString' is Z-encoded
408 isZEncoded :: FastString -> Bool
409 isZEncoded fs | ZEncoded <- enc fs = True
412 -- | Returns @True@ if this 'FastString' is not Z-encoded but already has
413 -- a Z-encoding cached (used in producing stats).
414 hasZEncoding :: FastString -> Bool
415 hasZEncoding (FastString _ _ _ _ enc) =
423 -- | Returns @True@ if the 'FastString' is empty
424 nullFS :: FastString -> Bool
425 nullFS f = n_bytes f == 0
427 -- | Unpacks and decodes the FastString
428 unpackFS :: FastString -> String
429 unpackFS (FastString _ n_bytes _ buf enc) =
430 inlinePerformIO $ withForeignPtr buf $ \ptr ->
432 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
433 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
435 bytesFS :: FastString -> [Word8]
436 bytesFS (FastString _ n_bytes _ buf _) =
437 inlinePerformIO $ withForeignPtr buf $ \ptr ->
438 peekArray n_bytes ptr
440 -- | Returns a Z-encoded version of a 'FastString'. This might be the
441 -- original, if it was already Z-encoded. The first time this
442 -- function is applied to a particular 'FastString', the results are
445 zEncodeFS :: FastString -> FastString
446 zEncodeFS fs@(FastString _ _ _ _ enc) =
455 let efs = mkZFastString (zEncodeString (unpackFS fs))
456 writeIORef ref (Just efs)
459 appendFS :: FastString -> FastString -> FastString
462 r <- mallocForeignPtrBytes len
463 withForeignPtr r $ \ r' -> do
464 withForeignPtr (buf fs1) $ \ fs1Ptr -> do
465 withForeignPtr (buf fs2) $ \ fs2Ptr -> do
466 copyBytes r' fs1Ptr len1
467 copyBytes (advancePtr r' len1) fs2Ptr len2
468 mkFastStringForeignPtr r' r len
469 where len = len1 + len2
473 concatFS :: [FastString] -> FastString
474 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
476 headFS :: FastString -> Char
477 headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
478 headFS (FastString _ _ _ buf enc) =
479 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
482 w <- peek (castPtr ptr)
483 return (castCCharToChar w)
485 return (fst (utf8DecodeChar ptr))
487 tailFS :: FastString -> FastString
488 tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
489 tailFS (FastString _ n_bytes _ buf enc) =
490 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
493 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
495 let (_,ptr') = utf8DecodeChar ptr
496 let off = ptr' `minusPtr` ptr
497 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
499 consFS :: Char -> FastString -> FastString
500 consFS c fs = mkFastString (c : unpackFS fs)
502 uniqueOfFS :: FastString -> FastInt
503 uniqueOfFS (FastString u _ _ _ _) = iUnbox u
506 nilFS = mkFastString ""
508 -- -----------------------------------------------------------------------------
511 getFastStringTable :: IO [[FastString]]
512 getFastStringTable = do
513 tbl <- readIORef string_table
514 buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
517 -- -----------------------------------------------------------------------------
518 -- Outputting 'FastString's
520 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
521 -- get the actual bytes in the 'FastString' written to the 'Handle'.
522 hPutFS :: Handle -> FastString -> IO ()
523 hPutFS handle (FastString _ len _ fp _)
524 | len == 0 = return ()
525 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
527 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
528 -- in the current locale's encoding (for error messages and suchlike).
530 -- -----------------------------------------------------------------------------
531 -- LitStrings, here for convenience only.
533 -- hmm, not unboxed (or rather FastPtr), interesting
534 --a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't
535 --really care about C types in naming, where we can help it.
536 type LitString = Ptr Word8
537 --Why do we recalculate length every time it's requested?
538 --If it's commonly needed, we should perhaps have
539 --data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt
541 #if defined(__GLASGOW_HASKELL__)
542 mkLitString# :: Addr# -> LitString
543 mkLitString# a# = Ptr a#
545 --can/should we use FastTypes here?
546 --Is this likely to be memory-preserving if only used on constant strings?
547 --should we inline it? If lucky, that would make a CAF that wouldn't
548 --be computationally repeated... although admittedly we're not
549 --really intending to use mkLitString when __GLASGOW_HASKELL__...
550 --(I wonder, is unicode / multi-byte characters allowed in LitStrings
552 {-# INLINE mkLitString #-}
553 mkLitString :: String -> LitString
556 p <- mallocBytes (length s + 1)
558 loop :: Int -> String -> IO ()
559 loop n cs | n `seq` null cs = pokeByteOff p n (0 :: Word8)
561 pokeByteOff p n (fromIntegral (ord c) :: Word8)
563 -- XXX GHC isn't smart enough to know that we have already covered
565 loop _ [] = panic "mkLitString"
570 unpackLitString :: LitString -> String
571 unpackLitString p_ = case pUnbox p_ of
572 p -> unpack (_ILIT(0))
574 unpack n = case indexWord8OffFastPtrAsFastChar p n of
575 ch -> if ch `eqFastChar` _CLIT('\0')
576 then [] else cBox ch : unpack (n +# _ILIT(1))
578 lengthLS :: LitString -> Int
579 lengthLS = ptrStrLength
581 -- for now, use a simple String representation
582 --no, let's not do that right now - it's work in other places
584 type LitString = String
586 mkLitString :: String -> LitString
589 unpackLitString :: LitString -> String
592 lengthLS :: LitString -> Int
597 -- -----------------------------------------------------------------------------
600 foreign import ccall unsafe "ghc_strlen"
601 ptrStrLength :: Ptr Word8 -> Int
603 -- NB. does *not* add a '\0'-terminator.
604 -- We only use CChar here to be parallel to the imported
605 -- peekC(A)StringLen.
606 pokeCAString :: Ptr CChar -> String -> IO ()
607 pokeCAString ptr str =
610 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
614 {-# NOINLINE sLit #-}
615 sLit :: String -> LitString
616 sLit x = mkLitString x
618 {-# NOINLINE fsLit #-}
619 fsLit :: String -> FastString
620 fsLit x = mkFastString x
623 forall x . sLit (unpackCString# x) = mkLitString# x #-}
625 forall x . fsLit (unpackCString# x) = mkFastString# x #-}