2 % (c) The University of Glasgow, 1997-2006
5 {-# OPTIONS -fglasgow-exts -O #-}
8 FastString: A compact, hash-consed, representation of character strings.
9 Comparison is O(1), and you can get a Unique from them.
10 Generated by the FSLIT macro
11 Turn into SDoc with Outputable.ftext
13 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
14 Practically no operations
15 Outputing them is fast
16 Generated by the SLIT macro
17 Turn into SDoc with Outputable.ptext
19 Use LitString unless you want the facilities of FastString
24 FastString(..), -- not abstract, for now.
29 mkFastStringForeignPtr,
35 unpackFS, -- :: FastString -> String
36 bytesFS, -- :: FastString -> [Word8]
62 -- This #define suppresses the "import FastString" that
63 -- HsVersions otherwise produces
64 #define COMPILING_FAST_STRING
65 #include "HsVersions.h"
72 import UNSAFE_IO ( unsafePerformIO )
73 import MONAD_ST ( stToIO )
74 import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
75 import System.IO ( hPutBuf )
77 import GHC.Arr ( STArray(..), newSTArray )
78 import GHC.IOBase ( IO(..) )
82 #define hASH_TBL_SIZE 4091
86 A 'FastString' is an array of bytes, hashed to support fast O(1)
87 comparison. It is also associated with a character encoding, so that
88 we know how to convert a 'FastString' to the local encoding, or to the
89 Z-encoding used by the compiler internally.
91 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
94 data FastString = FastString {
95 uniq :: {-# UNPACK #-} !Int, -- unique id
96 n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
97 n_chars :: {-# UNPACK #-} !Int, -- number of chars
98 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
104 -- including strings that don't need any encoding
105 | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
106 -- A UTF-8 string with a memoized Z-encoding
108 instance Eq FastString where
109 f1 == f2 = uniq f1 == uniq f2
111 instance Ord FastString where
112 -- Compares lexicographically, not by unique
113 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
114 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
115 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
116 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
121 compare a b = cmpFS a b
123 instance Show FastString where
124 show fs = show (unpackFS fs)
126 cmpFS :: FastString -> FastString -> Ordering
127 cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
128 if u1 == u2 then EQ else
129 let l = if l1 <= l2 then l1 else l2 in
131 withForeignPtr buf1 $ \p1 ->
132 withForeignPtr buf2 $ \p2 -> do
133 res <- memcmp p1 p2 l
135 _ | res < 0 -> return LT
136 | res == 0 -> if l1 == l2 then return EQ
137 else if l1 < l2 then return LT
139 | otherwise -> return GT
142 foreign import ccall unsafe "ghc_memcmp"
143 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
146 -- -----------------------------------------------------------------------------
150 Internally, the compiler will maintain a fast string symbol
151 table, providing sharing and fast comparison. Creation of
152 new @FastString@s then covertly does a lookup, re-using the
153 @FastString@ if there was a hit.
156 data FastStringTable =
159 (MutableArray# RealWorld [FastString])
161 string_table :: IORef FastStringTable
164 (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
165 newIORef (FastStringTable 0 arr#)
167 lookupTbl :: FastStringTable -> Int -> IO [FastString]
168 lookupTbl (FastStringTable _ arr#) (I# i#) =
169 IO $ \ s# -> readArray# arr# i# s#
171 updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
172 updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
173 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
174 writeIORef fs_table_var (FastStringTable (uid+1) arr#)
176 mkFastString# :: Addr# -> FastString
177 mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
180 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
181 mkFastStringBytes ptr len = unsafePerformIO $ do
182 ft@(FastStringTable uid tbl#) <- readIORef string_table
186 fs <- copyNewFastString uid ptr len
187 updTbl string_table ft h (fs:ls)
188 {- _trace ("new: " ++ show f_str) $ -}
191 lookup_result <- lookupTbl ft h
192 case lookup_result of
195 b <- bucket_match ls len ptr
198 Just v -> {- _trace ("re-use: "++show v) $ -} return v
200 mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
201 mkZFastStringBytes ptr len = unsafePerformIO $ do
202 ft@(FastStringTable uid tbl#) <- readIORef string_table
206 fs <- copyNewZFastString uid ptr len
207 updTbl string_table ft h (fs:ls)
208 {- _trace ("new: " ++ show f_str) $ -}
211 lookup_result <- lookupTbl ft h
212 case lookup_result of
215 b <- bucket_match ls len ptr
218 Just v -> {- _trace ("re-use: "++show v) $ -} return v
220 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
221 -- between this and 'mkFastStringBytes' is that we don't have to copy
222 -- the bytes if the string is new to the table.
223 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
224 mkFastStringForeignPtr ptr fp len = do
225 ft@(FastStringTable uid tbl#) <- readIORef string_table
226 -- _trace ("hashed: "++show (I# h)) $
230 fs <- mkNewFastString uid ptr fp len
231 updTbl string_table ft h (fs:ls)
232 {- _trace ("new: " ++ show f_str) $ -}
235 lookup_result <- lookupTbl ft h
236 case lookup_result of
239 b <- bucket_match ls len ptr
242 Just v -> {- _trace ("re-use: "++show v) $ -} return v
244 mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
245 mkZFastStringForeignPtr ptr fp len = do
246 ft@(FastStringTable uid tbl#) <- readIORef string_table
247 -- _trace ("hashed: "++show (I# h)) $
251 fs <- mkNewZFastString uid ptr fp len
252 updTbl string_table ft h (fs:ls)
253 {- _trace ("new: " ++ show f_str) $ -}
256 lookup_result <- lookupTbl ft h
257 case lookup_result of
260 b <- bucket_match ls len ptr
263 Just v -> {- _trace ("re-use: "++show v) $ -} return v
266 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
267 mkFastString :: String -> FastString
270 let l = utf8EncodedLength str
271 buf <- mallocForeignPtrBytes l
272 withForeignPtr buf $ \ptr -> do
273 utf8EncodeString ptr str
274 mkFastStringForeignPtr ptr buf l
277 -- | Creates a Z-encoded 'FastString' from a 'String'
278 mkZFastString :: String -> FastString
281 let l = Prelude.length str
282 buf <- mallocForeignPtrBytes l
283 withForeignPtr buf $ \ptr -> do
284 pokeCAString (castPtr ptr) str
285 mkZFastStringForeignPtr ptr buf l
287 bucket_match [] _ _ = return Nothing
288 bucket_match (v@(FastString _ l _ buf _):ls) len ptr
290 b <- cmpStringPrefix ptr buf len
291 if b then return (Just v)
292 else bucket_match ls len ptr
294 bucket_match ls len ptr
296 mkNewFastString uid ptr fp len = do
297 ref <- newIORef Nothing
298 n_chars <- countUTF8Chars ptr len
299 return (FastString uid len n_chars fp (UTF8Encoded ref))
301 mkNewZFastString uid ptr fp len = do
302 return (FastString uid len len fp ZEncoded)
305 copyNewFastString uid ptr len = do
306 fp <- copyBytesToForeignPtr ptr len
307 ref <- newIORef Nothing
308 n_chars <- countUTF8Chars ptr len
309 return (FastString uid len n_chars fp (UTF8Encoded ref))
311 copyNewZFastString uid ptr len = do
312 fp <- copyBytesToForeignPtr ptr len
313 return (FastString uid len len fp ZEncoded)
316 copyBytesToForeignPtr ptr len = do
317 fp <- mallocForeignPtrBytes len
318 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
321 cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
322 cmpStringPrefix ptr fp len =
323 withForeignPtr fp $ \ptr' -> do
324 r <- memcmp ptr ptr' len
328 hashStr :: Ptr Word8 -> Int -> Int
329 -- use the Addr to produce a hash value between 0 & m (inclusive)
330 hashStr (Ptr a#) (I# len#) = loop 0# 0#
332 loop h n | n ==# len# = I# h
333 | otherwise = loop h2 (n +# 1#)
334 where c = ord# (indexCharOffAddr# a# n)
335 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
337 -- -----------------------------------------------------------------------------
340 -- | Returns the length of the 'FastString' in characters
341 lengthFS :: FastString -> Int
342 lengthFS f = n_chars f
344 -- | Returns 'True' if the 'FastString' is Z-encoded
345 isZEncoded :: FastString -> Bool
346 isZEncoded fs | ZEncoded <- enc fs = True
349 -- | Returns 'True' if the 'FastString' is empty
350 nullFS :: FastString -> Bool
351 nullFS f = n_bytes f == 0
353 -- | unpacks and decodes the FastString
354 unpackFS :: FastString -> String
355 unpackFS (FastString _ n_bytes _ buf enc) =
356 inlinePerformIO $ withForeignPtr buf $ \ptr ->
358 ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
359 UTF8Encoded _ -> utf8DecodeString ptr n_bytes
361 bytesFS :: FastString -> [Word8]
362 bytesFS (FastString _ n_bytes _ buf enc) =
363 inlinePerformIO $ withForeignPtr buf $ \ptr ->
364 peekArray n_bytes ptr
366 -- | returns a Z-encoded version of a 'FastString'. This might be the
367 -- original, if it was already Z-encoded. The first time this
368 -- function is applied to a particular 'FastString', the results are
371 zEncodeFS :: FastString -> FastString
372 zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
381 let efs = mkZFastString (zEncodeString (unpackFS fs))
382 writeIORef ref (Just efs)
385 appendFS :: FastString -> FastString -> FastString
386 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
388 concatFS :: [FastString] -> FastString
389 concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
391 headFS :: FastString -> Char
392 headFS (FastString _ n_bytes _ buf enc) =
393 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
396 w <- peek (castPtr ptr)
397 return (castCCharToChar w)
399 return (fst (utf8DecodeChar ptr))
401 tailFS :: FastString -> FastString
402 tailFS (FastString _ n_bytes _ buf enc) =
403 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
406 return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
408 let (_,ptr') = utf8DecodeChar ptr
409 let off = ptr' `minusPtr` ptr
410 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
412 consFS :: Char -> FastString -> FastString
413 consFS c fs = mkFastString (c : unpackFS fs)
415 uniqueOfFS :: FastString -> Int#
416 uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
418 nilFS = mkFastString ""
420 -- -----------------------------------------------------------------------------
421 -- Outputting 'FastString's
423 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
424 -- get the actual bytes in the 'FastString' written to the 'Handle'.
425 hPutFS handle (FastString _ len _ fp _)
426 | len == 0 = return ()
427 | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
429 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
430 -- in the current locale's encoding (for error messages and suchlike).
432 -- -----------------------------------------------------------------------------
433 -- LitStrings, here for convenience only.
435 type LitString = Ptr ()
437 mkLitString# :: Addr# -> LitString
438 mkLitString# a# = Ptr a#
440 foreign import ccall unsafe "ghc_strlen"
441 strLength :: Ptr () -> Int
443 -- -----------------------------------------------------------------------------
446 -- Just like unsafePerformIO, but we inline it.
447 {-# INLINE inlinePerformIO #-}
448 inlinePerformIO :: IO a -> a
449 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
451 pokeCAString :: Ptr CChar -> String -> IO ()
452 pokeCAString ptr str =
454 go [] n = pokeElemOff ptr n 0
455 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)