2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
24 mkFastString, -- :: String -> FastString
25 mkFastStringNarrow, -- :: String -> FastString
26 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
28 mkFastString#, -- :: Addr# -> FastString
29 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
31 mkFastStringInt, -- :: [Int] -> FastString
33 uniqueOfFS, -- :: FastString -> Int#
34 lengthFS, -- :: FastString -> Int
35 nullFastString, -- :: FastString -> Bool
37 unpackFS, -- :: FastString -> String
38 unpackIntFS, -- :: FastString -> [Int]
39 appendFS, -- :: FastString -> FastString -> FastString
40 headFS, -- :: FastString -> Char
41 headIntFS, -- :: FastString -> Int
42 tailFS, -- :: FastString -> FastString
43 concatFS, -- :: [FastString] -> FastString
44 consFS, -- :: Char -> FastString -> FastString
45 indexFS, -- :: FastString -> Int -> Char
46 nilFS, -- :: FastString
48 hPutFS, -- :: Handle -> FastString -> IO ()
51 mkLitString# -- :: Addr# -> LitString
54 -- This #define suppresses the "import FastString" that
55 -- HsVersions otherwise produces
56 #define COMPILING_FAST_STRING
57 #include "HsVersions.h"
59 #if __GLASGOW_HASKELL__ < 503
60 import PrelIOBase ( IO(..) )
62 import GHC.IOBase ( IO(..) )
67 import UNSAFE_IO ( unsafePerformIO )
68 import MONAD_ST ( stToIO )
69 import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
71 #if __GLASGOW_HASKELL__ < 503
72 import PrelArr ( STArray(..), newSTArray )
74 import GHC.Arr ( STArray(..), newSTArray )
77 #if __GLASGOW_HASKELL__ >= 504
82 import IOExts ( hPutBufBAFull )
86 import Char ( chr, ord )
88 #define hASH_TBL_SIZE 993
91 @FastString@s are packed representations of strings
92 with a unique id for fast comparisons. The unique id
93 is assigned when creating the @FastString@, using
94 a hash table to map from the character string representation
99 = FastString -- packed repr. on the heap.
101 -- 0 => string literal, comparison
106 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
108 [Int] -- character numbers
110 instance Eq FastString where
111 -- shortcut for real FastStrings
112 (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
113 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
115 (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
116 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
118 instance Ord FastString where
119 -- Compares lexicographically, not by unique
120 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
121 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
122 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
123 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
128 compare a b = cmpFS a b
130 instance Show FastString where
131 show fs = show (unpackFS fs)
133 lengthFS :: FastString -> Int
134 lengthFS (FastString _ l# _) = I# l#
135 lengthFS (UnicodeStr _ s) = length s
137 nullFastString :: FastString -> Bool
138 nullFastString (FastString _ l# _) = l# ==# 0#
139 nullFastString (UnicodeStr _ []) = True
140 nullFastString (UnicodeStr _ (_:_)) = False
142 unpackFS :: FastString -> String
143 unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
144 unpackFS (UnicodeStr _ s) = map chr s
146 unpackIntFS :: FastString -> [Int]
147 unpackIntFS (UnicodeStr _ s) = s
148 unpackIntFS fs = map ord (unpackFS fs)
150 appendFS :: FastString -> FastString -> FastString
151 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
153 concatFS :: [FastString] -> FastString
154 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
156 headFS :: FastString -> Char
157 headFS (FastString _ l# ba#) =
158 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
159 headFS (UnicodeStr _ (c:_)) = chr c
160 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
162 headIntFS :: FastString -> Int
163 headIntFS (UnicodeStr _ (c:_)) = c
164 headIntFS fs = ord (headFS fs)
166 indexFS :: FastString -> Int -> Char
167 indexFS f i@(I# i#) =
170 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
171 | otherwise -> error (msg (I# l#))
172 UnicodeStr _ s -> chr (s!!i)
174 msg l = "indexFS: out of range: " ++ show (l,i)
176 tailFS :: FastString -> FastString
177 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
178 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
180 consFS :: Char -> FastString -> FastString
181 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
183 uniqueOfFS :: FastString -> Int#
184 uniqueOfFS (FastString u# _ _) = u#
185 uniqueOfFS (UnicodeStr u# _) = u#
187 nilFS = mkFastString ""
190 Internally, the compiler will maintain a fast string symbol
191 table, providing sharing and fast comparison. Creation of
192 new @FastString@s then covertly does a lookup, re-using the
193 @FastString@ if there was a hit.
195 Caution: mkFastStringUnicode assumes that if the string is in the
196 table, it sits under the UnicodeStr constructor. Other mkFastString
197 variants analogously assume the FastString constructor.
200 data FastStringTable =
203 (MutableArray# RealWorld [FastString])
205 type FastStringTableVar = IORef FastStringTable
207 string_table :: FastStringTableVar
210 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
211 >>= \ (STArray _ _ arr#) ->
212 newIORef (FastStringTable 0# arr#))
214 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
215 lookupTbl (FastStringTable _ arr#) i# =
217 readArray# arr# i# s#)
219 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
220 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
221 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
223 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
225 mkFastString# :: Addr# -> FastString
227 case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
229 mkFastStringLen# :: Addr# -> Int# -> FastString
230 mkFastStringLen# a# len# =
232 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
236 -- _trace ("hashed: "++show (I# h)) $
237 lookupTbl ft h >>= \ lookup_result ->
238 case lookup_result of
240 -- no match, add it to table by copying out the
241 -- the string into a ByteArray
242 -- _trace "empty bucket" $
243 case copyPrefixStr a# (I# len#) of
245 let f_str = FastString uid# len# barr# in
246 updTbl string_table ft h [f_str] >>
247 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
249 -- non-empty `bucket', scan the list looking
250 -- entry with same length and compare byte by byte.
251 -- _trace ("non-empty bucket"++show ls) $
252 case bucket_match ls len# a# of
254 case copyPrefixStr a# (I# len#) of
256 let f_str = FastString uid# len# barr# in
257 updTbl string_table ft h (f_str:ls) >>
258 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
259 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
261 bucket_match [] _ _ = Nothing
262 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
263 if len# ==# l# && eqStrPrefix a# ba# l# then
266 bucket_match ls len# a#
267 bucket_match (UnicodeStr _ _ : ls) len# a# =
268 bucket_match ls len# a#
270 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
271 mkFastSubStringBA# barr# start# len# =
273 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
275 h = hashSubStrBA barr# start# len#
277 -- _trace ("hashed(b): "++show (I# h)) $
278 lookupTbl ft h >>= \ lookup_result ->
279 case lookup_result of
281 -- no match, add it to table by copying out the
282 -- the string into a ByteArray
283 -- _trace "empty bucket(b)" $
284 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
286 let f_str = FastString uid# len# ba# in
287 updTbl string_table ft h [f_str] >>
288 -- _trace ("new(b): " ++ show f_str) $
291 -- non-empty `bucket', scan the list looking
292 -- entry with same length and compare byte by byte.
293 -- _trace ("non-empty bucket(b)"++show ls) $
294 case bucket_match ls start# len# barr# of
296 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
298 let f_str = FastString uid# len# ba# in
299 updTbl string_table ft h (f_str:ls) >>
300 -- _trace ("new(b): " ++ show f_str) $
303 -- _trace ("re-use(b): "++show v) $
307 bucket_match [] _ _ _ = Nothing
308 bucket_match (v:ls) start# len# ba# =
310 FastString _ l# barr# ->
311 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
314 bucket_match ls start# len# ba#
315 UnicodeStr _ _ -> bucket_match ls start# len# ba#
317 mkFastStringUnicode :: [Int] -> FastString
318 mkFastStringUnicode s =
320 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
324 -- _trace ("hashed(b): "++show (I# h)) $
325 lookupTbl ft h >>= \ lookup_result ->
326 case lookup_result of
328 -- no match, add it to table by copying out the
329 -- the string into a [Int]
330 let f_str = UnicodeStr uid# s in
331 updTbl string_table ft h [f_str] >>
332 -- _trace ("new(b): " ++ show f_str) $
335 -- non-empty `bucket', scan the list looking
336 -- entry with same length and compare byte by byte.
337 -- _trace ("non-empty bucket(b)"++show ls) $
338 case bucket_match ls of
340 let f_str = UnicodeStr uid# s in
341 updTbl string_table ft h (f_str:ls) >>
342 -- _trace ("new(b): " ++ show f_str) $
345 -- _trace ("re-use(b): "++show v) $
349 bucket_match [] = Nothing
350 bucket_match (v@(UnicodeStr _ s'):ls) =
351 if s' == s then Just v else bucket_match ls
352 bucket_match (FastString _ _ _ : ls) = bucket_match ls
354 mkFastStringNarrow :: String -> FastString
355 mkFastStringNarrow str =
356 case packString str of { (I# len#, BA frozen#) ->
357 mkFastSubStringBA# frozen# 0# len#
359 {- 0-indexed array, len# == index to one beyond end of string,
360 i.e., (0,1) => empty string. -}
362 mkFastString :: String -> FastString
363 mkFastString str = if all good str
364 then mkFastStringNarrow str
365 else mkFastStringUnicode (map ord str)
367 good c = c >= '\1' && c <= '\xFF'
369 mkFastStringInt :: [Int] -> FastString
370 mkFastStringInt str = if all good str
371 then mkFastStringNarrow (map chr str)
372 else mkFastStringUnicode str
374 good c = c >= 1 && c <= 0xFF
376 mkFastSubString :: Addr# -> Int -> Int -> FastString
377 mkFastSubString a# (I# start#) (I# len#) =
378 mkFastStringLen# (a# `plusAddr#` start#) len#
382 hashStr :: Addr# -> Int# -> Int#
383 -- use the Addr to produce a hash value between 0 & m (inclusive)
387 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
388 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
389 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
391 c0 = indexCharOffAddr# a# 0#
392 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
393 c2 = indexCharOffAddr# a# (len# -# 1#)
395 c1 = indexCharOffAddr# a# 1#
396 c2 = indexCharOffAddr# a# 2#
399 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
400 -- use the byte array to produce a hash value between 0 & m (inclusive)
401 hashSubStrBA ba# start# len# =
404 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
405 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
406 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
408 c0 = indexCharArray# ba# (start# +# 0#)
409 c1 = indexCharArray# ba# (start# +# (len# `quotInt#` 2# -# 1#))
410 c2 = indexCharArray# ba# (start# +# (len# -# 1#))
412 -- c1 = indexCharArray# ba# 1#
413 -- c2 = indexCharArray# ba# 2#
415 hashUnicode :: [Int] -> Int#
416 -- use the Addr to produce a hash value between 0 & m (inclusive)
418 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
419 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
420 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
424 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
425 I# c2 = s !! (I# (len# -# 1#))
430 cmpFS :: FastString -> FastString -> Ordering
431 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
433 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
434 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
435 cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
436 if u1# ==# u2# then EQ else
437 let l# = if l1# <=# l2# then l1# else l2# in
439 memcmp b1# b2# l# >>= \ (I# res) ->
442 else if res ==# 0# then
443 if l1# ==# l2# then EQ
444 else if l1# <# l2# then LT else GT
449 foreign import ccall unsafe "ghc_memcmp"
450 memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
453 -- -----------------------------------------------------------------------------
454 -- Outputting 'FastString's
456 #if __GLASGOW_HASKELL__ >= 504
458 -- this is our own version of hPutBuf for FastStrings, because in
459 -- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
460 -- The closest is hPutArray in Data.Array.IO, but that does some extra
461 -- range checks that we want to avoid here.
463 foreign import ccall unsafe "__hscore_memcpy_dst_off"
464 memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
466 hPutFS handle (FastString _ l# ba#)
467 | l# ==# 0# = return ()
469 = do wantWritableHandle "hPutFS" handle $
470 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
472 old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
476 raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
478 -- enough room in handle buffer?
479 if (size - w > count)
480 -- There's enough room in the buffer:
481 -- just copy the data in and update bufWPtr.
482 then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
483 writeIORef ref old_buf{ bufWPtr = w + count }
486 -- else, we have to flush
487 else do flushed_buf <- flushWriteBuffer fd stream old_buf
488 writeIORef ref flushed_buf
490 Buffer{ bufBuf=raw, bufState=WriteBuffer,
491 bufRPtr=0, bufWPtr=count, bufSize=count }
492 flushWriteBuffer fd stream this_buf
497 hPutFS :: Handle -> FastString -> IO ()
498 hPutFS handle (FastString _ l# ba#)
499 | l# ==# 0# = return ()
500 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
501 hPutBufBAFull handle mba (I# l#)
503 bot = error "hPutFS.ba"
507 -- ONLY here for debugging the NCG (so -ddump-stix works for string
508 -- literals); no idea if this is really necessary. JRS, 010131
509 hPutFS handle (UnicodeStr _ is)
510 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
512 -- -----------------------------------------------------------------------------
513 -- LitStrings, here for convenience only.
515 type LitString = Ptr ()
517 mkLitString# :: Addr# -> LitString
518 mkLitString# a# = Ptr a#