2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
24 mkFastString, -- :: String -> FastString
25 mkFastStringNarrow, -- :: String -> FastString
26 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
28 mkFastString#, -- :: Addr# -> FastString
29 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
31 mkFastStringInt, -- :: [Int] -> FastString
33 uniqueOfFS, -- :: FastString -> Int#
34 lengthFS, -- :: FastString -> Int
35 nullFastString, -- :: FastString -> Bool
37 unpackFS, -- :: FastString -> String
38 unpackIntFS, -- :: FastString -> [Int]
39 appendFS, -- :: FastString -> FastString -> FastString
40 headFS, -- :: FastString -> Char
41 headIntFS, -- :: FastString -> Int
42 tailFS, -- :: FastString -> FastString
43 concatFS, -- :: [FastString] -> FastString
44 consFS, -- :: Char -> FastString -> FastString
45 indexFS, -- :: FastString -> Int -> Char
46 nilFS, -- :: FastString
48 hPutFS, -- :: Handle -> FastString -> IO ()
51 mkLitString# -- :: Addr# -> LitString
54 -- This #define suppresses the "import FastString" that
55 -- HsVersions otherwise produces
56 #define COMPILING_FAST_STRING
57 #include "HsVersions.h"
59 #if __GLASGOW_HASKELL__ < 503
60 import PrelIOBase ( IO(..) )
62 import GHC.IOBase ( IO(..) )
67 import UNSAFE_IO ( unsafePerformIO )
68 import MONAD_ST ( stToIO )
69 import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
71 #if __GLASGOW_HASKELL__ < 503
72 import PrelArr ( STArray(..), newSTArray )
74 import GHC.Arr ( STArray(..), newSTArray )
77 #if __GLASGOW_HASKELL__ >= 504
82 import IOExts ( hPutBufBAFull )
86 import Char ( chr, ord )
88 #define hASH_TBL_SIZE 993
91 @FastString@s are packed representations of strings
92 with a unique id for fast comparisons. The unique id
93 is assigned when creating the @FastString@, using
94 a hash table to map from the character string representation
99 = FastString -- packed repr. on the heap.
101 -- 0 => string literal, comparison
106 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
108 [Int] -- character numbers
110 instance Eq FastString where
111 -- shortcut for real FastStrings
112 (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
113 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
115 (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
116 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
118 instance Ord FastString where
119 -- Compares lexicographically, not by unique
120 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
121 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
122 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
123 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
128 compare a b = cmpFS a b
130 lengthFS :: FastString -> Int
131 lengthFS (FastString _ l# _) = I# l#
132 lengthFS (UnicodeStr _ s) = length s
134 nullFastString :: FastString -> Bool
135 nullFastString (FastString _ l# _) = l# ==# 0#
136 nullFastString (UnicodeStr _ []) = True
137 nullFastString (UnicodeStr _ (_:_)) = False
139 unpackFS :: FastString -> String
140 unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
141 unpackFS (UnicodeStr _ s) = map chr s
143 unpackIntFS :: FastString -> [Int]
144 unpackIntFS (UnicodeStr _ s) = s
145 unpackIntFS fs = map ord (unpackFS fs)
147 appendFS :: FastString -> FastString -> FastString
148 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
150 concatFS :: [FastString] -> FastString
151 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
153 headFS :: FastString -> Char
154 headFS (FastString _ l# ba#) =
155 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
156 headFS (UnicodeStr _ (c:_)) = chr c
157 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
159 headIntFS :: FastString -> Int
160 headIntFS (UnicodeStr _ (c:_)) = c
161 headIntFS fs = ord (headFS fs)
163 indexFS :: FastString -> Int -> Char
164 indexFS f i@(I# i#) =
167 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
168 | otherwise -> error (msg (I# l#))
169 UnicodeStr _ s -> chr (s!!i)
171 msg l = "indexFS: out of range: " ++ show (l,i)
173 tailFS :: FastString -> FastString
174 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
175 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
177 consFS :: Char -> FastString -> FastString
178 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
180 uniqueOfFS :: FastString -> Int#
181 uniqueOfFS (FastString u# _ _) = u#
182 uniqueOfFS (UnicodeStr u# _) = u#
184 nilFS = mkFastString ""
187 Internally, the compiler will maintain a fast string symbol
188 table, providing sharing and fast comparison. Creation of
189 new @FastString@s then covertly does a lookup, re-using the
190 @FastString@ if there was a hit.
192 Caution: mkFastStringUnicode assumes that if the string is in the
193 table, it sits under the UnicodeStr constructor. Other mkFastString
194 variants analogously assume the FastString constructor.
197 data FastStringTable =
200 (MutableArray# RealWorld [FastString])
202 type FastStringTableVar = IORef FastStringTable
204 string_table :: FastStringTableVar
207 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
208 >>= \ (STArray _ _ arr#) ->
209 newIORef (FastStringTable 0# arr#))
211 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
212 lookupTbl (FastStringTable _ arr#) i# =
214 readArray# arr# i# s#)
216 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
217 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
218 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
220 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
222 mkFastString# :: Addr# -> FastString
224 case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
226 mkFastStringLen# :: Addr# -> Int# -> FastString
227 mkFastStringLen# a# len# =
229 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
233 -- _trace ("hashed: "++show (I# h)) $
234 lookupTbl ft h >>= \ lookup_result ->
235 case lookup_result of
237 -- no match, add it to table by copying out the
238 -- the string into a ByteArray
239 -- _trace "empty bucket" $
240 case copyPrefixStr a# (I# len#) of
242 let f_str = FastString uid# len# barr# in
243 updTbl string_table ft h [f_str] >>
244 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
246 -- non-empty `bucket', scan the list looking
247 -- entry with same length and compare byte by byte.
248 -- _trace ("non-empty bucket"++show ls) $
249 case bucket_match ls len# a# of
251 case copyPrefixStr a# (I# len#) of
253 let f_str = FastString uid# len# barr# in
254 updTbl string_table ft h (f_str:ls) >>
255 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
256 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
258 bucket_match [] _ _ = Nothing
259 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
260 if len# ==# l# && eqStrPrefix a# ba# l# then
263 bucket_match ls len# a#
264 bucket_match (UnicodeStr _ _ : ls) len# a# =
265 bucket_match ls len# a#
267 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
268 mkFastSubStringBA# barr# start# len# =
270 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
272 h = hashSubStrBA barr# start# len#
274 -- _trace ("hashed(b): "++show (I# h)) $
275 lookupTbl ft h >>= \ lookup_result ->
276 case lookup_result of
278 -- no match, add it to table by copying out the
279 -- the string into a ByteArray
280 -- _trace "empty bucket(b)" $
281 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
283 let f_str = FastString uid# len# ba# in
284 updTbl string_table ft h [f_str] >>
285 -- _trace ("new(b): " ++ show f_str) $
288 -- non-empty `bucket', scan the list looking
289 -- entry with same length and compare byte by byte.
290 -- _trace ("non-empty bucket(b)"++show ls) $
291 case bucket_match ls start# len# barr# of
293 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
295 let f_str = FastString uid# len# ba# in
296 updTbl string_table ft h (f_str:ls) >>
297 -- _trace ("new(b): " ++ show f_str) $
300 -- _trace ("re-use(b): "++show v) $
304 bucket_match [] _ _ _ = Nothing
305 bucket_match (v:ls) start# len# ba# =
307 FastString _ l# barr# ->
308 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
311 bucket_match ls start# len# ba#
312 UnicodeStr _ _ -> bucket_match ls start# len# ba#
314 mkFastStringUnicode :: [Int] -> FastString
315 mkFastStringUnicode s =
317 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
321 -- _trace ("hashed(b): "++show (I# h)) $
322 lookupTbl ft h >>= \ lookup_result ->
323 case lookup_result of
325 -- no match, add it to table by copying out the
326 -- the string into a [Int]
327 let f_str = UnicodeStr uid# s in
328 updTbl string_table ft h [f_str] >>
329 -- _trace ("new(b): " ++ show f_str) $
332 -- non-empty `bucket', scan the list looking
333 -- entry with same length and compare byte by byte.
334 -- _trace ("non-empty bucket(b)"++show ls) $
335 case bucket_match ls of
337 let f_str = UnicodeStr uid# s in
338 updTbl string_table ft h (f_str:ls) >>
339 -- _trace ("new(b): " ++ show f_str) $
342 -- _trace ("re-use(b): "++show v) $
346 bucket_match [] = Nothing
347 bucket_match (v@(UnicodeStr _ s'):ls) =
348 if s' == s then Just v else bucket_match ls
349 bucket_match (FastString _ _ _ : ls) = bucket_match ls
351 mkFastStringNarrow :: String -> FastString
352 mkFastStringNarrow str =
353 case packString str of { (I# len#, BA frozen#) ->
354 mkFastSubStringBA# frozen# 0# len#
356 {- 0-indexed array, len# == index to one beyond end of string,
357 i.e., (0,1) => empty string. -}
359 mkFastString :: String -> FastString
360 mkFastString str = if all good str
361 then mkFastStringNarrow str
362 else mkFastStringUnicode (map ord str)
364 good c = c >= '\1' && c <= '\xFF'
366 mkFastStringInt :: [Int] -> FastString
367 mkFastStringInt str = if all good str
368 then mkFastStringNarrow (map chr str)
369 else mkFastStringUnicode str
371 good c = c >= 1 && c <= 0xFF
373 mkFastSubString :: Addr# -> Int -> Int -> FastString
374 mkFastSubString a# (I# start#) (I# len#) =
375 mkFastStringLen# (a# `plusAddr#` start#) len#
379 hashStr :: Addr# -> Int# -> Int#
380 -- use the Addr to produce a hash value between 0 & m (inclusive)
384 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
385 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
386 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
388 c0 = indexCharOffAddr# a# 0#
389 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
390 c2 = indexCharOffAddr# a# (len# -# 1#)
392 c1 = indexCharOffAddr# a# 1#
393 c2 = indexCharOffAddr# a# 2#
396 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
397 -- use the byte array to produce a hash value between 0 & m (inclusive)
398 hashSubStrBA ba# start# len# =
401 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
402 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
403 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
405 c0 = indexCharArray# ba# (start# +# 0#)
406 c1 = indexCharArray# ba# (start# +# (len# `quotInt#` 2# -# 1#))
407 c2 = indexCharArray# ba# (start# +# (len# -# 1#))
409 -- c1 = indexCharArray# ba# 1#
410 -- c2 = indexCharArray# ba# 2#
412 hashUnicode :: [Int] -> Int#
413 -- use the Addr to produce a hash value between 0 & m (inclusive)
415 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
416 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
417 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
421 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
422 I# c2 = s !! (I# (len# -# 1#))
427 cmpFS :: FastString -> FastString -> Ordering
428 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
430 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
431 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
432 cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
433 if u1# ==# u2# then EQ else
434 let l# = if l1# <=# l2# then l1# else l2# in
436 memcmp b1# b2# l# >>= \ (I# res) ->
439 else if res ==# 0# then
440 if l1# ==# l2# then EQ
441 else if l1# <# l2# then LT else GT
445 foreign import ccall "ghc_memcmp" unsafe
446 memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
448 -- -----------------------------------------------------------------------------
449 -- Outputting 'FastString's
451 #if __GLASGOW_HASKELL__ >= 504
453 -- this is our own version of hPutBuf for FastStrings, because in
454 -- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
455 -- The closest is hPutArray in Data.Array.IO, but that does some extra
456 -- range checks that we want to avoid here.
458 foreign import ccall unsafe "__hscore_memcpy_dst_off"
459 memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
461 hPutFS handle (FastString _ l# ba#)
462 | l# ==# 0# = return ()
464 = do wantWritableHandle "hPutFS" handle $
465 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
467 old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
471 raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
473 -- enough room in handle buffer?
474 if (size - w > count)
475 -- There's enough room in the buffer:
476 -- just copy the data in and update bufWPtr.
477 then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
478 writeIORef ref old_buf{ bufWPtr = w + count }
481 -- else, we have to flush
482 else do flushed_buf <- flushWriteBuffer fd stream old_buf
483 writeIORef ref flushed_buf
485 Buffer{ bufBuf=raw, bufState=WriteBuffer,
486 bufRPtr=0, bufWPtr=count, bufSize=count }
487 flushWriteBuffer fd stream this_buf
492 hPutFS :: Handle -> FastString -> IO ()
493 hPutFS handle (FastString _ l# ba#)
494 | l# ==# 0# = return ()
495 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
496 hPutBufBAFull handle mba (I# l#)
498 bot = error "hPutFS.ba"
502 -- ONLY here for debugging the NCG (so -ddump-stix works for string
503 -- literals); no idea if this is really necessary. JRS, 010131
504 hPutFS handle (UnicodeStr _ is)
505 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
507 -- -----------------------------------------------------------------------------
508 -- LitStrings, here for convenience only.
510 type LitString = Ptr ()
512 mkLitString# :: Addr# -> LitString
513 mkLitString# a# = Ptr a#