2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastStringNarrow, -- :: String -> FastString
17 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
29 mkFastStringInt, -- :: [Int] -> FastString
31 uniqueOfFS, -- :: FastString -> Int#
32 lengthFS, -- :: FastString -> Int
33 nullFastString, -- :: FastString -> Bool
35 unpackFS, -- :: FastString -> String
36 unpackIntFS, -- :: FastString -> [Int]
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 headIntFS, -- :: FastString -> Int
40 tailFS, -- :: FastString -> FastString
41 concatFS, -- :: [FastString] -> FastString
42 consFS, -- :: Char -> FastString -> FastString
43 indexFS, -- :: FastString -> Int -> Char
45 hPutFS -- :: Handle -> FastString -> IO ()
48 -- This #define suppresses the "import FastString" that
49 -- HsVersions otherwise produces
50 #define COMPILING_FAST_STRING
51 #include "HsVersions.h"
54 import PrelIOBase ( IO(..) )
58 #if __GLASGOW_HASKELL__ < 411
59 import PrelAddr ( Addr(..) )
61 import Addr ( Addr(..) )
62 import Ptr ( Ptr(..) )
64 #if __GLASGOW_HASKELL__ < 407
65 import MutableArray ( MutableArray(..) )
67 import PrelArr ( STArray(..), newSTArray )
68 import IOExts ( hPutBufFull, hPutBufBAFull )
71 import IOExts ( IORef, newIORef, readIORef, writeIORef )
73 import Char ( chr, ord )
75 #define hASH_TBL_SIZE 993
78 @FastString@s are packed representations of strings
79 with a unique id for fast comparisons. The unique id
80 is assigned when creating the @FastString@, using
81 a hash table to map from the character string representation
86 = FastString -- packed repr. on the heap.
88 -- 0 => string literal, comparison
93 | CharStr -- external C string
94 Addr# -- pointer to the (null-terminated) bytes in C land.
95 Int# -- length (cached)
97 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
99 [Int] -- character numbers
101 instance Eq FastString where
102 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
103 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
105 instance Ord FastString where
106 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
107 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
108 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
109 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
114 compare a b = cmpFS a b
116 lengthFS :: FastString -> Int
117 lengthFS (FastString _ l# _) = I# l#
118 lengthFS (CharStr a# l#) = I# l#
119 lengthFS (UnicodeStr _ s) = length s
121 nullFastString :: FastString -> Bool
122 nullFastString (FastString _ l# _) = l# ==# 0#
123 nullFastString (CharStr _ l#) = l# ==# 0#
124 nullFastString (UnicodeStr _ []) = True
125 nullFastString (UnicodeStr _ (_:_)) = False
127 unpackFS :: FastString -> String
128 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
129 unpackFS (CharStr addr len#) =
134 | otherwise = C# ch : unpack (nh +# 1#)
136 ch = indexCharOffAddr# addr nh
137 unpackFS (UnicodeStr _ s) = map chr s
139 unpackIntFS :: FastString -> [Int]
140 unpackIntFS (UnicodeStr _ s) = s
141 unpackIntFS fs = map ord (unpackFS fs)
143 appendFS :: FastString -> FastString -> FastString
144 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
146 concatFS :: [FastString] -> FastString
147 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
149 headFS :: FastString -> Char
150 headFS (FastString _ l# ba#) =
151 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
152 headFS (CharStr a# l#) =
153 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
154 headFS (UnicodeStr _ (c:_)) = chr c
155 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
157 headIntFS :: FastString -> Int
158 headIntFS (UnicodeStr _ (c:_)) = c
159 headIntFS fs = ord (headFS fs)
161 indexFS :: FastString -> Int -> Char
162 indexFS f i@(I# i#) =
165 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
166 | otherwise -> error (msg (I# l#))
168 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
169 | otherwise -> error (msg (I# l#))
170 UnicodeStr _ s -> chr (s!!i)
172 msg l = "indexFS: out of range: " ++ show (l,i)
174 tailFS :: FastString -> FastString
175 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
176 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
178 consFS :: Char -> FastString -> FastString
179 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
181 uniqueOfFS :: FastString -> Int#
182 uniqueOfFS (FastString u# _ _) = u#
183 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
185 [A somewhat moby hack]: to avoid entering all sorts
186 of junk into the hash table, all C char strings
187 are by default left out. The benefit of being in
188 the table is that string comparisons are lightning fast,
189 just an Int# comparison.
191 But, if you want to get the Unique of a CharStr, we
192 enter it into the table and return that unique. This
193 works, but causes the CharStr to be looked up in the hash
194 table each time it is accessed..
196 uniqueOfFS (UnicodeStr u# _) = u#
199 Internally, the compiler will maintain a fast string symbol
200 table, providing sharing and fast comparison. Creation of
201 new @FastString@s then covertly does a lookup, re-using the
202 @FastString@ if there was a hit.
204 Caution: mkFastStringUnicode assumes that if the string is in the
205 table, it sits under the UnicodeStr constructor. Other mkFastString
206 variants analogously assume the FastString constructor.
209 data FastStringTable =
212 (MutableArray# RealWorld [FastString])
214 type FastStringTableVar = IORef FastStringTable
216 string_table :: FastStringTableVar
219 #if __GLASGOW_HASKELL__ < 405
220 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
221 >>= \ (MutableArray _ arr#) ->
222 #elif __GLASGOW_HASKELL__ < 407
223 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
224 >>= \ (MutableArray _ _ arr#) ->
226 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
227 >>= \ (STArray _ _ arr#) ->
229 newIORef (FastStringTable 0# arr#))
231 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
232 lookupTbl (FastStringTable _ arr#) i# =
234 readArray# arr# i# s#)
236 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
237 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
238 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
240 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
242 mkFastString# :: Addr# -> Int# -> FastString
243 mkFastString# a# len# =
245 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
249 -- _trace ("hashed: "++show (I# h)) $
250 lookupTbl ft h >>= \ lookup_result ->
251 case lookup_result of
253 -- no match, add it to table by copying out the
254 -- the string into a ByteArray
255 -- _trace "empty bucket" $
256 case copyPrefixStr (A# a#) (I# len#) of
257 #if __GLASGOW_HASKELL__ < 405
258 (ByteArray _ barr#) ->
260 (ByteArray _ _ barr#) ->
262 let f_str = FastString uid# len# barr# in
263 updTbl string_table ft h [f_str] >>
264 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
266 -- non-empty `bucket', scan the list looking
267 -- entry with same length and compare byte by byte.
268 -- _trace ("non-empty bucket"++show ls) $
269 case bucket_match ls len# a# of
271 case copyPrefixStr (A# a#) (I# len#) of
272 #if __GLASGOW_HASKELL__ < 405
273 (ByteArray _ barr#) ->
275 (ByteArray _ _ barr#) ->
277 let f_str = FastString uid# len# barr# in
278 updTbl string_table ft h (f_str:ls) >>
279 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
280 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
282 bucket_match [] _ _ = Nothing
283 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
284 if len# ==# l# && eqStrPrefix a# ba# l# then
287 bucket_match ls len# a#
288 bucket_match (UnicodeStr _ _ : ls) len# a# =
289 bucket_match ls len# a#
291 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
292 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
294 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
295 mkFastSubStringBA# barr# start# len# =
297 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
299 h = hashSubStrBA barr# start# len#
301 -- _trace ("hashed(b): "++show (I# h)) $
302 lookupTbl ft h >>= \ lookup_result ->
303 case lookup_result of
305 -- no match, add it to table by copying out the
306 -- the string into a ByteArray
307 -- _trace "empty bucket(b)" $
308 #if __GLASGOW_HASKELL__ < 405
309 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
312 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
313 (ByteArray _ _ ba#) ->
315 let f_str = FastString uid# len# ba# in
316 updTbl string_table ft h [f_str] >>
317 -- _trace ("new(b): " ++ show f_str) $
320 -- non-empty `bucket', scan the list looking
321 -- entry with same length and compare byte by byte.
322 -- _trace ("non-empty bucket(b)"++show ls) $
323 case bucket_match ls start# len# barr# of
325 #if __GLASGOW_HASKELL__ < 405
326 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
329 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
330 (ByteArray _ _ ba#) ->
332 let f_str = FastString uid# len# ba# in
333 updTbl string_table ft h (f_str:ls) >>
334 -- _trace ("new(b): " ++ show f_str) $
337 -- _trace ("re-use(b): "++show v) $
343 bucket_match [] _ _ _ = Nothing
344 bucket_match (v:ls) start# len# ba# =
346 FastString _ l# barr# ->
347 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
350 bucket_match ls start# len# ba#
351 UnicodeStr _ _ -> bucket_match ls start# len# ba#
353 mkFastStringUnicode :: [Int] -> FastString
354 mkFastStringUnicode s =
356 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
360 -- _trace ("hashed(b): "++show (I# h)) $
361 lookupTbl ft h >>= \ lookup_result ->
362 case lookup_result of
364 -- no match, add it to table by copying out the
365 -- the string into a [Int]
366 let f_str = UnicodeStr uid# s in
367 updTbl string_table ft h [f_str] >>
368 -- _trace ("new(b): " ++ show f_str) $
371 -- non-empty `bucket', scan the list looking
372 -- entry with same length and compare byte by byte.
373 -- _trace ("non-empty bucket(b)"++show ls) $
374 case bucket_match ls of
376 let f_str = UnicodeStr uid# s in
377 updTbl string_table ft h (f_str:ls) >>
378 -- _trace ("new(b): " ++ show f_str) $
381 -- _trace ("re-use(b): "++show v) $
385 bucket_match [] = Nothing
386 bucket_match (v@(UnicodeStr _ s'):ls) =
387 if s' == s then Just v else bucket_match ls
388 bucket_match (FastString _ _ _ : ls) = bucket_match ls
390 mkFastCharString :: Addr -> FastString
391 mkFastCharString a@(A# a#) =
392 case strLength a of{ (I# len#) -> CharStr a# len# }
394 mkFastCharString# :: Addr# -> FastString
395 mkFastCharString# a# =
396 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
398 mkFastCharString2 :: Addr -> Int -> FastString
399 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
401 mkFastStringNarrow :: String -> FastString
402 mkFastStringNarrow str =
403 case packString str of
404 #if __GLASGOW_HASKELL__ < 405
405 (ByteArray (_,I# len#) frozen#) ->
407 (ByteArray _ (I# len#) frozen#) ->
409 mkFastSubStringBA# frozen# 0# len#
410 {- 0-indexed array, len# == index to one beyond end of string,
411 i.e., (0,1) => empty string. -}
413 mkFastString :: String -> FastString
414 mkFastString str = if all good str
415 then mkFastStringNarrow str
416 else mkFastStringUnicode (map ord str)
418 good c = c >= '\1' && c <= '\xFF'
420 mkFastStringInt :: [Int] -> FastString
421 mkFastStringInt str = if all good str
422 then mkFastStringNarrow (map chr str)
423 else mkFastStringUnicode str
425 good c = c >= 1 && c <= 0xFF
427 mkFastSubString :: Addr -> Int -> Int -> FastString
428 mkFastSubString (A# a#) (I# start#) (I# len#) =
429 mkFastString# (addrOffset# a# start#) len#
433 hashStr :: Addr# -> Int# -> Int#
434 -- use the Addr to produce a hash value between 0 & m (inclusive)
438 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
439 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
440 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
442 c0 = indexCharOffAddr# a# 0#
443 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
444 c2 = indexCharOffAddr# a# (len# -# 1#)
446 c1 = indexCharOffAddr# a# 1#
447 c2 = indexCharOffAddr# a# 2#
450 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
451 -- use the byte array to produce a hash value between 0 & m (inclusive)
452 hashSubStrBA ba# start# len# =
455 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
456 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
457 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
459 c0 = indexCharArray# ba# 0#
460 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
461 c2 = indexCharArray# ba# (len# -# 1#)
463 -- c1 = indexCharArray# ba# 1#
464 -- c2 = indexCharArray# ba# 2#
466 hashUnicode :: [Int] -> Int#
467 -- use the Addr to produce a hash value between 0 & m (inclusive)
469 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
470 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
471 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
475 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
476 I# c2 = s !! (I# (len# -# 1#))
481 cmpFS :: FastString -> FastString -> Ordering
482 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
484 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
485 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
486 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
491 #if __GLASGOW_HASKELL__ < 405
492 _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) ->
494 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
498 else if res ==# 0# then EQ
502 #if __GLASGOW_HASKELL__ < 405
508 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
510 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
513 else if res ==# 0# then EQ
519 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
521 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
524 else if res ==# 0# then EQ
528 #if __GLASGOW_HASKELL__ < 405
529 ba1 = ByteArray ((error "")::(Int,Int)) bs1
531 ba1 = ByteArray (error "") ((error "")::Int) bs1
535 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
536 = -- try them the other way 'round
537 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
541 Outputting @FastString@s is quick, just block copying the chunk (using
545 hPutFS :: Handle -> FastString -> IO ()
546 hPutFS handle (FastString _ l# ba#)
547 | l# ==# 0# = return ()
548 #if __GLASGOW_HASKELL__ < 405
549 | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#)
550 #elif __GLASGOW_HASKELL__ < 407
551 | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#)
553 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
554 hPutBufBAFull handle mba (I# l#)
556 bot = error "hPutFS.ba"
558 --ToDo: avoid silly code duplic.
560 hPutFS handle (CharStr a# l#)
561 | l# ==# 0# = return ()
562 #if __GLASGOW_HASKELL__ < 407
563 | otherwise = hPutBuf handle (A# a#) (I# l#)
564 #elif __GLASGOW_HASKELL__ < 411
565 | otherwise = hPutBufFull handle (A# a#) (I# l#)
567 | otherwise = hPutBufFull handle (Ptr a#) (I# l#)
570 -- ONLY here for debugging the NCG (so -ddump-stix works for string
571 -- literals); no idea if this is really necessary. JRS, 010131
572 hPutFS handle (UnicodeStr _ is)
573 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")