2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastStringNarrow, -- :: String -> FastString
17 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
29 mkFastStringInt, -- :: [Int] -> FastString
31 uniqueOfFS, -- :: FastString -> Int#
32 lengthFS, -- :: FastString -> Int
33 nullFastString, -- :: FastString -> Bool
35 unpackFS, -- :: FastString -> String
36 unpackIntFS, -- :: FastString -> [Int]
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 headIntFS, -- :: FastString -> Int
40 tailFS, -- :: FastString -> FastString
41 concatFS, -- :: [FastString] -> FastString
42 consFS, -- :: Char -> FastString -> FastString
43 indexFS, -- :: FastString -> Int -> Char
45 hPutFS -- :: Handle -> FastString -> IO ()
48 -- This #define suppresses the "import FastString" that
49 -- HsVersions otherwise produces
50 #define COMPILING_FAST_STRING
51 #include "HsVersions.h"
53 #if __GLASGOW_HASKELL__ < 301
55 import STBase ( StateAndPtr#(..) )
56 import IOHandle ( filePtr, readHandle, writeHandle )
57 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
63 #if __GLASGOW_HASKELL__ < 400
64 import PrelST ( StateAndPtr#(..) )
67 #if __GLASGOW_HASKELL__ <= 303
68 import PrelHandle ( readHandle,
69 # if __GLASGOW_HASKELL__ < 303
76 import PrelIOBase ( Handle__(..), IOError, IOErrorType(..),
77 #if __GLASGOW_HASKELL__ < 400
81 #if __GLASGOW_HASKELL__ >= 303
90 #if __GLASGOW_HASKELL__ < 411
91 import PrelAddr ( Addr(..) )
93 import Addr ( Addr(..) )
94 import Ptr ( Ptr(..) )
96 #if __GLASGOW_HASKELL__ < 407
97 import MutableArray ( MutableArray(..) )
99 import PrelArr ( STArray(..), newSTArray )
100 import IOExts ( hPutBufFull, hPutBufBAFull )
103 import IOExts ( IORef, newIORef, readIORef, writeIORef )
105 import Char ( chr, ord )
107 #define hASH_TBL_SIZE 993
109 #if __GLASGOW_HASKELL__ >= 400
114 @FastString@s are packed representations of strings
115 with a unique id for fast comparisons. The unique id
116 is assigned when creating the @FastString@, using
117 a hash table to map from the character string representation
122 = FastString -- packed repr. on the heap.
124 -- 0 => string literal, comparison
129 | CharStr -- external C string
130 Addr# -- pointer to the (null-terminated) bytes in C land.
131 Int# -- length (cached)
133 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
135 [Int] -- character numbers
137 instance Eq FastString where
138 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
139 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
141 instance Ord FastString where
142 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
143 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
144 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
145 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
150 compare a b = cmpFS a b
152 lengthFS :: FastString -> Int
153 lengthFS (FastString _ l# _) = I# l#
154 lengthFS (CharStr a# l#) = I# l#
155 lengthFS (UnicodeStr _ s) = length s
157 nullFastString :: FastString -> Bool
158 nullFastString (FastString _ l# _) = l# ==# 0#
159 nullFastString (CharStr _ l#) = l# ==# 0#
160 nullFastString (UnicodeStr _ []) = True
161 nullFastString (UnicodeStr _ (_:_)) = False
163 unpackFS :: FastString -> String
164 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
165 unpackFS (CharStr addr len#) =
170 | otherwise = C# ch : unpack (nh +# 1#)
172 ch = indexCharOffAddr# addr nh
173 unpackFS (UnicodeStr _ s) = map chr s
175 unpackIntFS :: FastString -> [Int]
176 unpackIntFS (UnicodeStr _ s) = s
177 unpackIntFS fs = map ord (unpackFS fs)
179 appendFS :: FastString -> FastString -> FastString
180 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
182 concatFS :: [FastString] -> FastString
183 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
185 headFS :: FastString -> Char
186 headFS (FastString _ l# ba#) =
187 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
188 headFS (CharStr a# l#) =
189 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
190 headFS (UnicodeStr _ (c:_)) = chr c
191 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
193 headIntFS :: FastString -> Int
194 headIntFS (UnicodeStr _ (c:_)) = c
195 headIntFS fs = ord (headFS fs)
197 indexFS :: FastString -> Int -> Char
198 indexFS f i@(I# i#) =
201 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
202 | otherwise -> error (msg (I# l#))
204 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
205 | otherwise -> error (msg (I# l#))
206 UnicodeStr _ s -> chr (s!!i)
208 msg l = "indexFS: out of range: " ++ show (l,i)
210 tailFS :: FastString -> FastString
211 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
212 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
214 consFS :: Char -> FastString -> FastString
215 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
217 uniqueOfFS :: FastString -> Int#
218 uniqueOfFS (FastString u# _ _) = u#
219 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
221 [A somewhat moby hack]: to avoid entering all sorts
222 of junk into the hash table, all C char strings
223 are by default left out. The benefit of being in
224 the table is that string comparisons are lightning fast,
225 just an Int# comparison.
227 But, if you want to get the Unique of a CharStr, we
228 enter it into the table and return that unique. This
229 works, but causes the CharStr to be looked up in the hash
230 table each time it is accessed..
232 uniqueOfFS (UnicodeStr u# _) = u#
235 Internally, the compiler will maintain a fast string symbol
236 table, providing sharing and fast comparison. Creation of
237 new @FastString@s then covertly does a lookup, re-using the
238 @FastString@ if there was a hit.
240 Caution: mkFastStringUnicode assumes that if the string is in the
241 table, it sits under the UnicodeStr constructor. Other mkFastString
242 variants analogously assume the FastString constructor.
245 data FastStringTable =
248 (MutableArray# RealWorld [FastString])
250 type FastStringTableVar = IORef FastStringTable
252 string_table :: FastStringTableVar
255 #if __GLASGOW_HASKELL__ < 405
256 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
257 >>= \ (MutableArray _ arr#) ->
258 #elif __GLASGOW_HASKELL__ < 407
259 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
260 >>= \ (MutableArray _ _ arr#) ->
262 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
263 >>= \ (STArray _ _ arr#) ->
265 newIORef (FastStringTable 0# arr#))
267 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
268 lookupTbl (FastStringTable _ arr#) i# =
270 #if __GLASGOW_HASKELL__ < 400
271 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
274 readArray# arr# i# s#)
277 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
278 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
279 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
280 #if __GLASGOW_HASKELL__ < 400
285 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
287 mkFastString# :: Addr# -> Int# -> FastString
288 mkFastString# a# len# =
290 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
294 -- _trace ("hashed: "++show (I# h)) $
295 lookupTbl ft h >>= \ lookup_result ->
296 case lookup_result of
298 -- no match, add it to table by copying out the
299 -- the string into a ByteArray
300 -- _trace "empty bucket" $
301 case copyPrefixStr (A# a#) (I# len#) of
302 #if __GLASGOW_HASKELL__ < 405
303 (ByteArray _ barr#) ->
305 (ByteArray _ _ barr#) ->
307 let f_str = FastString uid# len# barr# in
308 updTbl string_table ft h [f_str] >>
309 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
311 -- non-empty `bucket', scan the list looking
312 -- entry with same length and compare byte by byte.
313 -- _trace ("non-empty bucket"++show ls) $
314 case bucket_match ls len# a# of
316 case copyPrefixStr (A# a#) (I# len#) of
317 #if __GLASGOW_HASKELL__ < 405
318 (ByteArray _ barr#) ->
320 (ByteArray _ _ barr#) ->
322 let f_str = FastString uid# len# barr# in
323 updTbl string_table ft h (f_str:ls) >>
324 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
325 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
327 bucket_match [] _ _ = Nothing
328 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
329 if len# ==# l# && eqStrPrefix a# ba# l# then
332 bucket_match ls len# a#
333 bucket_match (UnicodeStr _ _ : ls) len# a# =
334 bucket_match ls len# a#
336 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
337 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
339 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
340 mkFastSubStringBA# barr# start# len# =
342 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
344 h = hashSubStrBA barr# start# len#
346 -- _trace ("hashed(b): "++show (I# h)) $
347 lookupTbl ft h >>= \ lookup_result ->
348 case lookup_result of
350 -- no match, add it to table by copying out the
351 -- the string into a ByteArray
352 -- _trace "empty bucket(b)" $
353 #if __GLASGOW_HASKELL__ < 405
354 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
357 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
358 (ByteArray _ _ ba#) ->
360 let f_str = FastString uid# len# ba# in
361 updTbl string_table ft h [f_str] >>
362 -- _trace ("new(b): " ++ show f_str) $
365 -- non-empty `bucket', scan the list looking
366 -- entry with same length and compare byte by byte.
367 -- _trace ("non-empty bucket(b)"++show ls) $
368 case bucket_match ls start# len# barr# of
370 #if __GLASGOW_HASKELL__ < 405
371 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
374 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
375 (ByteArray _ _ ba#) ->
377 let f_str = FastString uid# len# ba# in
378 updTbl string_table ft h (f_str:ls) >>
379 -- _trace ("new(b): " ++ show f_str) $
382 -- _trace ("re-use(b): "++show v) $
388 bucket_match [] _ _ _ = Nothing
389 bucket_match (v:ls) start# len# ba# =
391 FastString _ l# barr# ->
392 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
395 bucket_match ls start# len# ba#
396 UnicodeStr _ _ -> bucket_match ls start# len# ba#
398 mkFastStringUnicode :: [Int] -> FastString
399 mkFastStringUnicode s =
401 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
405 -- _trace ("hashed(b): "++show (I# h)) $
406 lookupTbl ft h >>= \ lookup_result ->
407 case lookup_result of
409 -- no match, add it to table by copying out the
410 -- the string into a [Int]
411 let f_str = UnicodeStr uid# s in
412 updTbl string_table ft h [f_str] >>
413 -- _trace ("new(b): " ++ show f_str) $
416 -- non-empty `bucket', scan the list looking
417 -- entry with same length and compare byte by byte.
418 -- _trace ("non-empty bucket(b)"++show ls) $
419 case bucket_match ls of
421 let f_str = UnicodeStr uid# s in
422 updTbl string_table ft h (f_str:ls) >>
423 -- _trace ("new(b): " ++ show f_str) $
426 -- _trace ("re-use(b): "++show v) $
430 bucket_match [] = Nothing
431 bucket_match (v@(UnicodeStr _ s'):ls) =
432 if s' == s then Just v else bucket_match ls
433 bucket_match (FastString _ _ _ : ls) = bucket_match ls
435 mkFastCharString :: Addr -> FastString
436 mkFastCharString a@(A# a#) =
437 case strLength a of{ (I# len#) -> CharStr a# len# }
439 mkFastCharString# :: Addr# -> FastString
440 mkFastCharString# a# =
441 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
443 mkFastCharString2 :: Addr -> Int -> FastString
444 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
446 mkFastStringNarrow :: String -> FastString
447 mkFastStringNarrow str =
448 case packString str of
449 #if __GLASGOW_HASKELL__ < 405
450 (ByteArray (_,I# len#) frozen#) ->
452 (ByteArray _ (I# len#) frozen#) ->
454 mkFastSubStringBA# frozen# 0# len#
455 {- 0-indexed array, len# == index to one beyond end of string,
456 i.e., (0,1) => empty string. -}
458 mkFastString :: String -> FastString
459 mkFastString str = if all good str
460 then mkFastStringNarrow str
461 else mkFastStringUnicode (map ord str)
463 good c = c >= '\1' && c <= '\xFF'
465 mkFastStringInt :: [Int] -> FastString
466 mkFastStringInt str = if all good str
467 then mkFastStringNarrow (map chr str)
468 else mkFastStringUnicode str
470 good c = c >= 1 && c <= 0xFF
472 mkFastSubString :: Addr -> Int -> Int -> FastString
473 mkFastSubString (A# a#) (I# start#) (I# len#) =
474 mkFastString# (addrOffset# a# start#) len#
478 hashStr :: Addr# -> Int# -> Int#
479 -- use the Addr to produce a hash value between 0 & m (inclusive)
483 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
484 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
485 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
487 c0 = indexCharOffAddr# a# 0#
488 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
489 c2 = indexCharOffAddr# a# (len# -# 1#)
491 c1 = indexCharOffAddr# a# 1#
492 c2 = indexCharOffAddr# a# 2#
495 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
496 -- use the byte array to produce a hash value between 0 & m (inclusive)
497 hashSubStrBA ba# start# len# =
500 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
501 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
502 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
504 c0 = indexCharArray# ba# 0#
505 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
506 c2 = indexCharArray# ba# (len# -# 1#)
508 -- c1 = indexCharArray# ba# 1#
509 -- c2 = indexCharArray# ba# 2#
511 hashUnicode :: [Int] -> Int#
512 -- use the Addr to produce a hash value between 0 & m (inclusive)
514 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
515 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
516 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
520 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
521 I# c2 = s !! (I# (len# -# 1#))
526 cmpFS :: FastString -> FastString -> Ordering
527 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
529 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
530 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
531 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
536 #if __GLASGOW_HASKELL__ < 405
537 _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) ->
539 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
543 else if res ==# 0# then EQ
547 #if __GLASGOW_HASKELL__ < 405
553 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
555 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
558 else if res ==# 0# then EQ
564 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
566 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
569 else if res ==# 0# then EQ
573 #if __GLASGOW_HASKELL__ < 405
574 ba1 = ByteArray ((error "")::(Int,Int)) bs1
576 ba1 = ByteArray (error "") ((error "")::Int) bs1
580 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
581 = -- try them the other way 'round
582 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
586 Outputting @FastString@s is quick, just block copying the chunk (using
590 hPutFS :: Handle -> FastString -> IO ()
591 #if __GLASGOW_HASKELL__ <= 302
592 hPutFS handle (FastString _ l# ba#) =
596 readHandle handle >>= \ htype ->
598 ErrorHandle ioError ->
599 writeHandle handle htype >>
602 writeHandle handle htype >>
603 fail MkIOError(handle,IllegalOperation,"handle is closed")
604 SemiClosedHandle _ _ ->
605 writeHandle handle htype >>
606 fail MkIOError(handle,IllegalOperation,"handle is closed")
608 writeHandle handle htype >>
609 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
611 let fp = filePtr htype in
613 #if __GLASGOW_HASKELL__ < 405
614 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
616 _ccall_ writeFile (ByteArray ((error "")::Int) ((error "")::Int) ba#) fp (I# l#) >>= \rc ->
621 constructError "hPutFS" >>= \ err ->
623 hPutFS handle (CharStr a# l#) =
627 readHandle handle >>= \ htype ->
629 ErrorHandle ioError ->
630 writeHandle handle htype >>
633 writeHandle handle htype >>
634 fail MkIOError(handle,IllegalOperation,"handle is closed")
635 SemiClosedHandle _ _ ->
636 writeHandle handle htype >>
637 fail MkIOError(handle,IllegalOperation,"handle is closed")
639 writeHandle handle htype >>
640 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
642 let fp = filePtr htype in
644 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
648 constructError "hPutFS" >>= \ err ->
653 hPutFS handle (FastString _ l# ba#)
654 | l# ==# 0# = return ()
655 #if __GLASGOW_HASKELL__ < 405
656 | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#)
657 #elif __GLASGOW_HASKELL__ < 407
658 | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#)
660 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
661 hPutBufBAFull handle mba (I# l#)
664 bot = error "hPutFS.ba"
666 --ToDo: avoid silly code duplic.
668 hPutFS handle (CharStr a# l#)
669 | l# ==# 0# = return ()
670 #if __GLASGOW_HASKELL__ < 407
671 | otherwise = hPutBuf handle (A# a#) (I# l#)
672 #elif __GLASGOW_HASKELL__ < 411
673 | otherwise = hPutBufFull handle (A# a#) (I# l#)
675 | otherwise = hPutBufFull handle (Ptr a#) (I# l#)
678 -- ONLY here for debugging the NCG (so -ddump-stix works for string
679 -- literals); no idea if this is really necessary. JRS, 010131
680 hPutFS handle (UnicodeStr _ is)
681 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")