2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastStringNarrow, -- :: String -> FastString
17 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
29 mkFastStringInt, -- :: [Int] -> FastString
31 uniqueOfFS, -- :: FastString -> Int#
32 lengthFS, -- :: FastString -> Int
33 nullFastString, -- :: FastString -> Bool
35 unpackFS, -- :: FastString -> String
36 unpackIntFS, -- :: FastString -> [Int]
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 headIntFS, -- :: FastString -> Int
40 tailFS, -- :: FastString -> FastString
41 concatFS, -- :: [FastString] -> FastString
42 consFS, -- :: Char -> FastString -> FastString
43 indexFS, -- :: FastString -> Int -> Char
45 hPutFS -- :: Handle -> FastString -> IO ()
48 -- This #define suppresses the "import FastString" that
49 -- HsVersions otherwise produces
50 #define COMPILING_FAST_STRING
51 #include "HsVersions.h"
53 #if __GLASGOW_HASKELL__ < 301
55 import STBase ( StateAndPtr#(..) )
56 import IOHandle ( filePtr, readHandle, writeHandle )
57 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
63 #if __GLASGOW_HASKELL__ < 400
64 import PrelST ( StateAndPtr#(..) )
67 #if __GLASGOW_HASKELL__ <= 303
68 import PrelHandle ( readHandle,
69 # if __GLASGOW_HASKELL__ < 303
76 import PrelIOBase ( Handle__(..), IOError, IOErrorType(..),
77 #if __GLASGOW_HASKELL__ < 400
81 #if __GLASGOW_HASKELL__ >= 301 && __GLASGOW_HASKELL__ <= 302
89 #if __GLASGOW_HASKELL__ < 411
90 import PrelAddr ( Addr(..) )
92 import Addr ( Addr(..) )
93 import Ptr ( Ptr(..) )
95 #if __GLASGOW_HASKELL__ < 407
96 import MutableArray ( MutableArray(..) )
98 import PrelArr ( STArray(..), newSTArray )
99 import IOExts ( hPutBufFull, hPutBufBAFull )
102 import IOExts ( IORef, newIORef, readIORef, writeIORef )
104 import Char ( chr, ord )
106 #define hASH_TBL_SIZE 993
108 #if __GLASGOW_HASKELL__ >= 400
113 @FastString@s are packed representations of strings
114 with a unique id for fast comparisons. The unique id
115 is assigned when creating the @FastString@, using
116 a hash table to map from the character string representation
121 = FastString -- packed repr. on the heap.
123 -- 0 => string literal, comparison
128 | CharStr -- external C string
129 Addr# -- pointer to the (null-terminated) bytes in C land.
130 Int# -- length (cached)
132 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
134 [Int] -- character numbers
136 instance Eq FastString where
137 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
138 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
140 instance Ord FastString where
141 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
142 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
143 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
144 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
149 compare a b = cmpFS a b
151 lengthFS :: FastString -> Int
152 lengthFS (FastString _ l# _) = I# l#
153 lengthFS (CharStr a# l#) = I# l#
154 lengthFS (UnicodeStr _ s) = length s
156 nullFastString :: FastString -> Bool
157 nullFastString (FastString _ l# _) = l# ==# 0#
158 nullFastString (CharStr _ l#) = l# ==# 0#
159 nullFastString (UnicodeStr _ []) = True
160 nullFastString (UnicodeStr _ (_:_)) = False
162 unpackFS :: FastString -> String
163 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
164 unpackFS (CharStr addr len#) =
169 | otherwise = C# ch : unpack (nh +# 1#)
171 ch = indexCharOffAddr# addr nh
172 unpackFS (UnicodeStr _ s) = map chr s
174 unpackIntFS :: FastString -> [Int]
175 unpackIntFS (UnicodeStr _ s) = s
176 unpackIntFS fs = map ord (unpackFS fs)
178 appendFS :: FastString -> FastString -> FastString
179 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
181 concatFS :: [FastString] -> FastString
182 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
184 headFS :: FastString -> Char
185 headFS (FastString _ l# ba#) =
186 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
187 headFS (CharStr a# l#) =
188 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
189 headFS (UnicodeStr _ (c:_)) = chr c
190 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
192 headIntFS :: FastString -> Int
193 headIntFS (UnicodeStr _ (c:_)) = c
194 headIntFS fs = ord (headFS fs)
196 indexFS :: FastString -> Int -> Char
197 indexFS f i@(I# i#) =
200 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
201 | otherwise -> error (msg (I# l#))
203 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
204 | otherwise -> error (msg (I# l#))
205 UnicodeStr _ s -> chr (s!!i)
207 msg l = "indexFS: out of range: " ++ show (l,i)
209 tailFS :: FastString -> FastString
210 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
211 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
213 consFS :: Char -> FastString -> FastString
214 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
216 uniqueOfFS :: FastString -> Int#
217 uniqueOfFS (FastString u# _ _) = u#
218 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
220 [A somewhat moby hack]: to avoid entering all sorts
221 of junk into the hash table, all C char strings
222 are by default left out. The benefit of being in
223 the table is that string comparisons are lightning fast,
224 just an Int# comparison.
226 But, if you want to get the Unique of a CharStr, we
227 enter it into the table and return that unique. This
228 works, but causes the CharStr to be looked up in the hash
229 table each time it is accessed..
231 uniqueOfFS (UnicodeStr u# _) = u#
234 Internally, the compiler will maintain a fast string symbol
235 table, providing sharing and fast comparison. Creation of
236 new @FastString@s then covertly does a lookup, re-using the
237 @FastString@ if there was a hit.
239 Caution: mkFastStringUnicode assumes that if the string is in the
240 table, it sits under the UnicodeStr constructor. Other mkFastString
241 variants analogously assume the FastString constructor.
244 data FastStringTable =
247 (MutableArray# RealWorld [FastString])
249 type FastStringTableVar = IORef FastStringTable
251 string_table :: FastStringTableVar
254 #if __GLASGOW_HASKELL__ < 405
255 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
256 >>= \ (MutableArray _ arr#) ->
257 #elif __GLASGOW_HASKELL__ < 407
258 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
259 >>= \ (MutableArray _ _ arr#) ->
261 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
262 >>= \ (STArray _ _ arr#) ->
264 newIORef (FastStringTable 0# arr#))
266 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
267 lookupTbl (FastStringTable _ arr#) i# =
269 #if __GLASGOW_HASKELL__ < 400
270 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
273 readArray# arr# i# s#)
276 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
277 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
278 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
279 #if __GLASGOW_HASKELL__ < 400
284 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
286 mkFastString# :: Addr# -> Int# -> FastString
287 mkFastString# a# len# =
289 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
293 -- _trace ("hashed: "++show (I# h)) $
294 lookupTbl ft h >>= \ lookup_result ->
295 case lookup_result of
297 -- no match, add it to table by copying out the
298 -- the string into a ByteArray
299 -- _trace "empty bucket" $
300 case copyPrefixStr (A# a#) (I# len#) of
301 #if __GLASGOW_HASKELL__ < 405
302 (ByteArray _ barr#) ->
304 (ByteArray _ _ barr#) ->
306 let f_str = FastString uid# len# barr# in
307 updTbl string_table ft h [f_str] >>
308 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
310 -- non-empty `bucket', scan the list looking
311 -- entry with same length and compare byte by byte.
312 -- _trace ("non-empty bucket"++show ls) $
313 case bucket_match ls len# a# of
315 case copyPrefixStr (A# a#) (I# len#) of
316 #if __GLASGOW_HASKELL__ < 405
317 (ByteArray _ barr#) ->
319 (ByteArray _ _ barr#) ->
321 let f_str = FastString uid# len# barr# in
322 updTbl string_table ft h (f_str:ls) >>
323 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
324 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
326 bucket_match [] _ _ = Nothing
327 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
328 if len# ==# l# && eqStrPrefix a# ba# l# then
331 bucket_match ls len# a#
332 bucket_match (UnicodeStr _ _ : ls) len# a# =
333 bucket_match ls len# a#
335 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
336 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
338 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
339 mkFastSubStringBA# barr# start# len# =
341 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
343 h = hashSubStrBA barr# start# len#
345 -- _trace ("hashed(b): "++show (I# h)) $
346 lookupTbl ft h >>= \ lookup_result ->
347 case lookup_result of
349 -- no match, add it to table by copying out the
350 -- the string into a ByteArray
351 -- _trace "empty bucket(b)" $
352 #if __GLASGOW_HASKELL__ < 405
353 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
356 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
357 (ByteArray _ _ ba#) ->
359 let f_str = FastString uid# len# ba# in
360 updTbl string_table ft h [f_str] >>
361 -- _trace ("new(b): " ++ show f_str) $
364 -- non-empty `bucket', scan the list looking
365 -- entry with same length and compare byte by byte.
366 -- _trace ("non-empty bucket(b)"++show ls) $
367 case bucket_match ls start# len# barr# of
369 #if __GLASGOW_HASKELL__ < 405
370 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
373 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
374 (ByteArray _ _ ba#) ->
376 let f_str = FastString uid# len# ba# in
377 updTbl string_table ft h (f_str:ls) >>
378 -- _trace ("new(b): " ++ show f_str) $
381 -- _trace ("re-use(b): "++show v) $
387 bucket_match [] _ _ _ = Nothing
388 bucket_match (v:ls) start# len# ba# =
390 FastString _ l# barr# ->
391 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
394 bucket_match ls start# len# ba#
395 UnicodeStr _ _ -> bucket_match ls start# len# ba#
397 mkFastStringUnicode :: [Int] -> FastString
398 mkFastStringUnicode s =
400 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
404 -- _trace ("hashed(b): "++show (I# h)) $
405 lookupTbl ft h >>= \ lookup_result ->
406 case lookup_result of
408 -- no match, add it to table by copying out the
409 -- the string into a [Int]
410 let f_str = UnicodeStr uid# s in
411 updTbl string_table ft h [f_str] >>
412 -- _trace ("new(b): " ++ show f_str) $
415 -- non-empty `bucket', scan the list looking
416 -- entry with same length and compare byte by byte.
417 -- _trace ("non-empty bucket(b)"++show ls) $
418 case bucket_match ls of
420 let f_str = UnicodeStr uid# s in
421 updTbl string_table ft h (f_str:ls) >>
422 -- _trace ("new(b): " ++ show f_str) $
425 -- _trace ("re-use(b): "++show v) $
429 bucket_match [] = Nothing
430 bucket_match (v@(UnicodeStr _ s'):ls) =
431 if s' == s then Just v else bucket_match ls
432 bucket_match (FastString _ _ _ : ls) = bucket_match ls
434 mkFastCharString :: Addr -> FastString
435 mkFastCharString a@(A# a#) =
436 case strLength a of{ (I# len#) -> CharStr a# len# }
438 mkFastCharString# :: Addr# -> FastString
439 mkFastCharString# a# =
440 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
442 mkFastCharString2 :: Addr -> Int -> FastString
443 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
445 mkFastStringNarrow :: String -> FastString
446 mkFastStringNarrow str =
447 case packString str of
448 #if __GLASGOW_HASKELL__ < 405
449 (ByteArray (_,I# len#) frozen#) ->
451 (ByteArray _ (I# len#) frozen#) ->
453 mkFastSubStringBA# frozen# 0# len#
454 {- 0-indexed array, len# == index to one beyond end of string,
455 i.e., (0,1) => empty string. -}
457 mkFastString :: String -> FastString
458 mkFastString str = if all good str
459 then mkFastStringNarrow str
460 else mkFastStringUnicode (map ord str)
462 good c = c >= '\1' && c <= '\xFF'
464 mkFastStringInt :: [Int] -> FastString
465 mkFastStringInt str = if all good str
466 then mkFastStringNarrow (map chr str)
467 else mkFastStringUnicode str
469 good c = c >= 1 && c <= 0xFF
471 mkFastSubString :: Addr -> Int -> Int -> FastString
472 mkFastSubString (A# a#) (I# start#) (I# len#) =
473 mkFastString# (addrOffset# a# start#) len#
477 hashStr :: Addr# -> Int# -> Int#
478 -- use the Addr to produce a hash value between 0 & m (inclusive)
482 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
483 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
484 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
486 c0 = indexCharOffAddr# a# 0#
487 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
488 c2 = indexCharOffAddr# a# (len# -# 1#)
490 c1 = indexCharOffAddr# a# 1#
491 c2 = indexCharOffAddr# a# 2#
494 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
495 -- use the byte array to produce a hash value between 0 & m (inclusive)
496 hashSubStrBA ba# start# len# =
499 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
500 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
501 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
503 c0 = indexCharArray# ba# 0#
504 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
505 c2 = indexCharArray# ba# (len# -# 1#)
507 -- c1 = indexCharArray# ba# 1#
508 -- c2 = indexCharArray# ba# 2#
510 hashUnicode :: [Int] -> Int#
511 -- use the Addr to produce a hash value between 0 & m (inclusive)
513 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
514 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
515 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
519 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
520 I# c2 = s !! (I# (len# -# 1#))
525 cmpFS :: FastString -> FastString -> Ordering
526 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
528 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
529 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
530 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
535 #if __GLASGOW_HASKELL__ < 405
536 _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) ->
538 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
542 else if res ==# 0# then EQ
546 #if __GLASGOW_HASKELL__ < 405
552 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
554 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
557 else if res ==# 0# then EQ
563 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
565 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
568 else if res ==# 0# then EQ
572 #if __GLASGOW_HASKELL__ < 405
573 ba1 = ByteArray ((error "")::(Int,Int)) bs1
575 ba1 = ByteArray (error "") ((error "")::Int) bs1
579 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
580 = -- try them the other way 'round
581 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
585 Outputting @FastString@s is quick, just block copying the chunk (using
589 hPutFS :: Handle -> FastString -> IO ()
590 #if __GLASGOW_HASKELL__ <= 302
591 hPutFS handle (FastString _ l# ba#) =
595 readHandle handle >>= \ htype ->
597 ErrorHandle ioError ->
598 writeHandle handle htype >>
601 writeHandle handle htype >>
602 fail MkIOError(handle,IllegalOperation,"handle is closed")
603 SemiClosedHandle _ _ ->
604 writeHandle handle htype >>
605 fail MkIOError(handle,IllegalOperation,"handle is closed")
607 writeHandle handle htype >>
608 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
610 let fp = filePtr htype in
612 #if __GLASGOW_HASKELL__ < 405
613 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
615 _ccall_ writeFile (ByteArray ((error "")::Int) ((error "")::Int) ba#) fp (I# l#) >>= \rc ->
620 constructError "hPutFS" >>= \ err ->
622 hPutFS handle (CharStr a# l#) =
626 readHandle handle >>= \ htype ->
628 ErrorHandle ioError ->
629 writeHandle handle htype >>
632 writeHandle handle htype >>
633 fail MkIOError(handle,IllegalOperation,"handle is closed")
634 SemiClosedHandle _ _ ->
635 writeHandle handle htype >>
636 fail MkIOError(handle,IllegalOperation,"handle is closed")
638 writeHandle handle htype >>
639 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
641 let fp = filePtr htype in
643 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
647 constructError "hPutFS" >>= \ err ->
652 hPutFS handle (FastString _ l# ba#)
653 | l# ==# 0# = return ()
654 #if __GLASGOW_HASKELL__ < 405
655 | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#)
656 #elif __GLASGOW_HASKELL__ < 407
657 | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#)
659 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
660 hPutBufBAFull handle mba (I# l#)
663 bot = error "hPutFS.ba"
665 --ToDo: avoid silly code duplic.
667 hPutFS handle (CharStr a# l#)
668 | l# ==# 0# = return ()
669 #if __GLASGOW_HASKELL__ < 407
670 | otherwise = hPutBuf handle (A# a#) (I# l#)
671 #elif __GLASGOW_HASKELL__ < 411
672 | otherwise = hPutBufFull handle (A# a#) (I# l#)
674 | otherwise = hPutBufFull handle (Ptr a#) (I# l#)
677 -- ONLY here for debugging the NCG (so -ddump-stix works for string
678 -- literals); no idea if this is really necessary. JRS, 010131
679 hPutFS handle (UnicodeStr _ is)
680 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")