2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastStringNarrow, -- :: String -> FastString
17 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
29 mkFastStringInt, -- :: [Int] -> FastString
31 uniqueOfFS, -- :: FastString -> Int#
32 lengthFS, -- :: FastString -> Int
33 nullFastString, -- :: FastString -> Bool
35 unpackFS, -- :: FastString -> String
36 unpackIntFS, -- :: FastString -> [Int]
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 headIntFS, -- :: FastString -> Int
40 tailFS, -- :: FastString -> FastString
41 concatFS, -- :: [FastString] -> FastString
42 consFS, -- :: Char -> FastString -> FastString
43 indexFS, -- :: FastString -> Int -> Char
45 hPutFS -- :: Handle -> FastString -> IO ()
48 -- This #define suppresses the "import FastString" that
49 -- HsVersions otherwise produces
50 #define COMPILING_FAST_STRING
51 #include "HsVersions.h"
53 #if __GLASGOW_HASKELL__ < 503
55 import PrelIOBase ( IO(..) )
58 import GHC.IOBase ( IO(..) )
63 #if __GLASGOW_HASKELL__ < 411
64 import PrelAddr ( Addr(..) )
66 import Addr ( Addr(..) )
67 import Ptr ( Ptr(..) )
69 #if __GLASGOW_HASKELL__ < 503
70 import PrelArr ( STArray(..), newSTArray )
71 import IOExts ( hPutBufFull, hPutBufBAFull )
73 import GHC.Arr ( STArray(..), newSTArray )
74 import System.IO ( hPutBuf )
75 import IOExts ( hPutBufBA )
76 import CString ( unpackNBytesBA# )
79 import IOExts ( IORef, newIORef, readIORef, writeIORef )
81 import Char ( chr, ord )
83 #define hASH_TBL_SIZE 993
85 #if __GLASGOW_HASKELL__ < 503
87 hPutBufBA = hPutBufBAFull
91 @FastString@s are packed representations of strings
92 with a unique id for fast comparisons. The unique id
93 is assigned when creating the @FastString@, using
94 a hash table to map from the character string representation
99 = FastString -- packed repr. on the heap.
101 -- 0 => string literal, comparison
106 | CharStr -- external C string
107 Addr# -- pointer to the (null-terminated) bytes in C land.
108 Int# -- length (cached)
110 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
112 [Int] -- character numbers
114 instance Eq FastString where
115 -- shortcut for real FastStrings
116 (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
119 trace ("slow FastString comparison: " ++
120 unpackFS a ++ "/" ++ unpackFS b) $
122 case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
124 (FastString u1 _ _) == (FastString u2 _ _) = u1 /=# u2
127 trace ("slow FastString comparison: " ++
128 unpackFS a ++ "/" ++ unpackFS b) $
130 case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
132 instance Ord FastString where
133 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
134 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
135 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
136 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
141 compare a b = cmpFS a b
143 lengthFS :: FastString -> Int
144 lengthFS (FastString _ l# _) = I# l#
145 lengthFS (CharStr a# l#) = I# l#
146 lengthFS (UnicodeStr _ s) = length s
148 nullFastString :: FastString -> Bool
149 nullFastString (FastString _ l# _) = l# ==# 0#
150 nullFastString (CharStr _ l#) = l# ==# 0#
151 nullFastString (UnicodeStr _ []) = True
152 nullFastString (UnicodeStr _ (_:_)) = False
154 unpackFS :: FastString -> String
155 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
156 unpackFS (CharStr addr len#) =
161 | otherwise = C# ch : unpack (nh +# 1#)
163 ch = indexCharOffAddr# addr nh
164 unpackFS (UnicodeStr _ s) = map chr s
166 unpackIntFS :: FastString -> [Int]
167 unpackIntFS (UnicodeStr _ s) = s
168 unpackIntFS fs = map ord (unpackFS fs)
170 appendFS :: FastString -> FastString -> FastString
171 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
173 concatFS :: [FastString] -> FastString
174 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
176 headFS :: FastString -> Char
177 headFS (FastString _ l# ba#) =
178 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
179 headFS (CharStr a# l#) =
180 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
181 headFS (UnicodeStr _ (c:_)) = chr c
182 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
184 headIntFS :: FastString -> Int
185 headIntFS (UnicodeStr _ (c:_)) = c
186 headIntFS fs = ord (headFS fs)
188 indexFS :: FastString -> Int -> Char
189 indexFS f i@(I# i#) =
192 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
193 | otherwise -> error (msg (I# l#))
195 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
196 | otherwise -> error (msg (I# l#))
197 UnicodeStr _ s -> chr (s!!i)
199 msg l = "indexFS: out of range: " ++ show (l,i)
201 tailFS :: FastString -> FastString
202 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
203 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
205 consFS :: Char -> FastString -> FastString
206 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
208 uniqueOfFS :: FastString -> Int#
209 uniqueOfFS (FastString u# _ _) = u#
210 uniqueOfFS (CharStr a# l#) = case mkFastStringLen# a# l# of { FastString u# _ _ -> u#} -- Ugh!
212 [A somewhat moby hack]: to avoid entering all sorts
213 of junk into the hash table, all C char strings
214 are by default left out. The benefit of being in
215 the table is that string comparisons are lightning fast,
216 just an Int# comparison.
218 But, if you want to get the Unique of a CharStr, we
219 enter it into the table and return that unique. This
220 works, but causes the CharStr to be looked up in the hash
221 table each time it is accessed..
223 uniqueOfFS (UnicodeStr u# _) = u#
226 Internally, the compiler will maintain a fast string symbol
227 table, providing sharing and fast comparison. Creation of
228 new @FastString@s then covertly does a lookup, re-using the
229 @FastString@ if there was a hit.
231 Caution: mkFastStringUnicode assumes that if the string is in the
232 table, it sits under the UnicodeStr constructor. Other mkFastString
233 variants analogously assume the FastString constructor.
236 data FastStringTable =
239 (MutableArray# RealWorld [FastString])
241 type FastStringTableVar = IORef FastStringTable
243 string_table :: FastStringTableVar
246 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
247 >>= \ (STArray _ _ arr#) ->
248 newIORef (FastStringTable 0# arr#))
250 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
251 lookupTbl (FastStringTable _ arr#) i# =
253 readArray# arr# i# s#)
255 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
256 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
257 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
259 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
261 mkFastString# :: Addr# -> FastString
263 case strLength (A# a#) of { (I# len#) -> mkFastStringLen# a# len# }
265 mkFastStringLen# :: Addr# -> Int# -> FastString
266 mkFastStringLen# a# len# =
268 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
272 -- _trace ("hashed: "++show (I# h)) $
273 lookupTbl ft h >>= \ lookup_result ->
274 case lookup_result of
276 -- no match, add it to table by copying out the
277 -- the string into a ByteArray
278 -- _trace "empty bucket" $
279 case copyPrefixStr (A# a#) (I# len#) of
280 (ByteArray _ _ barr#) ->
281 let f_str = FastString uid# len# barr# in
282 updTbl string_table ft h [f_str] >>
283 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
285 -- non-empty `bucket', scan the list looking
286 -- entry with same length and compare byte by byte.
287 -- _trace ("non-empty bucket"++show ls) $
288 case bucket_match ls len# a# of
290 case copyPrefixStr (A# a#) (I# len#) of
291 (ByteArray _ _ barr#) ->
292 let f_str = FastString uid# len# barr# in
293 updTbl string_table ft h (f_str:ls) >>
294 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
295 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
297 bucket_match [] _ _ = Nothing
298 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
299 if len# ==# l# && eqStrPrefix a# ba# l# then
302 bucket_match ls len# a#
303 bucket_match (UnicodeStr _ _ : ls) len# a# =
304 bucket_match ls len# a#
306 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
307 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
309 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
310 mkFastSubStringBA# barr# start# len# =
312 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
314 h = hashSubStrBA barr# start# len#
316 -- _trace ("hashed(b): "++show (I# h)) $
317 lookupTbl ft h >>= \ lookup_result ->
318 case lookup_result of
320 -- no match, add it to table by copying out the
321 -- the string into a ByteArray
322 -- _trace "empty bucket(b)" $
323 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
324 (ByteArray _ _ ba#) ->
325 let f_str = FastString uid# len# ba# in
326 updTbl string_table ft h [f_str] >>
327 -- _trace ("new(b): " ++ show f_str) $
330 -- non-empty `bucket', scan the list looking
331 -- entry with same length and compare byte by byte.
332 -- _trace ("non-empty bucket(b)"++show ls) $
333 case bucket_match ls start# len# barr# of
335 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
336 (ByteArray _ _ ba#) ->
337 let f_str = FastString uid# len# ba# in
338 updTbl string_table ft h (f_str:ls) >>
339 -- _trace ("new(b): " ++ show f_str) $
342 -- _trace ("re-use(b): "++show v) $
348 bucket_match [] _ _ _ = Nothing
349 bucket_match (v:ls) start# len# ba# =
351 FastString _ l# barr# ->
352 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
355 bucket_match ls start# len# ba#
356 UnicodeStr _ _ -> bucket_match ls start# len# ba#
358 mkFastStringUnicode :: [Int] -> FastString
359 mkFastStringUnicode s =
361 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
365 -- _trace ("hashed(b): "++show (I# h)) $
366 lookupTbl ft h >>= \ lookup_result ->
367 case lookup_result of
369 -- no match, add it to table by copying out the
370 -- the string into a [Int]
371 let f_str = UnicodeStr uid# s in
372 updTbl string_table ft h [f_str] >>
373 -- _trace ("new(b): " ++ show f_str) $
376 -- non-empty `bucket', scan the list looking
377 -- entry with same length and compare byte by byte.
378 -- _trace ("non-empty bucket(b)"++show ls) $
379 case bucket_match ls of
381 let f_str = UnicodeStr uid# s in
382 updTbl string_table ft h (f_str:ls) >>
383 -- _trace ("new(b): " ++ show f_str) $
386 -- _trace ("re-use(b): "++show v) $
390 bucket_match [] = Nothing
391 bucket_match (v@(UnicodeStr _ s'):ls) =
392 if s' == s then Just v else bucket_match ls
393 bucket_match (FastString _ _ _ : ls) = bucket_match ls
395 mkFastCharString :: Addr -> FastString
396 mkFastCharString a@(A# a#) =
397 case strLength a of{ (I# len#) -> CharStr a# len# }
399 mkFastCharString# :: Addr# -> FastString
400 mkFastCharString# a# =
401 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
403 mkFastCharString2 :: Addr -> Int -> FastString
404 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
406 mkFastStringNarrow :: String -> FastString
407 mkFastStringNarrow str =
408 case packString str of
409 (ByteArray _ (I# len#) frozen#) ->
410 mkFastSubStringBA# frozen# 0# len#
411 {- 0-indexed array, len# == index to one beyond end of string,
412 i.e., (0,1) => empty string. -}
414 mkFastString :: String -> FastString
415 mkFastString str = if all good str
416 then mkFastStringNarrow str
417 else mkFastStringUnicode (map ord str)
419 good c = c >= '\1' && c <= '\xFF'
421 mkFastStringInt :: [Int] -> FastString
422 mkFastStringInt str = if all good str
423 then mkFastStringNarrow (map chr str)
424 else mkFastStringUnicode str
426 good c = c >= 1 && c <= 0xFF
428 mkFastSubString :: Addr -> Int -> Int -> FastString
429 mkFastSubString (A# a#) (I# start#) (I# len#) =
430 mkFastStringLen# (addrOffset# a# start#) len#
434 hashStr :: Addr# -> Int# -> Int#
435 -- use the Addr to produce a hash value between 0 & m (inclusive)
439 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
440 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
441 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
443 c0 = indexCharOffAddr# a# 0#
444 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
445 c2 = indexCharOffAddr# a# (len# -# 1#)
447 c1 = indexCharOffAddr# a# 1#
448 c2 = indexCharOffAddr# a# 2#
451 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
452 -- use the byte array to produce a hash value between 0 & m (inclusive)
453 hashSubStrBA ba# start# len# =
456 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
457 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
458 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
460 c0 = indexCharArray# ba# 0#
461 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
462 c2 = indexCharArray# ba# (len# -# 1#)
464 -- c1 = indexCharArray# ba# 1#
465 -- c2 = indexCharArray# ba# 2#
467 hashUnicode :: [Int] -> Int#
468 -- use the Addr to produce a hash value between 0 & m (inclusive)
470 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
471 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
472 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
476 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
477 I# c2 = s !! (I# (len# -# 1#))
482 cmpFS :: FastString -> FastString -> Ordering
483 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
485 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
486 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
487 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
492 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
495 else if res ==# 0# then EQ
501 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
503 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
506 else if res ==# 0# then EQ
512 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
514 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
517 else if res ==# 0# then EQ
521 ba1 = ByteArray (error "") ((error "")::Int) bs1
524 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
525 = -- try them the other way 'round
526 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
530 Outputting @FastString@s is quick, just block copying the chunk (using
534 hPutFS :: Handle -> FastString -> IO ()
535 hPutFS handle (FastString _ l# ba#)
536 | l# ==# 0# = return ()
537 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
538 hPutBufBA handle mba (I# l#)
540 bot = error "hPutFS.ba"
542 --ToDo: avoid silly code duplic.
544 hPutFS handle (CharStr a# l#)
545 | l# ==# 0# = return ()
546 #if __GLASGOW_HASKELL__ < 411
547 | otherwise = hPutBuf handle (A# a#) (I# l#)
549 | otherwise = hPutBuf handle (Ptr a#) (I# l#)
552 -- ONLY here for debugging the NCG (so -ddump-stix works for string
553 -- literals); no idea if this is really necessary. JRS, 010131
554 hPutFS handle (UnicodeStr _ is)
555 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")