2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastStringNarrow, -- :: String -> FastString
17 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
29 mkFastStringInt, -- :: [Int] -> FastString
31 uniqueOfFS, -- :: FastString -> Int#
32 lengthFS, -- :: FastString -> Int
33 nullFastString, -- :: FastString -> Bool
35 unpackFS, -- :: FastString -> String
36 unpackIntFS, -- :: FastString -> [Int]
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 headIntFS, -- :: FastString -> Int
40 tailFS, -- :: FastString -> FastString
41 concatFS, -- :: [FastString] -> FastString
42 consFS, -- :: Char -> FastString -> FastString
43 indexFS, -- :: FastString -> Int -> Char
45 hPutFS -- :: Handle -> FastString -> IO ()
48 -- This #define suppresses the "import FastString" that
49 -- HsVersions otherwise produces
50 #define COMPILING_FAST_STRING
51 #include "HsVersions.h"
53 #if __GLASGOW_HASKELL__ < 503
55 import PrelIOBase ( IO(..) )
58 import GHC.IOBase ( IO(..) )
63 #if __GLASGOW_HASKELL__ < 411
64 import PrelAddr ( Addr(..) )
66 import Addr ( Addr(..) )
67 import Ptr ( Ptr(..) )
69 #if __GLASGOW_HASKELL__ < 503
70 import PrelArr ( STArray(..), newSTArray )
71 import IOExts ( hPutBufFull, hPutBufBAFull )
73 import GHC.Arr ( STArray(..), newSTArray )
74 import System.IO ( hPutBuf )
75 import IOExts ( hPutBufBA )
76 import CString ( unpackNBytesBA# )
79 import IOExts ( IORef, newIORef, readIORef, writeIORef )
81 import Char ( chr, ord )
83 #define hASH_TBL_SIZE 993
85 #if __GLASGOW_HASKELL__ < 503
87 hPutBufBA = hPutBufBAFull
91 @FastString@s are packed representations of strings
92 with a unique id for fast comparisons. The unique id
93 is assigned when creating the @FastString@, using
94 a hash table to map from the character string representation
99 = FastString -- packed repr. on the heap.
101 -- 0 => string literal, comparison
106 | CharStr -- external C string
107 Addr# -- pointer to the (null-terminated) bytes in C land.
108 Int# -- length (cached)
110 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
112 [Int] -- character numbers
114 instance Eq FastString where
115 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
116 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
118 instance Ord FastString where
119 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
120 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
121 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
122 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
127 compare a b = cmpFS a b
129 lengthFS :: FastString -> Int
130 lengthFS (FastString _ l# _) = I# l#
131 lengthFS (CharStr a# l#) = I# l#
132 lengthFS (UnicodeStr _ s) = length s
134 nullFastString :: FastString -> Bool
135 nullFastString (FastString _ l# _) = l# ==# 0#
136 nullFastString (CharStr _ l#) = l# ==# 0#
137 nullFastString (UnicodeStr _ []) = True
138 nullFastString (UnicodeStr _ (_:_)) = False
140 unpackFS :: FastString -> String
141 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
142 unpackFS (CharStr addr len#) =
147 | otherwise = C# ch : unpack (nh +# 1#)
149 ch = indexCharOffAddr# addr nh
150 unpackFS (UnicodeStr _ s) = map chr s
152 unpackIntFS :: FastString -> [Int]
153 unpackIntFS (UnicodeStr _ s) = s
154 unpackIntFS fs = map ord (unpackFS fs)
156 appendFS :: FastString -> FastString -> FastString
157 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
159 concatFS :: [FastString] -> FastString
160 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
162 headFS :: FastString -> Char
163 headFS (FastString _ l# ba#) =
164 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
165 headFS (CharStr a# l#) =
166 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
167 headFS (UnicodeStr _ (c:_)) = chr c
168 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
170 headIntFS :: FastString -> Int
171 headIntFS (UnicodeStr _ (c:_)) = c
172 headIntFS fs = ord (headFS fs)
174 indexFS :: FastString -> Int -> Char
175 indexFS f i@(I# i#) =
178 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
179 | otherwise -> error (msg (I# l#))
181 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
182 | otherwise -> error (msg (I# l#))
183 UnicodeStr _ s -> chr (s!!i)
185 msg l = "indexFS: out of range: " ++ show (l,i)
187 tailFS :: FastString -> FastString
188 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
189 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
191 consFS :: Char -> FastString -> FastString
192 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
194 uniqueOfFS :: FastString -> Int#
195 uniqueOfFS (FastString u# _ _) = u#
196 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
198 [A somewhat moby hack]: to avoid entering all sorts
199 of junk into the hash table, all C char strings
200 are by default left out. The benefit of being in
201 the table is that string comparisons are lightning fast,
202 just an Int# comparison.
204 But, if you want to get the Unique of a CharStr, we
205 enter it into the table and return that unique. This
206 works, but causes the CharStr to be looked up in the hash
207 table each time it is accessed..
209 uniqueOfFS (UnicodeStr u# _) = u#
212 Internally, the compiler will maintain a fast string symbol
213 table, providing sharing and fast comparison. Creation of
214 new @FastString@s then covertly does a lookup, re-using the
215 @FastString@ if there was a hit.
217 Caution: mkFastStringUnicode assumes that if the string is in the
218 table, it sits under the UnicodeStr constructor. Other mkFastString
219 variants analogously assume the FastString constructor.
222 data FastStringTable =
225 (MutableArray# RealWorld [FastString])
227 type FastStringTableVar = IORef FastStringTable
229 string_table :: FastStringTableVar
232 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
233 >>= \ (STArray _ _ arr#) ->
234 newIORef (FastStringTable 0# arr#))
236 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
237 lookupTbl (FastStringTable _ arr#) i# =
239 readArray# arr# i# s#)
241 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
242 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
243 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
245 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
247 mkFastString# :: Addr# -> Int# -> FastString
248 mkFastString# a# len# =
250 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
254 -- _trace ("hashed: "++show (I# h)) $
255 lookupTbl ft h >>= \ lookup_result ->
256 case lookup_result of
258 -- no match, add it to table by copying out the
259 -- the string into a ByteArray
260 -- _trace "empty bucket" $
261 case copyPrefixStr (A# a#) (I# len#) of
262 (ByteArray _ _ barr#) ->
263 let f_str = FastString uid# len# barr# in
264 updTbl string_table ft h [f_str] >>
265 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
267 -- non-empty `bucket', scan the list looking
268 -- entry with same length and compare byte by byte.
269 -- _trace ("non-empty bucket"++show ls) $
270 case bucket_match ls len# a# of
272 case copyPrefixStr (A# a#) (I# len#) of
273 (ByteArray _ _ barr#) ->
274 let f_str = FastString uid# len# barr# in
275 updTbl string_table ft h (f_str:ls) >>
276 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
277 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
279 bucket_match [] _ _ = Nothing
280 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
281 if len# ==# l# && eqStrPrefix a# ba# l# then
284 bucket_match ls len# a#
285 bucket_match (UnicodeStr _ _ : ls) len# a# =
286 bucket_match ls len# a#
288 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
289 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
291 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
292 mkFastSubStringBA# barr# start# len# =
294 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
296 h = hashSubStrBA barr# start# len#
298 -- _trace ("hashed(b): "++show (I# h)) $
299 lookupTbl ft h >>= \ lookup_result ->
300 case lookup_result of
302 -- no match, add it to table by copying out the
303 -- the string into a ByteArray
304 -- _trace "empty bucket(b)" $
305 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
306 (ByteArray _ _ ba#) ->
307 let f_str = FastString uid# len# ba# in
308 updTbl string_table ft h [f_str] >>
309 -- _trace ("new(b): " ++ show f_str) $
312 -- non-empty `bucket', scan the list looking
313 -- entry with same length and compare byte by byte.
314 -- _trace ("non-empty bucket(b)"++show ls) $
315 case bucket_match ls start# len# barr# of
317 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
318 (ByteArray _ _ ba#) ->
319 let f_str = FastString uid# len# ba# in
320 updTbl string_table ft h (f_str:ls) >>
321 -- _trace ("new(b): " ++ show f_str) $
324 -- _trace ("re-use(b): "++show v) $
330 bucket_match [] _ _ _ = Nothing
331 bucket_match (v:ls) start# len# ba# =
333 FastString _ l# barr# ->
334 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
337 bucket_match ls start# len# ba#
338 UnicodeStr _ _ -> bucket_match ls start# len# ba#
340 mkFastStringUnicode :: [Int] -> FastString
341 mkFastStringUnicode s =
343 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
347 -- _trace ("hashed(b): "++show (I# h)) $
348 lookupTbl ft h >>= \ lookup_result ->
349 case lookup_result of
351 -- no match, add it to table by copying out the
352 -- the string into a [Int]
353 let f_str = UnicodeStr uid# s in
354 updTbl string_table ft h [f_str] >>
355 -- _trace ("new(b): " ++ show f_str) $
358 -- non-empty `bucket', scan the list looking
359 -- entry with same length and compare byte by byte.
360 -- _trace ("non-empty bucket(b)"++show ls) $
361 case bucket_match ls of
363 let f_str = UnicodeStr uid# s in
364 updTbl string_table ft h (f_str:ls) >>
365 -- _trace ("new(b): " ++ show f_str) $
368 -- _trace ("re-use(b): "++show v) $
372 bucket_match [] = Nothing
373 bucket_match (v@(UnicodeStr _ s'):ls) =
374 if s' == s then Just v else bucket_match ls
375 bucket_match (FastString _ _ _ : ls) = bucket_match ls
377 mkFastCharString :: Addr -> FastString
378 mkFastCharString a@(A# a#) =
379 case strLength a of{ (I# len#) -> CharStr a# len# }
381 mkFastCharString# :: Addr# -> FastString
382 mkFastCharString# a# =
383 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
385 mkFastCharString2 :: Addr -> Int -> FastString
386 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
388 mkFastStringNarrow :: String -> FastString
389 mkFastStringNarrow str =
390 case packString str of
391 (ByteArray _ (I# len#) frozen#) ->
392 mkFastSubStringBA# frozen# 0# len#
393 {- 0-indexed array, len# == index to one beyond end of string,
394 i.e., (0,1) => empty string. -}
396 mkFastString :: String -> FastString
397 mkFastString str = if all good str
398 then mkFastStringNarrow str
399 else mkFastStringUnicode (map ord str)
401 good c = c >= '\1' && c <= '\xFF'
403 mkFastStringInt :: [Int] -> FastString
404 mkFastStringInt str = if all good str
405 then mkFastStringNarrow (map chr str)
406 else mkFastStringUnicode str
408 good c = c >= 1 && c <= 0xFF
410 mkFastSubString :: Addr -> Int -> Int -> FastString
411 mkFastSubString (A# a#) (I# start#) (I# len#) =
412 mkFastString# (addrOffset# a# start#) len#
416 hashStr :: Addr# -> Int# -> Int#
417 -- use the Addr to produce a hash value between 0 & m (inclusive)
421 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
422 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
423 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
425 c0 = indexCharOffAddr# a# 0#
426 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
427 c2 = indexCharOffAddr# a# (len# -# 1#)
429 c1 = indexCharOffAddr# a# 1#
430 c2 = indexCharOffAddr# a# 2#
433 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
434 -- use the byte array to produce a hash value between 0 & m (inclusive)
435 hashSubStrBA ba# start# len# =
438 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
439 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
440 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
442 c0 = indexCharArray# ba# 0#
443 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
444 c2 = indexCharArray# ba# (len# -# 1#)
446 -- c1 = indexCharArray# ba# 1#
447 -- c2 = indexCharArray# ba# 2#
449 hashUnicode :: [Int] -> Int#
450 -- use the Addr to produce a hash value between 0 & m (inclusive)
452 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
453 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
454 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
458 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
459 I# c2 = s !! (I# (len# -# 1#))
464 cmpFS :: FastString -> FastString -> Ordering
465 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
467 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
468 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
469 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
474 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
477 else if res ==# 0# then EQ
483 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
485 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
488 else if res ==# 0# then EQ
494 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
496 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
499 else if res ==# 0# then EQ
503 ba1 = ByteArray (error "") ((error "")::Int) bs1
506 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
507 = -- try them the other way 'round
508 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
512 Outputting @FastString@s is quick, just block copying the chunk (using
516 hPutFS :: Handle -> FastString -> IO ()
517 hPutFS handle (FastString _ l# ba#)
518 | l# ==# 0# = return ()
519 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
520 hPutBufBA handle mba (I# l#)
522 bot = error "hPutFS.ba"
524 --ToDo: avoid silly code duplic.
526 hPutFS handle (CharStr a# l#)
527 | l# ==# 0# = return ()
528 #if __GLASGOW_HASKELL__ < 411
529 | otherwise = hPutBuf handle (A# a#) (I# l#)
531 | otherwise = hPutBuf handle (Ptr a#) (I# l#)
534 -- ONLY here for debugging the NCG (so -ddump-stix works for string
535 -- literals); no idea if this is really necessary. JRS, 010131
536 hPutFS handle (UnicodeStr _ is)
537 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")