2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
14 mkFastString, -- :: String -> FastString
15 mkFastStringNarrow, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
18 mkFastString#, -- :: Addr# -> FastString
19 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
21 mkFastStringInt, -- :: [Int] -> FastString
23 uniqueOfFS, -- :: FastString -> Int#
24 lengthFS, -- :: FastString -> Int
25 nullFastString, -- :: FastString -> Bool
27 unpackFS, -- :: FastString -> String
28 unpackIntFS, -- :: FastString -> [Int]
29 appendFS, -- :: FastString -> FastString -> FastString
30 headFS, -- :: FastString -> Char
31 headIntFS, -- :: FastString -> Int
32 tailFS, -- :: FastString -> FastString
33 concatFS, -- :: [FastString] -> FastString
34 consFS, -- :: Char -> FastString -> FastString
35 indexFS, -- :: FastString -> Int -> Char
36 nilFS, -- :: FastString
38 hPutFS, -- :: Handle -> FastString -> IO ()
41 mkLitString# -- :: Addr# -> Addr
44 -- This #define suppresses the "import FastString" that
45 -- HsVersions otherwise produces
46 #define COMPILING_FAST_STRING
47 #include "HsVersions.h"
49 #if __GLASGOW_HASKELL__ < 503
51 import PrelIOBase ( IO(..) )
54 import GHC.IOBase ( IO(..) )
59 #if __GLASGOW_HASKELL__ < 411
60 import PrelAddr ( Addr(..) )
62 import Addr ( Addr(..) )
63 import Ptr ( Ptr(..) )
65 #if __GLASGOW_HASKELL__ < 503
66 import PrelArr ( STArray(..), newSTArray )
67 import IOExts ( hPutBufBAFull )
69 import GHC.Arr ( STArray(..), newSTArray )
70 import IOExts ( hPutBufBA )
71 import CString ( unpackNBytesBA# )
74 import IOExts ( IORef, newIORef, readIORef, writeIORef )
76 import Char ( chr, ord )
78 #define hASH_TBL_SIZE 993
80 #if __GLASGOW_HASKELL__ < 503
81 hPutBufBA = hPutBufBAFull
85 @FastString@s are packed representations of strings
86 with a unique id for fast comparisons. The unique id
87 is assigned when creating the @FastString@, using
88 a hash table to map from the character string representation
93 = FastString -- packed repr. on the heap.
95 -- 0 => string literal, comparison
100 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
102 [Int] -- character numbers
104 instance Eq FastString where
105 -- shortcut for real FastStrings
106 (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
107 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
109 (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
110 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
112 instance Ord FastString where
113 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
114 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
115 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
116 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
121 compare a b = cmpFS a b
123 lengthFS :: FastString -> Int
124 lengthFS (FastString _ l# _) = I# l#
125 lengthFS (UnicodeStr _ s) = length s
127 nullFastString :: FastString -> Bool
128 nullFastString (FastString _ l# _) = l# ==# 0#
129 nullFastString (UnicodeStr _ []) = True
130 nullFastString (UnicodeStr _ (_:_)) = False
132 unpackFS :: FastString -> String
133 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
134 unpackFS (UnicodeStr _ s) = map chr s
136 unpackIntFS :: FastString -> [Int]
137 unpackIntFS (UnicodeStr _ s) = s
138 unpackIntFS fs = map ord (unpackFS fs)
140 appendFS :: FastString -> FastString -> FastString
141 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
143 concatFS :: [FastString] -> FastString
144 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
146 headFS :: FastString -> Char
147 headFS (FastString _ l# ba#) =
148 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
149 headFS (UnicodeStr _ (c:_)) = chr c
150 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
152 headIntFS :: FastString -> Int
153 headIntFS (UnicodeStr _ (c:_)) = c
154 headIntFS fs = ord (headFS fs)
156 indexFS :: FastString -> Int -> Char
157 indexFS f i@(I# i#) =
160 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
161 | otherwise -> error (msg (I# l#))
162 UnicodeStr _ s -> chr (s!!i)
164 msg l = "indexFS: out of range: " ++ show (l,i)
166 tailFS :: FastString -> FastString
167 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
168 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
170 consFS :: Char -> FastString -> FastString
171 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
173 uniqueOfFS :: FastString -> Int#
174 uniqueOfFS (FastString u# _ _) = u#
175 uniqueOfFS (UnicodeStr u# _) = u#
177 nilFS = mkFastString ""
180 Internally, the compiler will maintain a fast string symbol
181 table, providing sharing and fast comparison. Creation of
182 new @FastString@s then covertly does a lookup, re-using the
183 @FastString@ if there was a hit.
185 Caution: mkFastStringUnicode assumes that if the string is in the
186 table, it sits under the UnicodeStr constructor. Other mkFastString
187 variants analogously assume the FastString constructor.
190 data FastStringTable =
193 (MutableArray# RealWorld [FastString])
195 type FastStringTableVar = IORef FastStringTable
197 string_table :: FastStringTableVar
200 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
201 >>= \ (STArray _ _ arr#) ->
202 newIORef (FastStringTable 0# arr#))
204 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
205 lookupTbl (FastStringTable _ arr#) i# =
207 readArray# arr# i# s#)
209 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
210 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
211 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
213 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
215 mkFastString# :: Addr# -> FastString
217 case strLength (A# a#) of { (I# len#) -> mkFastStringLen# a# len# }
219 mkFastStringLen# :: Addr# -> Int# -> FastString
220 mkFastStringLen# a# len# =
222 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
226 -- _trace ("hashed: "++show (I# h)) $
227 lookupTbl ft h >>= \ lookup_result ->
228 case lookup_result of
230 -- no match, add it to table by copying out the
231 -- the string into a ByteArray
232 -- _trace "empty bucket" $
233 case copyPrefixStr (A# a#) (I# len#) of
234 (ByteArray _ _ barr#) ->
235 let f_str = FastString uid# len# barr# in
236 updTbl string_table ft h [f_str] >>
237 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
239 -- non-empty `bucket', scan the list looking
240 -- entry with same length and compare byte by byte.
241 -- _trace ("non-empty bucket"++show ls) $
242 case bucket_match ls len# a# of
244 case copyPrefixStr (A# a#) (I# len#) of
245 (ByteArray _ _ barr#) ->
246 let f_str = FastString uid# len# barr# in
247 updTbl string_table ft h (f_str:ls) >>
248 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
249 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
251 bucket_match [] _ _ = Nothing
252 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
253 if len# ==# l# && eqStrPrefix a# ba# l# then
256 bucket_match ls len# a#
257 bucket_match (UnicodeStr _ _ : ls) len# a# =
258 bucket_match ls len# a#
260 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
261 mkFastSubStringBA# barr# start# len# =
263 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
265 h = hashSubStrBA barr# start# len#
267 -- _trace ("hashed(b): "++show (I# h)) $
268 lookupTbl ft h >>= \ lookup_result ->
269 case lookup_result of
271 -- no match, add it to table by copying out the
272 -- the string into a ByteArray
273 -- _trace "empty bucket(b)" $
274 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
275 (ByteArray _ _ ba#) ->
276 let f_str = FastString uid# len# ba# in
277 updTbl string_table ft h [f_str] >>
278 -- _trace ("new(b): " ++ show f_str) $
281 -- non-empty `bucket', scan the list looking
282 -- entry with same length and compare byte by byte.
283 -- _trace ("non-empty bucket(b)"++show ls) $
284 case bucket_match ls start# len# barr# of
286 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
287 (ByteArray _ _ ba#) ->
288 let f_str = FastString uid# len# ba# in
289 updTbl string_table ft h (f_str:ls) >>
290 -- _trace ("new(b): " ++ show f_str) $
293 -- _trace ("re-use(b): "++show v) $
299 bucket_match [] _ _ _ = Nothing
300 bucket_match (v:ls) start# len# ba# =
302 FastString _ l# barr# ->
303 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
306 bucket_match ls start# len# ba#
307 UnicodeStr _ _ -> bucket_match ls start# len# ba#
309 mkFastStringUnicode :: [Int] -> FastString
310 mkFastStringUnicode s =
312 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
316 -- _trace ("hashed(b): "++show (I# h)) $
317 lookupTbl ft h >>= \ lookup_result ->
318 case lookup_result of
320 -- no match, add it to table by copying out the
321 -- the string into a [Int]
322 let f_str = UnicodeStr uid# s in
323 updTbl string_table ft h [f_str] >>
324 -- _trace ("new(b): " ++ show f_str) $
327 -- non-empty `bucket', scan the list looking
328 -- entry with same length and compare byte by byte.
329 -- _trace ("non-empty bucket(b)"++show ls) $
330 case bucket_match ls of
332 let f_str = UnicodeStr uid# s in
333 updTbl string_table ft h (f_str:ls) >>
334 -- _trace ("new(b): " ++ show f_str) $
337 -- _trace ("re-use(b): "++show v) $
341 bucket_match [] = Nothing
342 bucket_match (v@(UnicodeStr _ s'):ls) =
343 if s' == s then Just v else bucket_match ls
344 bucket_match (FastString _ _ _ : ls) = bucket_match ls
346 mkFastStringNarrow :: String -> FastString
347 mkFastStringNarrow str =
348 case packString str of
349 (ByteArray _ (I# len#) frozen#) ->
350 mkFastSubStringBA# frozen# 0# len#
351 {- 0-indexed array, len# == index to one beyond end of string,
352 i.e., (0,1) => empty string. -}
354 mkFastString :: String -> FastString
355 mkFastString str = if all good str
356 then mkFastStringNarrow str
357 else mkFastStringUnicode (map ord str)
359 good c = c >= '\1' && c <= '\xFF'
361 mkFastStringInt :: [Int] -> FastString
362 mkFastStringInt str = if all good str
363 then mkFastStringNarrow (map chr str)
364 else mkFastStringUnicode str
366 good c = c >= 1 && c <= 0xFF
368 mkFastSubString :: Addr -> Int -> Int -> FastString
369 mkFastSubString (A# a#) (I# start#) (I# len#) =
370 mkFastStringLen# (addrOffset# a# start#) len#
374 hashStr :: Addr# -> Int# -> Int#
375 -- use the Addr to produce a hash value between 0 & m (inclusive)
379 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
380 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
381 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
383 c0 = indexCharOffAddr# a# 0#
384 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
385 c2 = indexCharOffAddr# a# (len# -# 1#)
387 c1 = indexCharOffAddr# a# 1#
388 c2 = indexCharOffAddr# a# 2#
391 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
392 -- use the byte array to produce a hash value between 0 & m (inclusive)
393 hashSubStrBA ba# start# len# =
396 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
397 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
398 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
400 c0 = indexCharArray# ba# 0#
401 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
402 c2 = indexCharArray# ba# (len# -# 1#)
404 -- c1 = indexCharArray# ba# 1#
405 -- c2 = indexCharArray# ba# 2#
407 hashUnicode :: [Int] -> Int#
408 -- use the Addr to produce a hash value between 0 & m (inclusive)
410 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
411 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
412 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
416 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
417 I# c2 = s !! (I# (len# -# 1#))
422 cmpFS :: FastString -> FastString -> Ordering
423 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
425 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
426 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
427 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
432 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
435 else if res ==# 0# then EQ
443 Outputting @FastString@s is quick, just block copying the chunk (using
447 hPutFS :: Handle -> FastString -> IO ()
448 hPutFS handle (FastString _ l# ba#)
449 | l# ==# 0# = return ()
450 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
451 hPutBufBA handle mba (I# l#)
453 bot = error "hPutFS.ba"
455 -- ONLY here for debugging the NCG (so -ddump-stix works for string
456 -- literals); no idea if this is really necessary. JRS, 010131
457 hPutFS handle (UnicodeStr _ is)
458 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
461 Here for convenience only.
464 type LitString = Addr
465 -- ToDo: make it a Ptr when we don't have to support 4.08 any more
467 mkLitString# :: Addr# -> LitString
468 mkLitString# a# = A# a#