2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
42 indexFS, -- :: FastString -> Int -> Char
44 hPutFS -- :: Handle -> FastString -> IO ()
47 -- This #define suppresses the "import FastString" that
48 -- HsVersions otherwise produces
49 #define COMPILING_FAST_STRING
50 #include "HsVersions.h"
52 #if __GLASGOW_HASKELL__ < 301
54 import STBase ( StateAndPtr#(..) )
55 import IOHandle ( filePtr, readHandle, writeHandle )
56 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
62 #if __GLASGOW_HASKELL__ < 400
63 import PrelST ( StateAndPtr#(..) )
66 #if __GLASGOW_HASKELL__ <= 303
67 import PrelHandle ( readHandle,
68 # if __GLASGOW_HASKELL__ < 303
75 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
76 #if __GLASGOW_HASKELL__ < 400
80 #if __GLASGOW_HASKELL__ >= 303
89 import Addr ( Addr(..) )
90 import MutableArray ( MutableArray(..) )
92 -- ForeignObj is now exported abstractly.
93 #if __GLASGOW_HASKELL__ >= 303
94 import qualified PrelForeign as Foreign ( ForeignObj(..) )
96 import Foreign ( ForeignObj(..) )
99 import IOExts ( IORef, newIORef, readIORef, writeIORef )
102 #define hASH_TBL_SIZE 993
104 #if __GLASGOW_HASKELL__ >= 400
109 @FastString@s are packed representations of strings
110 with a unique id for fast comparisons. The unique id
111 is assigned when creating the @FastString@, using
112 a hash table to map from the character string representation
117 = FastString -- packed repr. on the heap.
119 -- 0 => string literal, comparison
124 | CharStr -- external C string
125 Addr# -- pointer to the (null-terminated) bytes in C land.
126 Int# -- length (cached)
128 instance Eq FastString where
129 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
130 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
132 instance Ord FastString where
133 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
134 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
135 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
136 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
141 compare a b = cmpFS a b
143 getByteArray# :: FastString -> ByteArray#
144 getByteArray# (FastString _ _ ba#) = ba#
146 getByteArray :: FastString -> ByteArray Int
147 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
149 lengthFS :: FastString -> Int
150 lengthFS (FastString _ l# _) = I# l#
151 lengthFS (CharStr a# l#) = I# l#
153 nullFastString :: FastString -> Bool
154 nullFastString (FastString _ l# _) = l# ==# 0#
155 nullFastString (CharStr _ l#) = l# ==# 0#
157 unpackFS :: FastString -> String
158 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
159 unpackFS (CharStr addr len#) =
164 | otherwise = C# ch : unpack (nh +# 1#)
166 ch = indexCharOffAddr# addr nh
168 appendFS :: FastString -> FastString -> FastString
169 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
171 concatFS :: [FastString] -> FastString
172 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
174 headFS :: FastString -> Char
175 headFS f@(FastString _ l# ba#) =
176 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
177 headFS f@(CharStr a# l#) =
178 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
180 indexFS :: FastString -> Int -> Char
181 indexFS f i@(I# i#) =
184 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
185 | otherwise -> error (msg (I# l#))
187 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
188 | otherwise -> error (msg (I# l#))
190 msg l = "indexFS: out of range: " ++ show (l,i)
192 tailFS :: FastString -> FastString
193 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
195 consFS :: Char -> FastString -> FastString
196 consFS c fs = mkFastString (c:unpackFS fs)
198 uniqueOfFS :: FastString -> Int#
199 uniqueOfFS (FastString u# _ _) = u#
200 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
202 [A somewhat moby hack]: to avoid entering all sorts
203 of junk into the hash table, all C char strings
204 are by default left out. The benefit of being in
205 the table is that string comparisons are lightning fast,
206 just an Int# comparison.
208 But, if you want to get the Unique of a CharStr, we
209 enter it into the table and return that unique. This
210 works, but causes the CharStr to be looked up in the hash
211 table each time it is accessed..
215 Internally, the compiler will maintain a fast string symbol
216 table, providing sharing and fast comparison. Creation of
217 new @FastString@s then covertly does a lookup, re-using the
218 @FastString@ if there was a hit.
221 data FastStringTable =
224 (MutableArray# RealWorld [FastString])
226 type FastStringTableVar = IORef FastStringTable
228 string_table :: FastStringTableVar
231 stToIO (newArray (0::Int,hASH_TBL_SIZE) []) >>= \ (MutableArray _ arr#) ->
232 newIORef (FastStringTable 0# arr#))
234 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
235 lookupTbl (FastStringTable _ arr#) i# =
237 #if __GLASGOW_HASKELL__ < 400
238 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
241 readArray# arr# i# s#)
244 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
245 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
246 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
247 #if __GLASGOW_HASKELL__ < 400
252 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
254 mkFastString# :: Addr# -> Int# -> FastString
255 mkFastString# a# len# =
257 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
261 -- _trace ("hashed: "++show (I# h)) $
262 lookupTbl ft h >>= \ lookup_result ->
263 case lookup_result of
265 -- no match, add it to table by copying out the
266 -- the string into a ByteArray
267 -- _trace "empty bucket" $
268 case copyPrefixStr (A# a#) (I# len#) of
269 (ByteArray _ barr#) ->
270 let f_str = FastString uid# len# barr# in
271 updTbl string_table ft h [f_str] >>
272 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
274 -- non-empty `bucket', scan the list looking
275 -- entry with same length and compare byte by byte.
276 -- _trace ("non-empty bucket"++show ls) $
277 case bucket_match ls len# a# of
279 case copyPrefixStr (A# a#) (I# len#) of
280 (ByteArray _ barr#) ->
281 let f_str = FastString uid# len# barr# in
282 updTbl string_table ft h (f_str:ls) >>
283 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
284 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
286 bucket_match [] _ _ = Nothing
287 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
288 if len# ==# l# && eqStrPrefix a# ba# l# then
291 bucket_match ls len# a#
293 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
294 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
296 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
297 mkFastSubStringFO# fo# start# len# =
299 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
301 h = hashSubStrFO fo# start# len#
303 lookupTbl ft h >>= \ lookup_result ->
304 case lookup_result of
306 -- no match, add it to table by copying out the
307 -- the string into a ByteArray
308 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
309 (ByteArray _ barr#) ->
310 let f_str = FastString uid# len# barr# in
311 updTbl string_table ft h [f_str] >>
314 -- non-empty `bucket', scan the list looking
315 -- entry with same length and compare byte by byte.
316 case bucket_match ls start# len# fo# of
318 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
319 (ByteArray _ barr#) ->
320 let f_str = FastString uid# len# barr# in
321 updTbl string_table ft h (f_str:ls) >>
322 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
323 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
325 bucket_match [] _ _ _ = Nothing
326 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
327 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
330 bucket_match ls start# len# fo#
333 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
334 mkFastSubStringBA# barr# start# len# =
336 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
338 h = hashSubStrBA barr# start# len#
340 -- _trace ("hashed(b): "++show (I# h)) $
341 lookupTbl ft h >>= \ lookup_result ->
342 case lookup_result of
344 -- no match, add it to table by copying out the
345 -- the string into a ByteArray
346 -- _trace "empty bucket(b)" $
347 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
349 let f_str = FastString uid# len# ba# in
350 updTbl string_table ft h [f_str] >>
351 -- _trace ("new(b): " ++ show f_str) $
354 -- non-empty `bucket', scan the list looking
355 -- entry with same length and compare byte by byte.
356 -- _trace ("non-empty bucket(b)"++show ls) $
357 case bucket_match ls start# len# barr# of
359 case copySubStrBA (ByteArray (error "") barr#) (I# start#) (I# len#) of
361 let f_str = FastString uid# len# ba# in
362 updTbl string_table ft h (f_str:ls) >>
363 -- _trace ("new(b): " ++ show f_str) $
366 -- _trace ("re-use(b): "++show v) $
372 bucket_match [] _ _ _ = Nothing
373 bucket_match (v:ls) start# len# ba# =
375 FastString _ l# barr# ->
376 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
379 bucket_match ls start# len# ba#
381 mkFastCharString :: Addr -> FastString
382 mkFastCharString a@(A# a#) =
383 case strLength a of{ (I# len#) -> CharStr a# len# }
385 mkFastCharString# :: Addr# -> FastString
386 mkFastCharString# a# =
387 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
389 mkFastCharString2 :: Addr -> Int -> FastString
390 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
392 mkFastString :: String -> FastString
394 case packString str of
395 (ByteArray (_,I# len#) frozen#) ->
396 mkFastSubStringBA# frozen# 0# len#
397 {- 0-indexed array, len# == index to one beyond end of string,
398 i.e., (0,1) => empty string. -}
400 mkFastSubString :: Addr -> Int -> Int -> FastString
401 mkFastSubString (A# a#) (I# start#) (I# len#) =
402 mkFastString# (addrOffset# a# start#) len#
404 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
405 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
406 mkFastSubStringFO# fo# start# len#
410 hashStr :: Addr# -> Int# -> Int#
411 -- use the Addr to produce a hash value between 0 & m (inclusive)
415 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
416 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
417 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
419 c0 = indexCharOffAddr# a# 0#
420 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
421 c2 = indexCharOffAddr# a# (len# -# 1#)
423 c1 = indexCharOffAddr# a# 1#
424 c2 = indexCharOffAddr# a# 2#
427 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
428 -- use the FO to produce a hash value between 0 & m (inclusive)
429 hashSubStrFO fo# start# len# =
432 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
433 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
434 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
436 c0 = indexCharOffForeignObj# fo# 0#
437 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
438 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
440 -- c1 = indexCharOffFO# fo# 1#
441 -- c2 = indexCharOffFO# fo# 2#
444 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
445 -- use the byte array to produce a hash value between 0 & m (inclusive)
446 hashSubStrBA ba# start# len# =
449 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
450 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
451 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
453 c0 = indexCharArray# ba# 0#
454 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
455 c2 = indexCharArray# ba# (len# -# 1#)
457 -- c1 = indexCharArray# ba# 1#
458 -- c2 = indexCharArray# ba# 2#
463 cmpFS :: FastString -> FastString -> Ordering
464 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
469 _ccall_ strcmp (ByteArray bottom b1#) (ByteArray bottom b2#) >>= \ (I# res) ->
472 else if res ==# 0# then EQ
477 bottom = error "tagCmp"
478 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
480 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
483 else if res ==# 0# then EQ
489 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
491 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
494 else if res ==# 0# then EQ
498 ba1 = ByteArray ((error "")::(Int,Int)) bs1
501 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
502 = -- try them the other way 'round
503 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
507 Outputting @FastString@s is quick, just block copying the chunk (using
511 hPutFS :: Handle -> FastString -> IO ()
512 #if __GLASGOW_HASKELL__ <= 302
513 hPutFS handle (FastString _ l# ba#) =
517 readHandle handle >>= \ htype ->
519 ErrorHandle ioError ->
520 writeHandle handle htype >>
523 writeHandle handle htype >>
524 fail MkIOError(handle,IllegalOperation,"handle is closed")
525 SemiClosedHandle _ _ ->
526 writeHandle handle htype >>
527 fail MkIOError(handle,IllegalOperation,"handle is closed")
529 writeHandle handle htype >>
530 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
532 let fp = filePtr htype in
534 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
538 constructError "hPutFS" >>= \ err ->
540 hPutFS handle (CharStr a# l#) =
544 readHandle handle >>= \ htype ->
546 ErrorHandle ioError ->
547 writeHandle handle htype >>
550 writeHandle handle htype >>
551 fail MkIOError(handle,IllegalOperation,"handle is closed")
552 SemiClosedHandle _ _ ->
553 writeHandle handle htype >>
554 fail MkIOError(handle,IllegalOperation,"handle is closed")
556 writeHandle handle htype >>
557 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
559 let fp = filePtr htype in
561 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
565 constructError "hPutFS" >>= \ err ->
570 hPutFS handle (FastString _ l# ba#)
571 | l# ==# 0# = return ()
572 | otherwise = hPutBufBA handle (ByteArray bottom ba#) (I# l#)
574 bottom = error "hPutFS.ba"
576 --ToDo: avoid silly code duplic.
578 hPutFS handle (CharStr a# l#)
579 | l# ==# 0# = return ()
580 | otherwise = hPutBuf handle (A# a#) (I# l#)