2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
43 hPutFS -- :: Handle -> FastString -> IO ()
46 -- This #define suppresses the "import FastString" that
47 -- HsVersions otherwise produces
48 #define COMPILING_FAST_STRING
49 #include "HsVersions.h"
51 #if __GLASGOW_HASKELL__ < 301
53 import STBase ( StateAndPtr#(..) )
54 import IOHandle ( filePtr, readHandle, writeHandle )
55 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
61 #if __GLASGOW_HASKELL__ < 400
62 import PrelST ( StateAndPtr#(..) )
65 #if __GLASGOW_HASKELL__ <= 303
66 import PrelHandle ( readHandle,
67 # if __GLASGOW_HASKELL__ < 303
74 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
75 #if __GLASGOW_HASKELL__ < 400
79 #if __GLASGOW_HASKELL__ >= 303
88 import Addr ( Addr(..) )
89 import MutableArray ( MutableArray(..) )
91 -- ForeignObj is now exported abstractly.
92 #if __GLASGOW_HASKELL__ >= 303
93 import qualified PrelForeign as Foreign ( ForeignObj(..) )
95 import Foreign ( ForeignObj(..) )
98 import IOExts ( IORef, newIORef, readIORef, writeIORef )
101 #define hASH_TBL_SIZE 993
103 #if __GLASGOW_HASKELL__ >= 400
108 @FastString@s are packed representations of strings
109 with a unique id for fast comparisons. The unique id
110 is assigned when creating the @FastString@, using
111 a hash table to map from the character string representation
116 = FastString -- packed repr. on the heap.
118 -- 0 => string literal, comparison
123 | CharStr -- external C string
124 Addr# -- pointer to the (null-terminated) bytes in C land.
125 Int# -- length (cached)
127 instance Eq FastString where
128 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
129 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
131 instance Ord FastString where
132 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
133 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
134 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
135 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
140 compare a b = cmpFS a b
142 getByteArray# :: FastString -> ByteArray#
143 getByteArray# (FastString _ _ ba#) = ba#
145 getByteArray :: FastString -> ByteArray Int
146 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
148 lengthFS :: FastString -> Int
149 lengthFS (FastString _ l# _) = I# l#
150 lengthFS (CharStr a# l#) = I# l#
152 nullFastString :: FastString -> Bool
153 nullFastString (FastString _ l# _) = l# ==# 0#
154 nullFastString (CharStr _ l#) = l# ==# 0#
156 unpackFS :: FastString -> String
157 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
158 unpackFS (CharStr addr len#) =
163 | otherwise = C# ch : unpack (nh +# 1#)
165 ch = indexCharOffAddr# addr nh
167 appendFS :: FastString -> FastString -> FastString
168 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
170 concatFS :: [FastString] -> FastString
171 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
173 headFS :: FastString -> Char
174 headFS f@(FastString _ l# ba#) =
175 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
176 headFS f@(CharStr a# l#) =
177 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
179 tailFS :: FastString -> FastString
180 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
182 consFS :: Char -> FastString -> FastString
183 consFS c fs = mkFastString (c:unpackFS fs)
185 uniqueOfFS :: FastString -> Int#
186 uniqueOfFS (FastString u# _ _) = u#
187 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
189 [A somewhat moby hack]: to avoid entering all sorts
190 of junk into the hash table, all C char strings
191 are by default left out. The benefit of being in
192 the table is that string comparisons are lightning fast,
193 just an Int# comparison.
195 But, if you want to get the Unique of a CharStr, we
196 enter it into the table and return that unique. This
197 works, but causes the CharStr to be looked up in the hash
198 table each time it is accessed..
202 Internally, the compiler will maintain a fast string symbol
203 table, providing sharing and fast comparison. Creation of
204 new @FastString@s then covertly does a lookup, re-using the
205 @FastString@ if there was a hit.
208 data FastStringTable =
211 (MutableArray# RealWorld [FastString])
213 type FastStringTableVar = IORef FastStringTable
215 string_table :: FastStringTableVar
218 stToIO (newArray (0::Int,hASH_TBL_SIZE) []) >>= \ (MutableArray _ arr#) ->
219 newIORef (FastStringTable 0# arr#))
221 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
222 lookupTbl (FastStringTable _ arr#) i# =
224 #if __GLASGOW_HASKELL__ < 400
225 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
228 readArray# arr# i# s#)
231 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
232 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
233 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
234 #if __GLASGOW_HASKELL__ < 400
239 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
241 mkFastString# :: Addr# -> Int# -> FastString
242 mkFastString# a# len# =
244 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
248 -- _trace ("hashed: "++show (I# h)) $
249 lookupTbl ft h >>= \ lookup_result ->
250 case lookup_result of
252 -- no match, add it to table by copying out the
253 -- the string into a ByteArray
254 -- _trace "empty bucket" $
255 case copyPrefixStr (A# a#) (I# len#) of
256 (ByteArray _ barr#) ->
257 let f_str = FastString uid# len# barr# in
258 updTbl string_table ft h [f_str] >>
259 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
261 -- non-empty `bucket', scan the list looking
262 -- entry with same length and compare byte by byte.
263 -- _trace ("non-empty bucket"++show ls) $
264 case bucket_match ls len# a# of
266 case copyPrefixStr (A# a#) (I# len#) of
267 (ByteArray _ barr#) ->
268 let f_str = FastString uid# len# barr# in
269 updTbl string_table ft h (f_str:ls) >>
270 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
271 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
273 bucket_match [] _ _ = Nothing
274 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
275 if len# ==# l# && eqStrPrefix a# ba# l# then
278 bucket_match ls len# a#
280 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
281 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
283 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
284 mkFastSubStringFO# fo# start# len# =
286 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
288 h = hashSubStrFO fo# start# len#
290 lookupTbl ft h >>= \ lookup_result ->
291 case lookup_result of
293 -- no match, add it to table by copying out the
294 -- the string into a ByteArray
295 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
296 (ByteArray _ barr#) ->
297 let f_str = FastString uid# len# barr# in
298 updTbl string_table ft h [f_str] >>
301 -- non-empty `bucket', scan the list looking
302 -- entry with same length and compare byte by byte.
303 case bucket_match ls start# len# fo# of
305 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
306 (ByteArray _ barr#) ->
307 let f_str = FastString uid# len# barr# in
308 updTbl string_table ft h (f_str:ls) >>
309 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
310 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
312 bucket_match [] _ _ _ = Nothing
313 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
314 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
317 bucket_match ls start# len# fo#
320 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
321 mkFastSubStringBA# barr# start# len# =
323 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
325 h = hashSubStrBA barr# start# len#
327 -- _trace ("hashed(b): "++show (I# h)) $
328 lookupTbl ft h >>= \ lookup_result ->
329 case lookup_result of
331 -- no match, add it to table by copying out the
332 -- the string into a ByteArray
333 -- _trace "empty bucket(b)" $
334 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
336 let f_str = FastString uid# len# ba# in
337 updTbl string_table ft h [f_str] >>
338 -- _trace ("new(b): " ++ show f_str) $
341 -- non-empty `bucket', scan the list looking
342 -- entry with same length and compare byte by byte.
343 -- _trace ("non-empty bucket(b)"++show ls) $
344 case bucket_match ls start# len# barr# of
346 case copySubStrBA (ByteArray (error "") barr#) (I# start#) (I# len#) of
348 let f_str = FastString uid# len# ba# in
349 updTbl string_table ft h (f_str:ls) >>
350 -- _trace ("new(b): " ++ show f_str) $
353 -- _trace ("re-use(b): "++show v) $
359 bucket_match [] _ _ _ = Nothing
360 bucket_match (v:ls) start# len# ba# =
362 FastString _ l# barr# ->
363 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
366 bucket_match ls start# len# ba#
368 mkFastCharString :: Addr -> FastString
369 mkFastCharString a@(A# a#) =
370 case strLength a of{ (I# len#) -> CharStr a# len# }
372 mkFastCharString# :: Addr# -> FastString
373 mkFastCharString# a# =
374 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
376 mkFastCharString2 :: Addr -> Int -> FastString
377 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
379 mkFastString :: String -> FastString
381 case packString str of
382 (ByteArray (_,I# len#) frozen#) ->
383 mkFastSubStringBA# frozen# 0# len#
384 {- 0-indexed array, len# == index to one beyond end of string,
385 i.e., (0,1) => empty string. -}
387 mkFastSubString :: Addr -> Int -> Int -> FastString
388 mkFastSubString (A# a#) (I# start#) (I# len#) =
389 mkFastString# (addrOffset# a# start#) len#
391 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
392 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
393 mkFastSubStringFO# fo# start# len#
397 hashStr :: Addr# -> Int# -> Int#
398 -- use the Addr to produce a hash value between 0 & m (inclusive)
402 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
403 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
404 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
406 c0 = indexCharOffAddr# a# 0#
407 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
408 c2 = indexCharOffAddr# a# (len# -# 1#)
410 c1 = indexCharOffAddr# a# 1#
411 c2 = indexCharOffAddr# a# 2#
414 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
415 -- use the FO to produce a hash value between 0 & m (inclusive)
416 hashSubStrFO fo# start# len# =
419 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
420 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
421 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
423 c0 = indexCharOffForeignObj# fo# 0#
424 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
425 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
427 -- c1 = indexCharOffFO# fo# 1#
428 -- c2 = indexCharOffFO# fo# 2#
431 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
432 -- use the byte array to produce a hash value between 0 & m (inclusive)
433 hashSubStrBA ba# start# len# =
436 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
437 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
438 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
440 c0 = indexCharArray# ba# 0#
441 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
442 c2 = indexCharArray# ba# (len# -# 1#)
444 -- c1 = indexCharArray# ba# 1#
445 -- c2 = indexCharArray# ba# 2#
450 cmpFS :: FastString -> FastString -> Ordering
451 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
456 _ccall_ strcmp (ByteArray bottom b1#) (ByteArray bottom b2#) >>= \ (I# res) ->
459 else if res ==# 0# then EQ
464 bottom = error "tagCmp"
465 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
467 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
470 else if res ==# 0# then EQ
476 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
478 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
481 else if res ==# 0# then EQ
485 ba1 = ByteArray ((error "")::(Int,Int)) bs1
488 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
489 = -- try them the other way 'round
490 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
494 Outputting @FastString@s is quick, just block copying the chunk (using
498 hPutFS :: Handle -> FastString -> IO ()
499 #if __GLASGOW_HASKELL__ <= 302
500 hPutFS handle (FastString _ l# ba#) =
504 readHandle handle >>= \ htype ->
506 ErrorHandle ioError ->
507 writeHandle handle htype >>
510 writeHandle handle htype >>
511 fail MkIOError(handle,IllegalOperation,"handle is closed")
512 SemiClosedHandle _ _ ->
513 writeHandle handle htype >>
514 fail MkIOError(handle,IllegalOperation,"handle is closed")
516 writeHandle handle htype >>
517 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
519 let fp = filePtr htype in
521 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
525 constructError "hPutFS" >>= \ err ->
527 hPutFS handle (CharStr a# l#) =
531 readHandle handle >>= \ htype ->
533 ErrorHandle ioError ->
534 writeHandle handle htype >>
537 writeHandle handle htype >>
538 fail MkIOError(handle,IllegalOperation,"handle is closed")
539 SemiClosedHandle _ _ ->
540 writeHandle handle htype >>
541 fail MkIOError(handle,IllegalOperation,"handle is closed")
543 writeHandle handle htype >>
544 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
546 let fp = filePtr htype in
548 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
552 constructError "hPutFS" >>= \ err ->
557 hPutFS handle (FastString _ l# ba#)
558 | l# ==# 0# = return ()
559 | otherwise = hPutBufBA handle (ByteArray bottom ba#) (I# l#)
561 bottom = error "hPutFS.ba"
563 --ToDo: avoid silly code duplic.
565 hPutFS handle (CharStr a# l#)
566 | l# ==# 0# = return ()
567 | otherwise = hPutBuf handle (A# a#) (I# l#)