2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
42 indexFS, -- :: FastString -> Int -> Char
44 hPutFS -- :: Handle -> FastString -> IO ()
47 -- This #define suppresses the "import FastString" that
48 -- HsVersions otherwise produces
49 #define COMPILING_FAST_STRING
50 #include "HsVersions.h"
52 #if __GLASGOW_HASKELL__ < 301
54 import STBase ( StateAndPtr#(..) )
55 import IOHandle ( filePtr, readHandle, writeHandle )
56 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
62 #if __GLASGOW_HASKELL__ < 400
63 import PrelST ( StateAndPtr#(..) )
66 #if __GLASGOW_HASKELL__ <= 303
67 import PrelHandle ( readHandle,
68 # if __GLASGOW_HASKELL__ < 303
75 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
76 #if __GLASGOW_HASKELL__ < 400
80 #if __GLASGOW_HASKELL__ >= 303
89 import Addr ( Addr(..) )
90 import MutableArray ( MutableArray(..) )
92 -- ForeignObj is now exported abstractly.
93 #if __GLASGOW_HASKELL__ >= 303
94 import qualified PrelForeign as Foreign ( ForeignObj(..) )
96 import Foreign ( ForeignObj(..) )
99 import IOExts ( IORef, newIORef, readIORef, writeIORef )
102 #define hASH_TBL_SIZE 993
104 #if __GLASGOW_HASKELL__ >= 400
109 @FastString@s are packed representations of strings
110 with a unique id for fast comparisons. The unique id
111 is assigned when creating the @FastString@, using
112 a hash table to map from the character string representation
117 = FastString -- packed repr. on the heap.
119 -- 0 => string literal, comparison
124 | CharStr -- external C string
125 Addr# -- pointer to the (null-terminated) bytes in C land.
126 Int# -- length (cached)
128 instance Eq FastString where
129 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
130 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
132 instance Ord FastString where
133 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
134 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
135 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
136 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
141 compare a b = cmpFS a b
143 getByteArray# :: FastString -> ByteArray#
144 getByteArray# (FastString _ _ ba#) = ba#
146 getByteArray :: FastString -> ByteArray Int
147 #if __GLASGOW_HASKELL__ < 405
148 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
150 getByteArray (FastString _ l# ba#) = ByteArray 0 (I# l#) ba#
153 lengthFS :: FastString -> Int
154 lengthFS (FastString _ l# _) = I# l#
155 lengthFS (CharStr a# l#) = I# l#
157 nullFastString :: FastString -> Bool
158 nullFastString (FastString _ l# _) = l# ==# 0#
159 nullFastString (CharStr _ l#) = l# ==# 0#
161 unpackFS :: FastString -> String
162 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
163 unpackFS (CharStr addr len#) =
168 | otherwise = C# ch : unpack (nh +# 1#)
170 ch = indexCharOffAddr# addr nh
172 appendFS :: FastString -> FastString -> FastString
173 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
175 concatFS :: [FastString] -> FastString
176 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
178 headFS :: FastString -> Char
179 headFS f@(FastString _ l# ba#) =
180 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
181 headFS f@(CharStr a# l#) =
182 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
184 indexFS :: FastString -> Int -> Char
185 indexFS f i@(I# i#) =
188 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
189 | otherwise -> error (msg (I# l#))
191 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
192 | otherwise -> error (msg (I# l#))
194 msg l = "indexFS: out of range: " ++ show (l,i)
196 tailFS :: FastString -> FastString
197 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
199 consFS :: Char -> FastString -> FastString
200 consFS c fs = mkFastString (c:unpackFS fs)
202 uniqueOfFS :: FastString -> Int#
203 uniqueOfFS (FastString u# _ _) = u#
204 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
206 [A somewhat moby hack]: to avoid entering all sorts
207 of junk into the hash table, all C char strings
208 are by default left out. The benefit of being in
209 the table is that string comparisons are lightning fast,
210 just an Int# comparison.
212 But, if you want to get the Unique of a CharStr, we
213 enter it into the table and return that unique. This
214 works, but causes the CharStr to be looked up in the hash
215 table each time it is accessed..
219 Internally, the compiler will maintain a fast string symbol
220 table, providing sharing and fast comparison. Creation of
221 new @FastString@s then covertly does a lookup, re-using the
222 @FastString@ if there was a hit.
225 data FastStringTable =
228 (MutableArray# RealWorld [FastString])
230 type FastStringTableVar = IORef FastStringTable
232 string_table :: FastStringTableVar
235 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
236 #if __GLASGOW_HASKELL__ < 405
237 >>= \ (MutableArray _ arr#) ->
239 >>= \ (MutableArray _ _ arr#) ->
241 newIORef (FastStringTable 0# arr#))
243 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
244 lookupTbl (FastStringTable _ arr#) i# =
246 #if __GLASGOW_HASKELL__ < 400
247 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
250 readArray# arr# i# s#)
253 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
254 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
255 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
256 #if __GLASGOW_HASKELL__ < 400
261 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
263 mkFastString# :: Addr# -> Int# -> FastString
264 mkFastString# a# len# =
266 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
270 -- _trace ("hashed: "++show (I# h)) $
271 lookupTbl ft h >>= \ lookup_result ->
272 case lookup_result of
274 -- no match, add it to table by copying out the
275 -- the string into a ByteArray
276 -- _trace "empty bucket" $
277 case copyPrefixStr (A# a#) (I# len#) of
278 #if __GLASGOW_HASKELL__ < 405
279 (ByteArray _ barr#) ->
281 (ByteArray _ _ barr#) ->
283 let f_str = FastString uid# len# barr# in
284 updTbl string_table ft h [f_str] >>
285 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
287 -- non-empty `bucket', scan the list looking
288 -- entry with same length and compare byte by byte.
289 -- _trace ("non-empty bucket"++show ls) $
290 case bucket_match ls len# a# of
292 case copyPrefixStr (A# a#) (I# len#) of
293 #if __GLASGOW_HASKELL__ < 405
294 (ByteArray _ barr#) ->
296 (ByteArray _ _ barr#) ->
298 let f_str = FastString uid# len# barr# in
299 updTbl string_table ft h (f_str:ls) >>
300 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
301 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
303 bucket_match [] _ _ = Nothing
304 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
305 if len# ==# l# && eqStrPrefix a# ba# l# then
308 bucket_match ls len# a#
310 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
311 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
313 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
314 mkFastSubStringFO# fo# start# len# =
316 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
318 h = hashSubStrFO fo# start# len#
320 lookupTbl ft h >>= \ lookup_result ->
321 case lookup_result of
323 -- no match, add it to table by copying out the
324 -- the string into a ByteArray
325 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
326 #if __GLASGOW_HASKELL__ < 405
327 (ByteArray _ barr#) ->
329 (ByteArray _ _ barr#) ->
331 let f_str = FastString uid# len# barr# in
332 updTbl string_table ft h [f_str] >>
335 -- non-empty `bucket', scan the list looking
336 -- entry with same length and compare byte by byte.
337 case bucket_match ls start# len# fo# of
339 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
340 #if __GLASGOW_HASKELL__ < 405
341 (ByteArray _ barr#) ->
343 (ByteArray _ _ barr#) ->
345 let f_str = FastString uid# len# barr# in
346 updTbl string_table ft h (f_str:ls) >>
347 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
348 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
350 bucket_match [] _ _ _ = Nothing
351 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
352 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
355 bucket_match ls start# len# fo#
358 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
359 mkFastSubStringBA# barr# start# len# =
361 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
363 h = hashSubStrBA barr# start# len#
365 -- _trace ("hashed(b): "++show (I# h)) $
366 lookupTbl ft h >>= \ lookup_result ->
367 case lookup_result of
369 -- no match, add it to table by copying out the
370 -- the string into a ByteArray
371 -- _trace "empty bucket(b)" $
372 #if __GLASGOW_HASKELL__ < 405
373 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
376 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
377 (ByteArray _ _ ba#) ->
379 let f_str = FastString uid# len# ba# in
380 updTbl string_table ft h [f_str] >>
381 -- _trace ("new(b): " ++ show f_str) $
384 -- non-empty `bucket', scan the list looking
385 -- entry with same length and compare byte by byte.
386 -- _trace ("non-empty bucket(b)"++show ls) $
387 case bucket_match ls start# len# barr# of
389 #if __GLASGOW_HASKELL__ < 405
390 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
393 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
394 (ByteArray _ _ ba#) ->
396 let f_str = FastString uid# len# ba# in
397 updTbl string_table ft h (f_str:ls) >>
398 -- _trace ("new(b): " ++ show f_str) $
401 -- _trace ("re-use(b): "++show v) $
407 bucket_match [] _ _ _ = Nothing
408 bucket_match (v:ls) start# len# ba# =
410 FastString _ l# barr# ->
411 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
414 bucket_match ls start# len# ba#
416 mkFastCharString :: Addr -> FastString
417 mkFastCharString a@(A# a#) =
418 case strLength a of{ (I# len#) -> CharStr a# len# }
420 mkFastCharString# :: Addr# -> FastString
421 mkFastCharString# a# =
422 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
424 mkFastCharString2 :: Addr -> Int -> FastString
425 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
427 mkFastString :: String -> FastString
429 case packString str of
430 #if __GLASGOW_HASKELL__ < 405
431 (ByteArray (_,I# len#) frozen#) ->
433 (ByteArray _ (I# len#) frozen#) ->
435 mkFastSubStringBA# frozen# 0# len#
436 {- 0-indexed array, len# == index to one beyond end of string,
437 i.e., (0,1) => empty string. -}
439 mkFastSubString :: Addr -> Int -> Int -> FastString
440 mkFastSubString (A# a#) (I# start#) (I# len#) =
441 mkFastString# (addrOffset# a# start#) len#
443 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
444 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
445 mkFastSubStringFO# fo# start# len#
449 hashStr :: Addr# -> Int# -> Int#
450 -- use the Addr to produce a hash value between 0 & m (inclusive)
454 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
455 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
456 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
458 c0 = indexCharOffAddr# a# 0#
459 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
460 c2 = indexCharOffAddr# a# (len# -# 1#)
462 c1 = indexCharOffAddr# a# 1#
463 c2 = indexCharOffAddr# a# 2#
466 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
467 -- use the FO to produce a hash value between 0 & m (inclusive)
468 hashSubStrFO fo# start# len# =
471 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
472 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
473 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
475 c0 = indexCharOffForeignObj# fo# 0#
476 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
477 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
479 -- c1 = indexCharOffFO# fo# 1#
480 -- c2 = indexCharOffFO# fo# 2#
483 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
484 -- use the byte array to produce a hash value between 0 & m (inclusive)
485 hashSubStrBA ba# start# len# =
488 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
489 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
490 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
492 c0 = indexCharArray# ba# 0#
493 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
494 c2 = indexCharArray# ba# (len# -# 1#)
496 -- c1 = indexCharArray# ba# 1#
497 -- c2 = indexCharArray# ba# 2#
502 cmpFS :: FastString -> FastString -> Ordering
503 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
508 #if __GLASGOW_HASKELL__ < 405
509 _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) ->
511 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
515 else if res ==# 0# then EQ
519 #if __GLASGOW_HASKELL__ < 405
525 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
527 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
530 else if res ==# 0# then EQ
536 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
538 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
541 else if res ==# 0# then EQ
545 #if __GLASGOW_HASKELL__ < 405
546 ba1 = ByteArray ((error "")::(Int,Int)) bs1
548 ba1 = ByteArray (error "") ((error "")::Int) bs1
552 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
553 = -- try them the other way 'round
554 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
558 Outputting @FastString@s is quick, just block copying the chunk (using
562 hPutFS :: Handle -> FastString -> IO ()
563 #if __GLASGOW_HASKELL__ <= 302
564 hPutFS handle (FastString _ l# ba#) =
568 readHandle handle >>= \ htype ->
570 ErrorHandle ioError ->
571 writeHandle handle htype >>
574 writeHandle handle htype >>
575 fail MkIOError(handle,IllegalOperation,"handle is closed")
576 SemiClosedHandle _ _ ->
577 writeHandle handle htype >>
578 fail MkIOError(handle,IllegalOperation,"handle is closed")
580 writeHandle handle htype >>
581 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
583 let fp = filePtr htype in
585 #if __GLASGOW_HASKELL__ < 405
586 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
588 _ccall_ writeFile (ByteArray ((error "")::Int) ((error "")::Int) ba#) fp (I# l#) >>= \rc ->
593 constructError "hPutFS" >>= \ err ->
595 hPutFS handle (CharStr a# l#) =
599 readHandle handle >>= \ htype ->
601 ErrorHandle ioError ->
602 writeHandle handle htype >>
605 writeHandle handle htype >>
606 fail MkIOError(handle,IllegalOperation,"handle is closed")
607 SemiClosedHandle _ _ ->
608 writeHandle handle htype >>
609 fail MkIOError(handle,IllegalOperation,"handle is closed")
611 writeHandle handle htype >>
612 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
614 let fp = filePtr htype in
616 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
620 constructError "hPutFS" >>= \ err ->
625 hPutFS handle (FastString _ l# ba#)
626 | l# ==# 0# = return ()
627 #if __GLASGOW_HASKELL__ < 405
628 | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#)
629 #elif __GLASGOW_HASKELL__ < 407
630 | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#)
632 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
633 hPutBufBA handle mba (I# l#)
636 bot = error "hPutFS.ba"
638 --ToDo: avoid silly code duplic.
640 hPutFS handle (CharStr a# l#)
641 | l# ==# 0# = return ()
642 | otherwise = hPutBuf handle (A# a#) (I# l#)