2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
42 indexFS, -- :: FastString -> Int -> Char
44 hPutFS -- :: Handle -> FastString -> IO ()
47 -- This #define suppresses the "import FastString" that
48 -- HsVersions otherwise produces
49 #define COMPILING_FAST_STRING
50 #include "HsVersions.h"
52 #if __GLASGOW_HASKELL__ < 301
54 import STBase ( StateAndPtr#(..) )
55 import IOHandle ( filePtr, readHandle, writeHandle )
56 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
62 #if __GLASGOW_HASKELL__ < 400
63 import PrelST ( StateAndPtr#(..) )
66 #if __GLASGOW_HASKELL__ <= 303
67 import PrelHandle ( readHandle,
68 # if __GLASGOW_HASKELL__ < 303
75 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
76 #if __GLASGOW_HASKELL__ < 400
80 #if __GLASGOW_HASKELL__ >= 303
89 import Addr ( Addr(..) )
90 #if __GLASGOW_HASKELL__ < 407
91 import MutableArray ( MutableArray(..) )
93 import PrelArr ( STArray(..), newSTArray )
96 -- ForeignObj is now exported abstractly.
97 #if __GLASGOW_HASKELL__ >= 303
98 import qualified PrelForeign as Foreign ( ForeignObj(..) )
100 import Foreign ( ForeignObj(..) )
103 import IOExts ( IORef, newIORef, readIORef, writeIORef )
106 #define hASH_TBL_SIZE 993
108 #if __GLASGOW_HASKELL__ >= 400
113 @FastString@s are packed representations of strings
114 with a unique id for fast comparisons. The unique id
115 is assigned when creating the @FastString@, using
116 a hash table to map from the character string representation
121 = FastString -- packed repr. on the heap.
123 -- 0 => string literal, comparison
128 | CharStr -- external C string
129 Addr# -- pointer to the (null-terminated) bytes in C land.
130 Int# -- length (cached)
132 instance Eq FastString where
133 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
134 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
136 instance Ord FastString where
137 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
138 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
139 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
140 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
145 compare a b = cmpFS a b
147 getByteArray# :: FastString -> ByteArray#
148 getByteArray# (FastString _ _ ba#) = ba#
150 getByteArray :: FastString -> ByteArray Int
151 #if __GLASGOW_HASKELL__ < 405
152 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
154 getByteArray (FastString _ l# ba#) = ByteArray 0 (I# l#) ba#
157 lengthFS :: FastString -> Int
158 lengthFS (FastString _ l# _) = I# l#
159 lengthFS (CharStr a# l#) = I# l#
161 nullFastString :: FastString -> Bool
162 nullFastString (FastString _ l# _) = l# ==# 0#
163 nullFastString (CharStr _ l#) = l# ==# 0#
165 unpackFS :: FastString -> String
166 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
167 unpackFS (CharStr addr len#) =
172 | otherwise = C# ch : unpack (nh +# 1#)
174 ch = indexCharOffAddr# addr nh
176 appendFS :: FastString -> FastString -> FastString
177 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
179 concatFS :: [FastString] -> FastString
180 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
182 headFS :: FastString -> Char
183 headFS f@(FastString _ l# ba#) =
184 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
185 headFS f@(CharStr a# l#) =
186 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
188 indexFS :: FastString -> Int -> Char
189 indexFS f i@(I# i#) =
192 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
193 | otherwise -> error (msg (I# l#))
195 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
196 | otherwise -> error (msg (I# l#))
198 msg l = "indexFS: out of range: " ++ show (l,i)
200 tailFS :: FastString -> FastString
201 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
203 consFS :: Char -> FastString -> FastString
204 consFS c fs = mkFastString (c:unpackFS fs)
206 uniqueOfFS :: FastString -> Int#
207 uniqueOfFS (FastString u# _ _) = u#
208 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
210 [A somewhat moby hack]: to avoid entering all sorts
211 of junk into the hash table, all C char strings
212 are by default left out. The benefit of being in
213 the table is that string comparisons are lightning fast,
214 just an Int# comparison.
216 But, if you want to get the Unique of a CharStr, we
217 enter it into the table and return that unique. This
218 works, but causes the CharStr to be looked up in the hash
219 table each time it is accessed..
223 Internally, the compiler will maintain a fast string symbol
224 table, providing sharing and fast comparison. Creation of
225 new @FastString@s then covertly does a lookup, re-using the
226 @FastString@ if there was a hit.
229 data FastStringTable =
232 (MutableArray# RealWorld [FastString])
234 type FastStringTableVar = IORef FastStringTable
236 string_table :: FastStringTableVar
239 #if __GLASGOW_HASKELL__ < 405
240 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
241 >>= \ (MutableArray _ arr#) ->
242 #elif __GLASGOW_HASKELL__ < 407
243 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
244 >>= \ (MutableArray _ _ arr#) ->
246 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
247 >>= \ (STArray _ _ arr#) ->
249 newIORef (FastStringTable 0# arr#))
251 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
252 lookupTbl (FastStringTable _ arr#) i# =
254 #if __GLASGOW_HASKELL__ < 400
255 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
258 readArray# arr# i# s#)
261 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
262 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
263 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
264 #if __GLASGOW_HASKELL__ < 400
269 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
271 mkFastString# :: Addr# -> Int# -> FastString
272 mkFastString# a# len# =
274 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
278 -- _trace ("hashed: "++show (I# h)) $
279 lookupTbl ft h >>= \ lookup_result ->
280 case lookup_result of
282 -- no match, add it to table by copying out the
283 -- the string into a ByteArray
284 -- _trace "empty bucket" $
285 case copyPrefixStr (A# a#) (I# len#) of
286 #if __GLASGOW_HASKELL__ < 405
287 (ByteArray _ barr#) ->
289 (ByteArray _ _ barr#) ->
291 let f_str = FastString uid# len# barr# in
292 updTbl string_table ft h [f_str] >>
293 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
295 -- non-empty `bucket', scan the list looking
296 -- entry with same length and compare byte by byte.
297 -- _trace ("non-empty bucket"++show ls) $
298 case bucket_match ls len# a# of
300 case copyPrefixStr (A# a#) (I# len#) of
301 #if __GLASGOW_HASKELL__ < 405
302 (ByteArray _ barr#) ->
304 (ByteArray _ _ barr#) ->
306 let f_str = FastString uid# len# barr# in
307 updTbl string_table ft h (f_str:ls) >>
308 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
309 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
311 bucket_match [] _ _ = Nothing
312 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
313 if len# ==# l# && eqStrPrefix a# ba# l# then
316 bucket_match ls len# a#
318 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
319 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
321 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
322 mkFastSubStringFO# fo# start# len# =
324 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
326 h = hashSubStrFO fo# start# len#
328 lookupTbl ft h >>= \ lookup_result ->
329 case lookup_result of
331 -- no match, add it to table by copying out the
332 -- the string into a ByteArray
333 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
334 #if __GLASGOW_HASKELL__ < 405
335 (ByteArray _ barr#) ->
337 (ByteArray _ _ barr#) ->
339 let f_str = FastString uid# len# barr# in
340 updTbl string_table ft h [f_str] >>
343 -- non-empty `bucket', scan the list looking
344 -- entry with same length and compare byte by byte.
345 case bucket_match ls start# len# fo# of
347 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
348 #if __GLASGOW_HASKELL__ < 405
349 (ByteArray _ barr#) ->
351 (ByteArray _ _ barr#) ->
353 let f_str = FastString uid# len# barr# in
354 updTbl string_table ft h (f_str:ls) >>
355 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
356 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
358 bucket_match [] _ _ _ = Nothing
359 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
360 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
363 bucket_match ls start# len# fo#
366 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
367 mkFastSubStringBA# barr# start# len# =
369 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
371 h = hashSubStrBA barr# start# len#
373 -- _trace ("hashed(b): "++show (I# h)) $
374 lookupTbl ft h >>= \ lookup_result ->
375 case lookup_result of
377 -- no match, add it to table by copying out the
378 -- the string into a ByteArray
379 -- _trace "empty bucket(b)" $
380 #if __GLASGOW_HASKELL__ < 405
381 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
384 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
385 (ByteArray _ _ ba#) ->
387 let f_str = FastString uid# len# ba# in
388 updTbl string_table ft h [f_str] >>
389 -- _trace ("new(b): " ++ show f_str) $
392 -- non-empty `bucket', scan the list looking
393 -- entry with same length and compare byte by byte.
394 -- _trace ("non-empty bucket(b)"++show ls) $
395 case bucket_match ls start# len# barr# of
397 #if __GLASGOW_HASKELL__ < 405
398 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
401 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
402 (ByteArray _ _ ba#) ->
404 let f_str = FastString uid# len# ba# in
405 updTbl string_table ft h (f_str:ls) >>
406 -- _trace ("new(b): " ++ show f_str) $
409 -- _trace ("re-use(b): "++show v) $
415 bucket_match [] _ _ _ = Nothing
416 bucket_match (v:ls) start# len# ba# =
418 FastString _ l# barr# ->
419 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
422 bucket_match ls start# len# ba#
424 mkFastCharString :: Addr -> FastString
425 mkFastCharString a@(A# a#) =
426 case strLength a of{ (I# len#) -> CharStr a# len# }
428 mkFastCharString# :: Addr# -> FastString
429 mkFastCharString# a# =
430 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
432 mkFastCharString2 :: Addr -> Int -> FastString
433 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
435 mkFastString :: String -> FastString
437 case packString str of
438 #if __GLASGOW_HASKELL__ < 405
439 (ByteArray (_,I# len#) frozen#) ->
441 (ByteArray _ (I# len#) frozen#) ->
443 mkFastSubStringBA# frozen# 0# len#
444 {- 0-indexed array, len# == index to one beyond end of string,
445 i.e., (0,1) => empty string. -}
447 mkFastSubString :: Addr -> Int -> Int -> FastString
448 mkFastSubString (A# a#) (I# start#) (I# len#) =
449 mkFastString# (addrOffset# a# start#) len#
451 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
452 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
453 mkFastSubStringFO# fo# start# len#
457 hashStr :: Addr# -> Int# -> Int#
458 -- use the Addr to produce a hash value between 0 & m (inclusive)
462 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
463 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
464 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
466 c0 = indexCharOffAddr# a# 0#
467 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
468 c2 = indexCharOffAddr# a# (len# -# 1#)
470 c1 = indexCharOffAddr# a# 1#
471 c2 = indexCharOffAddr# a# 2#
474 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
475 -- use the FO to produce a hash value between 0 & m (inclusive)
476 hashSubStrFO fo# start# len# =
479 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
480 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
481 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
483 c0 = indexCharOffForeignObj# fo# 0#
484 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
485 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
487 -- c1 = indexCharOffFO# fo# 1#
488 -- c2 = indexCharOffFO# fo# 2#
491 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
492 -- use the byte array to produce a hash value between 0 & m (inclusive)
493 hashSubStrBA ba# start# len# =
496 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
497 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
498 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
500 c0 = indexCharArray# ba# 0#
501 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
502 c2 = indexCharArray# ba# (len# -# 1#)
504 -- c1 = indexCharArray# ba# 1#
505 -- c2 = indexCharArray# ba# 2#
510 cmpFS :: FastString -> FastString -> Ordering
511 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
516 #if __GLASGOW_HASKELL__ < 405
517 _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) ->
519 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
523 else if res ==# 0# then EQ
527 #if __GLASGOW_HASKELL__ < 405
533 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
535 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
538 else if res ==# 0# then EQ
544 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
546 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
549 else if res ==# 0# then EQ
553 #if __GLASGOW_HASKELL__ < 405
554 ba1 = ByteArray ((error "")::(Int,Int)) bs1
556 ba1 = ByteArray (error "") ((error "")::Int) bs1
560 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
561 = -- try them the other way 'round
562 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
566 Outputting @FastString@s is quick, just block copying the chunk (using
570 hPutFS :: Handle -> FastString -> IO ()
571 #if __GLASGOW_HASKELL__ <= 302
572 hPutFS handle (FastString _ l# ba#) =
576 readHandle handle >>= \ htype ->
578 ErrorHandle ioError ->
579 writeHandle handle htype >>
582 writeHandle handle htype >>
583 fail MkIOError(handle,IllegalOperation,"handle is closed")
584 SemiClosedHandle _ _ ->
585 writeHandle handle htype >>
586 fail MkIOError(handle,IllegalOperation,"handle is closed")
588 writeHandle handle htype >>
589 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
591 let fp = filePtr htype in
593 #if __GLASGOW_HASKELL__ < 405
594 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
596 _ccall_ writeFile (ByteArray ((error "")::Int) ((error "")::Int) ba#) fp (I# l#) >>= \rc ->
601 constructError "hPutFS" >>= \ err ->
603 hPutFS handle (CharStr a# l#) =
607 readHandle handle >>= \ htype ->
609 ErrorHandle ioError ->
610 writeHandle handle htype >>
613 writeHandle handle htype >>
614 fail MkIOError(handle,IllegalOperation,"handle is closed")
615 SemiClosedHandle _ _ ->
616 writeHandle handle htype >>
617 fail MkIOError(handle,IllegalOperation,"handle is closed")
619 writeHandle handle htype >>
620 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
622 let fp = filePtr htype in
624 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
628 constructError "hPutFS" >>= \ err ->
633 hPutFS handle (FastString _ l# ba#)
634 | l# ==# 0# = return ()
635 #if __GLASGOW_HASKELL__ < 405
636 | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#)
637 #elif __GLASGOW_HASKELL__ < 407
638 | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#)
640 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
641 hPutBufBA handle mba (I# l#)
644 bot = error "hPutFS.ba"
646 --ToDo: avoid silly code duplic.
648 hPutFS handle (CharStr a# l#)
649 | l# ==# 0# = return ()
650 | otherwise = hPutBuf handle (A# a#) (I# l#)