2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
42 indexFS, -- :: FastString -> Int -> Char
44 hPutFS -- :: Handle -> FastString -> IO ()
47 -- This #define suppresses the "import FastString" that
48 -- HsVersions otherwise produces
49 #define COMPILING_FAST_STRING
50 #include "HsVersions.h"
52 #if __GLASGOW_HASKELL__ < 301
54 import STBase ( StateAndPtr#(..) )
55 import IOHandle ( filePtr, readHandle, writeHandle )
56 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
62 #if __GLASGOW_HASKELL__ < 400
63 import PrelST ( StateAndPtr#(..) )
66 #if __GLASGOW_HASKELL__ <= 303
67 import PrelHandle ( readHandle,
68 # if __GLASGOW_HASKELL__ < 303
75 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
76 #if __GLASGOW_HASKELL__ < 400
80 #if __GLASGOW_HASKELL__ >= 303
89 import PrelAddr ( Addr(..) )
90 #if __GLASGOW_HASKELL__ < 407
91 import MutableArray ( MutableArray(..) )
93 import PrelArr ( STArray(..), newSTArray )
94 import IOExts ( hPutBufFull, hPutBufBAFull )
97 -- ForeignObj is now exported abstractly.
98 #if __GLASGOW_HASKELL__ >= 303
99 import PrelForeign ( ForeignObj(..) )
101 import Foreign ( ForeignObj(..) )
104 import IOExts ( IORef, newIORef, readIORef, writeIORef )
107 #define hASH_TBL_SIZE 993
109 #if __GLASGOW_HASKELL__ >= 400
114 @FastString@s are packed representations of strings
115 with a unique id for fast comparisons. The unique id
116 is assigned when creating the @FastString@, using
117 a hash table to map from the character string representation
122 = FastString -- packed repr. on the heap.
124 -- 0 => string literal, comparison
129 | CharStr -- external C string
130 Addr# -- pointer to the (null-terminated) bytes in C land.
131 Int# -- length (cached)
133 instance Eq FastString where
134 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
135 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
137 instance Ord FastString where
138 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
139 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
140 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
141 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
146 compare a b = cmpFS a b
148 getByteArray# :: FastString -> ByteArray#
149 getByteArray# (FastString _ _ ba#) = ba#
151 getByteArray :: FastString -> ByteArray Int
152 #if __GLASGOW_HASKELL__ < 405
153 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
155 getByteArray (FastString _ l# ba#) = ByteArray 0 (I# l#) ba#
158 lengthFS :: FastString -> Int
159 lengthFS (FastString _ l# _) = I# l#
160 lengthFS (CharStr a# l#) = I# l#
162 nullFastString :: FastString -> Bool
163 nullFastString (FastString _ l# _) = l# ==# 0#
164 nullFastString (CharStr _ l#) = l# ==# 0#
166 unpackFS :: FastString -> String
167 unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
168 unpackFS (CharStr addr len#) =
173 | otherwise = C# ch : unpack (nh +# 1#)
175 ch = indexCharOffAddr# addr nh
177 appendFS :: FastString -> FastString -> FastString
178 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
180 concatFS :: [FastString] -> FastString
181 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
183 headFS :: FastString -> Char
184 headFS f@(FastString _ l# ba#) =
185 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
186 headFS f@(CharStr a# l#) =
187 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
189 indexFS :: FastString -> Int -> Char
190 indexFS f i@(I# i#) =
193 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
194 | otherwise -> error (msg (I# l#))
196 | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#)
197 | otherwise -> error (msg (I# l#))
199 msg l = "indexFS: out of range: " ++ show (l,i)
201 tailFS :: FastString -> FastString
202 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
204 consFS :: Char -> FastString -> FastString
205 consFS c fs = mkFastString (c:unpackFS fs)
207 uniqueOfFS :: FastString -> Int#
208 uniqueOfFS (FastString u# _ _) = u#
209 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
211 [A somewhat moby hack]: to avoid entering all sorts
212 of junk into the hash table, all C char strings
213 are by default left out. The benefit of being in
214 the table is that string comparisons are lightning fast,
215 just an Int# comparison.
217 But, if you want to get the Unique of a CharStr, we
218 enter it into the table and return that unique. This
219 works, but causes the CharStr to be looked up in the hash
220 table each time it is accessed..
224 Internally, the compiler will maintain a fast string symbol
225 table, providing sharing and fast comparison. Creation of
226 new @FastString@s then covertly does a lookup, re-using the
227 @FastString@ if there was a hit.
230 data FastStringTable =
233 (MutableArray# RealWorld [FastString])
235 type FastStringTableVar = IORef FastStringTable
237 string_table :: FastStringTableVar
240 #if __GLASGOW_HASKELL__ < 405
241 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
242 >>= \ (MutableArray _ arr#) ->
243 #elif __GLASGOW_HASKELL__ < 407
244 stToIO (newArray (0::Int,hASH_TBL_SIZE) [])
245 >>= \ (MutableArray _ _ arr#) ->
247 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
248 >>= \ (STArray _ _ arr#) ->
250 newIORef (FastStringTable 0# arr#))
252 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
253 lookupTbl (FastStringTable _ arr#) i# =
255 #if __GLASGOW_HASKELL__ < 400
256 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
259 readArray# arr# i# s#)
262 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
263 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
264 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
265 #if __GLASGOW_HASKELL__ < 400
270 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
272 mkFastString# :: Addr# -> Int# -> FastString
273 mkFastString# a# len# =
275 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
279 -- _trace ("hashed: "++show (I# h)) $
280 lookupTbl ft h >>= \ lookup_result ->
281 case lookup_result of
283 -- no match, add it to table by copying out the
284 -- the string into a ByteArray
285 -- _trace "empty bucket" $
286 case copyPrefixStr (A# a#) (I# len#) of
287 #if __GLASGOW_HASKELL__ < 405
288 (ByteArray _ barr#) ->
290 (ByteArray _ _ barr#) ->
292 let f_str = FastString uid# len# barr# in
293 updTbl string_table ft h [f_str] >>
294 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
296 -- non-empty `bucket', scan the list looking
297 -- entry with same length and compare byte by byte.
298 -- _trace ("non-empty bucket"++show ls) $
299 case bucket_match ls len# a# of
301 case copyPrefixStr (A# a#) (I# len#) of
302 #if __GLASGOW_HASKELL__ < 405
303 (ByteArray _ barr#) ->
305 (ByteArray _ _ barr#) ->
307 let f_str = FastString uid# len# barr# in
308 updTbl string_table ft h (f_str:ls) >>
309 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
310 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
312 bucket_match [] _ _ = Nothing
313 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
314 if len# ==# l# && eqStrPrefix a# ba# l# then
317 bucket_match ls len# a#
319 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
320 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
322 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
323 mkFastSubStringFO# fo# start# len# =
325 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
327 h = hashSubStrFO fo# start# len#
329 lookupTbl ft h >>= \ lookup_result ->
330 case lookup_result of
332 -- no match, add it to table by copying out the
333 -- the string into a ByteArray
334 case copySubStrFO (ForeignObj fo#) (I# start#) (I# len#) of
335 #if __GLASGOW_HASKELL__ < 405
336 (ByteArray _ barr#) ->
338 (ByteArray _ _ barr#) ->
340 let f_str = FastString uid# len# barr# in
341 updTbl string_table ft h [f_str] >>
344 -- non-empty `bucket', scan the list looking
345 -- entry with same length and compare byte by byte.
346 case bucket_match ls start# len# fo# of
348 case copySubStrFO (ForeignObj fo#) (I# start#) (I# len#) of
349 #if __GLASGOW_HASKELL__ < 405
350 (ByteArray _ barr#) ->
352 (ByteArray _ _ barr#) ->
354 let f_str = FastString uid# len# barr# in
355 updTbl string_table ft h (f_str:ls) >>
356 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
357 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
359 bucket_match [] _ _ _ = Nothing
360 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
361 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
364 bucket_match ls start# len# fo#
367 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
368 mkFastSubStringBA# barr# start# len# =
370 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
372 h = hashSubStrBA barr# start# len#
374 -- _trace ("hashed(b): "++show (I# h)) $
375 lookupTbl ft h >>= \ lookup_result ->
376 case lookup_result of
378 -- no match, add it to table by copying out the
379 -- the string into a ByteArray
380 -- _trace "empty bucket(b)" $
381 #if __GLASGOW_HASKELL__ < 405
382 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
385 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
386 (ByteArray _ _ ba#) ->
388 let f_str = FastString uid# len# ba# in
389 updTbl string_table ft h [f_str] >>
390 -- _trace ("new(b): " ++ show f_str) $
393 -- non-empty `bucket', scan the list looking
394 -- entry with same length and compare byte by byte.
395 -- _trace ("non-empty bucket(b)"++show ls) $
396 case bucket_match ls start# len# barr# of
398 #if __GLASGOW_HASKELL__ < 405
399 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
402 case copySubStrBA (ByteArray btm btm barr#) (I# start#) (I# len#) of
403 (ByteArray _ _ ba#) ->
405 let f_str = FastString uid# len# ba# in
406 updTbl string_table ft h (f_str:ls) >>
407 -- _trace ("new(b): " ++ show f_str) $
410 -- _trace ("re-use(b): "++show v) $
416 bucket_match [] _ _ _ = Nothing
417 bucket_match (v:ls) start# len# ba# =
419 FastString _ l# barr# ->
420 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
423 bucket_match ls start# len# ba#
425 mkFastCharString :: Addr -> FastString
426 mkFastCharString a@(A# a#) =
427 case strLength a of{ (I# len#) -> CharStr a# len# }
429 mkFastCharString# :: Addr# -> FastString
430 mkFastCharString# a# =
431 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
433 mkFastCharString2 :: Addr -> Int -> FastString
434 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
436 mkFastString :: String -> FastString
438 case packString str of
439 #if __GLASGOW_HASKELL__ < 405
440 (ByteArray (_,I# len#) frozen#) ->
442 (ByteArray _ (I# len#) frozen#) ->
444 mkFastSubStringBA# frozen# 0# len#
445 {- 0-indexed array, len# == index to one beyond end of string,
446 i.e., (0,1) => empty string. -}
448 mkFastSubString :: Addr -> Int -> Int -> FastString
449 mkFastSubString (A# a#) (I# start#) (I# len#) =
450 mkFastString# (addrOffset# a# start#) len#
452 mkFastSubStringFO :: ForeignObj -> Int -> Int -> FastString
453 mkFastSubStringFO (ForeignObj fo#) (I# start#) (I# len#) =
454 mkFastSubStringFO# fo# start# len#
458 hashStr :: Addr# -> Int# -> Int#
459 -- use the Addr to produce a hash value between 0 & m (inclusive)
463 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
464 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
465 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
467 c0 = indexCharOffAddr# a# 0#
468 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
469 c2 = indexCharOffAddr# a# (len# -# 1#)
471 c1 = indexCharOffAddr# a# 1#
472 c2 = indexCharOffAddr# a# 2#
475 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
476 -- use the FO to produce a hash value between 0 & m (inclusive)
477 hashSubStrFO fo# start# len# =
480 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
481 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
482 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
484 c0 = indexCharOffForeignObj# fo# 0#
485 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
486 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
488 -- c1 = indexCharOffFO# fo# 1#
489 -- c2 = indexCharOffFO# fo# 2#
492 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
493 -- use the byte array to produce a hash value between 0 & m (inclusive)
494 hashSubStrBA ba# start# len# =
497 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
498 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
499 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
501 c0 = indexCharArray# ba# 0#
502 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
503 c2 = indexCharArray# ba# (len# -# 1#)
505 -- c1 = indexCharArray# ba# 1#
506 -- c2 = indexCharArray# ba# 2#
511 cmpFS :: FastString -> FastString -> Ordering
512 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
517 #if __GLASGOW_HASKELL__ < 405
518 _ccall_ strcmp (ByteArray bot b1#) (ByteArray bot b2#) >>= \ (I# res) ->
520 _ccall_ strcmp (ByteArray bot bot b1#) (ByteArray bot bot b2#) >>= \ (I# res) ->
524 else if res ==# 0# then EQ
528 #if __GLASGOW_HASKELL__ < 405
534 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
536 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
539 else if res ==# 0# then EQ
545 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
547 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
550 else if res ==# 0# then EQ
554 #if __GLASGOW_HASKELL__ < 405
555 ba1 = ByteArray ((error "")::(Int,Int)) bs1
557 ba1 = ByteArray (error "") ((error "")::Int) bs1
561 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
562 = -- try them the other way 'round
563 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
567 Outputting @FastString@s is quick, just block copying the chunk (using
571 hPutFS :: Handle -> FastString -> IO ()
572 #if __GLASGOW_HASKELL__ <= 302
573 hPutFS handle (FastString _ l# ba#) =
577 readHandle handle >>= \ htype ->
579 ErrorHandle ioError ->
580 writeHandle handle htype >>
583 writeHandle handle htype >>
584 fail MkIOError(handle,IllegalOperation,"handle is closed")
585 SemiClosedHandle _ _ ->
586 writeHandle handle htype >>
587 fail MkIOError(handle,IllegalOperation,"handle is closed")
589 writeHandle handle htype >>
590 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
592 let fp = filePtr htype in
594 #if __GLASGOW_HASKELL__ < 405
595 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
597 _ccall_ writeFile (ByteArray ((error "")::Int) ((error "")::Int) ba#) fp (I# l#) >>= \rc ->
602 constructError "hPutFS" >>= \ err ->
604 hPutFS handle (CharStr a# l#) =
608 readHandle handle >>= \ htype ->
610 ErrorHandle ioError ->
611 writeHandle handle htype >>
614 writeHandle handle htype >>
615 fail MkIOError(handle,IllegalOperation,"handle is closed")
616 SemiClosedHandle _ _ ->
617 writeHandle handle htype >>
618 fail MkIOError(handle,IllegalOperation,"handle is closed")
620 writeHandle handle htype >>
621 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
623 let fp = filePtr htype in
625 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
629 constructError "hPutFS" >>= \ err ->
634 hPutFS handle (FastString _ l# ba#)
635 | l# ==# 0# = return ()
636 #if __GLASGOW_HASKELL__ < 405
637 | otherwise = hPutBufBA handle (ByteArray bot ba#) (I# l#)
638 #elif __GLASGOW_HASKELL__ < 407
639 | otherwise = hPutBufBA handle (ByteArray bot bot ba#) (I# l#)
641 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
642 hPutBufBAFull handle mba (I# l#)
645 bot = error "hPutFS.ba"
647 --ToDo: avoid silly code duplic.
649 hPutFS handle (CharStr a# l#)
650 | l# ==# 0# = return ()
651 #if __GLASGOW_HASKELL__ < 407
652 | otherwise = hPutBuf handle (A# a#) (I# l#)
654 | otherwise = hPutBufFull handle (A# a#) (I# l#)