2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
43 hPutFS -- :: Handle -> FastString -> IO ()
46 -- This #define suppresses the "import FastString" that
47 -- HsVersions otherwise produces
48 #define COMPILING_FAST_STRING
49 #include "HsVersions.h"
51 #if __GLASGOW_HASKELL__ < 301
53 import STBase ( StateAndPtr#(..) )
54 import IOHandle ( filePtr, readHandle, writeHandle )
55 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
61 import PrelST ( StateAndPtr#(..) )
62 import PrelHandle ( readHandle,
63 #if __GLASGOW_HASKELL__ < 303
68 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
70 #if __GLASGOW_HASKELL__ >= 303
79 import Addr ( Addr(..) )
80 import MutableArray ( MutableArray(..) )
82 -- ForeignObj is now exported abstractly.
83 #if __GLASGOW_HASKELL__ >= 303
84 import qualified PrelForeign as Foreign ( ForeignObj(..) )
86 import Foreign ( ForeignObj(..) )
89 import IOExts ( IORef, newIORef, readIORef, writeIORef )
92 #define hASH_TBL_SIZE 993
95 @FastString@s are packed representations of strings
96 with a unique id for fast comparisons. The unique id
97 is assigned when creating the @FastString@, using
98 a hash table to map from the character string representation
103 = FastString -- packed repr. on the heap.
105 -- 0 => string literal, comparison
110 | CharStr -- external C string
111 Addr# -- pointer to the (null-terminated) bytes in C land.
112 Int# -- length (cached)
114 instance Eq FastString where
115 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
116 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
118 instance Ord FastString where
119 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
120 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
121 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
122 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
127 compare a b = cmpFS a b
129 instance Text FastString where
130 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
131 showsPrec p ps r = showsPrec p (unpackFS ps) r
133 getByteArray# :: FastString -> ByteArray#
134 getByteArray# (FastString _ _ ba#) = ba#
136 getByteArray :: FastString -> ByteArray Int
137 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
139 lengthFS :: FastString -> Int
140 lengthFS (FastString _ l# _) = I# l#
141 lengthFS (CharStr a# l#) = I# l#
143 nullFastString :: FastString -> Bool
144 nullFastString (FastString _ l# _) = l# ==# 0#
145 nullFastString (CharStr _ l#) = l# ==# 0#
147 unpackFS :: FastString -> String
148 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
149 unpackFS (CharStr addr len#) =
154 | otherwise = C# ch : unpack (nh +# 1#)
156 ch = indexCharOffAddr# addr nh
158 appendFS :: FastString -> FastString -> FastString
159 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
161 concatFS :: [FastString] -> FastString
162 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
164 headFS :: FastString -> Char
165 headFS f@(FastString _ l# ba#) =
166 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
167 headFS f@(CharStr a# l#) =
168 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
170 tailFS :: FastString -> FastString
171 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
173 consFS :: Char -> FastString -> FastString
174 consFS c fs = mkFastString (c:unpackFS fs)
176 uniqueOfFS :: FastString -> Int#
177 uniqueOfFS (FastString u# _ _) = u#
178 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
180 [A somewhat moby hack]: to avoid entering all sorts
181 of junk into the hash table, all C char strings
182 are by default left out. The benefit of being in
183 the table is that string comparisons are lightning fast,
184 just an Int# comparison.
186 But, if you want to get the Unique of a CharStr, we
187 enter it into the table and return that unique. This
188 works, but causes the CharStr to be looked up in the hash
189 table each time it is accessed..
193 Internally, the compiler will maintain a fast string symbol
194 table, providing sharing and fast comparison. Creation of
195 new @FastString@s then covertly does a lookup, re-using the
196 @FastString@ if there was a hit.
199 data FastStringTable =
202 (MutableArray# RealWorld [FastString])
204 type FastStringTableVar = IORef FastStringTable
206 string_table :: FastStringTableVar
209 stToIO (newArray (0::Int,hASH_TBL_SIZE) []) >>= \ (MutableArray _ arr#) ->
210 newIORef (FastStringTable 0# arr#))
212 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
213 lookupTbl (FastStringTable _ arr#) i# =
215 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
218 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
219 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
220 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> IOok s2# () }) >>
221 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
223 mkFastString# :: Addr# -> Int# -> FastString
224 mkFastString# a# len# =
226 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
230 -- _trace ("hashed: "++show (I# h)) $
231 lookupTbl ft h >>= \ lookup_result ->
232 case lookup_result of
234 -- no match, add it to table by copying out the
235 -- the string into a ByteArray
236 -- _trace "empty bucket" $
237 case copyPrefixStr (A# a#) (I# len#) of
238 (ByteArray _ barr#) ->
239 let f_str = FastString uid# len# barr# in
240 updTbl string_table ft h [f_str] >>
241 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
243 -- non-empty `bucket', scan the list looking
244 -- entry with same length and compare byte by byte.
245 -- _trace ("non-empty bucket"++show ls) $
246 case bucket_match ls len# a# of
248 case copyPrefixStr (A# a#) (I# len#) of
249 (ByteArray _ barr#) ->
250 let f_str = FastString uid# len# barr# in
251 updTbl string_table ft h (f_str:ls) >>
252 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
253 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
255 bucket_match [] _ _ = Nothing
256 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
257 if len# ==# l# && eqStrPrefix a# ba# l# then
260 bucket_match ls len# a#
262 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
263 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
265 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
266 mkFastSubStringFO# fo# start# len# =
268 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
270 h = hashSubStrFO fo# start# len#
272 lookupTbl ft h >>= \ lookup_result ->
273 case lookup_result of
275 -- no match, add it to table by copying out the
276 -- the string into a ByteArray
277 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
278 (ByteArray _ barr#) ->
279 let f_str = FastString uid# len# barr# in
280 updTbl string_table ft h [f_str] >>
283 -- non-empty `bucket', scan the list looking
284 -- entry with same length and compare byte by byte.
285 case bucket_match ls start# len# fo# of
287 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
288 (ByteArray _ barr#) ->
289 let f_str = FastString uid# len# barr# in
290 updTbl string_table ft h (f_str:ls) >>
291 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
292 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
294 bucket_match [] _ _ _ = Nothing
295 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
296 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
299 bucket_match ls start# len# fo#
302 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
303 mkFastSubStringBA# barr# start# len# =
305 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
307 h = hashSubStrBA barr# start# len#
309 -- _trace ("hashed(b): "++show (I# h)) $
310 lookupTbl ft h >>= \ lookup_result ->
311 case lookup_result of
313 -- no match, add it to table by copying out the
314 -- the string into a ByteArray
315 -- _trace "empty bucket(b)" $
316 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
318 let f_str = FastString uid# len# ba# in
319 updTbl string_table ft h [f_str] >>
320 -- _trace ("new(b): " ++ show f_str) $
323 -- non-empty `bucket', scan the list looking
324 -- entry with same length and compare byte by byte.
325 -- _trace ("non-empty bucket(b)"++show ls) $
326 case bucket_match ls start# len# barr# of
328 case copySubStrBA (ByteArray (error "") barr#) (I# start#) (I# len#) of
330 let f_str = FastString uid# len# ba# in
331 updTbl string_table ft h (f_str:ls) >>
332 -- _trace ("new(b): " ++ show f_str) $
335 -- _trace ("re-use(b): "++show v) $
341 bucket_match [] _ _ _ = Nothing
342 bucket_match (v:ls) start# len# ba# =
344 FastString _ l# barr# ->
345 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
348 bucket_match ls start# len# ba#
350 mkFastCharString :: Addr -> FastString
351 mkFastCharString a@(A# a#) =
352 case strLength a of{ (I# len#) -> CharStr a# len# }
354 mkFastCharString# :: Addr# -> FastString
355 mkFastCharString# a# =
356 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
358 mkFastCharString2 :: Addr -> Int -> FastString
359 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
361 mkFastString :: String -> FastString
363 case packString str of
364 (ByteArray (_,I# len#) frozen#) ->
365 mkFastSubStringBA# frozen# 0# len#
366 {- 0-indexed array, len# == index to one beyond end of string,
367 i.e., (0,1) => empty string. -}
369 mkFastSubString :: Addr -> Int -> Int -> FastString
370 mkFastSubString (A# a#) (I# start#) (I# len#) =
371 mkFastString# (addrOffset# a# start#) len#
373 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
374 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
375 mkFastSubStringFO# fo# start# len#
379 hashStr :: Addr# -> Int# -> Int#
380 -- use the Addr to produce a hash value between 0 & m (inclusive)
384 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
385 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
386 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
388 c0 = indexCharOffAddr# a# 0#
389 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
390 c2 = indexCharOffAddr# a# (len# -# 1#)
392 c1 = indexCharOffAddr# a# 1#
393 c2 = indexCharOffAddr# a# 2#
396 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
397 -- use the FO to produce a hash value between 0 & m (inclusive)
398 hashSubStrFO fo# start# len# =
401 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
402 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
403 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
405 c0 = indexCharOffForeignObj# fo# 0#
406 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
407 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
409 -- c1 = indexCharOffFO# fo# 1#
410 -- c2 = indexCharOffFO# fo# 2#
413 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
414 -- use the byte array to produce a hash value between 0 & m (inclusive)
415 hashSubStrBA ba# start# len# =
418 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
419 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
420 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
422 c0 = indexCharArray# ba# 0#
423 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
424 c2 = indexCharArray# ba# (len# -# 1#)
426 -- c1 = indexCharArray# ba# 1#
427 -- c2 = indexCharArray# ba# 2#
432 cmpFS :: FastString -> FastString -> Ordering
433 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
438 _ccall_ strcmp (ByteArray bottom b1#) (ByteArray bottom b2#) >>= \ (I# res) ->
441 else if res ==# 0# then EQ
446 bottom = error "tagCmp"
447 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
449 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
452 else if res ==# 0# then EQ
458 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
460 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
463 else if res ==# 0# then EQ
467 ba1 = ByteArray ((error "")::(Int,Int)) bs1
470 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
471 = -- try them the other way 'round
472 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
476 Outputting @FastString@s is quick, just block copying the chunk (using
480 hPutFS :: Handle -> FastString -> IO ()
481 #if __GLASGOW_HASKELL__ <= 302
482 hPutFS handle (FastString _ l# ba#) =
486 readHandle handle >>= \ htype ->
488 ErrorHandle ioError ->
489 writeHandle handle htype >>
492 writeHandle handle htype >>
493 fail MkIOError(handle,IllegalOperation,"handle is closed")
494 SemiClosedHandle _ _ ->
495 writeHandle handle htype >>
496 fail MkIOError(handle,IllegalOperation,"handle is closed")
498 writeHandle handle htype >>
499 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
501 let fp = filePtr htype in
503 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
507 constructError "hPutFS" >>= \ err ->
509 hPutFS handle (CharStr a# l#) =
513 readHandle handle >>= \ htype ->
515 ErrorHandle ioError ->
516 writeHandle handle htype >>
519 writeHandle handle htype >>
520 fail MkIOError(handle,IllegalOperation,"handle is closed")
521 SemiClosedHandle _ _ ->
522 writeHandle handle htype >>
523 fail MkIOError(handle,IllegalOperation,"handle is closed")
525 writeHandle handle htype >>
526 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
528 let fp = filePtr htype in
530 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
534 constructError "hPutFS" >>= \ err ->
537 hPutFS handle (FastString _ l# ba#)
538 | l# ==# 0# = return ()
539 | otherwise = hPutBufBA handle (ByteArray bottom ba#) (I# l#)
541 bottom = error "hPutFS.ba"
543 hPutFS handle (CharStr a# l#)
544 | l# ==# 0# = return ()
545 | otherwise = hPutBuf handle (A# a#) (I# l#)