2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
43 hPutFS -- :: Handle -> FastString -> IO ()
46 -- This #define suppresses the "import FastString" that
47 -- HsVersions otherwise produces
48 #define COMPILING_FAST_STRING
49 #include "HsVersions.h"
51 #if __GLASGOW_HASKELL__ < 301
53 import STBase ( StateAndPtr#(..) )
54 import IOHandle ( filePtr, readHandle, writeHandle )
55 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
61 import PrelST ( StateAndPtr#(..) )
62 import PrelHandle ( filePtr, readHandle, writeHandle )
63 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
71 import Addr ( Addr(..) )
72 import MutableArray ( MutableArray(..) )
73 import Foreign ( ForeignObj(..) )
74 import IOExts ( IORef, newIORef, readIORef, writeIORef )
77 #define hASH_TBL_SIZE 993
80 @FastString@s are packed representations of strings
81 with a unique id for fast comparisons. The unique id
82 is assigned when creating the @FastString@, using
83 a hash table to map from the character string representation
88 = FastString -- packed repr. on the heap.
90 -- 0 => string literal, comparison
95 | CharStr -- external C string
96 Addr# -- pointer to the (null-terminated) bytes in C land.
97 Int# -- length (cached)
99 instance Eq FastString where
100 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
101 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
103 instance Ord FastString where
104 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
105 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
106 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
107 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
112 compare a b = cmpFS a b
114 instance Text FastString where
115 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
116 showsPrec p ps r = showsPrec p (unpackFS ps) r
118 getByteArray# :: FastString -> ByteArray#
119 getByteArray# (FastString _ _ ba#) = ba#
121 getByteArray :: FastString -> ByteArray Int
122 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
124 lengthFS :: FastString -> Int
125 lengthFS (FastString _ l# _) = I# l#
126 lengthFS (CharStr a# l#) = I# l#
128 nullFastString :: FastString -> Bool
129 nullFastString (FastString _ l# _) = l# ==# 0#
130 nullFastString (CharStr _ l#) = l# ==# 0#
132 unpackFS :: FastString -> String
133 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
134 unpackFS (CharStr addr len#) =
139 | otherwise = C# ch : unpack (nh +# 1#)
141 ch = indexCharOffAddr# addr nh
143 appendFS :: FastString -> FastString -> FastString
144 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
146 concatFS :: [FastString] -> FastString
147 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
149 headFS :: FastString -> Char
150 headFS f@(FastString _ l# ba#) =
151 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
152 headFS f@(CharStr a# l#) =
153 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
155 tailFS :: FastString -> FastString
156 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
158 consFS :: Char -> FastString -> FastString
159 consFS c fs = mkFastString (c:unpackFS fs)
161 uniqueOfFS :: FastString -> Int#
162 uniqueOfFS (FastString u# _ _) = u#
163 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
165 [A somewhat moby hack]: to avoid entering all sorts
166 of junk into the hash table, all C char strings
167 are by default left out. The benefit of being in
168 the table is that string comparisons are lightning fast,
169 just an Int# comparison.
171 But, if you want to get the Unique of a CharStr, we
172 enter it into the table and return that unique. This
173 works, but causes the CharStr to be looked up in the hash
174 table each time it is accessed..
178 Internally, the compiler will maintain a fast string symbol
179 table, providing sharing and fast comparison. Creation of
180 new @FastString@s then covertly does a lookup, re-using the
181 @FastString@ if there was a hit.
184 data FastStringTable =
187 (MutableArray# RealWorld [FastString])
189 type FastStringTableVar = IORef FastStringTable
191 string_table :: FastStringTableVar
194 stToIO (newArray (0::Int,hASH_TBL_SIZE) []) >>= \ (MutableArray _ arr#) ->
195 newIORef (FastStringTable 0# arr#))
197 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
198 lookupTbl (FastStringTable _ arr#) i# =
200 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
203 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
204 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
205 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> IOok s2# () }) >>
206 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
208 mkFastString# :: Addr# -> Int# -> FastString
209 mkFastString# a# len# =
211 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
215 -- _trace ("hashed: "++show (I# h)) $
216 lookupTbl ft h >>= \ lookup_result ->
217 case lookup_result of
219 -- no match, add it to table by copying out the
220 -- the string into a ByteArray
221 -- _trace "empty bucket" $
222 case copyPrefixStr (A# a#) (I# len#) of
223 (ByteArray _ barr#) ->
224 let f_str = FastString uid# len# barr# in
225 updTbl string_table ft h [f_str] >>
226 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
228 -- non-empty `bucket', scan the list looking
229 -- entry with same length and compare byte by byte.
230 -- _trace ("non-empty bucket"++show ls) $
231 case bucket_match ls len# a# of
233 case copyPrefixStr (A# a#) (I# len#) of
234 (ByteArray _ barr#) ->
235 let f_str = FastString uid# len# barr# in
236 updTbl string_table ft h (f_str:ls) >>
237 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
238 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
240 bucket_match [] _ _ = Nothing
241 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
242 if len# ==# l# && eqStrPrefix a# ba# l# then
245 bucket_match ls len# a#
247 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
248 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
250 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
251 mkFastSubStringFO# fo# start# len# =
253 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
255 h = hashSubStrFO fo# start# len#
257 lookupTbl ft h >>= \ lookup_result ->
258 case lookup_result of
260 -- no match, add it to table by copying out the
261 -- the string into a ByteArray
262 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
263 (ByteArray _ barr#) ->
264 let f_str = FastString uid# len# barr# in
265 updTbl string_table ft h [f_str] >>
268 -- non-empty `bucket', scan the list looking
269 -- entry with same length and compare byte by byte.
270 case bucket_match ls start# len# fo# of
272 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
273 (ByteArray _ barr#) ->
274 let f_str = FastString uid# len# barr# in
275 updTbl string_table ft h (f_str:ls) >>
276 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
277 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
279 bucket_match [] _ _ _ = Nothing
280 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
281 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
284 bucket_match ls start# len# fo#
287 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
288 mkFastSubStringBA# barr# start# len# =
290 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
292 h = hashSubStrBA barr# start# len#
294 -- _trace ("hashed(b): "++show (I# h)) $
295 lookupTbl ft h >>= \ lookup_result ->
296 case lookup_result of
298 -- no match, add it to table by copying out the
299 -- the string into a ByteArray
300 -- _trace "empty bucket(b)" $
301 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
303 let f_str = FastString uid# len# ba# in
304 updTbl string_table ft h [f_str] >>
305 -- _trace ("new(b): " ++ show f_str) $
308 -- non-empty `bucket', scan the list looking
309 -- entry with same length and compare byte by byte.
310 -- _trace ("non-empty bucket(b)"++show ls) $
311 case bucket_match ls start# len# barr# of
313 case copySubStrBA (ByteArray (error "") barr#) (I# start#) (I# len#) of
315 let f_str = FastString uid# len# ba# in
316 updTbl string_table ft h (f_str:ls) >>
317 -- _trace ("new(b): " ++ show f_str) $
320 -- _trace ("re-use(b): "++show v) $
326 bucket_match [] _ _ _ = Nothing
327 bucket_match (v:ls) start# len# ba# =
329 FastString _ l# barr# ->
330 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
333 bucket_match ls start# len# ba#
335 mkFastCharString :: Addr -> FastString
336 mkFastCharString a@(A# a#) =
337 case strLength a of{ (I# len#) -> CharStr a# len# }
339 mkFastCharString# :: Addr# -> FastString
340 mkFastCharString# a# =
341 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
343 mkFastCharString2 :: Addr -> Int -> FastString
344 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
346 mkFastString :: String -> FastString
348 case packString str of
349 (ByteArray (_,I# len#) frozen#) ->
350 mkFastSubStringBA# frozen# 0# len#
351 {- 0-indexed array, len# == index to one beyond end of string,
352 i.e., (0,1) => empty string. -}
354 mkFastSubString :: Addr -> Int -> Int -> FastString
355 mkFastSubString (A# a#) (I# start#) (I# len#) =
356 mkFastString# (addrOffset# a# start#) len#
358 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
359 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
360 mkFastSubStringFO# fo# start# len#
364 hashStr :: Addr# -> Int# -> Int#
365 -- use the Addr to produce a hash value between 0 & m (inclusive)
369 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
370 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
371 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
373 c0 = indexCharOffAddr# a# 0#
374 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
375 c2 = indexCharOffAddr# a# (len# -# 1#)
377 c1 = indexCharOffAddr# a# 1#
378 c2 = indexCharOffAddr# a# 2#
381 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
382 -- use the FO to produce a hash value between 0 & m (inclusive)
383 hashSubStrFO fo# start# len# =
386 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
387 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
388 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
390 c0 = indexCharOffForeignObj# fo# 0#
391 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
392 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
394 -- c1 = indexCharOffFO# fo# 1#
395 -- c2 = indexCharOffFO# fo# 2#
398 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
399 -- use the byte array to produce a hash value between 0 & m (inclusive)
400 hashSubStrBA ba# start# len# =
403 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
404 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
405 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
407 c0 = indexCharArray# ba# 0#
408 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
409 c2 = indexCharArray# ba# (len# -# 1#)
411 -- c1 = indexCharArray# ba# 1#
412 -- c2 = indexCharArray# ba# 2#
417 cmpFS :: FastString -> FastString -> Ordering
418 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
423 _ccall_ strcmp (ByteArray bottom b1#) (ByteArray bottom b2#) >>= \ (I# res) ->
426 else if res ==# 0# then EQ
431 bottom = error "tagCmp"
432 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
434 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
437 else if res ==# 0# then EQ
443 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
445 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
448 else if res ==# 0# then EQ
452 ba1 = ByteArray ((error "")::(Int,Int)) bs1
455 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
456 = -- try them the other way 'round
457 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
461 Outputting @FastString@s is quick, just block copying the chunk (using
465 hPutFS :: Handle -> FastString -> IO ()
466 hPutFS handle (FastString _ l# ba#) =
470 readHandle handle >>= \ htype ->
472 ErrorHandle ioError ->
473 writeHandle handle htype >>
476 writeHandle handle htype >>
477 fail MkIOError(handle,IllegalOperation,"handle is closed")
478 SemiClosedHandle _ _ ->
479 writeHandle handle htype >>
480 fail MkIOError(handle,IllegalOperation,"handle is closed")
482 writeHandle handle htype >>
483 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
485 let fp = filePtr htype in
487 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
491 constructError "hPutFS" >>= \ err ->
493 hPutFS handle (CharStr a# l#) =
497 readHandle handle >>= \ htype ->
499 ErrorHandle ioError ->
500 writeHandle handle htype >>
503 writeHandle handle htype >>
504 fail MkIOError(handle,IllegalOperation,"handle is closed")
505 SemiClosedHandle _ _ ->
506 writeHandle handle htype >>
507 fail MkIOError(handle,IllegalOperation,"handle is closed")
509 writeHandle handle htype >>
510 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
512 let fp = filePtr htype in
514 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
518 constructError "hPutFS" >>= \ err ->
521 --ToDo: avoid silly code duplic.