2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
43 hPutFS -- :: Handle -> FastString -> IO ()
46 -- This #define suppresses the "import FastString" that
47 -- HsVersions otherwise produces
48 #define COMPILING_FAST_STRING
49 #include "HsVersions.h"
54 import Addr ( Addr(..) )
55 import STBase ( StateAndPtr#(..) )
56 import ArrBase ( MutableArray(..) )
57 import Foreign ( ForeignObj(..) )
58 import IOExts ( IOArray(..), newIOArray,
59 IORef, newIORef, readIORef, writeIORef
62 import IOHandle ( filePtr, readHandle, writeHandle )
63 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
68 #define hASH_TBL_SIZE 993
71 @FastString@s are packed representations of strings
72 with a unique id for fast comparisons. The unique id
73 is assigned when creating the @FastString@, using
74 a hash table to map from the character string representation
79 = FastString -- packed repr. on the heap.
81 -- 0 => string literal, comparison
86 | CharStr -- external C string
87 Addr# -- pointer to the (null-terminated) bytes in C land.
88 Int# -- length (cached)
90 instance Eq FastString where
91 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
92 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
94 instance Ord FastString where
95 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
96 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
97 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
98 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
103 compare a b = cmpFS a b
105 instance Text FastString where
106 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
107 showsPrec p ps r = showsPrec p (unpackFS ps) r
109 getByteArray# :: FastString -> ByteArray#
110 getByteArray# (FastString _ _ ba#) = ba#
112 getByteArray :: FastString -> ByteArray Int
113 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
115 lengthFS :: FastString -> Int
116 lengthFS (FastString _ l# _) = I# l#
117 lengthFS (CharStr a# l#) = I# l#
119 nullFastString :: FastString -> Bool
120 nullFastString (FastString _ l# _) = l# ==# 0#
121 nullFastString (CharStr _ l#) = l# ==# 0#
123 unpackFS :: FastString -> String
124 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
125 unpackFS (CharStr addr len#) =
130 | otherwise = C# ch : unpack (nh +# 1#)
132 ch = indexCharOffAddr# addr nh
134 appendFS :: FastString -> FastString -> FastString
135 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
137 concatFS :: [FastString] -> FastString
138 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
140 headFS :: FastString -> Char
141 headFS f@(FastString _ l# ba#) =
142 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
143 headFS f@(CharStr a# l#) =
144 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
146 tailFS :: FastString -> FastString
147 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
149 consFS :: Char -> FastString -> FastString
150 consFS c fs = mkFastString (c:unpackFS fs)
152 uniqueOfFS :: FastString -> Int#
153 uniqueOfFS (FastString u# _ _) = u#
154 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
156 [A somewhat moby hack]: to avoid entering all sorts
157 of junk into the hash table, all C char strings
158 are by default left out. The benefit of being in
159 the table is that string comparisons are lightning fast,
160 just an Int# comparison.
162 But, if you want to get the Unique of a CharStr, we
163 enter it into the table and return that unique. This
164 works, but causes the CharStr to be looked up in the hash
165 table each time it is accessed..
169 Internally, the compiler will maintain a fast string symbol
170 table, providing sharing and fast comparison. Creation of
171 new @FastString@s then covertly does a lookup, re-using the
172 @FastString@ if there was a hit.
175 data FastStringTable =
178 (MutableArray# RealWorld [FastString])
180 type FastStringTableVar = IORef FastStringTable
182 string_table :: FastStringTableVar
185 stToIO (newArray (0::Int,hASH_TBL_SIZE) []) >>= \ (MutableArray _ arr#) ->
186 newIORef (FastStringTable 0# arr#))
188 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
189 lookupTbl (FastStringTable _ arr#) i# =
191 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
194 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
195 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
196 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> IOok s2# () }) >>
197 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
199 mkFastString# :: Addr# -> Int# -> FastString
200 mkFastString# a# len# =
202 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
206 -- _trace ("hashed: "++show (I# h)) $
207 lookupTbl ft h >>= \ lookup_result ->
208 case lookup_result of
210 -- no match, add it to table by copying out the
211 -- the string into a ByteArray
212 -- _trace "empty bucket" $
213 case copyPrefixStr (A# a#) (I# len#) of
214 (ByteArray _ barr#) ->
215 let f_str = FastString uid# len# barr# in
216 updTbl string_table ft h [f_str] >>
217 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
219 -- non-empty `bucket', scan the list looking
220 -- entry with same length and compare byte by byte.
221 -- _trace ("non-empty bucket"++show ls) $
222 case bucket_match ls len# a# of
224 case copyPrefixStr (A# a#) (I# len#) of
225 (ByteArray _ barr#) ->
226 let f_str = FastString uid# len# barr# in
227 updTbl string_table ft h (f_str:ls) >>
228 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
229 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
231 bucket_match [] _ _ = Nothing
232 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
233 if len# ==# l# && eqStrPrefix a# ba# l# then
236 bucket_match ls len# a#
238 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
239 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
241 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
242 mkFastSubStringFO# fo# start# len# =
244 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
246 h = hashSubStrFO fo# start# len#
248 lookupTbl ft h >>= \ lookup_result ->
249 case lookup_result of
251 -- no match, add it to table by copying out the
252 -- the string into a ByteArray
253 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
254 (ByteArray _ barr#) ->
255 let f_str = FastString uid# len# barr# in
256 updTbl string_table ft h [f_str] >>
259 -- non-empty `bucket', scan the list looking
260 -- entry with same length and compare byte by byte.
261 case bucket_match ls start# len# fo# of
263 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
264 (ByteArray _ barr#) ->
265 let f_str = FastString uid# len# barr# in
266 updTbl string_table ft h (f_str:ls) >>
267 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
268 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
270 bucket_match [] _ _ _ = Nothing
271 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
272 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
275 bucket_match ls start# len# fo#
278 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
279 mkFastSubStringBA# barr# start# len# =
281 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
283 h = hashSubStrBA barr# start# len#
285 -- _trace ("hashed(b): "++show (I# h)) $
286 lookupTbl ft h >>= \ lookup_result ->
287 case lookup_result of
289 -- no match, add it to table by copying out the
290 -- the string into a ByteArray
291 -- _trace "empty bucket(b)" $
292 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
294 let f_str = FastString uid# len# ba# in
295 updTbl string_table ft h [f_str] >>
296 -- _trace ("new(b): " ++ show f_str) $
299 -- non-empty `bucket', scan the list looking
300 -- entry with same length and compare byte by byte.
301 -- _trace ("non-empty bucket(b)"++show ls) $
302 case bucket_match ls start# len# barr# of
304 case copySubStrBA (ByteArray (error "") barr#) (I# start#) (I# len#) of
306 let f_str = FastString uid# len# ba# in
307 updTbl string_table ft h (f_str:ls) >>
308 -- _trace ("new(b): " ++ show f_str) $
311 -- _trace ("re-use(b): "++show v) $
317 bucket_match [] _ _ _ = Nothing
318 bucket_match (v:ls) start# len# ba# =
320 FastString _ l# barr# ->
321 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
324 bucket_match ls start# len# ba#
326 mkFastCharString :: Addr -> FastString
327 mkFastCharString a@(A# a#) =
328 case strLength a of{ (I# len#) -> CharStr a# len# }
330 mkFastCharString# :: Addr# -> FastString
331 mkFastCharString# a# =
332 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
334 mkFastCharString2 :: Addr -> Int -> FastString
335 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
337 mkFastString :: String -> FastString
339 case packString str of
340 (ByteArray (_,I# len#) frozen#) ->
341 mkFastSubStringBA# frozen# 0# len#
342 {- 0-indexed array, len# == index to one beyond end of string,
343 i.e., (0,1) => empty string. -}
345 mkFastSubString :: Addr -> Int -> Int -> FastString
346 mkFastSubString (A# a#) (I# start#) (I# len#) =
347 mkFastString# (addrOffset# a# start#) len#
349 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
350 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
351 mkFastSubStringFO# fo# start# len#
355 hashStr :: Addr# -> Int# -> Int#
356 -- use the Addr to produce a hash value between 0 & m (inclusive)
360 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
361 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
362 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
364 c0 = indexCharOffAddr# a# 0#
365 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
366 c2 = indexCharOffAddr# a# (len# -# 1#)
368 c1 = indexCharOffAddr# a# 1#
369 c2 = indexCharOffAddr# a# 2#
372 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
373 -- use the FO to produce a hash value between 0 & m (inclusive)
374 hashSubStrFO fo# start# len# =
377 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
378 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
379 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
381 c0 = indexCharOffFO# fo# 0#
382 c1 = indexCharOffFO# fo# (len# `quotInt#` 2# -# 1#)
383 c2 = indexCharOffFO# fo# (len# -# 1#)
385 -- c1 = indexCharOffFO# fo# 1#
386 -- c2 = indexCharOffFO# fo# 2#
389 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
390 -- use the byte array to produce a hash value between 0 & m (inclusive)
391 hashSubStrBA ba# start# len# =
394 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
395 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
396 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
398 c0 = indexCharArray# ba# 0#
399 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
400 c2 = indexCharArray# ba# (len# -# 1#)
402 -- c1 = indexCharArray# ba# 1#
403 -- c2 = indexCharArray# ba# 2#
408 cmpFS :: FastString -> FastString -> Ordering
409 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
414 _ccall_ strcmp (ByteArray bottom b1#) (ByteArray bottom b2#) >>= \ (I# res) ->
417 else if res ==# 0# then EQ
422 bottom = error "tagCmp"
423 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
425 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
428 else if res ==# 0# then EQ
434 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
436 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
439 else if res ==# 0# then EQ
443 ba1 = ByteArray ((error "")::(Int,Int)) bs1
446 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
447 = -- try them the other way 'round
448 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
452 Outputting @FastString@s is quick, just block copying the chunk (using
456 hPutFS :: Handle -> FastString -> IO ()
457 hPutFS handle (FastString _ l# ba#) =
461 _readHandle handle >>= \ htype ->
463 ErrorHandle ioError ->
464 _writeHandle handle htype >>
467 _writeHandle handle htype >>
468 fail MkIOError(handle,IllegalOperation,"handle is closed")
469 SemiClosedHandle _ _ ->
470 _writeHandle handle htype >>
471 fail MkIOError(handle,IllegalOperation,"handle is closed")
473 _writeHandle handle htype >>
474 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
476 let fp = filePtr htype in
478 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
482 constructError "hPutFS" >>= \ err ->
484 hPutFS handle (CharStr a# l#) =
488 _readHandle handle >>= \ htype ->
490 ErrorHandle ioError ->
491 _writeHandle handle htype >>
494 _writeHandle handle htype >>
495 fail MkIOError(handle,IllegalOperation,"handle is closed")
496 SemiClosedHandle _ _ ->
497 _writeHandle handle htype >>
498 fail MkIOError(handle,IllegalOperation,"handle is closed")
500 _writeHandle handle htype >>
501 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
503 let fp = filePtr htype in
505 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
509 constructError "hPutFS" >>= \ err ->
512 --ToDo: avoid silly code duplic.