2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
17 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
19 -- These ones hold on to the Addr after they return, and aren't hashed;
20 -- they are used for literals
21 mkFastCharString, -- :: Addr -> FastString
22 mkFastCharString#, -- :: Addr# -> FastString
23 mkFastCharString2, -- :: Addr -> Int -> FastString
25 mkFastString#, -- :: Addr# -> Int# -> FastString
26 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
27 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
28 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
30 uniqueOfFS, -- :: FastString -> Int#
31 lengthFS, -- :: FastString -> Int
32 nullFastString, -- :: FastString -> Bool
34 getByteArray#, -- :: FastString -> ByteArray#
35 getByteArray, -- :: FastString -> _ByteArray Int
36 unpackFS, -- :: FastString -> String
37 appendFS, -- :: FastString -> FastString -> FastString
38 headFS, -- :: FastString -> Char
39 tailFS, -- :: FastString -> FastString
40 concatFS, -- :: [FastString] -> FastString
41 consFS, -- :: Char -> FastString -> FastString
43 hPutFS -- :: Handle -> FastString -> IO ()
46 -- This #define suppresses the "import FastString" that
47 -- HsVersions otherwise produces
48 #define COMPILING_FAST_STRING
49 #include "HsVersions.h"
51 #if __GLASGOW_HASKELL__ < 301
53 import STBase ( StateAndPtr#(..) )
54 import IOHandle ( filePtr, readHandle, writeHandle )
55 import IOBase ( Handle__(..), IOError(..), IOErrorType(..),
61 import PrelST ( StateAndPtr#(..) )
62 import PrelHandle ( readHandle,
63 #if __GLASGOW_HASKELL__ < 303
68 import PrelIOBase ( Handle__(..), IOError(..), IOErrorType(..),
70 #if __GLASGOW_HASKELL__ >= 303
79 import Addr ( Addr(..) )
80 import MutableArray ( MutableArray(..) )
81 import Foreign ( ForeignObj(..) )
82 import IOExts ( IORef, newIORef, readIORef, writeIORef )
85 #define hASH_TBL_SIZE 993
88 @FastString@s are packed representations of strings
89 with a unique id for fast comparisons. The unique id
90 is assigned when creating the @FastString@, using
91 a hash table to map from the character string representation
96 = FastString -- packed repr. on the heap.
98 -- 0 => string literal, comparison
103 | CharStr -- external C string
104 Addr# -- pointer to the (null-terminated) bytes in C land.
105 Int# -- length (cached)
107 instance Eq FastString where
108 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
109 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
111 instance Ord FastString where
112 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
113 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
114 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
115 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
120 compare a b = cmpFS a b
122 instance Text FastString where
123 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
124 showsPrec p ps r = showsPrec p (unpackFS ps) r
126 getByteArray# :: FastString -> ByteArray#
127 getByteArray# (FastString _ _ ba#) = ba#
129 getByteArray :: FastString -> ByteArray Int
130 getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
132 lengthFS :: FastString -> Int
133 lengthFS (FastString _ l# _) = I# l#
134 lengthFS (CharStr a# l#) = I# l#
136 nullFastString :: FastString -> Bool
137 nullFastString (FastString _ l# _) = l# ==# 0#
138 nullFastString (CharStr _ l#) = l# ==# 0#
140 unpackFS :: FastString -> String
141 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
142 unpackFS (CharStr addr len#) =
147 | otherwise = C# ch : unpack (nh +# 1#)
149 ch = indexCharOffAddr# addr nh
151 appendFS :: FastString -> FastString -> FastString
152 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
154 concatFS :: [FastString] -> FastString
155 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
157 headFS :: FastString -> Char
158 headFS f@(FastString _ l# ba#) =
159 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
160 headFS f@(CharStr a# l#) =
161 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
163 tailFS :: FastString -> FastString
164 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
166 consFS :: Char -> FastString -> FastString
167 consFS c fs = mkFastString (c:unpackFS fs)
169 uniqueOfFS :: FastString -> Int#
170 uniqueOfFS (FastString u# _ _) = u#
171 uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ -> u#} -- Ugh!
173 [A somewhat moby hack]: to avoid entering all sorts
174 of junk into the hash table, all C char strings
175 are by default left out. The benefit of being in
176 the table is that string comparisons are lightning fast,
177 just an Int# comparison.
179 But, if you want to get the Unique of a CharStr, we
180 enter it into the table and return that unique. This
181 works, but causes the CharStr to be looked up in the hash
182 table each time it is accessed..
186 Internally, the compiler will maintain a fast string symbol
187 table, providing sharing and fast comparison. Creation of
188 new @FastString@s then covertly does a lookup, re-using the
189 @FastString@ if there was a hit.
192 data FastStringTable =
195 (MutableArray# RealWorld [FastString])
197 type FastStringTableVar = IORef FastStringTable
199 string_table :: FastStringTableVar
202 stToIO (newArray (0::Int,hASH_TBL_SIZE) []) >>= \ (MutableArray _ arr#) ->
203 newIORef (FastStringTable 0# arr#))
205 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
206 lookupTbl (FastStringTable _ arr#) i# =
208 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
211 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
212 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
213 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> IOok s2# () }) >>
214 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
216 mkFastString# :: Addr# -> Int# -> FastString
217 mkFastString# a# len# =
219 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
223 -- _trace ("hashed: "++show (I# h)) $
224 lookupTbl ft h >>= \ lookup_result ->
225 case lookup_result of
227 -- no match, add it to table by copying out the
228 -- the string into a ByteArray
229 -- _trace "empty bucket" $
230 case copyPrefixStr (A# a#) (I# len#) of
231 (ByteArray _ barr#) ->
232 let f_str = FastString uid# len# barr# in
233 updTbl string_table ft h [f_str] >>
234 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
236 -- non-empty `bucket', scan the list looking
237 -- entry with same length and compare byte by byte.
238 -- _trace ("non-empty bucket"++show ls) $
239 case bucket_match ls len# a# of
241 case copyPrefixStr (A# a#) (I# len#) of
242 (ByteArray _ barr#) ->
243 let f_str = FastString uid# len# barr# in
244 updTbl string_table ft h (f_str:ls) >>
245 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
246 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
248 bucket_match [] _ _ = Nothing
249 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
250 if len# ==# l# && eqStrPrefix a# ba# l# then
253 bucket_match ls len# a#
255 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
256 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
258 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
259 mkFastSubStringFO# fo# start# len# =
261 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
263 h = hashSubStrFO fo# start# len#
265 lookupTbl ft h >>= \ lookup_result ->
266 case lookup_result of
268 -- no match, add it to table by copying out the
269 -- the string into a ByteArray
270 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
271 (ByteArray _ barr#) ->
272 let f_str = FastString uid# len# barr# in
273 updTbl string_table ft h [f_str] >>
276 -- non-empty `bucket', scan the list looking
277 -- entry with same length and compare byte by byte.
278 case bucket_match ls start# len# fo# of
280 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
281 (ByteArray _ barr#) ->
282 let f_str = FastString uid# len# barr# in
283 updTbl string_table ft h (f_str:ls) >>
284 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
285 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
287 bucket_match [] _ _ _ = Nothing
288 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
289 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
292 bucket_match ls start# len# fo#
295 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
296 mkFastSubStringBA# barr# start# len# =
298 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
300 h = hashSubStrBA barr# start# len#
302 -- _trace ("hashed(b): "++show (I# h)) $
303 lookupTbl ft h >>= \ lookup_result ->
304 case lookup_result of
306 -- no match, add it to table by copying out the
307 -- the string into a ByteArray
308 -- _trace "empty bucket(b)" $
309 case copySubStrBA (ByteArray btm barr#) (I# start#) (I# len#) of
311 let f_str = FastString uid# len# ba# in
312 updTbl string_table ft h [f_str] >>
313 -- _trace ("new(b): " ++ show f_str) $
316 -- non-empty `bucket', scan the list looking
317 -- entry with same length and compare byte by byte.
318 -- _trace ("non-empty bucket(b)"++show ls) $
319 case bucket_match ls start# len# barr# of
321 case copySubStrBA (ByteArray (error "") barr#) (I# start#) (I# len#) of
323 let f_str = FastString uid# len# ba# in
324 updTbl string_table ft h (f_str:ls) >>
325 -- _trace ("new(b): " ++ show f_str) $
328 -- _trace ("re-use(b): "++show v) $
334 bucket_match [] _ _ _ = Nothing
335 bucket_match (v:ls) start# len# ba# =
337 FastString _ l# barr# ->
338 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
341 bucket_match ls start# len# ba#
343 mkFastCharString :: Addr -> FastString
344 mkFastCharString a@(A# a#) =
345 case strLength a of{ (I# len#) -> CharStr a# len# }
347 mkFastCharString# :: Addr# -> FastString
348 mkFastCharString# a# =
349 case strLength (A# a#) of { (I# len#) -> CharStr a# len# }
351 mkFastCharString2 :: Addr -> Int -> FastString
352 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
354 mkFastString :: String -> FastString
356 case packString str of
357 (ByteArray (_,I# len#) frozen#) ->
358 mkFastSubStringBA# frozen# 0# len#
359 {- 0-indexed array, len# == index to one beyond end of string,
360 i.e., (0,1) => empty string. -}
362 mkFastSubString :: Addr -> Int -> Int -> FastString
363 mkFastSubString (A# a#) (I# start#) (I# len#) =
364 mkFastString# (addrOffset# a# start#) len#
366 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
367 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
368 mkFastSubStringFO# fo# start# len#
372 hashStr :: Addr# -> Int# -> Int#
373 -- use the Addr to produce a hash value between 0 & m (inclusive)
377 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
378 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
379 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
381 c0 = indexCharOffAddr# a# 0#
382 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
383 c2 = indexCharOffAddr# a# (len# -# 1#)
385 c1 = indexCharOffAddr# a# 1#
386 c2 = indexCharOffAddr# a# 2#
389 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
390 -- use the FO to produce a hash value between 0 & m (inclusive)
391 hashSubStrFO fo# start# len# =
394 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
395 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
396 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
398 c0 = indexCharOffForeignObj# fo# 0#
399 c1 = indexCharOffForeignObj# fo# (len# `quotInt#` 2# -# 1#)
400 c2 = indexCharOffForeignObj# fo# (len# -# 1#)
402 -- c1 = indexCharOffFO# fo# 1#
403 -- c2 = indexCharOffFO# fo# 2#
406 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
407 -- use the byte array to produce a hash value between 0 & m (inclusive)
408 hashSubStrBA ba# start# len# =
411 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
412 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
413 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
415 c0 = indexCharArray# ba# 0#
416 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
417 c2 = indexCharArray# ba# (len# -# 1#)
419 -- c1 = indexCharArray# ba# 1#
420 -- c2 = indexCharArray# ba# 2#
425 cmpFS :: FastString -> FastString -> Ordering
426 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
431 _ccall_ strcmp (ByteArray bottom b1#) (ByteArray bottom b2#) >>= \ (I# res) ->
434 else if res ==# 0# then EQ
439 bottom = error "tagCmp"
440 cmpFS (CharStr bs1 len1) (CharStr bs2 len2)
442 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
445 else if res ==# 0# then EQ
451 cmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
453 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
456 else if res ==# 0# then EQ
460 ba1 = ByteArray ((error "")::(Int,Int)) bs1
463 cmpFS a@(CharStr _ _) b@(FastString _ _ _)
464 = -- try them the other way 'round
465 case (cmpFS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
469 Outputting @FastString@s is quick, just block copying the chunk (using
473 hPutFS :: Handle -> FastString -> IO ()
474 #if __GLASGOW_HASKELL__ <= 302
475 hPutFS handle (FastString _ l# ba#) =
479 readHandle handle >>= \ htype ->
481 ErrorHandle ioError ->
482 writeHandle handle htype >>
485 writeHandle handle htype >>
486 fail MkIOError(handle,IllegalOperation,"handle is closed")
487 SemiClosedHandle _ _ ->
488 writeHandle handle htype >>
489 fail MkIOError(handle,IllegalOperation,"handle is closed")
491 writeHandle handle htype >>
492 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
494 let fp = filePtr htype in
496 _ccall_ writeFile (ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) >>= \rc ->
500 constructError "hPutFS" >>= \ err ->
502 hPutFS handle (CharStr a# l#) =
506 readHandle handle >>= \ htype ->
508 ErrorHandle ioError ->
509 writeHandle handle htype >>
512 writeHandle handle htype >>
513 fail MkIOError(handle,IllegalOperation,"handle is closed")
514 SemiClosedHandle _ _ ->
515 writeHandle handle htype >>
516 fail MkIOError(handle,IllegalOperation,"handle is closed")
518 writeHandle handle htype >>
519 fail MkIOError(handle,IllegalOperation,"handle is not open for writing")
521 let fp = filePtr htype in
523 _ccall_ writeFile (A# a#) fp (I# l#) >>= \rc ->
527 constructError "hPutFS" >>= \ err ->
530 hPutFS handle (FastString _ l# ba#)
531 | l# ==# 0# = return ()
532 | otherwise = hPutBufBA handle (ByteArray bottom ba#) (I# l#)
534 bottom = error "hPutFS.ba"
536 hPutFS handle (CharStr a# l#)
537 | l# ==# 0# = return ()
538 | otherwise = hPutBuf handle (A# a#) (I# l#)