2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastCharString, -- :: _Addr -> FastString
17 mkFastCharString2, -- :: _Addr -> Int -> FastString
18 mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
19 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
21 mkFastString#, -- :: Addr# -> Int# -> FastString
22 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
23 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
24 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
26 lengthFS, -- :: FastString -> Int
27 nullFastString, -- :: FastString -> Bool
29 getByteArray#, -- :: FastString -> ByteArray#
30 getByteArray, -- :: FastString -> _ByteArray Int
31 unpackFS, -- :: FastString -> String
32 appendFS, -- :: FastString -> FastString -> FastString
33 headFS, -- :: FastString -> Char
34 tailFS, -- :: FastString -> FastString
35 concatFS, -- :: [FastString] -> FastString
36 consFS, -- :: Char -> FastString -> FastString
38 hPutFS, -- :: Handle -> FastString -> IO ()
39 tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
49 #define hASH_TBL_SIZE 993
53 @FastString@s are packed representations of strings
54 with a unique id for fast comparisons. The unique id
55 is assigned when creating the @FastString@, using
56 a hash table to map from the character string representation
61 = FastString -- packed repr. on the heap.
63 -- 0 => string literal, comparison
68 | CharStr -- external C string
69 Addr# -- pointer to the (null-terminated) bytes in C land.
70 Int# -- length (cached)
72 instance Eq FastString where
73 a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
74 a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
77 (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
80 instance Uniquable FastString where
81 uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
82 uniqueOf (CharStr a# l#) =
84 [A somewhat moby hack]: to avoid entering all sorts
85 of junk into the hash table, all C char strings
86 are by default left out. The benefit of being in
87 the table is that string comparisons are lightning fast,
88 just an Int# comparison.
90 But, if you want to get the Unique of a CharStr, we
91 enter it into the table and return that unique. This
92 works, but causes the CharStr to be looked up in the hash
93 table each time it is accessed..
95 mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
97 instance Uniquable Int where
98 uniqueOf (I# i#) = mkUniqueGrimily i#
100 instance Text FastString where
101 readsPrec p = error "readsPrec: FastString: ToDo"
102 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
103 showsPrec p ps r = showsPrec p (unpackFS ps) r
105 getByteArray# :: FastString -> ByteArray#
106 getByteArray# (FastString _ _ ba#) = ba#
108 getByteArray :: FastString -> _ByteArray Int
109 getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
111 lengthFS :: FastString -> Int
112 lengthFS (FastString _ l# _) = I# l#
113 lengthFS (CharStr a# l#) = I# l#
115 nullFastString :: FastString -> Bool
116 nullFastString (FastString _ l# _) = l# ==# 0#
117 nullFastString (CharStr _ l#) = l# ==# 0#
119 unpackFS :: FastString -> String
120 unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
121 unpackFS (CharStr addr len#) =
126 | otherwise = C# ch : unpack (nh +# 1#)
128 ch = indexCharOffAddr# addr nh
130 appendFS :: FastString -> FastString -> FastString
131 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
133 concatFS :: [FastString] -> FastString
134 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
136 headFS :: FastString -> Char
137 headFS f@(FastString _ l# ba#) =
138 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
139 headFS f@(CharStr a# l#) =
140 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
142 tailFS :: FastString -> FastString
143 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
145 consFS :: Char -> FastString -> FastString
146 consFS c fs = mkFastString (c:unpackFS fs)
150 Internally, the compiler will maintain a fast string symbol
151 table, providing sharing and fast comparison. Creation of
152 new @FastString@s then covertly does a lookup, re-using the
153 @FastString@ if there was a hit.
156 data FastStringTable =
159 (MutableArray# _RealWorld [FastString])
161 type FastStringTableVar = MutableVar _RealWorld FastStringTable
163 string_table :: FastStringTableVar
165 unsafePerformPrimIO (
166 newArray (0::Int,hASH_TBL_SIZE) [] `thenPrimIO` \ (_MutableArray _ arr#) ->
167 newVar (FastStringTable 0# arr#))
169 lookupTbl :: FastStringTable -> Int# -> [FastString]
170 lookupTbl (FastStringTable _ arr#) i# =
171 unsafePerformPrimIO ( \ (S# s#) ->
172 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
175 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
176 updTbl (_MutableArray _ var#) (FastStringTable uid# arr#) i# ls (S# s#) =
177 case writeArray# arr# i# ls s# of { s2# ->
178 case writeArray# var# 0# (FastStringTable (uid# +# 1#) arr#) s2# of { s3# ->
181 mkFastString# :: Addr# -> Int# -> FastString
182 mkFastString# a# len# =
183 unsafePerformPrimIO (
184 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
188 -- _trace ("hashed: "++show (I# h)) $
189 case lookupTbl ft h of
191 -- no match, add it to table by copying out the
192 -- the string into a ByteArray
193 -- _trace "empty bucket" $
194 case copyPrefixStr (A# a#) (I# len#) of
195 (_ByteArray _ barr#) ->
196 let f_str = FastString uid# len# barr# in
197 updTbl string_table ft h [f_str] `seqPrimIO`
198 ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
200 -- non-empty `bucket', scan the list looking
201 -- entry with same length and compare byte by byte.
202 -- _trace ("non-empty bucket"++show ls) $
203 case bucket_match ls len# a# of
205 case copyPrefixStr (A# a#) (I# len#) of
206 (_ByteArray _ barr#) ->
207 let f_str = FastString uid# len# barr# in
208 updTbl string_table ft h (f_str:ls) `seqPrimIO`
209 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
210 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
212 bucket_match [] _ _ = Nothing
213 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
214 if len# ==# l# && eqStrPrefix a# ba# l# then
217 bucket_match ls len# a#
219 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
220 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
222 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
223 mkFastSubStringFO# fo# start# len# =
224 unsafePerformPrimIO (
225 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
226 let h = hashSubStrFO fo# start# len# in
227 case lookupTbl ft h of
229 -- no match, add it to table by copying out the
230 -- the string into a ByteArray
231 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
232 (_ByteArray _ barr#) ->
233 let f_str = FastString uid# len# barr# in
234 updTbl string_table ft h [f_str] `seqPrimIO`
237 -- non-empty `bucket', scan the list looking
238 -- entry with same length and compare byte by byte.
239 case bucket_match ls start# len# fo# of
241 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
242 (_ByteArray _ barr#) ->
243 let f_str = FastString uid# len# barr# in
244 updTbl string_table ft h (f_str:ls) `seqPrimIO`
245 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
246 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
248 bucket_match [] _ _ _ = Nothing
249 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
250 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
253 bucket_match ls start# len# fo#
256 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
257 mkFastSubStringBA# barr# start# len# =
258 unsafePerformPrimIO (
259 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
260 let h = hashSubStrBA barr# start# len# in
261 -- _trace ("hashed(b): "++show (I# h)) $
262 case lookupTbl ft h of
264 -- no match, add it to table by copying out the
265 -- the string into a ByteArray
266 -- _trace "empty bucket(b)" $
267 case copySubStrBA (_ByteArray btm barr#) (I# start#) (I# len#) of
268 (_ByteArray _ ba#) ->
269 let f_str = FastString uid# len# ba# in
270 updTbl string_table ft h [f_str] `seqPrimIO`
271 -- _trace ("new(b): " ++ show f_str) $
274 -- non-empty `bucket', scan the list looking
275 -- entry with same length and compare byte by byte.
276 -- _trace ("non-empty bucket(b)"++show ls) $
277 case bucket_match ls start# len# barr# of
279 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
280 (_ByteArray _ ba#) ->
281 let f_str = FastString uid# len# ba# in
282 updTbl string_table ft h (f_str:ls) `seqPrimIO`
283 -- _trace ("new(b): " ++ show f_str) $
286 -- _trace ("re-use(b): "++show v) $
292 bucket_match [] _ _ _ = Nothing
293 bucket_match (v:ls) start# len# ba# =
295 FastString _ l# barr# ->
296 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
299 bucket_match ls start# len# ba#
301 mkFastCharString :: _Addr -> FastString
302 mkFastCharString a@(A# a#) =
303 case strLength a of{ (I# len#) -> CharStr a# len# }
305 mkFastCharString2 :: _Addr -> Int -> FastString
306 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
308 mkFastString :: String -> FastString
310 case stringToByteArray str of
311 (_ByteArray (_,I# len#) frozen#) ->
312 mkFastSubStringBA# frozen# 0# len#
313 {- 0-indexed array, len# == index to one beyond end of string,
314 i.e., (0,1) => empty string. -}
316 mkFastSubString :: _Addr -> Int -> Int -> FastString
317 mkFastSubString (A# a#) (I# start#) (I# len#) =
318 mkFastString# (addrOffset# a# start#) len#
320 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
321 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
322 mkFastSubStringFO# fo# start# len#
327 hashStr :: Addr# -> Int# -> Int#
328 -- use the Addr to produce a hash value between 0 & m (inclusive)
332 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
333 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
334 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
339 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
340 `remInt#` hASH_TBL_SIZE#
343 c0 = indexCharOffAddr# a# 0#
344 c1 = indexCharOffAddr# a# 1# --(len# `quotInt#` 2# -# 1#)
345 c2 = indexCharOffAddr# a# 2# --(len# -# 1#)
347 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
348 -- use the Addr to produce a hash value between 0 & m (inclusive)
349 hashSubStrFO fo# start# len# =
352 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
353 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
354 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
359 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
360 `remInt#` hASH_TBL_SIZE#
363 c0 = indexCharOffFO# fo# 0#
364 c1 = indexCharOffFO# fo# 1# --(len# `quotInt#` 2# -# 1#)
365 c2 = indexCharOffFO# fo# 2# --(len# -# 1#)
368 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
369 -- use the Addr to produce a hash value between 0 & m (inclusive)
370 hashSubStrBA ba# start# len# =
373 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
374 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
375 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
380 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
381 `remInt#` hASH_TBL_SIZE#
384 c0 = indexCharArray# ba# 0#
385 c1 = indexCharArray# ba# 1# --(len# `quotInt#` 2# -# 1#)
386 c2 = indexCharArray# ba# 2# --(len# -# 1#)
391 tagCmpFS :: FastString -> FastString -> _CMP_TAG
392 tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
396 unsafePerformPrimIO (
397 _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
399 if res <# 0# then _LT
400 else if res ==# 0# then _EQ
404 bottom = error "tagCmp"
405 tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
406 = unsafePerformPrimIO (
407 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
409 if res <# 0# then _LT
410 else if res ==# 0# then _EQ
416 tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
417 = unsafePerformPrimIO (
418 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
420 if res <# 0# then _LT
421 else if res ==# 0# then _EQ
425 ba1 = _ByteArray (error "") bs1
428 tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
429 = -- try them the other way 'round
430 case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
432 instance Ord FastString where
433 a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
434 a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
435 a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
436 a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
441 _tagCmp a b = tagCmpFS a b
445 Outputting @FastString@s is quick, just block copying the chunk (using
449 hPutFS :: Handle -> FastString -> IO ()
450 hPutFS handle (FastString _ l# ba#) =
454 _readHandle handle >>= \ htype ->
456 _ErrorHandle ioError ->
457 _writeHandle handle htype >>
460 _writeHandle handle htype >>
461 failWith (IllegalOperation "handle is closed")
462 _SemiClosedHandle _ _ ->
463 _writeHandle handle htype >>
464 failWith (IllegalOperation "handle is closed")
466 _writeHandle handle htype >>
467 failWith (IllegalOperation "handle is not open for writing")
469 let fp = _filePtr htype in
471 _ccall_ writeFile (_ByteArray (error "") ba#) fp (I# l#) `thenPrimIO` \rc ->
475 _constructError "hPutFS" `thenPrimIO` \ err ->
477 hPutFS handle (CharStr a# l#) =
481 _readHandle handle >>= \ htype ->
483 _ErrorHandle ioError ->
484 _writeHandle handle htype >>
487 _writeHandle handle htype >>
488 failWith (IllegalOperation "handle is closed")
489 _SemiClosedHandle _ _ ->
490 _writeHandle handle htype >>
491 failWith (IllegalOperation "handle is closed")
493 _writeHandle handle htype >>
494 failWith (IllegalOperation "handle is not open for writing")
496 let fp = _filePtr htype in
498 _ccall_ writeFile (A# a#) fp (I# l#) `thenPrimIO` \rc ->
502 _constructError "hPutFS" `thenPrimIO` \ err ->
505 --ToDo: avoid silly code duplic.