2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
12 FastString(..), -- not abstract, for now.
15 mkFastString, -- :: String -> FastString
16 mkFastCharString, -- :: _Addr -> FastString
17 mkFastCharString2, -- :: _Addr -> Int -> FastString
18 mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
19 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
21 mkFastString#, -- :: Addr# -> Int# -> FastString
22 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
23 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
24 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
26 lengthFS, -- :: FastString -> Int
27 nullFastString, -- :: FastString -> Bool
29 getByteArray#, -- :: FastString -> ByteArray#
30 getByteArray, -- :: FastString -> _ByteArray Int
31 unpackFS, -- :: FastString -> String
32 appendFS, -- :: FastString -> FastString -> FastString
33 headFS, -- :: FastString -> Char
34 tailFS, -- :: FastString -> FastString
35 concatFS, -- :: [FastString] -> FastString
36 consFS, -- :: Char -> FastString -> FastString
38 hPutFS, -- :: Handle -> FastString -> IO ()
39 tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
49 #define hASH_TBL_SIZE 993
53 @FastString@s are packed representations of strings
54 with a unique id for fast comparisons. The unique id
55 is assigned when creating the @FastString@, using
56 a hash table to map from the character string representation
61 = FastString -- packed repr. on the heap.
63 -- 0 => string literal, comparison
68 | CharStr -- external C string
69 Addr# -- pointer to the (null-terminated) bytes in C land.
70 Int# -- length (cached)
72 instance Eq FastString where
73 a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
74 a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
77 (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
80 instance Uniquable FastString where
81 uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
82 uniqueOf (CharStr a# l#) =
84 [A somewhat moby hack]: to avoid entering all sorts
85 of junk into the hash table, all C char strings
86 are by default left out. The benefit of being in
87 the table is that string comparisons are lightning fast,
88 just an Int# comparison.
90 But, if you want to get the Unique of a CharStr, we
91 enter it into the table and return that unique. This
92 works, but causes the CharStr to be looked up in the hash
93 table each time it is accessed..
95 mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
97 instance Uniquable Int where
98 uniqueOf (I# i#) = mkUniqueGrimily i#
100 instance Text FastString where
101 readsPrec p = error "readsPrec: FastString: ToDo"
102 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
103 showsPrec p ps r = showsPrec p (unpackFS ps) r
105 getByteArray# :: FastString -> ByteArray#
106 getByteArray# (FastString _ _ ba#) = ba#
108 getByteArray :: FastString -> _ByteArray Int
109 getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
111 lengthFS :: FastString -> Int
112 lengthFS (FastString _ l# _) = I# l#
113 lengthFS (CharStr a# l#) = I# l#
115 nullFastString :: FastString -> Bool
116 nullFastString (FastString _ l# _) = l# ==# 0#
117 nullFastString (CharStr _ l#) = l# ==# 0#
119 unpackFS :: FastString -> String
120 unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
121 unpackFS (CharStr addr len#) =
126 | otherwise = C# ch : unpack (nh +# 1#)
128 ch = indexCharOffAddr# addr nh
130 appendFS :: FastString -> FastString -> FastString
131 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
133 concatFS :: [FastString] -> FastString
134 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
136 headFS :: FastString -> Char
137 headFS f@(FastString _ l# ba#) =
138 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
139 headFS f@(CharStr a# l#) =
140 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
142 tailFS :: FastString -> FastString
143 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
145 consFS :: Char -> FastString -> FastString
146 consFS c fs = mkFastString (c:unpackFS fs)
150 Internally, the compiler will maintain a fast string symbol
151 table, providing sharing and fast comparison. Creation of
152 new @FastString@s then covertly does a lookup, re-using the
153 @FastString@ if there was a hit.
156 data FastStringTable =
159 (MutableArray# _RealWorld [FastString])
161 type FastStringTableVar = MutableVar _RealWorld FastStringTable
163 string_table :: FastStringTableVar
165 unsafePerformPrimIO (
166 newArray (0::Int,hASH_TBL_SIZE) [] `thenPrimIO` \ (_MutableArray _ arr#) ->
167 newVar (FastStringTable 0# arr#))
169 lookupTbl :: FastStringTable -> Int# -> [FastString]
170 lookupTbl (FastStringTable _ arr#) i# =
171 unsafePerformPrimIO ( \ (S# s#) ->
172 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
175 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
176 updTbl (_MutableArray _ var#) (FastStringTable uid# arr#) i# ls (S# s#) =
177 case writeArray# arr# i# ls s# of { s2# ->
178 case writeArray# var# 0# (FastStringTable (uid# +# 1#) arr#) s2# of { s3# ->
181 mkFastString# :: Addr# -> Int# -> FastString
182 mkFastString# a# len# =
183 unsafePerformPrimIO (
184 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
188 -- _trace ("hashed: "++show (I# h)) $
189 case lookupTbl ft h of
191 -- no match, add it to table by copying out the
192 -- the string into a ByteArray
193 -- _trace "empty bucket" $
194 case copyPrefixStr (A# a#) (I# len#) of
195 (_ByteArray _ barr#) ->
196 let f_str = FastString uid# len# barr# in
197 updTbl string_table ft h [f_str] `seqPrimIO`
198 ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
200 -- non-empty `bucket', scan the list looking
201 -- entry with same length and compare byte by byte.
202 -- _trace ("non-empty bucket"++show ls) $
203 case bucket_match ls len# a# of
205 case copyPrefixStr (A# a#) (I# len#) of
206 (_ByteArray _ barr#) ->
207 let f_str = FastString uid# len# barr# in
208 updTbl string_table ft h (f_str:ls) `seqPrimIO`
209 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
210 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
212 bucket_match [] _ _ = Nothing
213 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
214 if len# ==# l# && eqStrPrefix a# ba# l# then
217 bucket_match ls len# a#
219 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
220 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
222 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
223 mkFastSubStringFO# fo# start# len# =
224 unsafePerformPrimIO (
225 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
227 h = hashSubStrFO fo# start# len#
229 case lookupTbl ft h of
231 -- no match, add it to table by copying out the
232 -- the string into a ByteArray
233 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
234 (_ByteArray _ barr#) ->
235 let f_str = FastString uid# len# barr# in
236 updTbl string_table ft h [f_str] `seqPrimIO`
239 -- non-empty `bucket', scan the list looking
240 -- entry with same length and compare byte by byte.
241 case bucket_match ls start# len# fo# of
243 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
244 (_ByteArray _ barr#) ->
245 let f_str = FastString uid# len# barr# in
246 updTbl string_table ft h (f_str:ls) `seqPrimIO`
247 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
248 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
250 bucket_match [] _ _ _ = Nothing
251 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
252 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
255 bucket_match ls start# len# fo#
258 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
259 mkFastSubStringBA# barr# start# len# =
260 unsafePerformPrimIO (
261 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
263 h = hashSubStrBA barr# start# len#
265 -- _trace ("hashed(b): "++show (I# h)) $
266 case lookupTbl ft h of
268 -- no match, add it to table by copying out the
269 -- the string into a ByteArray
270 -- _trace "empty bucket(b)" $
271 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
272 (_ByteArray _ ba#) ->
273 let f_str = FastString uid# len# ba# in
274 updTbl string_table ft h [f_str] `seqPrimIO`
275 ({- _trace ("new(b): " ++ show f_str) $ -} returnPrimIO f_str)
277 -- non-empty `bucket', scan the list looking
278 -- entry with same length and compare byte by byte.
279 -- _trace ("non-empty bucket(b)"++show ls) $
280 case bucket_match ls start# len# barr# of
282 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
283 (_ByteArray _ ba#) ->
284 let f_str = FastString uid# len# ba# in
285 updTbl string_table ft h (f_str:ls) `seqPrimIO`
286 ({- _trace ("new(b): " ++ show f_str) $ -} returnPrimIO f_str)
287 Just v -> {- _trace ("re-use(b): "++show v) $ -} returnPrimIO v)
289 bucket_match [] _ _ _ = Nothing
290 bucket_match (v:ls) start# len# ba# =
292 FastString _ l# barr# ->
293 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
296 bucket_match ls len# start# ba#
298 mkFastCharString :: _Addr -> FastString
299 mkFastCharString a@(A# a#) =
300 case strLength a of{ (I# len#) -> CharStr a# len# }
302 mkFastCharString2 :: _Addr -> Int -> FastString
303 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
305 mkFastString :: String -> FastString
307 (case stringToByteArray str of
308 (_ByteArray (_,I# len#) frozen#) ->
310 -- 0-indexed array, len# == index to one beyond end of string,
311 -- i.e., (0,1) => empty string.
313 {- _trace (show (str,I# len#)) $ -} mkFastSubStringBA# frozen# 0# len#)
315 mkFastSubString :: _Addr -> Int -> Int -> FastString
316 mkFastSubString (A# a#) (I# start#) (I# len#)
317 = mkFastString# (addrOffset# a# start#) len#
319 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
320 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
321 mkFastSubStringFO# fo# start# len#
326 hashStr :: Addr# -> Int# -> Int#
327 -- use the Addr to produce a hash value between 0 & m (inclusive)
331 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
332 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
333 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
338 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
339 `remInt#` hASH_TBL_SIZE#
342 c0 = indexCharOffAddr# a# 0#
343 c1 = indexCharOffAddr# a# 1# --(len# `quotInt#` 2# -# 1#)
344 c2 = indexCharOffAddr# a# 2# --(len# -# 1#)
346 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
347 -- use the Addr to produce a hash value between 0 & m (inclusive)
348 hashSubStrFO fo# start# len# =
351 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
352 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
353 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
358 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
359 `remInt#` hASH_TBL_SIZE#
362 c0 = indexCharOffFO# fo# 0#
363 c1 = indexCharOffFO# fo# 1# --(len# `quotInt#` 2# -# 1#)
364 c2 = indexCharOffFO# fo# 2# --(len# -# 1#)
367 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
368 -- use the Addr to produce a hash value between 0 & m (inclusive)
369 hashSubStrBA ba# start# len# =
372 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
373 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
374 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
379 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
380 `remInt#` hASH_TBL_SIZE#
383 c0 = indexCharArray# ba# 0#
384 c1 = indexCharArray# ba# 1# --(len# `quotInt#` 2# -# 1#)
385 c2 = indexCharArray# ba# 2# --(len# -# 1#)
390 tagCmpFS :: FastString -> FastString -> _CMP_TAG
391 tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
395 unsafePerformPrimIO (
396 _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
398 if res <# 0# then _LT
399 else if res ==# 0# then _EQ
403 bottom = error "tagCmp"
404 tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
405 = unsafePerformPrimIO (
406 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
408 if res <# 0# then _LT
409 else if res ==# 0# then _EQ
415 tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
416 = unsafePerformPrimIO (
417 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
419 if res <# 0# then _LT
420 else if res ==# 0# then _EQ
424 ba1 = _ByteArray (error "") bs1
427 tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
428 = -- try them the other way 'round
429 case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
431 instance Ord FastString where
432 a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
433 a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
434 a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
435 a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
440 _tagCmp a b = tagCmpFS a b
444 Outputting @FastString@s is quick, just block copying the chunk (using
448 hPutFS :: Handle -> FastString -> IO ()
449 hPutFS handle (FastString _ l# ba#) =
453 _readHandle handle >>= \ htype ->
455 _ErrorHandle ioError ->
456 _writeHandle handle htype >>
459 _writeHandle handle htype >>
460 failWith (IllegalOperation "handle is closed")
461 _SemiClosedHandle _ _ ->
462 _writeHandle handle htype >>
463 failWith (IllegalOperation "handle is closed")
465 _writeHandle handle htype >>
466 failWith (IllegalOperation "handle is not open for writing")
468 let fp = _filePtr htype in
470 _ccall_ writeFile (_ByteArray (error "") ba#) fp (I# l#) `thenPrimIO` \rc ->
474 _constructError "hPutFS" `thenPrimIO` \ err ->
476 hPutFS handle (CharStr a# l#) =
480 _readHandle handle >>= \ htype ->
482 _ErrorHandle ioError ->
483 _writeHandle handle htype >>
486 _writeHandle handle htype >>
487 failWith (IllegalOperation "handle is closed")
488 _SemiClosedHandle _ _ ->
489 _writeHandle handle htype >>
490 failWith (IllegalOperation "handle is closed")
492 _writeHandle handle htype >>
493 failWith (IllegalOperation "handle is not open for writing")
495 let fp = _filePtr htype in
497 _ccall_ writeFile (A# a#) fp (I# l#) `thenPrimIO` \rc ->
501 _constructError "hPutFS" `thenPrimIO` \ err ->
504 --ToDo: avoid silly code duplic.