2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
10 #include "HsVersions.h"
14 FastString(..), -- not abstract, for now.
17 mkFastString, -- :: String -> FastString
18 mkFastCharString, -- :: _Addr -> FastString
19 mkFastCharString2, -- :: _Addr -> Int -> FastString
20 mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
21 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
23 mkFastString#, -- :: Addr# -> Int# -> FastString
24 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
25 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
26 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
28 lengthFS, -- :: FastString -> Int
29 nullFastString, -- :: FastString -> Bool
31 getByteArray#, -- :: FastString -> ByteArray#
32 getByteArray, -- :: FastString -> _ByteArray Int
33 unpackFS, -- :: FastString -> String
34 appendFS, -- :: FastString -> FastString -> FastString
35 headFS, -- :: FastString -> Char
36 tailFS, -- :: FastString -> FastString
37 concatFS, -- :: [FastString] -> FastString
38 consFS, -- :: Char -> FastString -> FastString
40 hPutFS, -- :: Handle -> FastString -> IO ()
41 tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
44 #if __GLASGOW_HASKELL__ <= 201
60 #define hASH_TBL_SIZE 993
64 @FastString@s are packed representations of strings
65 with a unique id for fast comparisons. The unique id
66 is assigned when creating the @FastString@, using
67 a hash table to map from the character string representation
72 = FastString -- packed repr. on the heap.
74 -- 0 => string literal, comparison
79 | CharStr -- external C string
80 Addr# -- pointer to the (null-terminated) bytes in C land.
81 Int# -- length (cached)
83 instance Eq FastString where
84 a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
85 a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
88 (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
91 instance Uniquable FastString where
92 uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
93 uniqueOf (CharStr a# l#) =
95 [A somewhat moby hack]: to avoid entering all sorts
96 of junk into the hash table, all C char strings
97 are by default left out. The benefit of being in
98 the table is that string comparisons are lightning fast,
99 just an Int# comparison.
101 But, if you want to get the Unique of a CharStr, we
102 enter it into the table and return that unique. This
103 works, but causes the CharStr to be looked up in the hash
104 table each time it is accessed..
106 mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
108 instance Uniquable Int where
109 uniqueOf (I# i#) = mkUniqueGrimily i#
111 instance Text FastString where
112 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
113 showsPrec p ps r = showsPrec p (unpackFS ps) r
115 getByteArray# :: FastString -> ByteArray#
116 getByteArray# (FastString _ _ ba#) = ba#
118 getByteArray :: FastString -> _ByteArray Int
119 getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
121 lengthFS :: FastString -> Int
122 lengthFS (FastString _ l# _) = I# l#
123 lengthFS (CharStr a# l#) = I# l#
125 nullFastString :: FastString -> Bool
126 nullFastString (FastString _ l# _) = l# ==# 0#
127 nullFastString (CharStr _ l#) = l# ==# 0#
129 unpackFS :: FastString -> String
130 unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
131 unpackFS (CharStr addr len#) =
136 | otherwise = C# ch : unpack (nh +# 1#)
138 ch = indexCharOffAddr# addr nh
140 appendFS :: FastString -> FastString -> FastString
141 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
143 concatFS :: [FastString] -> FastString
144 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
146 headFS :: FastString -> Char
147 headFS f@(FastString _ l# ba#) =
148 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
149 headFS f@(CharStr a# l#) =
150 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
152 tailFS :: FastString -> FastString
153 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
155 consFS :: Char -> FastString -> FastString
156 consFS c fs = mkFastString (c:unpackFS fs)
160 Internally, the compiler will maintain a fast string symbol
161 table, providing sharing and fast comparison. Creation of
162 new @FastString@s then covertly does a lookup, re-using the
163 @FastString@ if there was a hit.
166 data FastStringTable =
169 (MutableArray# _RealWorld [FastString])
171 type FastStringTableVar = MutableVar _RealWorld FastStringTable
173 string_table :: FastStringTableVar
175 unsafePerformPrimIO (
176 newArray (0::Int,hASH_TBL_SIZE) [] `thenPrimIO` \ (_MutableArray _ arr#) ->
177 newVar (FastStringTable 0# arr#))
179 lookupTbl :: FastStringTable -> Int# -> PrimIO [FastString]
180 lookupTbl (FastStringTable _ arr#) i# =
182 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
185 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
186 updTbl (_MutableArray _ var#) (FastStringTable uid# arr#) i# ls =
188 case writeArray# arr# i# ls s# of { s2# ->
189 case writeArray# var# 0# (FastStringTable (uid# +# 1#) arr#) s2# of { s3# ->
192 mkFastString# :: Addr# -> Int# -> FastString
193 mkFastString# a# len# =
194 unsafePerformPrimIO (
195 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
199 -- _trace ("hashed: "++show (I# h)) $
200 lookupTbl ft h `thenPrimIO` \ lookup_result ->
201 case lookup_result of
203 -- no match, add it to table by copying out the
204 -- the string into a ByteArray
205 -- _trace "empty bucket" $
206 case copyPrefixStr (A# a#) (I# len#) of
207 (_ByteArray _ barr#) ->
208 let f_str = FastString uid# len# barr# in
209 updTbl string_table ft h [f_str] `seqPrimIO`
210 ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
212 -- non-empty `bucket', scan the list looking
213 -- entry with same length and compare byte by byte.
214 -- _trace ("non-empty bucket"++show ls) $
215 case bucket_match ls len# a# of
217 case copyPrefixStr (A# a#) (I# len#) of
218 (_ByteArray _ barr#) ->
219 let f_str = FastString uid# len# barr# in
220 updTbl string_table ft h (f_str:ls) `seqPrimIO`
221 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
222 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
224 bucket_match [] _ _ = Nothing
225 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
226 if len# ==# l# && eqStrPrefix a# ba# l# then
229 bucket_match ls len# a#
231 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
232 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
234 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
235 mkFastSubStringFO# fo# start# len# =
236 unsafePerformPrimIO (
237 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
239 h = hashSubStrFO fo# start# len#
241 lookupTbl ft h `thenPrimIO` \ lookup_result ->
242 case lookup_result of
244 -- no match, add it to table by copying out the
245 -- the string into a ByteArray
246 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
247 (_ByteArray _ barr#) ->
248 let f_str = FastString uid# len# barr# in
249 updTbl string_table ft h [f_str] `seqPrimIO`
252 -- non-empty `bucket', scan the list looking
253 -- entry with same length and compare byte by byte.
254 case bucket_match ls start# len# fo# of
256 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
257 (_ByteArray _ barr#) ->
258 let f_str = FastString uid# len# barr# in
259 updTbl string_table ft h (f_str:ls) `seqPrimIO`
260 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
261 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
263 bucket_match [] _ _ _ = Nothing
264 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
265 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
268 bucket_match ls start# len# fo#
271 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
272 mkFastSubStringBA# barr# start# len# =
273 unsafePerformPrimIO (
274 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
276 h = hashSubStrBA barr# start# len#
278 -- _trace ("hashed(b): "++show (I# h)) $
279 lookupTbl ft h `thenPrimIO` \ lookup_result ->
280 case lookup_result of
282 -- no match, add it to table by copying out the
283 -- the string into a ByteArray
284 -- _trace "empty bucket(b)" $
285 case copySubStrBA (_ByteArray btm barr#) (I# start#) (I# len#) of
286 (_ByteArray _ ba#) ->
287 let f_str = FastString uid# len# ba# in
288 updTbl string_table ft h [f_str] `seqPrimIO`
289 -- _trace ("new(b): " ++ show f_str) $
292 -- non-empty `bucket', scan the list looking
293 -- entry with same length and compare byte by byte.
294 -- _trace ("non-empty bucket(b)"++show ls) $
295 case bucket_match ls start# len# barr# of
297 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
298 (_ByteArray _ ba#) ->
299 let f_str = FastString uid# len# ba# in
300 updTbl string_table ft h (f_str:ls) `seqPrimIO`
301 -- _trace ("new(b): " ++ show f_str) $
304 -- _trace ("re-use(b): "++show v) $
310 bucket_match [] _ _ _ = Nothing
311 bucket_match (v:ls) start# len# ba# =
313 FastString _ l# barr# ->
314 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
317 bucket_match ls start# len# ba#
319 mkFastCharString :: _Addr -> FastString
320 mkFastCharString a@(A# a#) =
321 case strLength a of{ (I# len#) -> CharStr a# len# }
323 mkFastCharString2 :: _Addr -> Int -> FastString
324 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
326 mkFastString :: String -> FastString
328 case stringToByteArray str of
329 (_ByteArray (_,I# len#) frozen#) ->
330 mkFastSubStringBA# frozen# 0# len#
331 {- 0-indexed array, len# == index to one beyond end of string,
332 i.e., (0,1) => empty string. -}
334 mkFastSubString :: _Addr -> Int -> Int -> FastString
335 mkFastSubString (A# a#) (I# start#) (I# len#) =
336 mkFastString# (addrOffset# a# start#) len#
338 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
339 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
340 mkFastSubStringFO# fo# start# len#
345 hashStr :: Addr# -> Int# -> Int#
346 -- use the Addr to produce a hash value between 0 & m (inclusive)
350 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
351 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
352 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
357 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
358 `remInt#` hASH_TBL_SIZE#
361 c0 = indexCharOffAddr# a# 0#
362 c1 = indexCharOffAddr# a# 1# --(len# `quotInt#` 2# -# 1#)
363 c2 = indexCharOffAddr# a# 2# --(len# -# 1#)
365 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
366 -- use the Addr to produce a hash value between 0 & m (inclusive)
367 hashSubStrFO fo# start# len# =
370 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
371 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
372 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
377 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
378 `remInt#` hASH_TBL_SIZE#
381 c0 = indexCharOffFO# fo# 0#
382 c1 = indexCharOffFO# fo# 1# --(len# `quotInt#` 2# -# 1#)
383 c2 = indexCharOffFO# fo# 2# --(len# -# 1#)
386 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
387 -- use the Addr to produce a hash value between 0 & m (inclusive)
388 hashSubStrBA ba# start# len# =
391 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
392 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
393 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
398 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
399 `remInt#` hASH_TBL_SIZE#
402 c0 = indexCharArray# ba# 0#
403 c1 = indexCharArray# ba# 1# --(len# `quotInt#` 2# -# 1#)
404 c2 = indexCharArray# ba# 2# --(len# -# 1#)
409 tagCmpFS :: FastString -> FastString -> _CMP_TAG
410 tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
414 unsafePerformPrimIO (
415 _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
417 if res <# 0# then _LT
418 else if res ==# 0# then _EQ
423 bottom = error "tagCmp"
424 tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
425 = unsafePerformPrimIO (
426 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
428 if res <# 0# then _LT
429 else if res ==# 0# then _EQ
435 tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
436 = unsafePerformPrimIO (
437 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
439 if res <# 0# then _LT
440 else if res ==# 0# then _EQ
444 ba1 = _ByteArray ((error "")::(Int,Int)) bs1
447 tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
448 = -- try them the other way 'round
449 case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
451 instance Ord FastString where
452 a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
453 a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
454 a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
455 a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
460 _tagCmp a b = tagCmpFS a b
464 Outputting @FastString@s is quick, just block copying the chunk (using
468 #if __GLASGOW_HASKELL__ >= 201
469 #define _ErrorHandle IOBase.ErrorHandle
470 #define _ReadHandle IOBase.ReadHandle
471 #define _ClosedHandle IOBase.ClosedHandle
472 #define _SemiClosedHandle IOBase.SemiClosedHandle
473 #define _constructError IOBase.constructError
474 #define _filePtr IOHandle.filePtr
475 #define failWith fail
478 hPutFS :: Handle -> FastString -> IO ()
479 hPutFS handle (FastString _ l# ba#) =
483 _readHandle handle >>= \ htype ->
485 _ErrorHandle ioError ->
486 _writeHandle handle htype >>
489 _writeHandle handle htype >>
490 failWith MkIOError(handle,IllegalOperation,"handle is closed")
491 _SemiClosedHandle _ _ ->
492 _writeHandle handle htype >>
493 failWith MkIOError(handle,IllegalOperation,"handle is closed")
495 _writeHandle handle htype >>
496 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
498 let fp = _filePtr htype in
500 _ccall_ writeFile (_ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) `CCALL_THEN` \rc ->
504 _constructError "hPutFS" `CCALL_THEN` \ err ->
506 hPutFS handle (CharStr a# l#) =
510 _readHandle handle >>= \ htype ->
512 _ErrorHandle ioError ->
513 _writeHandle handle htype >>
516 _writeHandle handle htype >>
517 failWith MkIOError(handle,IllegalOperation,"handle is closed")
518 _SemiClosedHandle _ _ ->
519 _writeHandle handle htype >>
520 failWith MkIOError(handle,IllegalOperation,"handle is closed")
522 _writeHandle handle htype >>
523 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
525 let fp = _filePtr htype in
527 _ccall_ writeFile (A# a#) fp (I# l#) `CCALL_THEN` \rc ->
531 _constructError "hPutFS" `CCALL_THEN` \ err ->
534 --ToDo: avoid silly code duplic.