2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
10 #include "HsVersions.h"
14 FastString(..), -- not abstract, for now.
17 mkFastString, -- :: String -> FastString
18 mkFastCharString, -- :: _Addr -> FastString
19 mkFastCharString2, -- :: _Addr -> Int -> FastString
20 mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
21 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
23 mkFastString#, -- :: Addr# -> Int# -> FastString
24 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
25 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
26 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
28 lengthFS, -- :: FastString -> Int
29 nullFastString, -- :: FastString -> Bool
31 getByteArray#, -- :: FastString -> ByteArray#
32 getByteArray, -- :: FastString -> _ByteArray Int
33 unpackFS, -- :: FastString -> String
34 appendFS, -- :: FastString -> FastString -> FastString
35 headFS, -- :: FastString -> Char
36 tailFS, -- :: FastString -> FastString
37 concatFS, -- :: [FastString] -> FastString
38 consFS, -- :: Char -> FastString -> FastString
40 hPutFS, -- :: Handle -> FastString -> IO ()
41 tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
44 #if __GLASGOW_HASKELL__ <= 201
56 import {-# SOURCE #-} Unique ( mkUniqueGrimily, Unique, Uniquable(..) )
57 #if __GLASGOW_HASKELL__ == 202
58 import PrelBase ( Char (..) )
64 #define hASH_TBL_SIZE 993
68 @FastString@s are packed representations of strings
69 with a unique id for fast comparisons. The unique id
70 is assigned when creating the @FastString@, using
71 a hash table to map from the character string representation
76 = FastString -- packed repr. on the heap.
78 -- 0 => string literal, comparison
83 | CharStr -- external C string
84 Addr# -- pointer to the (null-terminated) bytes in C land.
85 Int# -- length (cached)
87 instance Eq FastString where
88 a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
89 a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
92 (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
95 instance Uniquable FastString where
96 uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
97 uniqueOf (CharStr a# l#) =
99 [A somewhat moby hack]: to avoid entering all sorts
100 of junk into the hash table, all C char strings
101 are by default left out. The benefit of being in
102 the table is that string comparisons are lightning fast,
103 just an Int# comparison.
105 But, if you want to get the Unique of a CharStr, we
106 enter it into the table and return that unique. This
107 works, but causes the CharStr to be looked up in the hash
108 table each time it is accessed..
110 mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
112 instance Uniquable Int where
113 uniqueOf (I# i#) = mkUniqueGrimily i#
115 instance Text FastString where
116 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
117 showsPrec p ps r = showsPrec p (unpackFS ps) r
119 getByteArray# :: FastString -> ByteArray#
120 getByteArray# (FastString _ _ ba#) = ba#
122 getByteArray :: FastString -> _ByteArray Int
123 getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
125 lengthFS :: FastString -> Int
126 lengthFS (FastString _ l# _) = I# l#
127 lengthFS (CharStr a# l#) = I# l#
129 nullFastString :: FastString -> Bool
130 nullFastString (FastString _ l# _) = l# ==# 0#
131 nullFastString (CharStr _ l#) = l# ==# 0#
133 unpackFS :: FastString -> String
134 unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
135 unpackFS (CharStr addr len#) =
140 | otherwise = C# ch : unpack (nh +# 1#)
142 ch = indexCharOffAddr# addr nh
144 appendFS :: FastString -> FastString -> FastString
145 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
147 concatFS :: [FastString] -> FastString
148 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
150 headFS :: FastString -> Char
151 headFS f@(FastString _ l# ba#) =
152 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
153 headFS f@(CharStr a# l#) =
154 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
156 tailFS :: FastString -> FastString
157 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
159 consFS :: Char -> FastString -> FastString
160 consFS c fs = mkFastString (c:unpackFS fs)
164 Internally, the compiler will maintain a fast string symbol
165 table, providing sharing and fast comparison. Creation of
166 new @FastString@s then covertly does a lookup, re-using the
167 @FastString@ if there was a hit.
170 data FastStringTable =
173 (MutableArray# _RealWorld [FastString])
175 type FastStringTableVar = MutableVar _RealWorld FastStringTable
177 string_table :: FastStringTableVar
179 unsafePerformPrimIO (
180 newArray (0::Int,hASH_TBL_SIZE) [] `thenPrimIO` \ (_MutableArray _ arr#) ->
181 newVar (FastStringTable 0# arr#))
183 lookupTbl :: FastStringTable -> Int# -> PrimIO [FastString]
184 lookupTbl (FastStringTable _ arr#) i# =
186 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
189 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
190 updTbl (_MutableArray _ var#) (FastStringTable uid# arr#) i# ls =
192 case writeArray# arr# i# ls s# of { s2# ->
193 case writeArray# var# 0# (FastStringTable (uid# +# 1#) arr#) s2# of { s3# ->
196 mkFastString# :: Addr# -> Int# -> FastString
197 mkFastString# a# len# =
198 unsafePerformPrimIO (
199 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
203 -- _trace ("hashed: "++show (I# h)) $
204 lookupTbl ft h `thenPrimIO` \ lookup_result ->
205 case lookup_result of
207 -- no match, add it to table by copying out the
208 -- the string into a ByteArray
209 -- _trace "empty bucket" $
210 case copyPrefixStr (A# a#) (I# len#) of
211 (_ByteArray _ barr#) ->
212 let f_str = FastString uid# len# barr# in
213 updTbl string_table ft h [f_str] `seqPrimIO`
214 ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
216 -- non-empty `bucket', scan the list looking
217 -- entry with same length and compare byte by byte.
218 -- _trace ("non-empty bucket"++show ls) $
219 case bucket_match ls len# a# of
221 case copyPrefixStr (A# a#) (I# len#) of
222 (_ByteArray _ barr#) ->
223 let f_str = FastString uid# len# barr# in
224 updTbl string_table ft h (f_str:ls) `seqPrimIO`
225 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
226 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
228 bucket_match [] _ _ = Nothing
229 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
230 if len# ==# l# && eqStrPrefix a# ba# l# then
233 bucket_match ls len# a#
235 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
236 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
238 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
239 mkFastSubStringFO# fo# start# len# =
240 unsafePerformPrimIO (
241 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
243 h = hashSubStrFO fo# start# len#
245 lookupTbl ft h `thenPrimIO` \ lookup_result ->
246 case lookup_result of
248 -- no match, add it to table by copying out the
249 -- the string into a ByteArray
250 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
251 (_ByteArray _ barr#) ->
252 let f_str = FastString uid# len# barr# in
253 updTbl string_table ft h [f_str] `seqPrimIO`
256 -- non-empty `bucket', scan the list looking
257 -- entry with same length and compare byte by byte.
258 case bucket_match ls start# len# fo# of
260 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
261 (_ByteArray _ barr#) ->
262 let f_str = FastString uid# len# barr# in
263 updTbl string_table ft h (f_str:ls) `seqPrimIO`
264 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
265 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
267 bucket_match [] _ _ _ = Nothing
268 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
269 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
272 bucket_match ls start# len# fo#
275 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
276 mkFastSubStringBA# barr# start# len# =
277 unsafePerformPrimIO (
278 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
280 h = hashSubStrBA barr# start# len#
282 -- _trace ("hashed(b): "++show (I# h)) $
283 lookupTbl ft h `thenPrimIO` \ lookup_result ->
284 case lookup_result of
286 -- no match, add it to table by copying out the
287 -- the string into a ByteArray
288 -- _trace "empty bucket(b)" $
289 case copySubStrBA (_ByteArray btm barr#) (I# start#) (I# len#) of
290 (_ByteArray _ ba#) ->
291 let f_str = FastString uid# len# ba# in
292 updTbl string_table ft h [f_str] `seqPrimIO`
293 -- _trace ("new(b): " ++ show f_str) $
296 -- non-empty `bucket', scan the list looking
297 -- entry with same length and compare byte by byte.
298 -- _trace ("non-empty bucket(b)"++show ls) $
299 case bucket_match ls start# len# barr# of
301 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
302 (_ByteArray _ ba#) ->
303 let f_str = FastString uid# len# ba# in
304 updTbl string_table ft h (f_str:ls) `seqPrimIO`
305 -- _trace ("new(b): " ++ show f_str) $
308 -- _trace ("re-use(b): "++show v) $
314 bucket_match [] _ _ _ = Nothing
315 bucket_match (v:ls) start# len# ba# =
317 FastString _ l# barr# ->
318 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
321 bucket_match ls start# len# ba#
323 mkFastCharString :: _Addr -> FastString
324 mkFastCharString a@(A# a#) =
325 case strLength a of{ (I# len#) -> CharStr a# len# }
327 mkFastCharString2 :: _Addr -> Int -> FastString
328 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
330 mkFastString :: String -> FastString
332 case stringToByteArray str of
333 (_ByteArray (_,I# len#) frozen#) ->
334 mkFastSubStringBA# frozen# 0# len#
335 {- 0-indexed array, len# == index to one beyond end of string,
336 i.e., (0,1) => empty string. -}
338 mkFastSubString :: _Addr -> Int -> Int -> FastString
339 mkFastSubString (A# a#) (I# start#) (I# len#) =
340 mkFastString# (addrOffset# a# start#) len#
342 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
343 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
344 mkFastSubStringFO# fo# start# len#
349 hashStr :: Addr# -> Int# -> Int#
350 -- use the Addr to produce a hash value between 0 & m (inclusive)
354 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
355 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
356 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
361 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
362 `remInt#` hASH_TBL_SIZE#
365 c0 = indexCharOffAddr# a# 0#
366 c1 = indexCharOffAddr# a# 1# --(len# `quotInt#` 2# -# 1#)
367 c2 = indexCharOffAddr# a# 2# --(len# -# 1#)
369 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
370 -- use the Addr to produce a hash value between 0 & m (inclusive)
371 hashSubStrFO fo# start# len# =
374 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
375 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
376 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
381 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
382 `remInt#` hASH_TBL_SIZE#
385 c0 = indexCharOffFO# fo# 0#
386 c1 = indexCharOffFO# fo# 1# --(len# `quotInt#` 2# -# 1#)
387 c2 = indexCharOffFO# fo# 2# --(len# -# 1#)
390 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
391 -- use the Addr to produce a hash value between 0 & m (inclusive)
392 hashSubStrBA ba# start# len# =
395 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
396 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
397 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
402 ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#)
403 `remInt#` hASH_TBL_SIZE#
406 c0 = indexCharArray# ba# 0#
407 c1 = indexCharArray# ba# 1# --(len# `quotInt#` 2# -# 1#)
408 c2 = indexCharArray# ba# 2# --(len# -# 1#)
413 tagCmpFS :: FastString -> FastString -> _CMP_TAG
414 tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
418 unsafePerformPrimIO (
419 _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
421 if res <# 0# then _LT
422 else if res ==# 0# then _EQ
427 bottom = error "tagCmp"
428 tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
429 = unsafePerformPrimIO (
430 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
432 if res <# 0# then _LT
433 else if res ==# 0# then _EQ
439 tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
440 = unsafePerformPrimIO (
441 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
443 if res <# 0# then _LT
444 else if res ==# 0# then _EQ
448 ba1 = _ByteArray ((error "")::(Int,Int)) bs1
451 tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
452 = -- try them the other way 'round
453 case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
455 instance Ord FastString where
456 a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
457 a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
458 a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
459 a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
464 _tagCmp a b = tagCmpFS a b
468 Outputting @FastString@s is quick, just block copying the chunk (using
472 #if __GLASGOW_HASKELL__ >= 201
473 #define _ErrorHandle IOBase.ErrorHandle
474 #define _ReadHandle IOBase.ReadHandle
475 #define _ClosedHandle IOBase.ClosedHandle
476 #define _SemiClosedHandle IOBase.SemiClosedHandle
477 #define _constructError IOBase.constructError
478 #define _filePtr IOHandle.filePtr
479 #define failWith fail
482 hPutFS :: Handle -> FastString -> IO ()
483 hPutFS handle (FastString _ l# ba#) =
487 _readHandle handle >>= \ htype ->
489 _ErrorHandle ioError ->
490 _writeHandle handle htype >>
493 _writeHandle handle htype >>
494 failWith MkIOError(handle,IllegalOperation,"handle is closed")
495 _SemiClosedHandle _ _ ->
496 _writeHandle handle htype >>
497 failWith MkIOError(handle,IllegalOperation,"handle is closed")
499 _writeHandle handle htype >>
500 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
502 let fp = _filePtr htype in
504 _ccall_ writeFile (_ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) `CCALL_THEN` \rc ->
508 _constructError "hPutFS" `CCALL_THEN` \ err ->
510 hPutFS handle (CharStr a# l#) =
514 _readHandle handle >>= \ htype ->
516 _ErrorHandle ioError ->
517 _writeHandle handle htype >>
520 _writeHandle handle htype >>
521 failWith MkIOError(handle,IllegalOperation,"handle is closed")
522 _SemiClosedHandle _ _ ->
523 _writeHandle handle htype >>
524 failWith MkIOError(handle,IllegalOperation,"handle is closed")
526 _writeHandle handle htype >>
527 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
529 let fp = _filePtr htype in
531 _ccall_ writeFile (A# a#) fp (I# l#) `CCALL_THEN` \rc ->
535 _constructError "hPutFS" `CCALL_THEN` \ err ->
538 --ToDo: avoid silly code duplic.