2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
10 #include "HsVersions.h"
14 FastString(..), -- not abstract, for now.
17 mkFastString, -- :: String -> FastString
18 mkFastCharString, -- :: _Addr -> FastString
19 mkFastCharString2, -- :: _Addr -> Int -> FastString
20 mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
21 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
23 mkFastString#, -- :: Addr# -> Int# -> FastString
24 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
25 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
26 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
28 lengthFS, -- :: FastString -> Int
29 nullFastString, -- :: FastString -> Bool
31 getByteArray#, -- :: FastString -> ByteArray#
32 getByteArray, -- :: FastString -> _ByteArray Int
33 unpackFS, -- :: FastString -> String
34 appendFS, -- :: FastString -> FastString -> FastString
35 headFS, -- :: FastString -> Char
36 tailFS, -- :: FastString -> FastString
37 concatFS, -- :: [FastString] -> FastString
38 consFS, -- :: Char -> FastString -> FastString
40 hPutFS, -- :: Handle -> FastString -> IO ()
41 tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
44 #if __GLASGOW_HASKELL__ <= 201
56 import {-# SOURCE #-} Unique ( mkUniqueGrimily, Unique, Uniquable(..) )
57 #if __GLASGOW_HASKELL__ == 202
58 import PrelBase ( Char (..) )
60 #if __GLASGOW_HASKELL__ >= 206
67 #define hASH_TBL_SIZE 993
71 @FastString@s are packed representations of strings
72 with a unique id for fast comparisons. The unique id
73 is assigned when creating the @FastString@, using
74 a hash table to map from the character string representation
79 = FastString -- packed repr. on the heap.
81 -- 0 => string literal, comparison
86 | CharStr -- external C string
87 Addr# -- pointer to the (null-terminated) bytes in C land.
88 Int# -- length (cached)
90 instance Eq FastString where
91 a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
92 a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
95 (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
98 instance Uniquable FastString where
99 uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
100 uniqueOf (CharStr a# l#) =
102 [A somewhat moby hack]: to avoid entering all sorts
103 of junk into the hash table, all C char strings
104 are by default left out. The benefit of being in
105 the table is that string comparisons are lightning fast,
106 just an Int# comparison.
108 But, if you want to get the Unique of a CharStr, we
109 enter it into the table and return that unique. This
110 works, but causes the CharStr to be looked up in the hash
111 table each time it is accessed..
113 mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
115 instance Uniquable Int where
116 uniqueOf (I# i#) = mkUniqueGrimily i#
118 instance Text FastString where
119 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
120 showsPrec p ps r = showsPrec p (unpackFS ps) r
122 getByteArray# :: FastString -> ByteArray#
123 getByteArray# (FastString _ _ ba#) = ba#
125 getByteArray :: FastString -> _ByteArray Int
126 getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
128 lengthFS :: FastString -> Int
129 lengthFS (FastString _ l# _) = I# l#
130 lengthFS (CharStr a# l#) = I# l#
132 nullFastString :: FastString -> Bool
133 nullFastString (FastString _ l# _) = l# ==# 0#
134 nullFastString (CharStr _ l#) = l# ==# 0#
136 unpackFS :: FastString -> String
137 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
138 unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
140 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
142 unpackFS (CharStr addr len#) =
147 | otherwise = C# ch : unpack (nh +# 1#)
149 ch = indexCharOffAddr# addr nh
151 appendFS :: FastString -> FastString -> FastString
152 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
154 concatFS :: [FastString] -> FastString
155 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
157 headFS :: FastString -> Char
158 headFS f@(FastString _ l# ba#) =
159 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
160 headFS f@(CharStr a# l#) =
161 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
163 tailFS :: FastString -> FastString
164 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
166 consFS :: Char -> FastString -> FastString
167 consFS c fs = mkFastString (c:unpackFS fs)
171 Internally, the compiler will maintain a fast string symbol
172 table, providing sharing and fast comparison. Creation of
173 new @FastString@s then covertly does a lookup, re-using the
174 @FastString@ if there was a hit.
177 data FastStringTable =
180 (MutableArray# _RealWorld [FastString])
182 type FastStringTableVar = MutableVar _RealWorld FastStringTable
184 string_table :: FastStringTableVar
186 unsafePerformPrimIO (
187 newArray (0::Int,hASH_TBL_SIZE) [] `thenPrimIO` \ (_MutableArray _ arr#) ->
188 newVar (FastStringTable 0# arr#))
190 lookupTbl :: FastStringTable -> Int# -> PrimIO [FastString]
191 lookupTbl (FastStringTable _ arr#) i# =
193 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
196 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
197 updTbl (_MutableArray _ var#) (FastStringTable uid# arr#) i# ls =
199 case writeArray# arr# i# ls s# of { s2# ->
200 case writeArray# var# 0# (FastStringTable (uid# +# 1#) arr#) s2# of { s3# ->
203 mkFastString# :: Addr# -> Int# -> FastString
204 mkFastString# a# len# =
205 unsafePerformPrimIO (
206 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
210 -- _trace ("hashed: "++show (I# h)) $
211 lookupTbl ft h `thenPrimIO` \ lookup_result ->
212 case lookup_result of
214 -- no match, add it to table by copying out the
215 -- the string into a ByteArray
216 -- _trace "empty bucket" $
217 case copyPrefixStr (A# a#) (I# len#) of
218 (_ByteArray _ barr#) ->
219 let f_str = FastString uid# len# barr# in
220 updTbl string_table ft h [f_str] `seqPrimIO`
221 ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
223 -- non-empty `bucket', scan the list looking
224 -- entry with same length and compare byte by byte.
225 -- _trace ("non-empty bucket"++show ls) $
226 case bucket_match ls len# a# of
228 case copyPrefixStr (A# a#) (I# len#) of
229 (_ByteArray _ barr#) ->
230 let f_str = FastString uid# len# barr# in
231 updTbl string_table ft h (f_str:ls) `seqPrimIO`
232 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
233 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
235 bucket_match [] _ _ = Nothing
236 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
237 if len# ==# l# && eqStrPrefix a# ba# l# then
240 bucket_match ls len# a#
242 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
243 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
245 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
246 mkFastSubStringFO# fo# start# len# =
247 unsafePerformPrimIO (
248 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
250 h = hashSubStrFO fo# start# len#
252 lookupTbl ft h `thenPrimIO` \ lookup_result ->
253 case lookup_result of
255 -- no match, add it to table by copying out the
256 -- the string into a ByteArray
257 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
258 (_ByteArray _ barr#) ->
259 let f_str = FastString uid# len# barr# in
260 updTbl string_table ft h [f_str] `seqPrimIO`
263 -- non-empty `bucket', scan the list looking
264 -- entry with same length and compare byte by byte.
265 case bucket_match ls start# len# fo# of
267 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
268 (_ByteArray _ barr#) ->
269 let f_str = FastString uid# len# barr# in
270 updTbl string_table ft h (f_str:ls) `seqPrimIO`
271 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
272 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
274 bucket_match [] _ _ _ = Nothing
275 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
276 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
279 bucket_match ls start# len# fo#
282 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
283 mkFastSubStringBA# barr# start# len# =
284 unsafePerformPrimIO (
285 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
287 h = hashSubStrBA barr# start# len#
289 -- _trace ("hashed(b): "++show (I# h)) $
290 lookupTbl ft h `thenPrimIO` \ lookup_result ->
291 case lookup_result of
293 -- no match, add it to table by copying out the
294 -- the string into a ByteArray
295 -- _trace "empty bucket(b)" $
296 case copySubStrBA (_ByteArray btm barr#) (I# start#) (I# len#) of
297 (_ByteArray _ ba#) ->
298 let f_str = FastString uid# len# ba# in
299 updTbl string_table ft h [f_str] `seqPrimIO`
300 -- _trace ("new(b): " ++ show f_str) $
303 -- non-empty `bucket', scan the list looking
304 -- entry with same length and compare byte by byte.
305 -- _trace ("non-empty bucket(b)"++show ls) $
306 case bucket_match ls start# len# barr# of
308 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
309 (_ByteArray _ ba#) ->
310 let f_str = FastString uid# len# ba# in
311 updTbl string_table ft h (f_str:ls) `seqPrimIO`
312 -- _trace ("new(b): " ++ show f_str) $
315 -- _trace ("re-use(b): "++show v) $
321 bucket_match [] _ _ _ = Nothing
322 bucket_match (v:ls) start# len# ba# =
324 FastString _ l# barr# ->
325 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
328 bucket_match ls start# len# ba#
330 mkFastCharString :: _Addr -> FastString
331 mkFastCharString a@(A# a#) =
332 case strLength a of{ (I# len#) -> CharStr a# len# }
334 mkFastCharString2 :: _Addr -> Int -> FastString
335 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
337 mkFastString :: String -> FastString
339 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
340 case stringToByteArray str of
342 case packString str of
344 (_ByteArray (_,I# len#) frozen#) ->
345 mkFastSubStringBA# frozen# 0# len#
346 {- 0-indexed array, len# == index to one beyond end of string,
347 i.e., (0,1) => empty string. -}
349 mkFastSubString :: _Addr -> Int -> Int -> FastString
350 mkFastSubString (A# a#) (I# start#) (I# len#) =
351 mkFastString# (addrOffset# a# start#) len#
353 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
354 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
355 mkFastSubStringFO# fo# start# len#
360 hashStr :: Addr# -> Int# -> Int#
361 -- use the Addr to produce a hash value between 0 & m (inclusive)
365 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
366 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
367 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
369 c0 = indexCharOffAddr# a# 0#
370 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
371 c2 = indexCharOffAddr# a# (len# -# 1#)
373 c1 = indexCharOffAddr# a# 1#
374 c2 = indexCharOffAddr# a# 2#
377 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
378 -- use the FO to produce a hash value between 0 & m (inclusive)
379 hashSubStrFO fo# start# len# =
382 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
383 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
384 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
386 c0 = indexCharOffFO# fo# 0#
387 c1 = indexCharOffFO# fo# (len# `quotInt#` 2# -# 1#)
388 c2 = indexCharOffFO# fo# (len# -# 1#)
390 -- c1 = indexCharOffFO# fo# 1#
391 -- c2 = indexCharOffFO# fo# 2#
394 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
395 -- use the byte array to produce a hash value between 0 & m (inclusive)
396 hashSubStrBA ba# start# len# =
399 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
400 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
401 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
403 c0 = indexCharArray# ba# 0#
404 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
405 c2 = indexCharArray# ba# (len# -# 1#)
407 -- c1 = indexCharArray# ba# 1#
408 -- c2 = indexCharArray# ba# 2#
413 tagCmpFS :: FastString -> FastString -> _CMP_TAG
414 tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
418 unsafePerformPrimIO (
419 _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
421 if res <# 0# then _LT
422 else if res ==# 0# then _EQ
427 bottom = error "tagCmp"
428 tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
429 = unsafePerformPrimIO (
430 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
432 if res <# 0# then _LT
433 else if res ==# 0# then _EQ
439 tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
440 = unsafePerformPrimIO (
441 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
443 if res <# 0# then _LT
444 else if res ==# 0# then _EQ
448 ba1 = _ByteArray ((error "")::(Int,Int)) bs1
451 tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
452 = -- try them the other way 'round
453 case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
455 instance Ord FastString where
456 a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
457 a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
458 a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
459 a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
464 _tagCmp a b = tagCmpFS a b
468 Outputting @FastString@s is quick, just block copying the chunk (using
472 #if __GLASGOW_HASKELL__ >= 201
473 #define _ErrorHandle IOBase.ErrorHandle
474 #define _ReadHandle IOBase.ReadHandle
475 #define _ClosedHandle IOBase.ClosedHandle
476 #define _SemiClosedHandle IOBase.SemiClosedHandle
477 #define _constructError IOBase.constructError
478 #define _filePtr IOHandle.filePtr
479 #define failWith fail
482 hPutFS :: Handle -> FastString -> IO ()
483 hPutFS handle (FastString _ l# ba#) =
487 _readHandle handle >>= \ htype ->
489 _ErrorHandle ioError ->
490 _writeHandle handle htype >>
493 _writeHandle handle htype >>
494 failWith MkIOError(handle,IllegalOperation,"handle is closed")
495 _SemiClosedHandle _ _ ->
496 _writeHandle handle htype >>
497 failWith MkIOError(handle,IllegalOperation,"handle is closed")
499 _writeHandle handle htype >>
500 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
502 let fp = _filePtr htype in
504 _ccall_ writeFile (_ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) `CCALL_THEN` \rc ->
508 _constructError "hPutFS" `CCALL_THEN` \ err ->
510 hPutFS handle (CharStr a# l#) =
514 _readHandle handle >>= \ htype ->
516 _ErrorHandle ioError ->
517 _writeHandle handle htype >>
520 _writeHandle handle htype >>
521 failWith MkIOError(handle,IllegalOperation,"handle is closed")
522 _SemiClosedHandle _ _ ->
523 _writeHandle handle htype >>
524 failWith MkIOError(handle,IllegalOperation,"handle is closed")
526 _writeHandle handle htype >>
527 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
529 let fp = _filePtr htype in
531 _ccall_ writeFile (A# a#) fp (I# l#) `CCALL_THEN` \rc ->
535 _constructError "hPutFS" `CCALL_THEN` \ err ->
538 --ToDo: avoid silly code duplic.