2 % (c) The GRASP/AQUA Project, Glasgow University, 1997
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
10 #include "HsVersions.h"
14 FastString(..), -- not abstract, for now.
17 mkFastString, -- :: String -> FastString
18 mkFastCharString, -- :: _Addr -> FastString
19 mkFastCharString2, -- :: _Addr -> Int -> FastString
20 mkFastSubString, -- :: _Addr -> Int -> Int -> FastString
21 mkFastSubStringFO, -- :: ForeignObj -> Int -> Int -> FastString
23 mkFastString#, -- :: Addr# -> Int# -> FastString
24 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
25 mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString
26 mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
28 lengthFS, -- :: FastString -> Int
29 nullFastString, -- :: FastString -> Bool
31 getByteArray#, -- :: FastString -> ByteArray#
32 getByteArray, -- :: FastString -> _ByteArray Int
33 unpackFS, -- :: FastString -> String
34 appendFS, -- :: FastString -> FastString -> FastString
35 headFS, -- :: FastString -> Char
36 tailFS, -- :: FastString -> FastString
37 concatFS, -- :: [FastString] -> FastString
38 consFS, -- :: Char -> FastString -> FastString
40 hPutFS, -- :: Handle -> FastString -> IO ()
41 tagCmpFS -- :: FastString -> FastString -> _CMP_TAG
44 #if __GLASGOW_HASKELL__ <= 201
56 import {-# SOURCE #-} Unique ( mkUniqueGrimily, Unique, Uniquable(..) )
57 #if __GLASGOW_HASKELL__ == 202
58 import PrelBase ( Char (..) )
60 #if __GLASGOW_HASKELL__ >= 206
63 #if __GLASGOW_HASKELL__ >= 209
66 # define newVar newIORef
67 # define readVar readIORef
68 # define writeVar writeIORef
75 #define hASH_TBL_SIZE 993
79 @FastString@s are packed representations of strings
80 with a unique id for fast comparisons. The unique id
81 is assigned when creating the @FastString@, using
82 a hash table to map from the character string representation
87 = FastString -- packed repr. on the heap.
89 -- 0 => string literal, comparison
94 | CharStr -- external C string
95 Addr# -- pointer to the (null-terminated) bytes in C land.
96 Int# -- length (cached)
98 instance Eq FastString where
99 a == b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> False }
100 a /= b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> True }
103 (FastString u1# _ _) == (FastString u2# _ _) = u1# ==# u2#
106 instance Uniquable FastString where
107 uniqueOf (FastString u# _ _) = mkUniqueGrimily u#
108 uniqueOf (CharStr a# l#) =
110 [A somewhat moby hack]: to avoid entering all sorts
111 of junk into the hash table, all C char strings
112 are by default left out. The benefit of being in
113 the table is that string comparisons are lightning fast,
114 just an Int# comparison.
116 But, if you want to get the Unique of a CharStr, we
117 enter it into the table and return that unique. This
118 works, but causes the CharStr to be looked up in the hash
119 table each time it is accessed..
121 mkUniqueGrimily (case mkFastString# a# l# of { FastString u# _ _ -> u#}) -- Ugh!
123 instance Uniquable Int where
124 uniqueOf (I# i#) = mkUniqueGrimily i#
126 instance Text FastString where
127 showsPrec p ps@(FastString u# _ _) r = showsPrec p (unpackFS ps) r
128 showsPrec p ps r = showsPrec p (unpackFS ps) r
130 getByteArray# :: FastString -> ByteArray#
131 getByteArray# (FastString _ _ ba#) = ba#
133 getByteArray :: FastString -> _ByteArray Int
134 getByteArray (FastString _ l# ba#) = _ByteArray (0,I# l#) ba#
136 lengthFS :: FastString -> Int
137 lengthFS (FastString _ l# _) = I# l#
138 lengthFS (CharStr a# l#) = I# l#
140 nullFastString :: FastString -> Bool
141 nullFastString (FastString _ l# _) = l# ==# 0#
142 nullFastString (CharStr _ l#) = l# ==# 0#
144 unpackFS :: FastString -> String
145 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
146 unpackFS (FastString _ l# ba#) = byteArrayToString (_ByteArray (0,I# l#) ba#)
148 unpackFS (FastString _ l# ba#) = unpackCStringBA# ba# l#
150 unpackFS (CharStr addr len#) =
155 | otherwise = C# ch : unpack (nh +# 1#)
157 ch = indexCharOffAddr# addr nh
159 appendFS :: FastString -> FastString -> FastString
160 appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
162 concatFS :: [FastString] -> FastString
163 concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
165 headFS :: FastString -> Char
166 headFS f@(FastString _ l# ba#) =
167 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
168 headFS f@(CharStr a# l#) =
169 if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
171 tailFS :: FastString -> FastString
172 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
174 consFS :: Char -> FastString -> FastString
175 consFS c fs = mkFastString (c:unpackFS fs)
179 Internally, the compiler will maintain a fast string symbol
180 table, providing sharing and fast comparison. Creation of
181 new @FastString@s then covertly does a lookup, re-using the
182 @FastString@ if there was a hit.
185 data FastStringTable =
188 (MutableArray# _RealWorld [FastString])
190 #if __GLASGOW_HASKELL__ < 209
191 type FastStringTableVar = MutableVar _RealWorld FastStringTable
193 type FastStringTableVar = IORef FastStringTable
196 string_table :: FastStringTableVar
198 unsafePerformPrimIO (
199 ST_TO_PrimIO (newArray (0::Int,hASH_TBL_SIZE) []) `thenPrimIO` \ (_MutableArray _ arr#) ->
200 newVar (FastStringTable 0# arr#))
202 lookupTbl :: FastStringTable -> Int# -> PrimIO [FastString]
203 lookupTbl (FastStringTable _ arr#) i# =
205 MkST ( \ STATE_TOK(s#) ->
206 case readArray# arr# i# s# of { StateAndPtr# s2# r ->
207 ST_RET(r, STATE_TOK(s2#)) }))
209 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> PrimIO ()
210 updTbl ref (FastStringTable uid# arr#) i# ls =
212 MkST ( \ STATE_TOK(s#) ->
213 case writeArray# arr# i# ls s# of { s2# ->
214 ST_RET((), STATE_TOK(s2#)) })) `thenPrimIO` \ _ ->
215 writeVar ref (FastStringTable (uid# +# 1#) arr#)
217 mkFastString# :: Addr# -> Int# -> FastString
218 mkFastString# a# len# =
219 unsafePerformPrimIO (
220 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
224 -- _trace ("hashed: "++show (I# h)) $
225 lookupTbl ft h `thenPrimIO` \ lookup_result ->
226 case lookup_result of
228 -- no match, add it to table by copying out the
229 -- the string into a ByteArray
230 -- _trace "empty bucket" $
231 case copyPrefixStr (A# a#) (I# len#) of
232 (_ByteArray _ barr#) ->
233 let f_str = FastString uid# len# barr# in
234 updTbl string_table ft h [f_str] `seqPrimIO`
235 ({- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
237 -- non-empty `bucket', scan the list looking
238 -- entry with same length and compare byte by byte.
239 -- _trace ("non-empty bucket"++show ls) $
240 case bucket_match ls len# a# of
242 case copyPrefixStr (A# a#) (I# len#) of
243 (_ByteArray _ barr#) ->
244 let f_str = FastString uid# len# barr# in
245 updTbl string_table ft h (f_str:ls) `seqPrimIO`
246 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
247 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
249 bucket_match [] _ _ = Nothing
250 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
251 if len# ==# l# && eqStrPrefix a# ba# l# then
254 bucket_match ls len# a#
256 mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
257 mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
259 mkFastSubStringFO# :: ForeignObj# -> Int# -> Int# -> FastString
260 mkFastSubStringFO# fo# start# len# =
261 unsafePerformPrimIO (
262 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
264 h = hashSubStrFO fo# start# len#
266 lookupTbl ft h `thenPrimIO` \ lookup_result ->
267 case lookup_result of
269 -- no match, add it to table by copying out the
270 -- the string into a ByteArray
271 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
272 (_ByteArray _ barr#) ->
273 let f_str = FastString uid# len# barr# in
274 updTbl string_table ft h [f_str] `seqPrimIO`
277 -- non-empty `bucket', scan the list looking
278 -- entry with same length and compare byte by byte.
279 case bucket_match ls start# len# fo# of
281 case copySubStrFO (_ForeignObj fo#) (I# start#) (I# len#) of
282 (_ByteArray _ barr#) ->
283 let f_str = FastString uid# len# barr# in
284 updTbl string_table ft h (f_str:ls) `seqPrimIO`
285 ( {- _trace ("new: " ++ show f_str) $ -} returnPrimIO f_str)
286 Just v -> {- _trace ("re-use: "++show v) $ -} returnPrimIO v)
288 bucket_match [] _ _ _ = Nothing
289 bucket_match (v@(FastString _ l# barr#):ls) start# len# fo# =
290 if len# ==# l# && eqStrPrefixFO fo# barr# start# len# then
293 bucket_match ls start# len# fo#
296 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
297 mkFastSubStringBA# barr# start# len# =
298 unsafePerformPrimIO (
299 readVar string_table `thenPrimIO` \ ft@(FastStringTable uid# tbl#) ->
301 h = hashSubStrBA barr# start# len#
303 -- _trace ("hashed(b): "++show (I# h)) $
304 lookupTbl ft h `thenPrimIO` \ lookup_result ->
305 case lookup_result of
307 -- no match, add it to table by copying out the
308 -- the string into a ByteArray
309 -- _trace "empty bucket(b)" $
310 case copySubStrBA (_ByteArray btm barr#) (I# start#) (I# len#) of
311 (_ByteArray _ ba#) ->
312 let f_str = FastString uid# len# ba# in
313 updTbl string_table ft h [f_str] `seqPrimIO`
314 -- _trace ("new(b): " ++ show f_str) $
317 -- non-empty `bucket', scan the list looking
318 -- entry with same length and compare byte by byte.
319 -- _trace ("non-empty bucket(b)"++show ls) $
320 case bucket_match ls start# len# barr# of
322 case copySubStrBA (_ByteArray (error "") barr#) (I# start#) (I# len#) of
323 (_ByteArray _ ba#) ->
324 let f_str = FastString uid# len# ba# in
325 updTbl string_table ft h (f_str:ls) `seqPrimIO`
326 -- _trace ("new(b): " ++ show f_str) $
329 -- _trace ("re-use(b): "++show v) $
335 bucket_match [] _ _ _ = Nothing
336 bucket_match (v:ls) start# len# ba# =
338 FastString _ l# barr# ->
339 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
342 bucket_match ls start# len# ba#
344 mkFastCharString :: _Addr -> FastString
345 mkFastCharString a@(A# a#) =
346 case strLength a of{ (I# len#) -> CharStr a# len# }
348 mkFastCharString2 :: _Addr -> Int -> FastString
349 mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
351 mkFastString :: String -> FastString
353 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
354 case stringToByteArray str of
356 case packString str of
358 (_ByteArray (_,I# len#) frozen#) ->
359 mkFastSubStringBA# frozen# 0# len#
360 {- 0-indexed array, len# == index to one beyond end of string,
361 i.e., (0,1) => empty string. -}
363 mkFastSubString :: _Addr -> Int -> Int -> FastString
364 mkFastSubString (A# a#) (I# start#) (I# len#) =
365 mkFastString# (addrOffset# a# start#) len#
367 mkFastSubStringFO :: _ForeignObj -> Int -> Int -> FastString
368 mkFastSubStringFO (_ForeignObj fo#) (I# start#) (I# len#) =
369 mkFastSubStringFO# fo# start# len#
374 hashStr :: Addr# -> Int# -> Int#
375 -- use the Addr to produce a hash value between 0 & m (inclusive)
379 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
380 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
381 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
383 c0 = indexCharOffAddr# a# 0#
384 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
385 c2 = indexCharOffAddr# a# (len# -# 1#)
387 c1 = indexCharOffAddr# a# 1#
388 c2 = indexCharOffAddr# a# 2#
391 hashSubStrFO :: ForeignObj# -> Int# -> Int# -> Int#
392 -- use the FO to produce a hash value between 0 & m (inclusive)
393 hashSubStrFO fo# start# len# =
396 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
397 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
398 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
400 c0 = indexCharOffFO# fo# 0#
401 c1 = indexCharOffFO# fo# (len# `quotInt#` 2# -# 1#)
402 c2 = indexCharOffFO# fo# (len# -# 1#)
404 -- c1 = indexCharOffFO# fo# 1#
405 -- c2 = indexCharOffFO# fo# 2#
408 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
409 -- use the byte array to produce a hash value between 0 & m (inclusive)
410 hashSubStrBA ba# start# len# =
413 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
414 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
415 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
417 c0 = indexCharArray# ba# 0#
418 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
419 c2 = indexCharArray# ba# (len# -# 1#)
421 -- c1 = indexCharArray# ba# 1#
422 -- c2 = indexCharArray# ba# 2#
427 tagCmpFS :: FastString -> FastString -> _CMP_TAG
428 tagCmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
432 unsafePerformPrimIO (
433 _ccall_ strcmp (_ByteArray bottom b1#) (_ByteArray bottom b2#) `thenPrimIO` \ (I# res) ->
435 if res <# 0# then _LT
436 else if res ==# 0# then _EQ
441 bottom = error "tagCmp"
442 tagCmpFS (CharStr bs1 len1) (CharStr bs2 len2)
443 = unsafePerformPrimIO (
444 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
446 if res <# 0# then _LT
447 else if res ==# 0# then _EQ
453 tagCmpFS (FastString _ len1 bs1) (CharStr bs2 len2)
454 = unsafePerformPrimIO (
455 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
457 if res <# 0# then _LT
458 else if res ==# 0# then _EQ
462 ba1 = _ByteArray ((error "")::(Int,Int)) bs1
465 tagCmpFS a@(CharStr _ _) b@(FastString _ _ _)
466 = -- try them the other way 'round
467 case (tagCmpFS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
469 instance Ord FastString where
470 a <= b = case tagCmpFS a b of { _LT -> True; _EQ -> True; _GT -> False }
471 a < b = case tagCmpFS a b of { _LT -> True; _EQ -> False; _GT -> False }
472 a >= b = case tagCmpFS a b of { _LT -> False; _EQ -> True; _GT -> True }
473 a > b = case tagCmpFS a b of { _LT -> False; _EQ -> False; _GT -> True }
478 _tagCmp a b = tagCmpFS a b
482 Outputting @FastString@s is quick, just block copying the chunk (using
486 #if __GLASGOW_HASKELL__ >= 201
487 #define _ErrorHandle IOBase.ErrorHandle
488 #define _ReadHandle IOBase.ReadHandle
489 #define _ClosedHandle IOBase.ClosedHandle
490 #define _SemiClosedHandle IOBase.SemiClosedHandle
491 #define _constructError IOBase.constructError
492 #define _filePtr IOHandle.filePtr
493 #define failWith fail
496 hPutFS :: Handle -> FastString -> IO ()
497 hPutFS handle (FastString _ l# ba#) =
501 _readHandle handle >>= \ htype ->
503 _ErrorHandle ioError ->
504 _writeHandle handle htype >>
507 _writeHandle handle htype >>
508 failWith MkIOError(handle,IllegalOperation,"handle is closed")
509 _SemiClosedHandle _ _ ->
510 _writeHandle handle htype >>
511 failWith MkIOError(handle,IllegalOperation,"handle is closed")
513 _writeHandle handle htype >>
514 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
516 let fp = _filePtr htype in
518 _ccall_ writeFile (_ByteArray ((error "")::(Int,Int)) ba#) fp (I# l#) `CCALL_THEN` \rc ->
522 _constructError "hPutFS" `CCALL_THEN` \ err ->
524 hPutFS handle (CharStr a# l#) =
528 _readHandle handle >>= \ htype ->
530 _ErrorHandle ioError ->
531 _writeHandle handle htype >>
534 _writeHandle handle htype >>
535 failWith MkIOError(handle,IllegalOperation,"handle is closed")
536 _SemiClosedHandle _ _ ->
537 _writeHandle handle htype >>
538 failWith MkIOError(handle,IllegalOperation,"handle is closed")
540 _writeHandle handle htype >>
541 failWith MkIOError(handle,IllegalOperation,"handle is not open for writing")
543 let fp = _filePtr htype in
545 _ccall_ writeFile (A# a#) fp (I# l#) `CCALL_THEN` \rc ->
549 _constructError "hPutFS" `CCALL_THEN` \ err ->
552 --ToDo: avoid silly code duplic.