2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 Compact representations of character strings with
7 unique identifiers (hash-cons'ish).
12 FastString(..), -- not abstract, for now.
14 mkFastString, -- :: String -> FastString
15 mkFastStringNarrow, -- :: String -> FastString
16 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
18 mkFastString#, -- :: Addr# -> FastString
19 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
21 mkFastStringInt, -- :: [Int] -> FastString
23 uniqueOfFS, -- :: FastString -> Int#
24 lengthFS, -- :: FastString -> Int
25 nullFastString, -- :: FastString -> Bool
27 unpackFS, -- :: FastString -> String
28 unpackIntFS, -- :: FastString -> [Int]
29 appendFS, -- :: FastString -> FastString -> FastString
30 headFS, -- :: FastString -> Char
31 headIntFS, -- :: FastString -> Int
32 tailFS, -- :: FastString -> FastString
33 concatFS, -- :: [FastString] -> FastString
34 consFS, -- :: Char -> FastString -> FastString
35 indexFS, -- :: FastString -> Int -> Char
36 nilFS, -- :: FastString
38 hPutFS, -- :: Handle -> FastString -> IO ()
41 mkLitString# -- :: Addr# -> LitString
44 -- This #define suppresses the "import FastString" that
45 -- HsVersions otherwise produces
46 #define COMPILING_FAST_STRING
47 #include "HsVersions.h"
49 #if __GLASGOW_HASKELL__ < 503
50 import PrelIOBase ( IO(..) )
52 import GHC.IOBase ( IO(..) )
57 import UNSAFE_IO ( unsafePerformIO )
58 import MONAD_ST ( stToIO )
59 import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
61 #if __GLASGOW_HASKELL__ < 503
62 import PrelArr ( STArray(..), newSTArray )
64 import GHC.Arr ( STArray(..), newSTArray )
67 #if __GLASGOW_HASKELL__ >= 504
72 import IOExts ( hPutBufBAFull )
76 import Char ( chr, ord )
78 #define hASH_TBL_SIZE 993
81 @FastString@s are packed representations of strings
82 with a unique id for fast comparisons. The unique id
83 is assigned when creating the @FastString@, using
84 a hash table to map from the character string representation
89 = FastString -- packed repr. on the heap.
91 -- 0 => string literal, comparison
96 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
98 [Int] -- character numbers
100 instance Eq FastString where
101 -- shortcut for real FastStrings
102 (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
103 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
105 (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
106 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
108 instance Ord FastString where
109 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
110 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
111 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
112 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
117 compare a b = cmpFS a b
119 lengthFS :: FastString -> Int
120 lengthFS (FastString _ l# _) = I# l#
121 lengthFS (UnicodeStr _ s) = length s
123 nullFastString :: FastString -> Bool
124 nullFastString (FastString _ l# _) = l# ==# 0#
125 nullFastString (UnicodeStr _ []) = True
126 nullFastString (UnicodeStr _ (_:_)) = False
128 unpackFS :: FastString -> String
129 unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
130 unpackFS (UnicodeStr _ s) = map chr s
132 unpackIntFS :: FastString -> [Int]
133 unpackIntFS (UnicodeStr _ s) = s
134 unpackIntFS fs = map ord (unpackFS fs)
136 appendFS :: FastString -> FastString -> FastString
137 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
139 concatFS :: [FastString] -> FastString
140 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
142 headFS :: FastString -> Char
143 headFS (FastString _ l# ba#) =
144 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
145 headFS (UnicodeStr _ (c:_)) = chr c
146 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
148 headIntFS :: FastString -> Int
149 headIntFS (UnicodeStr _ (c:_)) = c
150 headIntFS fs = ord (headFS fs)
152 indexFS :: FastString -> Int -> Char
153 indexFS f i@(I# i#) =
156 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
157 | otherwise -> error (msg (I# l#))
158 UnicodeStr _ s -> chr (s!!i)
160 msg l = "indexFS: out of range: " ++ show (l,i)
162 tailFS :: FastString -> FastString
163 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
164 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
166 consFS :: Char -> FastString -> FastString
167 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
169 uniqueOfFS :: FastString -> Int#
170 uniqueOfFS (FastString u# _ _) = u#
171 uniqueOfFS (UnicodeStr u# _) = u#
173 nilFS = mkFastString ""
176 Internally, the compiler will maintain a fast string symbol
177 table, providing sharing and fast comparison. Creation of
178 new @FastString@s then covertly does a lookup, re-using the
179 @FastString@ if there was a hit.
181 Caution: mkFastStringUnicode assumes that if the string is in the
182 table, it sits under the UnicodeStr constructor. Other mkFastString
183 variants analogously assume the FastString constructor.
186 data FastStringTable =
189 (MutableArray# RealWorld [FastString])
191 type FastStringTableVar = IORef FastStringTable
193 string_table :: FastStringTableVar
196 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
197 >>= \ (STArray _ _ arr#) ->
198 newIORef (FastStringTable 0# arr#))
200 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
201 lookupTbl (FastStringTable _ arr#) i# =
203 readArray# arr# i# s#)
205 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
206 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
207 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
209 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
211 mkFastString# :: Addr# -> FastString
213 case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
215 mkFastStringLen# :: Addr# -> Int# -> FastString
216 mkFastStringLen# a# len# =
218 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
222 -- _trace ("hashed: "++show (I# h)) $
223 lookupTbl ft h >>= \ lookup_result ->
224 case lookup_result of
226 -- no match, add it to table by copying out the
227 -- the string into a ByteArray
228 -- _trace "empty bucket" $
229 case copyPrefixStr a# (I# len#) of
231 let f_str = FastString uid# len# barr# in
232 updTbl string_table ft h [f_str] >>
233 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
235 -- non-empty `bucket', scan the list looking
236 -- entry with same length and compare byte by byte.
237 -- _trace ("non-empty bucket"++show ls) $
238 case bucket_match ls len# a# of
240 case copyPrefixStr a# (I# len#) of
242 let f_str = FastString uid# len# barr# in
243 updTbl string_table ft h (f_str:ls) >>
244 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
245 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
247 bucket_match [] _ _ = Nothing
248 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
249 if len# ==# l# && eqStrPrefix a# ba# l# then
252 bucket_match ls len# a#
253 bucket_match (UnicodeStr _ _ : ls) len# a# =
254 bucket_match ls len# a#
256 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
257 mkFastSubStringBA# barr# start# len# =
259 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
261 h = hashSubStrBA barr# start# len#
263 -- _trace ("hashed(b): "++show (I# h)) $
264 lookupTbl ft h >>= \ lookup_result ->
265 case lookup_result of
267 -- no match, add it to table by copying out the
268 -- the string into a ByteArray
269 -- _trace "empty bucket(b)" $
270 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
272 let f_str = FastString uid# len# ba# in
273 updTbl string_table ft h [f_str] >>
274 -- _trace ("new(b): " ++ show f_str) $
277 -- non-empty `bucket', scan the list looking
278 -- entry with same length and compare byte by byte.
279 -- _trace ("non-empty bucket(b)"++show ls) $
280 case bucket_match ls start# len# barr# of
282 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
284 let f_str = FastString uid# len# ba# in
285 updTbl string_table ft h (f_str:ls) >>
286 -- _trace ("new(b): " ++ show f_str) $
289 -- _trace ("re-use(b): "++show v) $
293 bucket_match [] _ _ _ = Nothing
294 bucket_match (v:ls) start# len# ba# =
296 FastString _ l# barr# ->
297 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
300 bucket_match ls start# len# ba#
301 UnicodeStr _ _ -> bucket_match ls start# len# ba#
303 mkFastStringUnicode :: [Int] -> FastString
304 mkFastStringUnicode s =
306 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
310 -- _trace ("hashed(b): "++show (I# h)) $
311 lookupTbl ft h >>= \ lookup_result ->
312 case lookup_result of
314 -- no match, add it to table by copying out the
315 -- the string into a [Int]
316 let f_str = UnicodeStr uid# s in
317 updTbl string_table ft h [f_str] >>
318 -- _trace ("new(b): " ++ show f_str) $
321 -- non-empty `bucket', scan the list looking
322 -- entry with same length and compare byte by byte.
323 -- _trace ("non-empty bucket(b)"++show ls) $
324 case bucket_match ls of
326 let f_str = UnicodeStr uid# s in
327 updTbl string_table ft h (f_str:ls) >>
328 -- _trace ("new(b): " ++ show f_str) $
331 -- _trace ("re-use(b): "++show v) $
335 bucket_match [] = Nothing
336 bucket_match (v@(UnicodeStr _ s'):ls) =
337 if s' == s then Just v else bucket_match ls
338 bucket_match (FastString _ _ _ : ls) = bucket_match ls
340 mkFastStringNarrow :: String -> FastString
341 mkFastStringNarrow str =
342 case packString str of { (I# len#, BA frozen#) ->
343 mkFastSubStringBA# frozen# 0# len#
345 {- 0-indexed array, len# == index to one beyond end of string,
346 i.e., (0,1) => empty string. -}
348 mkFastString :: String -> FastString
349 mkFastString str = if all good str
350 then mkFastStringNarrow str
351 else mkFastStringUnicode (map ord str)
353 good c = c >= '\1' && c <= '\xFF'
355 mkFastStringInt :: [Int] -> FastString
356 mkFastStringInt str = if all good str
357 then mkFastStringNarrow (map chr str)
358 else mkFastStringUnicode str
360 good c = c >= 1 && c <= 0xFF
362 mkFastSubString :: Addr# -> Int -> Int -> FastString
363 mkFastSubString a# (I# start#) (I# len#) =
364 mkFastStringLen# (a# `plusAddr#` start#) len#
368 hashStr :: Addr# -> Int# -> Int#
369 -- use the Addr to produce a hash value between 0 & m (inclusive)
373 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
374 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
375 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
377 c0 = indexCharOffAddr# a# 0#
378 c1 = indexCharOffAddr# a# (len# `quotInt#` 2# -# 1#)
379 c2 = indexCharOffAddr# a# (len# -# 1#)
381 c1 = indexCharOffAddr# a# 1#
382 c2 = indexCharOffAddr# a# 2#
385 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
386 -- use the byte array to produce a hash value between 0 & m (inclusive)
387 hashSubStrBA ba# start# len# =
390 1# -> ((ord# c0 *# 631#) +# len#) `remInt#` hASH_TBL_SIZE#
391 2# -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# len#) `remInt#` hASH_TBL_SIZE#
392 _ -> ((ord# c0 *# 631#) +# (ord# c1 *# 217#) +# (ord# c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
394 c0 = indexCharArray# ba# 0#
395 c1 = indexCharArray# ba# (len# `quotInt#` 2# -# 1#)
396 c2 = indexCharArray# ba# (len# -# 1#)
398 -- c1 = indexCharArray# ba# 1#
399 -- c2 = indexCharArray# ba# 2#
401 hashUnicode :: [Int] -> Int#
402 -- use the Addr to produce a hash value between 0 & m (inclusive)
404 hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
405 hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
406 hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
410 I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
411 I# c2 = s !! (I# (len# -# 1#))
416 cmpFS :: FastString -> FastString -> Ordering
417 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
419 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
420 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
421 cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
422 if u1# ==# u2# then EQ else
423 let l# = if l1# <=# l2# then l1# else l2# in
425 memcmp b1# b2# l# >>= \ (I# res) ->
428 else if res ==# 0# then
429 if l1# ==# l2# then EQ
430 else if l1# <# l2# then LT else GT
434 foreign import ccall "ghc_memcmp" unsafe
435 memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
437 -- -----------------------------------------------------------------------------
438 -- Outputting 'FastString's
440 #if __GLASGOW_HASKELL__ >= 504
442 -- this is our own version of hPutBuf for FastStrings, because in
443 -- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
444 -- The closest is hPutArray in Data.Array.IO, but that does some extra
445 -- range checks that we want to avoid here.
447 foreign import ccall unsafe "__hscore_memcpy_dst_off"
448 memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
450 hPutFS handle (FastString _ l# ba#)
451 | l# ==# 0# = return ()
453 = do wantWritableHandle "hPutFS" handle $
454 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
456 old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
460 raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
462 -- enough room in handle buffer?
463 if (size - w > count)
464 -- There's enough room in the buffer:
465 -- just copy the data in and update bufWPtr.
466 then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
467 writeIORef ref old_buf{ bufWPtr = w + count }
470 -- else, we have to flush
471 else do flushed_buf <- flushWriteBuffer fd stream old_buf
472 writeIORef ref flushed_buf
474 Buffer{ bufBuf=raw, bufState=WriteBuffer,
475 bufRPtr=0, bufWPtr=count, bufSize=count }
476 flushWriteBuffer fd stream this_buf
481 hPutFS :: Handle -> FastString -> IO ()
482 hPutFS handle (FastString _ l# ba#)
483 | l# ==# 0# = return ()
484 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
485 hPutBufBAFull handle mba (I# l#)
487 bot = error "hPutFS.ba"
491 -- ONLY here for debugging the NCG (so -ddump-stix works for string
492 -- literals); no idea if this is really necessary. JRS, 010131
493 hPutFS handle (UnicodeStr _ is)
494 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
496 -- -----------------------------------------------------------------------------
497 -- LitStrings, here for convenience only.
499 type LitString = Ptr ()
500 -- ToDo: make it a Ptr when we don't have to support 4.08 any more
502 mkLitString# :: Addr# -> LitString
503 mkLitString# a# = Ptr a#