2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
6 FastString: A compact, hash-consed, representation of character strings.
7 Comparison is O(1), and you can get a Unique from them.
8 Generated by the FSLIT macro
9 Turn into SDoc with Outputable.ftext
11 LitString: Just a wrapper for the Addr# of a C string (Ptr CChar).
12 Practically no operations
13 Outputing them is fast
14 Generated by the SLIT macro
15 Turn into SDoc with Outputable.ptext
17 Use LitString unless you want the facilities of FastString
22 FastString(..), -- not abstract, for now.
24 mkFastString, -- :: String -> FastString
25 mkFastStringNarrow, -- :: String -> FastString
26 mkFastSubString, -- :: Addr -> Int -> Int -> FastString
28 mkFastString#, -- :: Addr# -> FastString
29 mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
31 mkFastStringInt, -- :: [Int] -> FastString
33 uniqueOfFS, -- :: FastString -> Int#
34 lengthFS, -- :: FastString -> Int
35 nullFastString, -- :: FastString -> Bool
37 unpackFS, -- :: FastString -> String
38 unpackIntFS, -- :: FastString -> [Int]
39 appendFS, -- :: FastString -> FastString -> FastString
40 headFS, -- :: FastString -> Char
41 headIntFS, -- :: FastString -> Int
42 tailFS, -- :: FastString -> FastString
43 concatFS, -- :: [FastString] -> FastString
44 consFS, -- :: Char -> FastString -> FastString
45 indexFS, -- :: FastString -> Int -> Char
46 nilFS, -- :: FastString
48 hPutFS, -- :: Handle -> FastString -> IO ()
51 mkLitString# -- :: Addr# -> LitString
54 -- This #define suppresses the "import FastString" that
55 -- HsVersions otherwise produces
56 #define COMPILING_FAST_STRING
57 #include "HsVersions.h"
59 #if __GLASGOW_HASKELL__ < 503
60 import PrelIOBase ( IO(..) )
62 import GHC.IOBase ( IO(..) )
67 import UNSAFE_IO ( unsafePerformIO )
68 import MONAD_ST ( stToIO )
69 import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
71 #if __GLASGOW_HASKELL__ < 503
72 import PrelArr ( STArray(..), newSTArray )
74 import GHC.Arr ( STArray(..), newSTArray )
77 #if __GLASGOW_HASKELL__ >= 504
82 import IOExts ( hPutBufBAFull )
86 import Char ( chr, ord )
88 #define hASH_TBL_SIZE 4091
91 @FastString@s are packed representations of strings
92 with a unique id for fast comparisons. The unique id
93 is assigned when creating the @FastString@, using
94 a hash table to map from the character string representation
99 = FastString -- packed repr. on the heap.
101 -- 0 => string literal, comparison
106 | UnicodeStr -- if contains characters outside '\1'..'\xFF'
108 [Int] -- character numbers
110 instance Eq FastString where
111 -- shortcut for real FastStrings
112 (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
113 a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
115 (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
116 a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
118 instance Ord FastString where
119 -- Compares lexicographically, not by unique
120 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
121 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
122 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
123 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
128 compare a b = cmpFS a b
130 instance Show FastString where
131 show fs = show (unpackFS fs)
133 lengthFS :: FastString -> Int
134 lengthFS (FastString _ l# _) = I# l#
135 lengthFS (UnicodeStr _ s) = length s
137 nullFastString :: FastString -> Bool
138 nullFastString (FastString _ l# _) = l# ==# 0#
139 nullFastString (UnicodeStr _ []) = True
140 nullFastString (UnicodeStr _ (_:_)) = False
142 unpackFS :: FastString -> String
143 unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
144 unpackFS (UnicodeStr _ s) = map chr s
146 unpackIntFS :: FastString -> [Int]
147 unpackIntFS (UnicodeStr _ s) = s
148 unpackIntFS fs = map ord (unpackFS fs)
150 appendFS :: FastString -> FastString -> FastString
151 appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
153 concatFS :: [FastString] -> FastString
154 concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
156 headFS :: FastString -> Char
157 headFS (FastString _ l# ba#) =
158 if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
159 headFS (UnicodeStr _ (c:_)) = chr c
160 headFS (UnicodeStr _ []) = error ("headFS: empty FS")
162 headIntFS :: FastString -> Int
163 headIntFS (UnicodeStr _ (c:_)) = c
164 headIntFS fs = ord (headFS fs)
166 indexFS :: FastString -> Int -> Char
167 indexFS f i@(I# i#) =
170 | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
171 | otherwise -> error (msg (I# l#))
172 UnicodeStr _ s -> chr (s!!i)
174 msg l = "indexFS: out of range: " ++ show (l,i)
176 tailFS :: FastString -> FastString
177 tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
178 tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
180 consFS :: Char -> FastString -> FastString
181 consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
183 uniqueOfFS :: FastString -> Int#
184 uniqueOfFS (FastString u# _ _) = u#
185 uniqueOfFS (UnicodeStr u# _) = u#
187 nilFS = mkFastString ""
190 Internally, the compiler will maintain a fast string symbol
191 table, providing sharing and fast comparison. Creation of
192 new @FastString@s then covertly does a lookup, re-using the
193 @FastString@ if there was a hit.
195 Caution: mkFastStringUnicode assumes that if the string is in the
196 table, it sits under the UnicodeStr constructor. Other mkFastString
197 variants analogously assume the FastString constructor.
200 data FastStringTable =
203 (MutableArray# RealWorld [FastString])
205 type FastStringTableVar = IORef FastStringTable
207 string_table :: FastStringTableVar
210 stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
211 >>= \ (STArray _ _ arr#) ->
212 newIORef (FastStringTable 0# arr#))
214 lookupTbl :: FastStringTable -> Int# -> IO [FastString]
215 lookupTbl (FastStringTable _ arr#) i# =
217 readArray# arr# i# s#)
219 updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
220 updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
221 IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
223 writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
225 mkFastString# :: Addr# -> FastString
227 case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
229 mkFastStringLen# :: Addr# -> Int# -> FastString
230 mkFastStringLen# a# len# =
232 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
236 -- _trace ("hashed: "++show (I# h)) $
237 lookupTbl ft h >>= \ lookup_result ->
238 case lookup_result of
240 -- no match, add it to table by copying out the
241 -- the string into a ByteArray
242 -- _trace "empty bucket" $
243 case copyPrefixStr a# (I# len#) of
245 let f_str = FastString uid# len# barr# in
246 updTbl string_table ft h [f_str] >>
247 ({- _trace ("new: " ++ show f_str) $ -} return f_str)
249 -- non-empty `bucket', scan the list looking
250 -- entry with same length and compare byte by byte.
251 -- _trace ("non-empty bucket"++show ls) $
252 case bucket_match ls len# a# of
254 case copyPrefixStr a# (I# len#) of
256 let f_str = FastString uid# len# barr# in
257 updTbl string_table ft h (f_str:ls) >>
258 ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
259 Just v -> {- _trace ("re-use: "++show v) $ -} return v)
261 bucket_match [] _ _ = Nothing
262 bucket_match (v@(FastString _ l# ba#):ls) len# a# =
263 if len# ==# l# && eqStrPrefix a# ba# l# then
266 bucket_match ls len# a#
267 bucket_match (UnicodeStr _ _ : ls) len# a# =
268 bucket_match ls len# a#
270 mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
271 mkFastSubStringBA# barr# start# len# =
273 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
275 h = hashSubStrBA barr# start# len#
277 -- _trace ("hashed(b): "++show (I# h)) $
278 lookupTbl ft h >>= \ lookup_result ->
279 case lookup_result of
281 -- no match, add it to table by copying out the
282 -- the string into a ByteArray
283 -- _trace "empty bucket(b)" $
284 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
286 let f_str = FastString uid# len# ba# in
287 updTbl string_table ft h [f_str] >>
288 -- _trace ("new(b): " ++ show f_str) $
291 -- non-empty `bucket', scan the list looking
292 -- entry with same length and compare byte by byte.
293 -- _trace ("non-empty bucket(b)"++show ls) $
294 case bucket_match ls start# len# barr# of
296 case copySubStrBA (BA barr#) (I# start#) (I# len#) of
298 let f_str = FastString uid# len# ba# in
299 updTbl string_table ft h (f_str:ls) >>
300 -- _trace ("new(b): " ++ show f_str) $
303 -- _trace ("re-use(b): "++show v) $
307 bucket_match [] _ _ _ = Nothing
308 bucket_match (v:ls) start# len# ba# =
310 FastString _ l# barr# ->
311 if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
314 bucket_match ls start# len# ba#
315 UnicodeStr _ _ -> bucket_match ls start# len# ba#
317 mkFastStringUnicode :: [Int] -> FastString
318 mkFastStringUnicode s =
320 readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
324 -- _trace ("hashed(b): "++show (I# h)) $
325 lookupTbl ft h >>= \ lookup_result ->
326 case lookup_result of
328 -- no match, add it to table by copying out the
329 -- the string into a [Int]
330 let f_str = UnicodeStr uid# s in
331 updTbl string_table ft h [f_str] >>
332 -- _trace ("new(b): " ++ show f_str) $
335 -- non-empty `bucket', scan the list looking
336 -- entry with same length and compare byte by byte.
337 -- _trace ("non-empty bucket(b)"++show ls) $
338 case bucket_match ls of
340 let f_str = UnicodeStr uid# s in
341 updTbl string_table ft h (f_str:ls) >>
342 -- _trace ("new(b): " ++ show f_str) $
345 -- _trace ("re-use(b): "++show v) $
349 bucket_match [] = Nothing
350 bucket_match (v@(UnicodeStr _ s'):ls) =
351 if s' == s then Just v else bucket_match ls
352 bucket_match (FastString _ _ _ : ls) = bucket_match ls
354 mkFastStringNarrow :: String -> FastString
355 mkFastStringNarrow str =
356 case packString str of { (I# len#, BA frozen#) ->
357 mkFastSubStringBA# frozen# 0# len#
359 {- 0-indexed array, len# == index to one beyond end of string,
360 i.e., (0,1) => empty string. -}
362 mkFastString :: String -> FastString
363 mkFastString str = if all good str
364 then mkFastStringNarrow str
365 else mkFastStringUnicode (map ord str)
367 good c = c >= '\1' && c <= '\xFF'
369 mkFastStringInt :: [Int] -> FastString
370 mkFastStringInt str = if all good str
371 then mkFastStringNarrow (map chr str)
372 else mkFastStringUnicode str
374 good c = c >= 1 && c <= 0xFF
376 mkFastSubString :: Addr# -> Int -> Int -> FastString
377 mkFastSubString a# (I# start#) (I# len#) =
378 mkFastStringLen# (a# `plusAddr#` start#) len#
382 hashStr :: Addr# -> Int# -> Int#
383 -- use the Addr to produce a hash value between 0 & m (inclusive)
384 hashStr a# len# = loop 0# 0#
386 loop h n | n ==# len# = h
387 | otherwise = loop h2 (n +# 1#)
388 where c = ord# (indexCharOffAddr# a# n)
389 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
391 hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
392 -- use the byte array to produce a hash value between 0 & m (inclusive)
393 hashSubStrBA ba# start# len# = loop 0# 0#
395 loop h n | n ==# len# = h
396 | otherwise = loop h2 (n +# 1#)
397 where c = ord# (indexCharArray# ba# (start# +# n))
398 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
400 hashUnicode :: [Int] -> Int# -> Int#
402 hashUnicode (I# c : cs) h = hashUnicode cs ((ord# c + (h *# 128)) `remInt#` hASH_TBL_SIZE#)
406 cmpFS :: FastString -> FastString -> Ordering
407 cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
409 cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
410 cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
411 cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
412 if u1# ==# u2# then EQ else
413 let l# = if l1# <=# l2# then l1# else l2# in
415 memcmp b1# b2# l# >>= \ (I# res) ->
418 else if res ==# 0# then
419 if l1# ==# l2# then EQ
420 else if l1# <# l2# then LT else GT
425 foreign import ccall unsafe "ghc_memcmp"
426 memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
429 -- -----------------------------------------------------------------------------
430 -- Outputting 'FastString's
432 #if __GLASGOW_HASKELL__ >= 504
434 -- this is our own version of hPutBuf for FastStrings, because in
435 -- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
436 -- The closest is hPutArray in Data.Array.IO, but that does some extra
437 -- range checks that we want to avoid here.
439 foreign import ccall unsafe "__hscore_memcpy_dst_off"
440 memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
442 hPutFS handle (FastString _ l# ba#)
443 | l# ==# 0# = return ()
445 = do wantWritableHandle "hPutFS" handle $
446 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
448 old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
452 raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
454 -- enough room in handle buffer?
455 if (size - w > count)
456 -- There's enough room in the buffer:
457 -- just copy the data in and update bufWPtr.
458 then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
459 writeIORef ref old_buf{ bufWPtr = w + count }
462 -- else, we have to flush
463 else do flushed_buf <- flushWriteBuffer fd stream old_buf
464 writeIORef ref flushed_buf
466 Buffer{ bufBuf=raw, bufState=WriteBuffer,
467 bufRPtr=0, bufWPtr=count, bufSize=count }
468 flushWriteBuffer fd stream this_buf
473 hPutFS :: Handle -> FastString -> IO ()
474 hPutFS handle (FastString _ l# ba#)
475 | l# ==# 0# = return ()
476 | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
477 hPutBufBAFull handle mba (I# l#)
479 bot = error "hPutFS.ba"
483 -- ONLY here for debugging the NCG (so -ddump-stix works for string
484 -- literals); no idea if this is really necessary. JRS, 010131
485 hPutFS handle (UnicodeStr _ is)
486 = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
488 -- -----------------------------------------------------------------------------
489 -- LitStrings, here for convenience only.
491 type LitString = Ptr ()
493 mkLitString# :: Addr# -> LitString
494 mkLitString# a# = Ptr a#