2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1994
4 \section[PrelPS]{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
11 module PreludePS{-yes, a Prelude module!-} (
58 -- to make interface self-sufficient
59 _PackedString, -- abstract!
64 import Stdio ( _FILE )
65 import TyArray ( _ByteArray(..) )
72 import Prel ( otherwise, (&&), (||), chr, ($), not, (.), isSpace, flip )
73 import List ( length, (++), map, filter, foldl, foldr,
74 lines, words, reverse, null, foldr1
76 import TyArray ( Array(..) )
80 %************************************************************************
82 \subsection{@_PackedString@ type declaration and interface (signatures)}
84 %************************************************************************
89 = _PS ByteArray# -- the bytes
90 Int# -- length (*not* including NUL at the end)
91 Bool -- True <=> contains a NUL
92 | _CPS Addr# -- pointer to the (null-terminated) bytes in C land
93 Int# -- length, as per strlen
94 -- definitely doesn't contain a NUL
96 _packString :: [Char] -> _PackedString
97 _packStringST :: [Char] -> _ST s _PackedString
98 _packCString :: _Addr -> _PackedString
99 _packCBytes :: Int -> _Addr -> _PackedString
100 _packCBytesST :: Int -> _Addr -> _ST s _PackedString
101 _packStringForC :: [Char] -> ByteArray# -- calls injected by compiler
102 _packBytesForC :: [Char] -> _ByteArray Int
103 _packBytesForCST :: [Char] -> _ST s (_ByteArray Int)
104 _nilPS :: _PackedString
105 _consPS :: Char -> _PackedString -> _PackedString
106 _byteArrayToPS :: _ByteArray Int -> _PackedString
107 _psToByteArray :: _PackedString -> _ByteArray Int
109 --OLD: packString# :: [Char] -> ByteArray#
110 --OLD: packToCString :: [Char] -> _ByteArray Int -- hmmm... weird name
112 _unpackPS :: _PackedString -> [Char]
113 unpackPS# :: Addr# -> [Char] -- calls injected by compiler
114 --???toCString :: _PackedString -> ByteArray#
115 _putPS :: _FILE -> _PackedString -> PrimIO () -- ToDo: more sensible type
119 _headPS :: _PackedString -> Char
120 _tailPS :: _PackedString -> _PackedString
121 _nullPS :: _PackedString -> Bool
122 _appendPS :: _PackedString -> _PackedString -> _PackedString
123 _lengthPS :: _PackedString -> Int
124 _indexPS :: _PackedString -> Int -> Char
125 -- 0-origin indexing into the string
126 _mapPS :: (Char -> Char) -> _PackedString -> _PackedString {-or String?-}
127 _filterPS :: (Char -> Bool) -> _PackedString -> _PackedString {-or String?-}
128 _foldlPS :: (a -> Char -> a) -> a -> _PackedString -> a
129 _foldrPS :: (Char -> a -> a) -> a -> _PackedString -> a
130 _takePS :: Int -> _PackedString -> _PackedString
131 _dropPS :: Int -> _PackedString -> _PackedString
132 _splitAtPS :: Int -> _PackedString -> (_PackedString, _PackedString)
133 _takeWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
134 _dropWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
135 _spanPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
136 _breakPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
137 _linesPS :: _PackedString -> [_PackedString]
138 _wordsPS :: _PackedString -> [_PackedString]
139 _reversePS :: _PackedString -> _PackedString
140 _concatPS :: [_PackedString] -> _PackedString
142 _substrPS :: _PackedString -> Int -> Int -> _PackedString
143 -- pluck out a piece of a _PS
144 -- start and end chars you want; both 0-origin-specified
145 --??? _hashPS :: _PackedString -> Int -> Int
146 -- use the _PS to produce a hash value between 0 & m (inclusive)
149 %************************************************************************
151 \subsection{Constructor functions}
153 %************************************************************************
155 Easy ones first. @_packString@ requires getting some heap-bytes and
156 scribbling stuff into them.
159 _packCString (A# a#) -- the easy one; we just believe the caller
162 len = case (strlen# a#) of { I# x -> x }
165 _consPS c cs = _packString (c : (_unpackPS cs)) -- ToDo:better
168 = case (_packString str) of
169 _PS bytes _ _ -> bytes
172 = _psToByteArray (_packString str)
175 = _packStringST str `thenStrictlyST` \ (_PS bytes n has_null) ->
176 --later? ASSERT(not has_null)
177 returnStrictlyST (_ByteArray (0, I# (n -# 1#)) bytes)
179 _packString str = _runST (_packStringST str)
182 = let len = length str in
185 pack_me :: Int -> [Char] -> _ST s _PackedString
187 pack_me len@(I# length#) str
188 = -- allocate an array that will hold the string
189 -- (not forgetting the NUL byte at the end)
190 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
192 -- fill in packed string from "str"
193 fill_in ch_array 0# str `seqStrictlyST`
196 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
198 let has_null = byteArrayHasNUL# frozen# length# in
200 returnStrictlyST (_PS frozen# length# has_null)
202 fill_in :: _MutableByteArray s Int -> Int# -> [Char] -> _ST s ()
204 fill_in arr_in# idx []
205 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
208 fill_in arr_in# idx (C# c : cs)
209 = write_ps_array arr_in# idx c `seqStrictlyST`
210 fill_in arr_in# (idx +# 1#) cs
212 _packCBytes len addr = _runST (_packCBytesST len addr)
214 _packCBytesST len@(I# length#) (A# addr)
215 = -- allocate an array that will hold the string
216 -- (not forgetting the NUL byte at the end)
217 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
219 -- fill in packed string from "addr"
220 fill_in ch_array 0# `seqStrictlyST`
223 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
225 let has_null = byteArrayHasNUL# frozen# length# in
227 returnStrictlyST (_PS frozen# length# has_null)
229 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
233 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
236 = case (indexCharOffAddr# addr idx) of { ch ->
237 write_ps_array arr_in# idx ch `seqStrictlyST`
238 fill_in arr_in# (idx +# 1#) }
240 _byteArrayToPS (_ByteArray ixs@(_, ix_end) frozen#)
245 else ((index ixs ix_end) + 1)
248 _PS frozen# n# (byteArrayHasNUL# frozen# n#)
250 _unsafeByteArrayToPS (_ByteArray _ frozen#) (I# n#)
251 = _PS frozen# n# (byteArrayHasNUL# frozen# n#)
253 _psToByteArray (_PS bytes n has_null)
254 = _ByteArray (0, I# (n -# 1#)) bytes
256 _psToByteArray (_CPS addr len#)
259 byte_array_form = _packCBytes len (A# addr)
261 case byte_array_form of { _PS bytes _ _ ->
262 _ByteArray (0, len - 1) bytes }
265 %************************************************************************
267 \subsection{Destructor functions (taking @_PackedStrings@ apart)}
269 %************************************************************************
272 unpackPS# addr -- calls injected by compiler
273 = _unpackPS (_CPS addr len)
275 len = case (strlen# addr) of { I# x -> x }
277 -- OK, but this code gets *hammered*:
279 -- = [ _indexPS ps n | n <- [ 0::Int .. _lengthPS ps - 1 ] ]
281 _unpackPS (_PS bytes len has_null)
286 | otherwise = C# ch : unpack (nh +# 1#)
288 ch = indexCharArray# bytes nh
290 _unpackPS (_CPS addr len)
294 | ch `eqChar#` '\0'# = []
295 | otherwise = C# ch : unpack (nh +# 1#)
297 ch = indexCharOffAddr# addr nh
301 _putPS file ps@(_PS bytes len has_null)
306 byte_array = _ByteArray (0, I# (len -# 1#)) bytes
308 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
309 `thenPrimIO` \ (I# written) ->
310 if written ==# len then
313 error "_putPS: fwrite failed!\n"
315 _putPS file (_CPS addr len)
319 = _ccall_ fputs (A# addr) file `thenPrimIO` \ (I# _){-force type-} ->
323 %************************************************************************
325 \subsection{List-mimicking functions for @_PackedStrings@}
327 %************************************************************************
329 First, the basic functions that do look into the representation;
330 @indexPS@ is the most important one.
332 _lengthPS ps = I# (lengthPS# ps)
334 {-# INLINE lengthPS# #-}
336 lengthPS# (_PS _ i _) = i
337 lengthPS# (_CPS _ i) = i
339 {-# INLINE strlen# #-}
341 strlen# :: Addr# -> Int
343 = unsafePerformPrimIO (
344 _ccall_ strlen (A# a) `thenPrimIO` \ len@(I# _) ->
348 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
350 byteArrayHasNUL# bs len
351 = unsafePerformPrimIO (
352 _ccall_ byteArrayHasNUL__ ba (I# len) `thenPrimIO` \ (I# res) ->
354 if res ==# 0# then False else True
357 ba = _ByteArray (0, I# (len -# 1#)) bs
359 -----------------------
360 _indexPS ps (I# n) = C# (indexPS# ps n)
362 {-# INLINE indexPS# #-}
364 indexPS# (_PS bs i _) n
365 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
368 indexPS# (_CPS a _) n
369 = indexCharOffAddr# a n
372 Now, the rest of the functions can be defined without digging
373 around in the representation.
376 | _nullPS ps = error "_headPS: head []"
377 | otherwise = C# (indexPS# ps 0#)
380 | len <=# 0# = error "_tailPS: tail []"
381 | len ==# 1# = _nilPS
382 | otherwise = substrPS# ps 1# (len -# 1#)
386 _nullPS (_PS _ i _) = i ==# 0#
387 _nullPS (_CPS _ i) = i ==# 0#
389 -- ToDo: some non-lousy implementations...
394 | otherwise = _packString (_unpackPS xs ++ _unpackPS ys)
396 _mapPS f xs = _packString (map f (_unpackPS xs))
398 _filterPS p ps = _packString (filter p (_unpackPS ps))
399 _foldlPS f b ps = foldl f b (_unpackPS ps)
400 _foldrPS f b ps = foldr f b (_unpackPS ps)
402 _takePS (I# n) ps = substrPS# ps 0# (n -# 1#)
403 _dropPS (I# n) ps = substrPS# ps n (lengthPS# ps -# 1#)
404 _splitAtPS n ps = (_takePS n ps, _dropPS n ps)
408 break_pt = char_pos_that_dissatisfies
414 substrPS# ps 0# (break_pt -# 1#)
419 break_pt = char_pos_that_dissatisfies
425 substrPS# ps break_pt (len -# 1#)
427 char_pos_that_dissatisfies :: (Char# -> Bool) -> _PackedString -> Int# -> Int# -> Int#
429 char_pos_that_dissatisfies p ps len pos
430 | pos >=# len = pos -- end
431 | p (indexPS# ps pos) = -- predicate satisfied; keep going
432 char_pos_that_dissatisfies p ps len (pos +# 1#)
433 | otherwise = pos -- predicate not satisfied
435 char_pos_that_dissatisfies p ps len pos -- dead code: HACK to avoid badly-typed error msg
438 -- ToDo: could certainly go quicker
439 _spanPS p ps = (_takeWhilePS p ps, _dropWhilePS p ps)
440 _breakPS p ps = _spanPS (not . p) ps
442 _linesPS ps = map _packString (lines (_unpackPS ps))
443 _wordsPS ps = map _packString (words (_unpackPS ps))
445 _reversePS ps = _packString (reverse (_unpackPS ps))
447 _concatPS [] = _nilPS
450 tot_len# = case (foldr ((+) . _lengthPS) 0 pss) of { I# x -> x }
451 tot_len = I# tot_len#
454 new_ps_array (tot_len# +# 1#) `thenStrictlyST` \ arr# -> -- incl NUL byte!
455 packum arr# pss 0# `seqStrictlyST`
456 freeze_ps_array arr# `thenStrictlyST` \ (_ByteArray _ frozen#) ->
458 let has_null = byteArrayHasNUL# frozen# tot_len# in
460 returnStrictlyST (_PS frozen# tot_len# has_null)
463 packum :: _MutableByteArray s Int -> [_PackedString] -> Int# -> _ST s ()
466 = write_ps_array arr pos (chr# 0#) `seqStrictlyST`
468 packum arr (ps : pss) pos
469 = fill arr pos ps 0# (lengthPS# ps) `thenStrictlyST` \ (I# next_pos) ->
470 packum arr pss next_pos
472 fill :: _MutableByteArray s Int -> Int# -> _PackedString -> Int# -> Int# -> _ST s Int
474 fill arr arr_i ps ps_i ps_len
476 = returnStrictlyST (I# (arr_i +# ps_len))
478 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) `seqStrictlyST`
479 fill arr arr_i ps (ps_i +# 1#) ps_len
482 %************************************************************************
484 \subsection{Instances for @_PackedStrings@: @Eq@, @Ord@, @Text@}
486 %************************************************************************
490 instance Eq _PackedString where
491 a == b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> False }
492 a /= b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> True }
494 instance Ord _PackedString where
495 a <= b = case _tagCmpPS a b of { _LT -> True; _EQ -> True; _GT -> False }
496 a < b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> False }
497 a >= b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> True }
498 a > b = case _tagCmpPS a b of { _LT -> False; _EQ -> False; _GT -> True }
499 _tagCmp a b = _tagCmpPS a b
502 We try hard to make this go fast:
504 _tagCmpPS :: _PackedString -> _PackedString -> _CMP_TAG
506 _tagCmpPS (_PS bs1 len1 has_null1) (_PS bs2 len2 has_null2)
507 | not has_null1 && not has_null2
508 = unsafePerformPrimIO (
509 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
511 if res <# 0# then _LT
512 else if res ==# 0# then _EQ
516 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
517 ba2 = _ByteArray (0, I# (len2 -# 1#)) bs2
519 _tagCmpPS (_PS bs1 len1 has_null1) (_CPS bs2 len2)
521 = unsafePerformPrimIO (
522 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
524 if res <# 0# then _LT
525 else if res ==# 0# then _EQ
529 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
532 _tagCmpPS (_CPS bs1 len1) (_CPS bs2 len2)
533 = unsafePerformPrimIO (
534 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
536 if res <# 0# then _LT
537 else if res ==# 0# then _EQ
544 _tagCmpPS a@(_CPS _ _) b@(_PS _ _ has_null2)
546 = -- try them the other way 'round
547 case (_tagCmpPS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
549 _tagCmpPS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
552 end1 = lengthPS# ps1 -# 1#
553 end2 = lengthPS# ps2 -# 1#
556 = if char# ># end1 then
557 if char# ># end2 then -- both strings ran out at once
559 else -- ps1 ran out before ps2
561 else if char# ># end2 then
562 _GT -- ps2 ran out before ps1
565 ch1 = indexPS# ps1 char#
566 ch2 = indexPS# ps2 char#
568 if ch1 `eqChar#` ch2 then
569 looking_at (char# +# 1#)
570 else if ch1 `ltChar#` ch2 then _LT
573 instance Text _PackedString where
574 readsPrec p = error "readsPrec: _PackedString: ToDo"
575 showsPrec p ps r = showsPrec p (_unpackPS ps) r
578 %************************************************************************
580 \subsection{Uniquely PackedString functions}
582 %************************************************************************
584 For @_substrPS@, see the next section.
586 @_hashPS@ is just what we happen to need in GHC...
589 _hashPS ps (I# hASH_TBL_SIZE#)
590 = I# (h `remInt#` hASH_TBL_SIZE#)
594 h | len <=# 0# = 0# -- probably should just be an "error"
595 | len ==# 1# = ord# c1
596 | len ==# 2# = ord# c2
597 | len ==# 3# = ord# c2 +# ord# c3
598 | len ==# 4# = ord# c2 +# ord# c3 +# ord# c4
599 | len ==# 5# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5
600 | len >=# 6# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5 +# ord# c6
601 | otherwise = 999# -- will never happen
612 %************************************************************************
614 \subsection{Local utility functions}
616 %************************************************************************
618 The definition of @_substrPS@ is essentially:
619 @take (end - begin + 1) (drop begin str)@.
621 _substrPS ps (I# begin) (I# end) = substrPS# ps begin end
625 = error "_substrPS: bounds out of range"
627 | s >=# len || result_len# <=# 0#
632 new_ps_array (result_len# +# 1#) `thenStrictlyST` \ ch_arr -> -- incl NUL byte!
633 fill_in ch_arr 0# `seqStrictlyST`
634 freeze_ps_array ch_arr `thenStrictlyST` \ (_ByteArray _ frozen#) ->
636 let has_null = byteArrayHasNUL# frozen# result_len# in
638 returnStrictlyST (_PS frozen# result_len# has_null)
643 result_len# = (if e <# len then (e +# 1#) else len) -# s
644 result_len = I# result_len#
646 -----------------------
647 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
650 | idx ==# result_len#
651 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
655 ch = indexPS# ps (s +# idx)
657 write_ps_array arr_in# idx ch `seqStrictlyST`
658 fill_in arr_in# (idx +# 1#)
661 (Very :-) ``Specialised'' versions of some CharArray things...
663 new_ps_array :: Int# -> _ST s (_MutableByteArray s Int)
664 write_ps_array :: _MutableByteArray s Int -> Int# -> Char# -> _ST s ()
665 freeze_ps_array :: _MutableByteArray s Int -> _ST s (_ByteArray Int)
667 new_ps_array size (S# s)
668 = case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
669 (_MutableByteArray bot barr#, S# s2#)}
671 bot = error "new_ps_array"
673 write_ps_array (_MutableByteArray _ barr#) n ch (S# s#)
674 = case writeCharArray# barr# n ch s# of { s2# ->
677 -- same as unsafeFreezeByteArray
678 freeze_ps_array (_MutableByteArray ixs arr#) (S# s#)
679 = case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
680 (_ByteArray ixs frozen#, S# s2#) }