2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1994
4 \section[PrelPS]{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
11 module PreludePS{-yes, a Prelude module!-} (
58 -- to make interface self-sufficient
59 _PackedString, -- abstract!
64 import Stdio ( _FILE )
65 import TyArray ( _ByteArray(..) )
72 import Prel ( otherwise, (&&), (||), chr, ($), not, (.), isSpace, flip )
73 import List ( length, (++), map, filter, foldl, foldr,
74 lines, words, reverse, null, foldr1,
77 import TyArray ( Array(..) )
82 %************************************************************************
84 \subsection{@_PackedString@ type declaration and interface (signatures)}
86 %************************************************************************
91 = _PS ByteArray# -- the bytes
92 Int# -- length (*not* including NUL at the end)
93 Bool -- True <=> contains a NUL
94 | _CPS Addr# -- pointer to the (null-terminated) bytes in C land
95 Int# -- length, as per strlen
96 -- definitely doesn't contain a NUL
98 _packString :: [Char] -> _PackedString
99 _packStringST :: [Char] -> _ST s _PackedString
100 _packCString :: _Addr -> _PackedString
101 _packCBytes :: Int -> _Addr -> _PackedString
102 _packCBytesST :: Int -> _Addr -> _ST s _PackedString
103 _packStringForC :: [Char] -> ByteArray# -- calls injected by compiler
104 _packBytesForC :: [Char] -> _ByteArray Int
105 _packBytesForCST :: [Char] -> _ST s (_ByteArray Int)
106 _nilPS :: _PackedString
107 _consPS :: Char -> _PackedString -> _PackedString
108 _byteArrayToPS :: _ByteArray Int -> _PackedString
109 _psToByteArray :: _PackedString -> _ByteArray Int
111 --OLD: packString# :: [Char] -> ByteArray#
112 --OLD: packToCString :: [Char] -> _ByteArray Int -- hmmm... weird name
114 _unpackPS :: _PackedString -> [Char]
115 --???toCString :: _PackedString -> ByteArray#
116 _putPS :: _FILE -> _PackedString -> PrimIO () -- ToDo: more sensible type
120 _headPS :: _PackedString -> Char
121 _tailPS :: _PackedString -> _PackedString
122 _nullPS :: _PackedString -> Bool
123 _appendPS :: _PackedString -> _PackedString -> _PackedString
124 _lengthPS :: _PackedString -> Int
125 _indexPS :: _PackedString -> Int -> Char
126 -- 0-origin indexing into the string
127 _mapPS :: (Char -> Char) -> _PackedString -> _PackedString {-or String?-}
128 _filterPS :: (Char -> Bool) -> _PackedString -> _PackedString {-or String?-}
129 _foldlPS :: (a -> Char -> a) -> a -> _PackedString -> a
130 _foldrPS :: (Char -> a -> a) -> a -> _PackedString -> a
131 _takePS :: Int -> _PackedString -> _PackedString
132 _dropPS :: Int -> _PackedString -> _PackedString
133 _splitAtPS :: Int -> _PackedString -> (_PackedString, _PackedString)
134 _takeWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
135 _dropWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
136 _spanPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
137 _breakPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
138 _linesPS :: _PackedString -> [_PackedString]
139 _wordsPS :: _PackedString -> [_PackedString]
140 _reversePS :: _PackedString -> _PackedString
141 _concatPS :: [_PackedString] -> _PackedString
143 _substrPS :: _PackedString -> Int -> Int -> _PackedString
144 -- pluck out a piece of a _PS
145 -- start and end chars you want; both 0-origin-specified
146 --??? _hashPS :: _PackedString -> Int -> Int
147 -- use the _PS to produce a hash value between 0 & m (inclusive)
150 %************************************************************************
152 \subsection{Constructor functions}
154 %************************************************************************
156 Easy ones first. @_packString@ requires getting some heap-bytes and
157 scribbling stuff into them.
160 _packCString (A# a#) -- the easy one; we just believe the caller
163 len = case (strlen# a#) of { I# x -> x }
166 _consPS c cs = _packString (c : (_unpackPS cs)) -- ToDo:better
169 = case (_packString str) of
170 _PS bytes _ _ -> bytes
173 = _psToByteArray (_packString str)
176 = _packStringST str `thenStrictlyST` \ (_PS bytes n has_null) ->
177 --later? ASSERT(not has_null)
178 returnStrictlyST (_ByteArray (0, I# (n -# 1#)) bytes)
180 _packString str = _runST (_packStringST str)
183 = let len = length str in
186 pack_me :: Int -> [Char] -> _ST s _PackedString
188 pack_me len@(I# length#) str
189 = -- allocate an array that will hold the string
190 -- (not forgetting the NUL byte at the end)
191 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
193 -- fill in packed string from "str"
194 fill_in ch_array 0# str `seqStrictlyST`
197 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
199 let has_null = byteArrayHasNUL# frozen# length# in
201 returnStrictlyST (_PS frozen# length# has_null)
203 fill_in :: _MutableByteArray s Int -> Int# -> [Char] -> _ST s ()
205 fill_in arr_in# idx []
206 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
209 fill_in arr_in# idx (C# c : cs)
210 = write_ps_array arr_in# idx c `seqStrictlyST`
211 fill_in arr_in# (idx +# 1#) cs
213 _packCBytes len addr = _runST (_packCBytesST len addr)
215 _packCBytesST len@(I# length#) (A# addr)
216 = -- allocate an array that will hold the string
217 -- (not forgetting the NUL byte at the end)
218 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
220 -- fill in packed string from "addr"
221 fill_in ch_array 0# `seqStrictlyST`
224 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
226 let has_null = byteArrayHasNUL# frozen# length# in
228 returnStrictlyST (_PS frozen# length# has_null)
230 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
234 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
237 = case (indexCharOffAddr# addr idx) of { ch ->
238 write_ps_array arr_in# idx ch `seqStrictlyST`
239 fill_in arr_in# (idx +# 1#) }
241 _byteArrayToPS (_ByteArray ixs@(_, ix_end) frozen#)
246 else ((index ixs ix_end) + 1)
249 _PS frozen# n# (byteArrayHasNUL# frozen# n#)
251 _unsafeByteArrayToPS (_ByteArray _ frozen#) (I# n#)
252 = _PS frozen# n# (byteArrayHasNUL# frozen# n#)
254 _psToByteArray (_PS bytes n has_null)
255 = _ByteArray (0, I# (n -# 1#)) bytes
257 _psToByteArray (_CPS addr len#)
260 byte_array_form = _packCBytes len (A# addr)
262 case byte_array_form of { _PS bytes _ _ ->
263 _ByteArray (0, len - 1) bytes }
266 %************************************************************************
268 \subsection{Destructor functions (taking @_PackedStrings@ apart)}
270 %************************************************************************
273 {- OLD: but good? WDP 96/01
274 unpackPS# addr -- calls injected by compiler
275 = _unpackPS (_CPS addr len)
277 len = case (strlen# addr) of { I# x -> x }
280 -- OK, but this code gets *hammered*:
282 -- = [ _indexPS ps n | n <- [ 0::Int .. _lengthPS ps - 1 ] ]
284 _unpackPS (_PS bytes len has_null)
289 | otherwise = C# ch : unpack (nh +# 1#)
291 ch = indexCharArray# bytes nh
293 _unpackPS (_CPS addr len)
297 | ch `eqChar#` '\0'# = []
298 | otherwise = C# ch : unpack (nh +# 1#)
300 ch = indexCharOffAddr# addr nh
304 _putPS file ps@(_PS bytes len has_null)
309 byte_array = _ByteArray (0, I# (len -# 1#)) bytes
311 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
312 `thenPrimIO` \ (I# written) ->
313 if written ==# len then
316 error "_putPS: fwrite failed!\n"
318 _putPS file (_CPS addr len)
322 = _ccall_ fputs (A# addr) file `thenPrimIO` \ (I# _){-force type-} ->
326 The dual to @_putPS@, note that the size of the chunk specified
327 is the upper bound of the size of the chunk returned.
330 _getPS :: _FILE -> Int -> PrimIO _PackedString
331 _getPS file len@(I# len#)
332 | len# <=# 0# = returnPrimIO _nilPS -- I'm being kind here.
334 -- Allocate an array for system call to store its bytes into.
335 new_ps_array len# `thenPrimIO` \ ch_arr ->
336 freeze_ps_array ch_arr `thenPrimIO` \ (_ByteArray _ frozen#) ->
338 byte_array = _ByteArray (0, I# len#) frozen#
340 _ccall_ fread byte_array (1::Int) len file `thenPrimIO` \ (I# read#) ->
341 if read# ==# 0# then -- EOF or other error
342 error "_getPS: EOF reached or other error"
345 The system call may not return the number of
346 bytes requested. Instead of failing with an error
347 if the number of bytes read is less than requested,
348 a packed string containing the bytes we did manage
349 to snarf is returned.
352 has_null = byteArrayHasNUL# frozen# read#
354 returnPrimIO (_PS frozen# read# has_null)
358 %************************************************************************
360 \subsection{List-mimicking functions for @_PackedStrings@}
362 %************************************************************************
364 First, the basic functions that do look into the representation;
365 @indexPS@ is the most important one.
367 _lengthPS ps = I# (lengthPS# ps)
369 {-# INLINE lengthPS# #-}
371 lengthPS# (_PS _ i _) = i
372 lengthPS# (_CPS _ i) = i
374 {-# INLINE strlen# #-}
376 strlen# :: Addr# -> Int
378 = unsafePerformPrimIO (
379 _ccall_ strlen (A# a) `thenPrimIO` \ len@(I# _) ->
383 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
385 byteArrayHasNUL# bs len
386 = unsafePerformPrimIO (
387 _ccall_ byteArrayHasNUL__ ba (I# len) `thenPrimIO` \ (I# res) ->
389 if res ==# 0# then False else True
392 ba = _ByteArray (0, I# (len -# 1#)) bs
394 -----------------------
395 _indexPS ps (I# n) = C# (indexPS# ps n)
397 {-# INLINE indexPS# #-}
399 indexPS# (_PS bs i _) n
400 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
403 indexPS# (_CPS a _) n
404 = indexCharOffAddr# a n
407 Now, the rest of the functions can be defined without digging
408 around in the representation.
411 | _nullPS ps = error "_headPS: head []"
412 | otherwise = C# (indexPS# ps 0#)
415 | len <=# 0# = error "_tailPS: tail []"
416 | len ==# 1# = _nilPS
417 | otherwise = substrPS# ps 1# (len -# 1#)
421 _nullPS (_PS _ i _) = i ==# 0#
422 _nullPS (_CPS _ i) = i ==# 0#
424 -- ToDo: some non-lousy implementations...
429 | otherwise = _packString (_unpackPS xs ++ _unpackPS ys)
431 _mapPS f xs = _packString (map f (_unpackPS xs))
433 _filterPS p ps = _packString (filter p (_unpackPS ps))
434 _foldlPS f b ps = foldl f b (_unpackPS ps)
435 _foldrPS f b ps = foldr f b (_unpackPS ps)
437 _takePS (I# n) ps = substrPS# ps 0# (n -# 1#)
438 _dropPS (I# n) ps = substrPS# ps n (lengthPS# ps -# 1#)
439 _splitAtPS n ps = (_takePS n ps, _dropPS n ps)
443 break_pt = char_pos_that_dissatisfies
449 substrPS# ps 0# (break_pt -# 1#)
454 break_pt = char_pos_that_dissatisfies
460 substrPS# ps break_pt (len -# 1#)
462 char_pos_that_dissatisfies :: (Char# -> Bool) -> _PackedString -> Int# -> Int# -> Int#
464 char_pos_that_dissatisfies p ps len pos
465 | pos >=# len = pos -- end
466 | p (indexPS# ps pos) = -- predicate satisfied; keep going
467 char_pos_that_dissatisfies p ps len (pos +# 1#)
468 | otherwise = pos -- predicate not satisfied
470 char_pos_that_dissatisfies p ps len pos -- dead code: HACK to avoid badly-typed error msg
473 -- ToDo: could certainly go quicker
474 _spanPS p ps = (_takeWhilePS p ps, _dropWhilePS p ps)
475 _breakPS p ps = _spanPS (not . p) ps
477 _linesPS ps = map _packString (lines (_unpackPS ps))
478 _wordsPS ps = map _packString (words (_unpackPS ps))
480 _reversePS ps = _packString (reverse (_unpackPS ps))
482 _concatPS [] = _nilPS
485 tot_len# = case (foldr ((+) . _lengthPS) 0 pss) of { I# x -> x }
486 tot_len = I# tot_len#
489 new_ps_array (tot_len# +# 1#) `thenStrictlyST` \ arr# -> -- incl NUL byte!
490 packum arr# pss 0# `seqStrictlyST`
491 freeze_ps_array arr# `thenStrictlyST` \ (_ByteArray _ frozen#) ->
493 let has_null = byteArrayHasNUL# frozen# tot_len# in
495 returnStrictlyST (_PS frozen# tot_len# has_null)
498 packum :: _MutableByteArray s Int -> [_PackedString] -> Int# -> _ST s ()
501 = write_ps_array arr pos (chr# 0#) `seqStrictlyST`
503 packum arr (ps : pss) pos
504 = fill arr pos ps 0# (lengthPS# ps) `thenStrictlyST` \ (I# next_pos) ->
505 packum arr pss next_pos
507 fill :: _MutableByteArray s Int -> Int# -> _PackedString -> Int# -> Int# -> _ST s Int
509 fill arr arr_i ps ps_i ps_len
511 = returnStrictlyST (I# (arr_i +# ps_len))
513 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) `seqStrictlyST`
514 fill arr arr_i ps (ps_i +# 1#) ps_len
517 %************************************************************************
519 \subsection{Instances for @_PackedStrings@: @Eq@, @Ord@, @Text@}
521 %************************************************************************
525 instance Eq _PackedString where
526 a == b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> False }
527 a /= b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> True }
529 instance Ord _PackedString where
530 a <= b = case _tagCmpPS a b of { _LT -> True; _EQ -> True; _GT -> False }
531 a < b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> False }
532 a >= b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> True }
533 a > b = case _tagCmpPS a b of { _LT -> False; _EQ -> False; _GT -> True }
538 _tagCmp a b = _tagCmpPS a b
541 We try hard to make this go fast:
543 _tagCmpPS :: _PackedString -> _PackedString -> _CMP_TAG
545 _tagCmpPS (_PS bs1 len1 has_null1) (_PS bs2 len2 has_null2)
546 | not has_null1 && not has_null2
547 = unsafePerformPrimIO (
548 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
550 if res <# 0# then _LT
551 else if res ==# 0# then _EQ
555 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
556 ba2 = _ByteArray (0, I# (len2 -# 1#)) bs2
558 _tagCmpPS (_PS bs1 len1 has_null1) (_CPS bs2 len2)
560 = unsafePerformPrimIO (
561 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
563 if res <# 0# then _LT
564 else if res ==# 0# then _EQ
568 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
571 _tagCmpPS (_CPS bs1 len1) (_CPS bs2 len2)
572 = unsafePerformPrimIO (
573 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
575 if res <# 0# then _LT
576 else if res ==# 0# then _EQ
583 _tagCmpPS a@(_CPS _ _) b@(_PS _ _ has_null2)
585 = -- try them the other way 'round
586 case (_tagCmpPS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
588 _tagCmpPS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
591 end1 = lengthPS# ps1 -# 1#
592 end2 = lengthPS# ps2 -# 1#
595 = if char# ># end1 then
596 if char# ># end2 then -- both strings ran out at once
598 else -- ps1 ran out before ps2
600 else if char# ># end2 then
601 _GT -- ps2 ran out before ps1
604 ch1 = indexPS# ps1 char#
605 ch2 = indexPS# ps2 char#
607 if ch1 `eqChar#` ch2 then
608 looking_at (char# +# 1#)
609 else if ch1 `ltChar#` ch2 then _LT
612 instance Text _PackedString where
613 readsPrec p = error "readsPrec: _PackedString: ToDo"
614 showsPrec p ps r = showsPrec p (_unpackPS ps) r
615 readList = _readList (readsPrec 0)
616 showList = _showList (showsPrec 0)
619 %************************************************************************
621 \subsection{Uniquely PackedString functions}
623 %************************************************************************
625 For @_substrPS@, see the next section.
627 @_hashPS@ is just what we happen to need in GHC...
630 _hashPS ps (I# hASH_TBL_SIZE#)
631 = I# (h `remInt#` hASH_TBL_SIZE#)
635 h | len <=# 0# = 0# -- probably should just be an "error"
636 | len ==# 1# = ord# c1
637 | len ==# 2# = ord# c2
638 | len ==# 3# = ord# c2 +# ord# c3
639 | len ==# 4# = ord# c2 +# ord# c3 +# ord# c4
640 | len ==# 5# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5
641 | len >=# 6# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5 +# ord# c6
642 | otherwise = 999# -- will never happen
653 %************************************************************************
655 \subsection{Local utility functions}
657 %************************************************************************
659 The definition of @_substrPS@ is essentially:
660 @take (end - begin + 1) (drop begin str)@.
662 _substrPS ps (I# begin) (I# end) = substrPS# ps begin end
666 = error "_substrPS: bounds out of range"
668 | s >=# len || result_len# <=# 0#
673 new_ps_array (result_len# +# 1#) `thenStrictlyST` \ ch_arr -> -- incl NUL byte!
674 fill_in ch_arr 0# `seqStrictlyST`
675 freeze_ps_array ch_arr `thenStrictlyST` \ (_ByteArray _ frozen#) ->
677 let has_null = byteArrayHasNUL# frozen# result_len# in
679 returnStrictlyST (_PS frozen# result_len# has_null)
684 result_len# = (if e <# len then (e +# 1#) else len) -# s
685 result_len = I# result_len#
687 -----------------------
688 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
691 | idx ==# result_len#
692 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
696 ch = indexPS# ps (s +# idx)
698 write_ps_array arr_in# idx ch `seqStrictlyST`
699 fill_in arr_in# (idx +# 1#)
702 (Very :-) ``Specialised'' versions of some CharArray things...
704 new_ps_array :: Int# -> _ST s (_MutableByteArray s Int)
705 write_ps_array :: _MutableByteArray s Int -> Int# -> Char# -> _ST s ()
706 freeze_ps_array :: _MutableByteArray s Int -> _ST s (_ByteArray Int)
708 new_ps_array size (S# s)
709 = case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
710 (_MutableByteArray bot barr#, S# s2#)}
712 bot = error "new_ps_array"
714 write_ps_array (_MutableByteArray _ barr#) n ch (S# s#)
715 = case writeCharArray# barr# n ch s# of { s2# ->
718 -- same as unsafeFreezeByteArray
719 freeze_ps_array (_MutableByteArray ixs arr#) (S# s#)
720 = case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
721 (_ByteArray ixs frozen#, S# s2#) }