2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1994
4 \section[PrelPS]{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
11 module PreludePS{-yes, a Prelude module!-} (
29 unpackPS#, unpackPS2#,
59 -- to make interface self-sufficient
60 _PackedString, -- abstract!
65 import Stdio ( _FILE )
66 import TyArray ( _ByteArray(..) )
73 import Prel ( otherwise, (&&), (||), chr, ($), not, (.), isSpace, flip )
74 import List ( length, (++), map, filter, foldl, foldr,
75 lines, words, reverse, null, foldr1
77 import TyArray ( Array(..) )
82 %************************************************************************
84 \subsection{@_PackedString@ type declaration and interface (signatures)}
86 %************************************************************************
91 = _PS ByteArray# -- the bytes
92 Int# -- length (*not* including NUL at the end)
93 Bool -- True <=> contains a NUL
94 | _CPS Addr# -- pointer to the (null-terminated) bytes in C land
95 Int# -- length, as per strlen
96 -- definitely doesn't contain a NUL
98 _packString :: [Char] -> _PackedString
99 _packStringST :: [Char] -> _ST s _PackedString
100 _packCString :: _Addr -> _PackedString
101 _packCBytes :: Int -> _Addr -> _PackedString
102 _packCBytesST :: Int -> _Addr -> _ST s _PackedString
103 _packStringForC :: [Char] -> ByteArray# -- calls injected by compiler
104 _packBytesForC :: [Char] -> _ByteArray Int
105 _packBytesForCST :: [Char] -> _ST s (_ByteArray Int)
106 _nilPS :: _PackedString
107 _consPS :: Char -> _PackedString -> _PackedString
108 _byteArrayToPS :: _ByteArray Int -> _PackedString
109 _psToByteArray :: _PackedString -> _ByteArray Int
111 --OLD: packString# :: [Char] -> ByteArray#
112 --OLD: packToCString :: [Char] -> _ByteArray Int -- hmmm... weird name
114 _unpackPS :: _PackedString -> [Char]
115 unpackPS# :: Addr# -> [Char] -- calls injected by compiler
116 unpackPS2# :: Addr# -> Int# -> [Char] -- calls injected by compiler
117 --???toCString :: _PackedString -> ByteArray#
118 _putPS :: _FILE -> _PackedString -> PrimIO () -- ToDo: more sensible type
122 _headPS :: _PackedString -> Char
123 _tailPS :: _PackedString -> _PackedString
124 _nullPS :: _PackedString -> Bool
125 _appendPS :: _PackedString -> _PackedString -> _PackedString
126 _lengthPS :: _PackedString -> Int
127 _indexPS :: _PackedString -> Int -> Char
128 -- 0-origin indexing into the string
129 _mapPS :: (Char -> Char) -> _PackedString -> _PackedString {-or String?-}
130 _filterPS :: (Char -> Bool) -> _PackedString -> _PackedString {-or String?-}
131 _foldlPS :: (a -> Char -> a) -> a -> _PackedString -> a
132 _foldrPS :: (Char -> a -> a) -> a -> _PackedString -> a
133 _takePS :: Int -> _PackedString -> _PackedString
134 _dropPS :: Int -> _PackedString -> _PackedString
135 _splitAtPS :: Int -> _PackedString -> (_PackedString, _PackedString)
136 _takeWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
137 _dropWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
138 _spanPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
139 _breakPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
140 _linesPS :: _PackedString -> [_PackedString]
141 _wordsPS :: _PackedString -> [_PackedString]
142 _reversePS :: _PackedString -> _PackedString
143 _concatPS :: [_PackedString] -> _PackedString
145 _substrPS :: _PackedString -> Int -> Int -> _PackedString
146 -- pluck out a piece of a _PS
147 -- start and end chars you want; both 0-origin-specified
148 --??? _hashPS :: _PackedString -> Int -> Int
149 -- use the _PS to produce a hash value between 0 & m (inclusive)
152 %************************************************************************
154 \subsection{Constructor functions}
156 %************************************************************************
158 Easy ones first. @_packString@ requires getting some heap-bytes and
159 scribbling stuff into them.
162 _packCString (A# a#) -- the easy one; we just believe the caller
165 len = case (strlen# a#) of { I# x -> x }
168 _consPS c cs = _packString (c : (_unpackPS cs)) -- ToDo:better
171 = case (_packString str) of
172 _PS bytes _ _ -> bytes
175 = _psToByteArray (_packString str)
178 = _packStringST str `thenStrictlyST` \ (_PS bytes n has_null) ->
179 --later? ASSERT(not has_null)
180 returnStrictlyST (_ByteArray (0, I# (n -# 1#)) bytes)
182 _packString str = _runST (_packStringST str)
185 = let len = length str in
188 pack_me :: Int -> [Char] -> _ST s _PackedString
190 pack_me len@(I# length#) str
191 = -- allocate an array that will hold the string
192 -- (not forgetting the NUL byte at the end)
193 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
195 -- fill in packed string from "str"
196 fill_in ch_array 0# str `seqStrictlyST`
199 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
201 let has_null = byteArrayHasNUL# frozen# length# in
203 returnStrictlyST (_PS frozen# length# has_null)
205 fill_in :: _MutableByteArray s Int -> Int# -> [Char] -> _ST s ()
207 fill_in arr_in# idx []
208 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
211 fill_in arr_in# idx (C# c : cs)
212 = write_ps_array arr_in# idx c `seqStrictlyST`
213 fill_in arr_in# (idx +# 1#) cs
215 _packCBytes len addr = _runST (_packCBytesST len addr)
217 _packCBytesST len@(I# length#) (A# addr)
218 = -- allocate an array that will hold the string
219 -- (not forgetting the NUL byte at the end)
220 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
222 -- fill in packed string from "addr"
223 fill_in ch_array 0# `seqStrictlyST`
226 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
228 let has_null = byteArrayHasNUL# frozen# length# in
230 returnStrictlyST (_PS frozen# length# has_null)
232 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
236 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
239 = case (indexCharOffAddr# addr idx) of { ch ->
240 write_ps_array arr_in# idx ch `seqStrictlyST`
241 fill_in arr_in# (idx +# 1#) }
243 _byteArrayToPS (_ByteArray ixs@(_, ix_end) frozen#)
248 else ((index ixs ix_end) + 1)
251 _PS frozen# n# (byteArrayHasNUL# frozen# n#)
253 _unsafeByteArrayToPS (_ByteArray _ frozen#) (I# n#)
254 = _PS frozen# n# (byteArrayHasNUL# frozen# n#)
256 _psToByteArray (_PS bytes n has_null)
257 = _ByteArray (0, I# (n -# 1#)) bytes
259 _psToByteArray (_CPS addr len#)
262 byte_array_form = _packCBytes len (A# addr)
264 case byte_array_form of { _PS bytes _ _ ->
265 _ByteArray (0, len - 1) bytes }
268 %************************************************************************
270 \subsection{Destructor functions (taking @_PackedStrings@ apart)}
272 %************************************************************************
275 unpackPS# addr -- calls injected by compiler
276 = _unpackPS (_CPS addr len)
278 len = case (strlen# addr) of { I# x -> x }
280 unpackPS2# addr len -- calls injected by compiler
281 -- this one is for literal strings with NULs in them; rare.
282 = _unpackPS (_packCBytes (I# len) (A# addr))
284 -- OK, but this code gets *hammered*:
286 -- = [ _indexPS ps n | n <- [ 0::Int .. _lengthPS ps - 1 ] ]
288 _unpackPS (_PS bytes len has_null)
293 | otherwise = C# ch : unpack (nh +# 1#)
295 ch = indexCharArray# bytes nh
297 _unpackPS (_CPS addr len)
301 | ch `eqChar#` '\0'# = []
302 | otherwise = C# ch : unpack (nh +# 1#)
304 ch = indexCharOffAddr# addr nh
308 _putPS file ps@(_PS bytes len has_null)
313 byte_array = _ByteArray (0, I# (len -# 1#)) bytes
315 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
316 `thenPrimIO` \ (I# written) ->
317 if written ==# len then
320 error "_putPS: fwrite failed!\n"
322 _putPS file (_CPS addr len)
326 = _ccall_ fputs (A# addr) file `thenPrimIO` \ (I# _){-force type-} ->
330 The dual to @_putPS@, note that the size of the chunk specified
331 is the upper bound of the size of the chunk returned.
334 _getPS :: _FILE -> Int -> PrimIO _PackedString
335 _getPS file len@(I# len#)
336 | len# <=# 0# = returnPrimIO _nilPS -- I'm being kind here.
338 -- Allocate an array for system call to store its bytes into.
339 new_ps_array len# `thenPrimIO` \ ch_arr ->
340 freeze_ps_array ch_arr `thenPrimIO` \ (_ByteArray _ frozen#) ->
342 byte_array = _ByteArray (0, I# len#) frozen#
344 _ccall_ fread byte_array (1::Int) len file `thenPrimIO` \ (I# read#) ->
345 if read# ==# 0# then -- EOF or other error
346 error "_getPS: EOF reached or other error"
349 The system call may not return the number of
350 bytes requested. Instead of failing with an error
351 if the number of bytes read is less than requested,
352 a packed string containing the bytes we did manage
353 to snarf is returned.
356 has_null = byteArrayHasNUL# frozen# read#
358 returnPrimIO (_PS frozen# read# has_null)
362 %************************************************************************
364 \subsection{List-mimicking functions for @_PackedStrings@}
366 %************************************************************************
368 First, the basic functions that do look into the representation;
369 @indexPS@ is the most important one.
371 _lengthPS ps = I# (lengthPS# ps)
373 {-# INLINE lengthPS# #-}
375 lengthPS# (_PS _ i _) = i
376 lengthPS# (_CPS _ i) = i
378 {-# INLINE strlen# #-}
380 strlen# :: Addr# -> Int
382 = unsafePerformPrimIO (
383 _ccall_ strlen (A# a) `thenPrimIO` \ len@(I# _) ->
387 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
389 byteArrayHasNUL# bs len
390 = unsafePerformPrimIO (
391 _ccall_ byteArrayHasNUL__ ba (I# len) `thenPrimIO` \ (I# res) ->
393 if res ==# 0# then False else True
396 ba = _ByteArray (0, I# (len -# 1#)) bs
398 -----------------------
399 _indexPS ps (I# n) = C# (indexPS# ps n)
401 {-# INLINE indexPS# #-}
403 indexPS# (_PS bs i _) n
404 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
407 indexPS# (_CPS a _) n
408 = indexCharOffAddr# a n
411 Now, the rest of the functions can be defined without digging
412 around in the representation.
415 | _nullPS ps = error "_headPS: head []"
416 | otherwise = C# (indexPS# ps 0#)
419 | len <=# 0# = error "_tailPS: tail []"
420 | len ==# 1# = _nilPS
421 | otherwise = substrPS# ps 1# (len -# 1#)
425 _nullPS (_PS _ i _) = i ==# 0#
426 _nullPS (_CPS _ i) = i ==# 0#
428 -- ToDo: some non-lousy implementations...
433 | otherwise = _packString (_unpackPS xs ++ _unpackPS ys)
435 _mapPS f xs = _packString (map f (_unpackPS xs))
437 _filterPS p ps = _packString (filter p (_unpackPS ps))
438 _foldlPS f b ps = foldl f b (_unpackPS ps)
439 _foldrPS f b ps = foldr f b (_unpackPS ps)
441 _takePS (I# n) ps = substrPS# ps 0# (n -# 1#)
442 _dropPS (I# n) ps = substrPS# ps n (lengthPS# ps -# 1#)
443 _splitAtPS n ps = (_takePS n ps, _dropPS n ps)
447 break_pt = char_pos_that_dissatisfies
453 substrPS# ps 0# (break_pt -# 1#)
458 break_pt = char_pos_that_dissatisfies
464 substrPS# ps break_pt (len -# 1#)
466 char_pos_that_dissatisfies :: (Char# -> Bool) -> _PackedString -> Int# -> Int# -> Int#
468 char_pos_that_dissatisfies p ps len pos
469 | pos >=# len = pos -- end
470 | p (indexPS# ps pos) = -- predicate satisfied; keep going
471 char_pos_that_dissatisfies p ps len (pos +# 1#)
472 | otherwise = pos -- predicate not satisfied
474 char_pos_that_dissatisfies p ps len pos -- dead code: HACK to avoid badly-typed error msg
477 -- ToDo: could certainly go quicker
478 _spanPS p ps = (_takeWhilePS p ps, _dropWhilePS p ps)
479 _breakPS p ps = _spanPS (not . p) ps
481 _linesPS ps = map _packString (lines (_unpackPS ps))
482 _wordsPS ps = map _packString (words (_unpackPS ps))
484 _reversePS ps = _packString (reverse (_unpackPS ps))
486 _concatPS [] = _nilPS
489 tot_len# = case (foldr ((+) . _lengthPS) 0 pss) of { I# x -> x }
490 tot_len = I# tot_len#
493 new_ps_array (tot_len# +# 1#) `thenStrictlyST` \ arr# -> -- incl NUL byte!
494 packum arr# pss 0# `seqStrictlyST`
495 freeze_ps_array arr# `thenStrictlyST` \ (_ByteArray _ frozen#) ->
497 let has_null = byteArrayHasNUL# frozen# tot_len# in
499 returnStrictlyST (_PS frozen# tot_len# has_null)
502 packum :: _MutableByteArray s Int -> [_PackedString] -> Int# -> _ST s ()
505 = write_ps_array arr pos (chr# 0#) `seqStrictlyST`
507 packum arr (ps : pss) pos
508 = fill arr pos ps 0# (lengthPS# ps) `thenStrictlyST` \ (I# next_pos) ->
509 packum arr pss next_pos
511 fill :: _MutableByteArray s Int -> Int# -> _PackedString -> Int# -> Int# -> _ST s Int
513 fill arr arr_i ps ps_i ps_len
515 = returnStrictlyST (I# (arr_i +# ps_len))
517 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) `seqStrictlyST`
518 fill arr arr_i ps (ps_i +# 1#) ps_len
521 %************************************************************************
523 \subsection{Instances for @_PackedStrings@: @Eq@, @Ord@, @Text@}
525 %************************************************************************
529 instance Eq _PackedString where
530 a == b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> False }
531 a /= b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> True }
533 instance Ord _PackedString where
534 a <= b = case _tagCmpPS a b of { _LT -> True; _EQ -> True; _GT -> False }
535 a < b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> False }
536 a >= b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> True }
537 a > b = case _tagCmpPS a b of { _LT -> False; _EQ -> False; _GT -> True }
542 _tagCmp a b = _tagCmpPS a b
545 We try hard to make this go fast:
547 _tagCmpPS :: _PackedString -> _PackedString -> _CMP_TAG
549 _tagCmpPS (_PS bs1 len1 has_null1) (_PS bs2 len2 has_null2)
550 | not has_null1 && not has_null2
551 = unsafePerformPrimIO (
552 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
554 if res <# 0# then _LT
555 else if res ==# 0# then _EQ
559 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
560 ba2 = _ByteArray (0, I# (len2 -# 1#)) bs2
562 _tagCmpPS (_PS bs1 len1 has_null1) (_CPS bs2 len2)
564 = unsafePerformPrimIO (
565 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
567 if res <# 0# then _LT
568 else if res ==# 0# then _EQ
572 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
575 _tagCmpPS (_CPS bs1 len1) (_CPS bs2 len2)
576 = unsafePerformPrimIO (
577 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
579 if res <# 0# then _LT
580 else if res ==# 0# then _EQ
587 _tagCmpPS a@(_CPS _ _) b@(_PS _ _ has_null2)
589 = -- try them the other way 'round
590 case (_tagCmpPS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
592 _tagCmpPS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
595 end1 = lengthPS# ps1 -# 1#
596 end2 = lengthPS# ps2 -# 1#
599 = if char# ># end1 then
600 if char# ># end2 then -- both strings ran out at once
602 else -- ps1 ran out before ps2
604 else if char# ># end2 then
605 _GT -- ps2 ran out before ps1
608 ch1 = indexPS# ps1 char#
609 ch2 = indexPS# ps2 char#
611 if ch1 `eqChar#` ch2 then
612 looking_at (char# +# 1#)
613 else if ch1 `ltChar#` ch2 then _LT
616 instance Text _PackedString where
617 readsPrec p = error "readsPrec: _PackedString: ToDo"
618 showsPrec p ps r = showsPrec p (_unpackPS ps) r
619 readList = _readList (readsPrec 0)
620 showList = _showList (showsPrec 0)
623 %************************************************************************
625 \subsection{Uniquely PackedString functions}
627 %************************************************************************
629 For @_substrPS@, see the next section.
631 @_hashPS@ is just what we happen to need in GHC...
634 _hashPS ps (I# hASH_TBL_SIZE#)
635 = I# (h `remInt#` hASH_TBL_SIZE#)
639 h | len <=# 0# = 0# -- probably should just be an "error"
640 | len ==# 1# = ord# c1
641 | len ==# 2# = ord# c2
642 | len ==# 3# = ord# c2 +# ord# c3
643 | len ==# 4# = ord# c2 +# ord# c3 +# ord# c4
644 | len ==# 5# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5
645 | len >=# 6# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5 +# ord# c6
646 | otherwise = 999# -- will never happen
657 %************************************************************************
659 \subsection{Local utility functions}
661 %************************************************************************
663 The definition of @_substrPS@ is essentially:
664 @take (end - begin + 1) (drop begin str)@.
666 _substrPS ps (I# begin) (I# end) = substrPS# ps begin end
670 = error "_substrPS: bounds out of range"
672 | s >=# len || result_len# <=# 0#
677 new_ps_array (result_len# +# 1#) `thenStrictlyST` \ ch_arr -> -- incl NUL byte!
678 fill_in ch_arr 0# `seqStrictlyST`
679 freeze_ps_array ch_arr `thenStrictlyST` \ (_ByteArray _ frozen#) ->
681 let has_null = byteArrayHasNUL# frozen# result_len# in
683 returnStrictlyST (_PS frozen# result_len# has_null)
688 result_len# = (if e <# len then (e +# 1#) else len) -# s
689 result_len = I# result_len#
691 -----------------------
692 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
695 | idx ==# result_len#
696 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
700 ch = indexPS# ps (s +# idx)
702 write_ps_array arr_in# idx ch `seqStrictlyST`
703 fill_in arr_in# (idx +# 1#)
706 (Very :-) ``Specialised'' versions of some CharArray things...
708 new_ps_array :: Int# -> _ST s (_MutableByteArray s Int)
709 write_ps_array :: _MutableByteArray s Int -> Int# -> Char# -> _ST s ()
710 freeze_ps_array :: _MutableByteArray s Int -> _ST s (_ByteArray Int)
712 new_ps_array size (S# s)
713 = case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
714 (_MutableByteArray bot barr#, S# s2#)}
716 bot = error "new_ps_array"
718 write_ps_array (_MutableByteArray _ barr#) n ch (S# s#)
719 = case writeCharArray# barr# n ch s# of { s2# ->
722 -- same as unsafeFreezeByteArray
723 freeze_ps_array (_MutableByteArray ixs arr#) (S# s#)
724 = case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
725 (_ByteArray ixs frozen#, S# s2#) }