2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1996
4 \section{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
12 PackedString, -- abstract
14 -- Creating the beasts
15 packString, -- :: [Char] -> PackedString
16 packStringST, -- :: [Char] -> ST s PackedString
17 packCBytesST, -- :: Int -> Addr -> ST s PackedString
19 byteArrayToPS, -- :: ByteArray Int -> PackedString
20 unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString
22 psToByteArray, -- :: PackedString -> ByteArray Int
23 psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int)
24 psToCString, -- :: PackedString -> Addr
25 isCString, -- :: PackedString -> Bool
27 unpackPS, -- :: PackedString -> [Char]
29 hPutPS, -- :: Handle -> PackedString -> IO ()
30 putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type
31 getPS, -- :: FILE -> Int -> PrimIO PackedString
33 nilPS, -- :: PackedString
34 consPS, -- :: Char -> PackedString -> PackedString
35 headPS, -- :: PackedString -> Char
36 tailPS, -- :: PackedString -> PackedString
37 nullPS, -- :: PackedString -> Bool
38 appendPS, -- :: PackedString -> PackedString -> PackedString
39 lengthPS, -- :: PackedString -> Int
40 {- 0-origin indexing into the string -}
41 indexPS, -- :: PackedString -> Int -> Char
42 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
43 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
44 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
45 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
46 takePS, -- :: Int -> PackedString -> PackedString
47 dropPS, -- :: Int -> PackedString -> PackedString
48 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
49 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
50 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
51 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
52 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
53 linesPS, -- :: PackedString -> [PackedString]
55 wordsPS, -- :: PackedString -> [PackedString]
56 reversePS, -- :: PackedString -> PackedString
57 splitPS, -- :: Char -> PackedString -> [PackedString]
58 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
59 joinPS, -- :: PackedString -> [PackedString] -> PackedString
60 concatPS, -- :: [PackedString] -> PackedString
61 elemPS, -- :: Char -> PackedString -> Bool
64 Pluck out a piece of a PS start and end
65 chars you want; both 0-origin-specified
67 substrPS, -- :: PackedString -> Int -> Int -> PackedString
71 -- Converting to C strings
73 unpackCString#, unpackCString2#, unpackAppendCString#, unpackFoldrCString#,
78 import PrelBase ( showList__ ) -- ToDo: better
81 import PrelArr ( StateAndMutableByteArray#(..) , StateAndByteArray#(..) )
84 import IOExts ( unsafePerformIO )
91 %************************************************************************
93 \subsection{@PackedString@ type declaration}
95 %************************************************************************
99 = PS ByteArray# -- the bytes
100 Int# -- length (*not* including NUL at the end)
101 Bool -- True <=> contains a NUL
102 | CPS Addr# -- pointer to the (null-terminated) bytes in C land
103 Int# -- length, as per strlen
104 -- definitely doesn't contain a NUL
106 instance Eq PackedString where
107 x == y = compare x y == EQ
108 x /= y = compare x y /= EQ
110 instance Ord PackedString where
112 x <= y = compare x y /= GT
113 x < y = compare x y == LT
114 x >= y = compare x y /= LT
115 x > y = compare x y == GT
116 max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x }
117 min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y }
119 --instance Read PackedString: ToDo
121 instance Show PackedString where
122 showsPrec p ps r = showsPrec p (unpackPS ps) r
123 showList = showList__ (showsPrec 0)
127 %************************************************************************
129 \subsection{@PackedString@ instances}
131 %************************************************************************
133 We try hard to make this go fast:
135 comparePS :: PackedString -> PackedString -> Ordering
137 comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2)
138 | not has_null1 && not has_null2
140 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
143 else if res ==# 0# then EQ
147 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
148 ba2 = ByteArray (0, I# (len2 -# 1#)) bs2
150 comparePS (PS bs1 len1 has_null1) (CPS bs2 len2)
153 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
156 else if res ==# 0# then EQ
160 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
163 comparePS (CPS bs1 len1) (CPS bs2 len2)
165 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
168 else if res ==# 0# then EQ
175 comparePS a@(CPS _ _) b@(PS _ _ has_null2)
177 = -- try them the other way 'round
178 case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
180 comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
183 end1 = lengthPS# ps1 -# 1#
184 end2 = lengthPS# ps2 -# 1#
187 = if char# ># end1 then
188 if char# ># end2 then -- both strings ran out at once
190 else -- ps1 ran out before ps2
192 else if char# ># end2 then
193 GT -- ps2 ran out before ps1
196 ch1 = indexPS# ps1 char#
197 ch2 = indexPS# ps2 char#
199 if ch1 `eqChar#` ch2 then
200 looking_at (char# +# 1#)
201 else if ch1 `ltChar#` ch2 then LT
206 %************************************************************************
208 \subsection{Constructor functions}
210 %************************************************************************
212 Easy ones first. @packString@ requires getting some heap-bytes and
213 scribbling stuff into them.
216 nilPS :: PackedString
219 consPS :: Char -> PackedString -> PackedString
220 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
222 packString :: [Char] -> PackedString
223 packString str = runST (packStringST str)
225 packStringST :: [Char] -> ST s PackedString
227 let len = length str in
230 packNCharsST :: Int -> [Char] -> ST s PackedString
231 packNCharsST len@(I# length#) str =
233 allocate an array that will hold the string
234 (not forgetting the NUL byte at the end)
236 new_ps_array (length# +# 1#) >>= \ ch_array ->
237 -- fill in packed string from "str"
238 fill_in ch_array 0# str >>
240 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
241 let has_null = byteArrayHasNUL# frozen# length# in
242 return (PS frozen# length# has_null)
244 fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s ()
245 fill_in arr_in# idx [] =
246 write_ps_array arr_in# idx (chr# 0#) >>
249 fill_in arr_in# idx (C# c : cs) =
250 write_ps_array arr_in# idx c >>
251 fill_in arr_in# (idx +# 1#) cs
253 byteArrayToPS :: ByteArray Int -> PackedString
254 byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) =
260 else ((index ixs ix_end) + 1)
263 PS frozen# n# (byteArrayHasNUL# frozen# n#)
265 unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString
266 unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#)
267 = PS frozen# n# (byteArrayHasNUL# frozen# n#)
269 psToByteArray :: PackedString -> ByteArray Int
270 psToByteArray (PS bytes n has_null)
271 = ByteArray (0, I# (n -# 1#)) bytes
273 psToByteArray (CPS addr len#)
276 byte_array_form = packCBytes len (A# addr)
278 case byte_array_form of { PS bytes _ _ ->
279 ByteArray (0, len - 1) bytes }
281 -- isCString is useful when passing PackedStrings to the
282 -- outside world, and need to figure out whether you can
283 -- pass it as an Addr or ByteArray.
285 isCString :: PackedString -> Bool
286 isCString (CPS _ _ ) = True
289 psToCString :: PackedString -> Addr
290 psToCString (CPS addr _) = (A# addr)
291 psToCString (PS bytes n# has_null) =
293 stuff <- _ccall_ malloc ((I# n#) * (``sizeof(char)''))
296 | n# ==# 0# = return ()
298 let ch# = indexCharArray# bytes i#
299 writeCharOffAddr stuff (I# i#) (C# ch#)
300 fill_in (n# -# 1#) (i# +# 1#)
306 %************************************************************************
308 \subsection{Destructor functions (taking @PackedStrings@ apart)}
310 %************************************************************************
313 -- OK, but this code gets *hammered*:
315 -- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ]
317 unpackPS :: PackedString -> [Char]
318 unpackPS (PS bytes len has_null)
323 | otherwise = C# ch : unpack (nh +# 1#)
325 ch = indexCharArray# bytes nh
327 unpackPS (CPS addr len)
331 | ch `eqChar#` '\0'# = []
332 | otherwise = C# ch : unpack (nh +# 1#)
334 ch = indexCharOffAddr# addr nh
337 Output a packed string via a handle:
341 hPutPS :: Handle -> PackedString -> IO ()
352 _readHandle handle >>= \ htype ->
354 _ErrorHandle ioError ->
355 _writeHandle handle htype >>
358 _writeHandle handle htype >>
359 failWith (IllegalOperation "handle is closed")
360 _SemiClosedHandle _ _ ->
361 _writeHandle handle htype >>
362 failWith (IllegalOperation "handle is closed")
364 _writeHandle handle htype >>
365 failWith (IllegalOperation "handle is not open for writing")
367 _getBufferMode other >>= \ other ->
368 (case _bufferMode other of
369 Just LineBuffering ->
370 writeLines (_filePtr other)
371 Just (BlockBuffering (Just size)) ->
372 writeBlocks (_filePtr other) size
373 Just (BlockBuffering Nothing) ->
374 writeBlocks (_filePtr other) ``BUFSIZ''
375 _ -> -- Nothing is treated pessimistically as NoBuffering
376 writeChars (_filePtr other) 0#
378 _writeHandle handle (_markHandle other) >>
382 _constructError "hPutStr" >>= \ ioError ->
388 writeLines :: Addr -> IO Bool
389 writeLines = writeChunks ``BUFSIZ'' True
391 writeBlocks :: Addr -> Int -> IO Bool
392 writeBlocks fp size = writeChunks size False fp
395 The breaking up of output into lines along \n boundaries
396 works fine as long as there are newlines to split by.
397 Avoid the splitting up into lines altogether (doesn't work
398 for overly long lines like the stuff that showsPrec instances
399 normally return). Instead, we split them up into fixed size
400 chunks before blasting them off to the Real World.
402 Hacked to avoid multiple passes over the strings - unsightly, but
403 a whole lot quicker. -- SOF 3/96
406 writeChunks :: Int -> Bool -> Addr -> IO Bool
407 writeChunks (I# bufLen) chopOnNewLine fp =
408 newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) ->
410 shoveString :: Int# -> Int# -> IO Bool
412 | i ==# pslen = -- end of string
416 _ccall_ writeFile arr fp (I# n) >>= \rc ->
420 case writeCharArray# arr# n (indexPS# ps i) s# of
422 {- Flushing lines - should we bother? -}
423 (if n ==# bufLen then
424 _ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc ->
426 shoveString 0# (i +# 1#)
430 shoveString (n +# 1#) (i +# 1#)) (S# s1#))
434 writeChars :: Addr -> Int# -> IO Bool
436 | i ==# pslen = return True
438 _ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc ->
440 writeChars fp (i +# 1#)
444 ---------------------------------------------
446 putPS :: _FILE -> PackedString -> IO ()
447 putPS file ps@(PS bytes len has_null)
452 byte_array = ByteArray (0, I# (len -# 1#)) bytes
454 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
455 >>= \ (I# written) ->
456 if written ==# len then
459 error "putPS: fwrite failed!\n"
461 putPS file (CPS addr len)
465 = _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} ->
469 The dual to @_putPS@, note that the size of the chunk specified
470 is the upper bound of the size of the chunk returned.
473 getPS :: _FILE -> Int -> IO PackedString
474 getPS file len@(I# len#)
475 | len# <=# 0# = return nilPS -- I'm being kind here.
477 -- Allocate an array for system call to store its bytes into.
478 new_ps_array len# >>= \ ch_arr ->
479 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
481 byte_array = ByteArray (0, I# len#) frozen#
483 _ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) ->
484 if read# ==# 0# then -- EOF or other error
485 error "getPS: EOF reached or other error"
488 The system call may not return the number of
489 bytes requested. Instead of failing with an error
490 if the number of bytes read is less than requested,
491 a packed string containing the bytes we did manage
492 to snarf is returned.
495 has_null = byteArrayHasNUL# frozen# read#
497 return (PS frozen# read# has_null)
501 %************************************************************************
503 \subsection{List-mimicking functions for @PackedStrings@}
505 %************************************************************************
507 First, the basic functions that do look into the representation;
508 @indexPS@ is the most important one.
511 lengthPS :: PackedString -> Int
512 lengthPS ps = I# (lengthPS# ps)
514 {-# INLINE lengthPS# #-}
516 lengthPS# (PS _ i _) = i
517 lengthPS# (CPS _ i) = i
519 {-# INLINE strlen# #-}
521 strlen# :: Addr# -> Int
524 _ccall_ strlen (A# a) >>= \ len@(I# _) ->
528 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
529 byteArrayHasNUL# bs len
531 _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) ->
533 if res ==# 0# then False else True
536 ba = ByteArray (0, I# (len -# 1#)) bs
538 -----------------------
540 indexPS :: PackedString -> Int -> Char
541 indexPS ps (I# n) = C# (indexPS# ps n)
543 {-# INLINE indexPS# #-}
545 indexPS# (PS bs i _) n
546 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
550 = indexCharOffAddr# a n
553 Now, the rest of the functions can be defined without digging
554 around in the representation.
557 headPS :: PackedString -> Char
559 | nullPS ps = error "headPS: head []"
560 | otherwise = C# (indexPS# ps 0#)
562 tailPS :: PackedString -> PackedString
564 | len <=# 0# = error "tailPS: tail []"
566 | otherwise = substrPS# ps 1# (len -# 1#)
570 nullPS :: PackedString -> Bool
571 nullPS (PS _ i _) = i ==# 0#
572 nullPS (CPS _ i) = i ==# 0#
574 {- (ToDo: some non-lousy implementations...)
576 Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys)
579 appendPS :: PackedString -> PackedString -> PackedString
583 | otherwise = concatPS [xs,ys]
585 {- OLD: mapPS f xs = packString (map f (unpackPS xs)) -}
587 mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-}
593 new_ps_array (length +# 1#) >>= \ ps_arr ->
594 whizz ps_arr length 0# >>
595 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
596 let has_null = byteArrayHasNUL# frozen# length in
597 return (PS frozen# length has_null))
599 length = lengthPS# xs
601 whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
604 = write_ps_array arr# i (chr# 0#) >>
610 write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >>
611 whizz arr# (n -# 1#) (i +# 1#)
613 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
619 Filtering proceeds as follows:
621 * traverse the list, applying the pred. to each element,
622 remembering the positions where it was satisfied.
624 Encode these positions using a run-length encoding of the gaps
625 between the matching positions.
627 * Allocate a MutableByteArray in the heap big enough to hold
628 all the matched entries, and copy the elements that matched over.
630 A better solution that merges the scan© passes into one,
631 would be to copy the filtered elements over into a growable
632 buffer. No such operation currently supported over
633 MutableByteArrays (could of course use malloc&realloc)
634 But, this solution may in the case of repeated realloc's
635 be worse than the current solution.
639 (rle,len_filtered) = filter_ps (len# -# 1#) 0# 0# []
640 len_filtered# = case len_filtered of { I# x# -> x#}
642 if len# ==# len_filtered# then
643 {- not much filtering as everything passed through. -}
645 else if len_filtered# ==# 0# then
648 new_ps_array (len_filtered# +# 1#) >>= \ ps_arr ->
649 copy_arr ps_arr rle 0# 0# >>
650 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
651 let has_null = byteArrayHasNUL# frozen# len_filtered# in
652 return (PS frozen# len_filtered# has_null))
656 matchOffset :: Int# -> [Char] -> (Int,[Char])
657 matchOffset off [] = (I# off,[])
658 matchOffset off (C# c:cs) =
663 if x==# 0# then -- escape code, add 255#
668 copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s ()
669 copy_arr arr# [_] _ _ = return ()
670 copy_arr arr# ls n i =
672 (x,ls') = matchOffset 0# ls
673 n' = n +# (case x of { (I# x#) -> x#}) -# 1#
676 write_ps_array arr# i ch >>
677 copy_arr arr# ls' (n' +# 1#) (i +# 1#)
679 esc :: Int# -> Int# -> [Char] -> [Char]
680 esc v 0# ls = (C# (chr# v)):ls
681 esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls)
683 filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int)
684 filter_ps n hits run acc
687 escs = run `quotInt#` 255#
688 v = run `remInt#` 255#
690 (esc (v +# 1#) escs acc, I# hits)
698 escs = run `quotInt#` 255#
699 v = run `remInt#` 255#
700 acc' = esc (v +# 1#) escs acc
702 filter_ps n' (hits +# 1#) 0# acc'
704 filter_ps n' hits (run +# 1#) acc
707 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
716 --whizzLR :: a -> Int# -> a
719 | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#)
722 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
731 --whizzRL :: a -> Int# -> a
734 | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#)
736 takePS :: Int -> PackedString -> PackedString
739 | otherwise = substrPS# ps 0# (n -# 1#)
741 dropPS :: Int -> PackedString -> PackedString
744 | otherwise = substrPS# ps n (lengthPS# ps -# 1#)
748 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
749 splitAtPS n ps = (takePS n ps, dropPS n ps)
751 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
754 break_pt = char_pos_that_dissatisfies
760 if break_pt ==# 0# then
763 substrPS# ps 0# (break_pt -# 1#)
765 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
769 break_pt = char_pos_that_dissatisfies
775 if len ==# break_pt then
778 substrPS# ps break_pt (len -# 1#)
780 elemPS :: Char -> PackedString -> Bool
784 break_pt = first_char_pos_that_satisfies
792 char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
794 char_pos_that_dissatisfies p ps len pos
795 | pos >=# len = pos -- end
796 | p (indexPS# ps pos) = -- predicate satisfied; keep going
797 char_pos_that_dissatisfies p ps len (pos +# 1#)
798 | otherwise = pos -- predicate not satisfied
800 first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
801 first_char_pos_that_satisfies p ps len pos
802 | pos >=# len = pos -- end
803 | p (indexPS# ps pos) = pos -- got it!
804 | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#)
806 -- ToDo: could certainly go quicker
807 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
808 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
810 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
811 breakPS p ps = spanPS (not . p) ps
813 linesPS :: PackedString -> [PackedString]
814 linesPS ps = splitPS '\n' ps
816 wordsPS :: PackedString -> [PackedString]
817 wordsPS ps = splitWithPS isSpace ps
819 reversePS :: PackedString -> PackedString
821 if nullPS ps then -- don't create stuff unnecessarily.
825 new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte!
826 fill_in arr# (length -# 1#) 0# >>
827 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
828 let has_null = byteArrayHasNUL# frozen# length in
829 return (PS frozen# length has_null))
831 length = lengthPS# ps
833 fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
834 fill_in arr_in# n i =
838 write_ps_array arr_in# i ch >>
840 write_ps_array arr_in# (i +# 1#) (chr# 0#) >>
843 fill_in arr_in# (n -# 1#) (i +# 1#)
845 concatPS :: [PackedString] -> PackedString
849 tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x }
850 tot_len = I# tot_len#
853 new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte!
854 packum arr# pss 0# >>
855 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
857 let has_null = byteArrayHasNUL# frozen# tot_len# in
859 return (PS frozen# tot_len# has_null)
862 packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s ()
865 = write_ps_array arr pos (chr# 0#) >>
867 packum arr (ps : pss) pos
868 = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) ->
869 packum arr pss next_pos
871 fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int
873 fill arr arr_i ps ps_i ps_len
875 = return (I# (arr_i +# ps_len))
877 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >>
878 fill arr arr_i ps (ps_i +# 1#) ps_len
880 ------------------------------------------------------------
881 joinPS :: PackedString -> [PackedString] -> PackedString
882 joinPS filler pss = concatPS (splice pss)
886 splice (x:y:xs) = x:filler:splice (y:xs)
888 -- ToDo: the obvious generalisation
890 Some properties that hold:
893 where False = any (map (x `elemPS`) ls')
894 False = any (map (nullPS) ls')
896 * all x's have been chopped out.
897 * no empty PackedStrings in returned list. A conseq.
902 * joinPS (packString [x]) (_splitPS x ls) = ls
906 splitPS :: Char -> PackedString -> [PackedString]
907 splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch)
909 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
910 splitWithPS pred ps =
920 first_char_pos_that_satisfies
926 if break_pt ==# n then -- immediate match, no substring to cut out.
927 splitify (break_pt +# 1#)
929 substrPS# ps n (break_pt -# 1#): -- leave out the matching character
930 splitify (break_pt +# 1#)
933 %************************************************************************
935 \subsection{Local utility functions}
937 %************************************************************************
939 The definition of @_substrPS@ is essentially:
940 @take (end - begin + 1) (drop begin str)@.
943 substrPS :: PackedString -> Int -> Int -> PackedString
944 substrPS ps (I# begin) (I# end) = substrPS# ps begin end
948 = error "substrPS: bounds out of range"
950 | s >=# len || result_len# <=# 0#
955 new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte!
957 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
959 let has_null = byteArrayHasNUL# frozen# result_len# in
961 return (PS frozen# result_len# has_null)
966 result_len# = (if e <# len then (e +# 1#) else len) -# s
967 result_len = I# result_len#
969 -----------------------
970 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
973 | idx ==# result_len#
974 = write_ps_array arr_in# idx (chr# 0#) >>
978 ch = indexPS# ps (s +# idx)
980 write_ps_array arr_in# idx ch >>
981 fill_in arr_in# (idx +# 1#)
984 (Very :-) ``Specialised'' versions of some CharArray things...
987 new_ps_array :: Int# -> ST s (MutableByteArray s Int)
988 write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s ()
989 freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int)
991 new_ps_array size = ST $ \ s# ->
992 case newCharArray# size s# of { StateAndMutableByteArray# s2# barr# ->
993 STret s2# (MutableByteArray bot barr#)}
995 bot = error "new_ps_array"
997 write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ s# ->
998 case writeCharArray# barr# n ch s# of { s2# ->
1001 -- same as unsafeFreezeByteArray
1002 freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ s# ->
1003 case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
1004 STret s2# (ByteArray ixs frozen#) }
1008 %*********************************************************
1010 \subsection{Packing and unpacking C strings}
1012 %*********************************************************
1015 unpackCString :: Addr -> [Char]
1017 -- Calls to the next four are injected by the compiler itself,
1018 -- to deal with literal strings
1019 packCString# :: [Char] -> ByteArray#
1020 unpackCString# :: Addr# -> [Char]
1021 unpackCString2# :: Addr# -> Int# -> [Char]
1022 unpackAppendCString# :: Addr# -> [Char] -> [Char]
1023 unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
1025 packCString# str = case (packString str) of { PS bytes _ _ -> bytes }
1027 unpackCString a@(A# addr) =
1028 if a == ``NULL'' then
1037 | ch `eqChar#` '\0'# = []
1038 | otherwise = C# ch : unpack (nh +# 1#)
1040 ch = indexCharOffAddr# addr nh
1042 unpackCString2# addr len
1043 -- This one is called by the compiler to unpack literal strings with NULs in them; rare.
1044 = unpackPS (packCBytes (I# len) (A# addr))
1046 unpackAppendCString# addr rest
1050 | ch `eqChar#` '\0'# = rest
1051 | otherwise = C# ch : unpack (nh +# 1#)
1053 ch = indexCharOffAddr# addr nh
1055 unpackFoldrCString# addr f z
1059 | ch `eqChar#` '\0'# = z
1060 | otherwise = C# ch `f` unpack (nh +# 1#)
1062 ch = indexCharOffAddr# addr nh
1065 cStringToPS :: Addr -> PackedString
1066 cStringToPS (A# a#) = -- the easy one; we just believe the caller
1069 len = case (strlen# a#) of { I# x -> x }
1071 packBytesForC :: [Char] -> ByteArray Int
1072 packBytesForC str = psToByteArray (packString str)
1074 psToByteArrayST :: [Char] -> ST s (ByteArray Int)
1075 psToByteArrayST str =
1076 packStringST str >>= \ (PS bytes n has_null) ->
1077 --later? ASSERT(not has_null)
1078 return (ByteArray (0, I# (n -# 1#)) bytes)
1080 packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int)
1081 packNBytesForCST len str =
1082 packNCharsST len str >>= \ (PS bytes n has_null) ->
1083 return (ByteArray (0, I# (n -# 1#)) bytes)
1085 packCBytes :: Int -> Addr -> PackedString
1086 packCBytes len addr = runST (packCBytesST len addr)
1088 packCBytesST :: Int -> Addr -> ST s PackedString
1089 packCBytesST len@(I# length#) (A# addr) =
1091 allocate an array that will hold the string
1092 (not forgetting the NUL byte at the end)
1094 new_ps_array (length# +# 1#) >>= \ ch_array ->
1095 -- fill in packed string from "addr"
1096 fill_in ch_array 0# >>
1097 -- freeze the puppy:
1098 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
1099 let has_null = byteArrayHasNUL# frozen# length# in
1100 return (PS frozen# length# has_null)
1102 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
1106 = write_ps_array arr_in# idx (chr# 0#) >>
1109 = case (indexCharOffAddr# addr idx) of { ch ->
1110 write_ps_array arr_in# idx ch >>
1111 fill_in arr_in# (idx +# 1#) }