2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1996
4 \section{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
11 {-# OPTIONS -fno-implicit-prelude '-#include "cbits/stgio.h"' #-}
14 PackedString, -- abstract
16 -- Creating the beasts
17 packString, -- :: [Char] -> PackedString
18 packStringST, -- :: [Char] -> ST s PackedString
19 packCBytesST, -- :: Int -> Addr -> ST s PackedString
21 byteArrayToPS, -- :: ByteArray Int -> PackedString
22 unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString
24 psToByteArray, -- :: PackedString -> ByteArray Int
25 psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int)
27 unpackPS, -- :: PackedString -> [Char]
29 hPutPS, -- :: Handle -> PackedString -> IO ()
30 putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type
31 getPS, -- :: FILE -> Int -> PrimIO PackedString
33 nilPS, -- :: PackedString
34 consPS, -- :: Char -> PackedString -> PackedString
35 headPS, -- :: PackedString -> Char
36 tailPS, -- :: PackedString -> PackedString
37 nullPS, -- :: PackedString -> Bool
38 appendPS, -- :: PackedString -> PackedString -> PackedString
39 lengthPS, -- :: PackedString -> Int
40 {- 0-origin indexing into the string -}
41 indexPS, -- :: PackedString -> Int -> Char
42 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
43 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
44 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
45 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
46 takePS, -- :: Int -> PackedString -> PackedString
47 dropPS, -- :: Int -> PackedString -> PackedString
48 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
49 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
50 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
51 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
52 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
53 linesPS, -- :: PackedString -> [PackedString]
55 wordsPS, -- :: PackedString -> [PackedString]
56 reversePS, -- :: PackedString -> PackedString
57 splitPS, -- :: Char -> PackedString -> [PackedString]
58 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
59 joinPS, -- :: PackedString -> [PackedString] -> PackedString
60 concatPS, -- :: [PackedString] -> PackedString
61 elemPS, -- :: Char -> PackedString -> Bool
64 Pluck out a piece of a PS start and end
65 chars you want; both 0-origin-specified
67 substrPS, -- :: PackedString -> Int -> Int -> PackedString
71 -- Converting to C strings
73 unpackCString#, unpackCString2#, unpackAppendCString#, unpackFoldrCString#,
77 import {-# SOURCE #-} IOBase ( error )
81 import UnsafeST ( unsafePerformPrimIO )
84 import Foreign ( Addr(..) )
89 %************************************************************************
91 \subsection{@PackedString@ type declaration}
93 %************************************************************************
97 = PS ByteArray# -- the bytes
98 Int# -- length (*not* including NUL at the end)
99 Bool -- True <=> contains a NUL
100 | CPS Addr# -- pointer to the (null-terminated) bytes in C land
101 Int# -- length, as per strlen
102 -- definitely doesn't contain a NUL
104 instance Eq PackedString where
105 x == y = compare x y == EQ
106 x /= y = compare x y /= EQ
108 instance Ord PackedString where
110 x <= y = compare x y /= GT
111 x < y = compare x y == LT
112 x >= y = compare x y /= LT
113 x > y = compare x y == GT
114 max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x }
115 min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y }
117 --instance Read PackedString: ToDo
119 instance Show PackedString where
120 showsPrec p ps r = showsPrec p (unpackPS ps) r
121 showList = showList__ (showsPrec 0)
125 %************************************************************************
127 \subsection{@PackedString@ instances}
129 %************************************************************************
131 We try hard to make this go fast:
133 comparePS :: PackedString -> PackedString -> Ordering
135 comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2)
136 | not has_null1 && not has_null2
137 = unsafePerformPrimIO (
138 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
141 else if res ==# 0# then EQ
145 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
146 ba2 = ByteArray (0, I# (len2 -# 1#)) bs2
148 comparePS (PS bs1 len1 has_null1) (CPS bs2 len2)
150 = unsafePerformPrimIO (
151 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
154 else if res ==# 0# then EQ
158 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
161 comparePS (CPS bs1 len1) (CPS bs2 len2)
162 = unsafePerformPrimIO (
163 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
166 else if res ==# 0# then EQ
173 comparePS a@(CPS _ _) b@(PS _ _ has_null2)
175 = -- try them the other way 'round
176 case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
178 comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
181 end1 = lengthPS# ps1 -# 1#
182 end2 = lengthPS# ps2 -# 1#
185 = if char# ># end1 then
186 if char# ># end2 then -- both strings ran out at once
188 else -- ps1 ran out before ps2
190 else if char# ># end2 then
191 GT -- ps2 ran out before ps1
194 ch1 = indexPS# ps1 char#
195 ch2 = indexPS# ps2 char#
197 if ch1 `eqChar#` ch2 then
198 looking_at (char# +# 1#)
199 else if ch1 `ltChar#` ch2 then LT
204 %************************************************************************
206 \subsection{Constructor functions}
208 %************************************************************************
210 Easy ones first. @packString@ requires getting some heap-bytes and
211 scribbling stuff into them.
214 nilPS :: PackedString
217 consPS :: Char -> PackedString -> PackedString
218 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
220 packString :: [Char] -> PackedString
221 packString str = runST (packStringST str)
223 packStringST :: [Char] -> ST s PackedString
225 let len = length str in
228 packNCharsST :: Int -> [Char] -> ST s PackedString
229 packNCharsST len@(I# length#) str =
231 allocate an array that will hold the string
232 (not forgetting the NUL byte at the end)
234 new_ps_array (length# +# 1#) >>= \ ch_array ->
235 -- fill in packed string from "str"
236 fill_in ch_array 0# str >>
238 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
239 let has_null = byteArrayHasNUL# frozen# length# in
240 return (PS frozen# length# has_null)
242 fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s ()
243 fill_in arr_in# idx [] =
244 write_ps_array arr_in# idx (chr# 0#) >>
247 fill_in arr_in# idx (C# c : cs) =
248 write_ps_array arr_in# idx c >>
249 fill_in arr_in# (idx +# 1#) cs
251 byteArrayToPS :: ByteArray Int -> PackedString
252 byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) =
258 else ((index ixs ix_end) + 1)
261 PS frozen# n# (byteArrayHasNUL# frozen# n#)
263 unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString
264 unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#)
265 = PS frozen# n# (byteArrayHasNUL# frozen# n#)
267 psToByteArray :: PackedString -> ByteArray Int
268 psToByteArray (PS bytes n has_null)
269 = ByteArray (0, I# (n -# 1#)) bytes
271 psToByteArray (CPS addr len#)
274 byte_array_form = packCBytes len (A# addr)
276 case byte_array_form of { PS bytes _ _ ->
277 ByteArray (0, len - 1) bytes }
280 %************************************************************************
282 \subsection{Destructor functions (taking @PackedStrings@ apart)}
284 %************************************************************************
287 -- OK, but this code gets *hammered*:
289 -- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ]
291 unpackPS :: PackedString -> [Char]
292 unpackPS (PS bytes len has_null)
297 | otherwise = C# ch : unpack (nh +# 1#)
299 ch = indexCharArray# bytes nh
301 unpackPS (CPS addr len)
305 | ch `eqChar#` '\0'# = []
306 | otherwise = C# ch : unpack (nh +# 1#)
308 ch = indexCharOffAddr# addr nh
311 Output a packed string via a handle:
315 hPutPS :: Handle -> PackedString -> IO ()
326 _readHandle handle >>= \ htype ->
328 _ErrorHandle ioError ->
329 _writeHandle handle htype >>
332 _writeHandle handle htype >>
333 failWith (IllegalOperation "handle is closed")
334 _SemiClosedHandle _ _ ->
335 _writeHandle handle htype >>
336 failWith (IllegalOperation "handle is closed")
338 _writeHandle handle htype >>
339 failWith (IllegalOperation "handle is not open for writing")
341 _getBufferMode other >>= \ other ->
342 (case _bufferMode other of
343 Just LineBuffering ->
344 writeLines (_filePtr other)
345 Just (BlockBuffering (Just size)) ->
346 writeBlocks (_filePtr other) size
347 Just (BlockBuffering Nothing) ->
348 writeBlocks (_filePtr other) ``BUFSIZ''
349 _ -> -- Nothing is treated pessimistically as NoBuffering
350 writeChars (_filePtr other) 0#
352 _writeHandle handle (_markHandle other) >>
356 _constructError "hPutStr" >>= \ ioError ->
362 writeLines :: Addr -> PrimIO Bool
363 writeLines = writeChunks ``BUFSIZ'' True
365 writeBlocks :: Addr -> Int -> PrimIO Bool
366 writeBlocks fp size = writeChunks size False fp
369 The breaking up of output into lines along \n boundaries
370 works fine as long as there are newlines to split by.
371 Avoid the splitting up into lines altogether (doesn't work
372 for overly long lines like the stuff that showsPrec instances
373 normally return). Instead, we split them up into fixed size
374 chunks before blasting them off to the Real World.
376 Hacked to avoid multiple passes over the strings - unsightly, but
377 a whole lot quicker. -- SOF 3/96
380 writeChunks :: Int -> Bool -> Addr -> PrimIO Bool
381 writeChunks (I# bufLen) chopOnNewLine fp =
382 newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) ->
384 shoveString :: Int# -> Int# -> PrimIO Bool
386 | i ==# pslen = -- end of string
390 _ccall_ writeFile arr fp (I# n) >>= \rc ->
394 case writeCharArray# arr# n (indexPS# ps i) s# of
396 {- Flushing lines - should we bother? -}
397 (if n ==# bufLen then
398 _ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc ->
400 shoveString 0# (i +# 1#)
404 shoveString (n +# 1#) (i +# 1#)) (S# s1#))
408 writeChars :: Addr -> Int# -> PrimIO Bool
410 | i ==# pslen = return True
412 _ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc ->
414 writeChars fp (i +# 1#)
418 ---------------------------------------------
420 putPS :: _FILE -> PackedString -> PrimIO ()
421 putPS file ps@(PS bytes len has_null)
426 byte_array = ByteArray (0, I# (len -# 1#)) bytes
428 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
429 >>= \ (I# written) ->
430 if written ==# len then
433 error "putPS: fwrite failed!\n"
435 putPS file (CPS addr len)
439 = _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} ->
443 The dual to @_putPS@, note that the size of the chunk specified
444 is the upper bound of the size of the chunk returned.
447 getPS :: _FILE -> Int -> PrimIO PackedString
448 getPS file len@(I# len#)
449 | len# <=# 0# = return nilPS -- I'm being kind here.
451 -- Allocate an array for system call to store its bytes into.
452 new_ps_array len# >>= \ ch_arr ->
453 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
455 byte_array = ByteArray (0, I# len#) frozen#
457 _ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) ->
458 if read# ==# 0# then -- EOF or other error
459 error "getPS: EOF reached or other error"
462 The system call may not return the number of
463 bytes requested. Instead of failing with an error
464 if the number of bytes read is less than requested,
465 a packed string containing the bytes we did manage
466 to snarf is returned.
469 has_null = byteArrayHasNUL# frozen# read#
471 return (PS frozen# read# has_null)
475 %************************************************************************
477 \subsection{List-mimicking functions for @PackedStrings@}
479 %************************************************************************
481 First, the basic functions that do look into the representation;
482 @indexPS@ is the most important one.
485 lengthPS :: PackedString -> Int
486 lengthPS ps = I# (lengthPS# ps)
488 {-# INLINE lengthPS# #-}
490 lengthPS# (PS _ i _) = i
491 lengthPS# (CPS _ i) = i
493 {-# INLINE strlen# #-}
495 strlen# :: Addr# -> Int
497 = unsafePerformPrimIO (
498 _ccall_ strlen (A# a) >>= \ len@(I# _) ->
502 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
503 byteArrayHasNUL# bs len
504 = unsafePerformPrimIO (
505 _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) ->
507 if res ==# 0# then False else True
510 ba = ByteArray (0, I# (len -# 1#)) bs
512 -----------------------
514 indexPS :: PackedString -> Int -> Char
515 indexPS ps (I# n) = C# (indexPS# ps n)
517 {-# INLINE indexPS# #-}
519 indexPS# (PS bs i _) n
520 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
524 = indexCharOffAddr# a n
527 Now, the rest of the functions can be defined without digging
528 around in the representation.
531 headPS :: PackedString -> Char
533 | nullPS ps = error "headPS: head []"
534 | otherwise = C# (indexPS# ps 0#)
536 tailPS :: PackedString -> PackedString
538 | len <=# 0# = error "tailPS: tail []"
540 | otherwise = substrPS# ps 1# (len -# 1#)
544 nullPS :: PackedString -> Bool
545 nullPS (PS _ i _) = i ==# 0#
546 nullPS (CPS _ i) = i ==# 0#
548 {- (ToDo: some non-lousy implementations...)
550 Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys)
553 appendPS :: PackedString -> PackedString -> PackedString
557 | otherwise = concatPS [xs,ys]
559 {- OLD: mapPS f xs = packString (map f (unpackPS xs)) -}
561 mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-}
567 new_ps_array (length +# 1#) >>= \ ps_arr ->
568 whizz ps_arr length 0# >>
569 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
570 let has_null = byteArrayHasNUL# frozen# length in
571 return (PS frozen# length has_null))
573 length = lengthPS# xs
575 whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
578 = write_ps_array arr# i (chr# 0#) >>
584 write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >>
585 whizz arr# (n -# 1#) (i +# 1#)
587 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
593 Filtering proceeds as follows:
595 * traverse the list, applying the pred. to each element,
596 remembering the positions where it was satisfied.
598 Encode these positions using a run-length encoding of the gaps
599 between the matching positions.
601 * Allocate a MutableByteArray in the heap big enough to hold
602 all the matched entries, and copy the elements that matched over.
604 A better solution that merges the scan© passes into one,
605 would be to copy the filtered elements over into a growable
606 buffer. No such operation currently supported over
607 MutableByteArrays (could of course use malloc&realloc)
608 But, this solution may in the case of repeated realloc's
609 be worse than the current solution.
613 (rle,len_filtered) = filter_ps len# 0# 0# []
614 len_filtered# = case len_filtered of { I# x# -> x#}
616 if len# ==# len_filtered# then
617 {- not much filtering as everything passed through. -}
619 else if len_filtered# ==# 0# then
622 new_ps_array (len_filtered# +# 1#) >>= \ ps_arr ->
623 copy_arr ps_arr rle 0# 0# >>
624 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
625 let has_null = byteArrayHasNUL# frozen# len_filtered# in
626 return (PS frozen# len_filtered# has_null))
630 matchOffset :: Int# -> [Char] -> (Int,[Char])
631 matchOffset off [] = (I# off,[])
632 matchOffset off (C# c:cs) =
637 if x==# 0# then -- escape code, add 255#
642 copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s ()
643 copy_arr arr# [_] _ _ = return ()
644 copy_arr arr# ls n i =
646 (x,ls') = matchOffset 0# ls
647 n' = n +# (case x of { (I# x#) -> x#}) -# 1#
650 write_ps_array arr# i ch >>
651 copy_arr arr# ls' (n' +# 1#) (i +# 1#)
653 esc :: Int# -> Int# -> [Char] -> [Char]
654 esc v 0# ls = (C# (chr# v)):ls
655 esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls)
657 filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int)
658 filter_ps n hits run acc
661 escs = run `quotInt#` 255#
662 v = run `remInt#` 255#
664 (esc (v +# 1#) escs acc, I# hits)
672 escs = run `quotInt#` 255#
673 v = run `remInt#` 255#
674 acc' = esc (v +# 1#) escs acc
676 filter_ps n' (hits +# 1#) 0# acc'
678 filter_ps n' hits (run +# 1#) acc
681 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
690 --whizzLR :: a -> Int# -> a
693 | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#)
696 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
705 --whizzRL :: a -> Int# -> a
708 | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#)
710 takePS :: Int -> PackedString -> PackedString
713 | otherwise = substrPS# ps 0# (n -# 1#)
715 dropPS :: Int -> PackedString -> PackedString
718 | otherwise = substrPS# ps n (lengthPS# ps -# 1#)
722 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
723 splitAtPS n ps = (takePS n ps, dropPS n ps)
725 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
728 break_pt = char_pos_that_dissatisfies
734 if break_pt ==# 0# then
737 substrPS# ps 0# (break_pt -# 1#)
739 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
743 break_pt = char_pos_that_dissatisfies
749 if len ==# break_pt then
752 substrPS# ps break_pt (len -# 1#)
754 elemPS :: Char -> PackedString -> Bool
758 break_pt = first_char_pos_that_satisfies
766 char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
768 char_pos_that_dissatisfies p ps len pos
769 | pos >=# len = pos -- end
770 | p (indexPS# ps pos) = -- predicate satisfied; keep going
771 char_pos_that_dissatisfies p ps len (pos +# 1#)
772 | otherwise = pos -- predicate not satisfied
774 first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
775 first_char_pos_that_satisfies p ps len pos
776 | pos >=# len = pos -- end
777 | p (indexPS# ps pos) = pos -- got it!
778 | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#)
780 -- ToDo: could certainly go quicker
781 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
782 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
784 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
785 breakPS p ps = spanPS (not . p) ps
787 linesPS :: PackedString -> [PackedString]
788 linesPS ps = splitPS '\n' ps
790 wordsPS :: PackedString -> [PackedString]
791 wordsPS ps = splitWithPS isSpace ps
793 reversePS :: PackedString -> PackedString
795 if nullPS ps then -- don't create stuff unnecessarily.
799 new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte!
800 fill_in arr# (length -# 1#) 0# >>
801 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
802 let has_null = byteArrayHasNUL# frozen# length in
803 return (PS frozen# length has_null))
805 length = lengthPS# ps
807 fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
808 fill_in arr_in# n i =
812 write_ps_array arr_in# i ch >>
814 write_ps_array arr_in# (i +# 1#) (chr# 0#) >>
817 fill_in arr_in# (n -# 1#) (i +# 1#)
819 concatPS :: [PackedString] -> PackedString
823 tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x }
824 tot_len = I# tot_len#
827 new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte!
828 packum arr# pss 0# >>
829 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
831 let has_null = byteArrayHasNUL# frozen# tot_len# in
833 return (PS frozen# tot_len# has_null)
836 packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s ()
839 = write_ps_array arr pos (chr# 0#) >>
841 packum arr (ps : pss) pos
842 = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) ->
843 packum arr pss next_pos
845 fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int
847 fill arr arr_i ps ps_i ps_len
849 = return (I# (arr_i +# ps_len))
851 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >>
852 fill arr arr_i ps (ps_i +# 1#) ps_len
854 ------------------------------------------------------------
855 joinPS :: PackedString -> [PackedString] -> PackedString
856 joinPS filler pss = concatPS (splice pss)
860 splice (x:y:xs) = x:filler:splice (y:xs)
862 -- ToDo: the obvious generalisation
864 Some properties that hold:
867 where False = any (map (x `elemPS`) ls')
868 False = any (map (nullPS) ls')
870 * all x's have been chopped out.
871 * no empty PackedStrings in returned list. A conseq.
876 * joinPS (packString [x]) (_splitPS x ls) = ls
880 splitPS :: Char -> PackedString -> [PackedString]
881 splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch)
883 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
884 splitWithPS pred ps =
894 first_char_pos_that_satisfies
900 if break_pt ==# n then -- immediate match, no substring to cut out.
901 splitify (break_pt +# 1#)
903 substrPS# ps n (break_pt -# 1#): -- leave out the matching character
904 splitify (break_pt +# 1#)
907 %************************************************************************
909 \subsection{Local utility functions}
911 %************************************************************************
913 The definition of @_substrPS@ is essentially:
914 @take (end - begin + 1) (drop begin str)@.
917 substrPS :: PackedString -> Int -> Int -> PackedString
918 substrPS ps (I# begin) (I# end) = substrPS# ps begin end
922 = error "substrPS: bounds out of range"
924 | s >=# len || result_len# <=# 0#
929 new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte!
931 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
933 let has_null = byteArrayHasNUL# frozen# result_len# in
935 return (PS frozen# result_len# has_null)
940 result_len# = (if e <# len then (e +# 1#) else len) -# s
941 result_len = I# result_len#
943 -----------------------
944 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
947 | idx ==# result_len#
948 = write_ps_array arr_in# idx (chr# 0#) >>
952 ch = indexPS# ps (s +# idx)
954 write_ps_array arr_in# idx ch >>
955 fill_in arr_in# (idx +# 1#)
958 (Very :-) ``Specialised'' versions of some CharArray things...
961 new_ps_array :: Int# -> ST s (MutableByteArray s Int)
962 write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s ()
963 freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int)
965 new_ps_array size = ST $ \ (S# s) ->
966 case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
967 (MutableByteArray bot barr#, S# s2#)}
969 bot = error "new_ps_array"
971 write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ (S# s#) ->
972 case writeCharArray# barr# n ch s# of { s2# ->
975 -- same as unsafeFreezeByteArray
976 freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ (S# s#) ->
977 case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
978 (ByteArray ixs frozen#, S# s2#) }
982 %*********************************************************
984 \subsection{Packing and unpacking C strings}
986 %*********************************************************
989 unpackCString :: Addr -> [Char]
991 -- Calls to the next four are injected by the compiler itself,
992 -- to deal with literal strings
993 packCString# :: [Char] -> ByteArray#
994 unpackCString# :: Addr# -> [Char]
995 unpackCString2# :: Addr# -> Int# -> [Char]
996 unpackAppendCString# :: Addr# -> [Char] -> [Char]
997 unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
999 packCString# str = case (packString str) of { PS bytes _ _ -> bytes }
1001 unpackCString (A# addr) = unpackCString# addr
1007 | ch `eqChar#` '\0'# = []
1008 | otherwise = C# ch : unpack (nh +# 1#)
1010 ch = indexCharOffAddr# addr nh
1012 unpackCString2# addr len
1013 -- This one is called by the compiler to unpack literal strings with NULs in them; rare.
1014 = unpackPS (packCBytes (I# len) (A# addr))
1016 unpackAppendCString# addr rest
1020 | ch `eqChar#` '\0'# = rest
1021 | otherwise = C# ch : unpack (nh +# 1#)
1023 ch = indexCharOffAddr# addr nh
1025 unpackFoldrCString# addr f z
1029 | ch `eqChar#` '\0'# = z
1030 | otherwise = C# ch `f` unpack (nh +# 1#)
1032 ch = indexCharOffAddr# addr nh
1035 cStringToPS :: Addr -> PackedString
1036 cStringToPS (A# a#) = -- the easy one; we just believe the caller
1039 len = case (strlen# a#) of { I# x -> x }
1041 packBytesForC :: [Char] -> ByteArray Int
1042 packBytesForC str = psToByteArray (packString str)
1044 psToByteArrayST :: [Char] -> ST s (ByteArray Int)
1045 psToByteArrayST str =
1046 packStringST str >>= \ (PS bytes n has_null) ->
1047 --later? ASSERT(not has_null)
1048 return (ByteArray (0, I# (n -# 1#)) bytes)
1050 packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int)
1051 packNBytesForCST len str =
1052 packNCharsST len str >>= \ (PS bytes n has_null) ->
1053 return (ByteArray (0, I# (n -# 1#)) bytes)
1055 packCBytes :: Int -> Addr -> PackedString
1056 packCBytes len addr = runST (packCBytesST len addr)
1058 packCBytesST :: Int -> Addr -> ST s PackedString
1059 packCBytesST len@(I# length#) (A# addr) =
1061 allocate an array that will hold the string
1062 (not forgetting the NUL byte at the end)
1064 new_ps_array (length# +# 1#) >>= \ ch_array ->
1065 -- fill in packed string from "addr"
1066 fill_in ch_array 0# >>
1067 -- freeze the puppy:
1068 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
1069 let has_null = byteArrayHasNUL# frozen# length# in
1070 return (PS frozen# length# has_null)
1072 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
1076 = write_ps_array arr_in# idx (chr# 0#) >>
1079 = case (indexCharOffAddr# addr idx) of { ch ->
1080 write_ps_array arr_in# idx ch >>
1081 fill_in arr_in# (idx +# 1#) }