2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1996
4 \section{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
11 {-# OPTIONS -fno-implicit-prelude #-}
14 PackedString, -- abstract
16 -- Creating the beasts
17 packString, -- :: [Char] -> PackedString
18 packStringST, -- :: [Char] -> ST s PackedString
20 byteArrayToPS, -- :: ByteArray Int -> PackedString
21 unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString
23 psToByteArray, -- :: PackedString -> ByteArray Int
24 psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int)
26 unpackPS, -- :: PackedString -> [Char]
28 hPutPS, -- :: Handle -> PackedString -> IO ()
29 putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type
30 getPS, -- :: FILE -> Int -> PrimIO PackedString
32 nilPS, -- :: PackedString
33 consPS, -- :: Char -> PackedString -> PackedString
34 headPS, -- :: PackedString -> Char
35 tailPS, -- :: PackedString -> PackedString
36 nullPS, -- :: PackedString -> Bool
37 appendPS, -- :: PackedString -> PackedString -> PackedString
38 lengthPS, -- :: PackedString -> Int
39 {- 0-origin indexing into the string -}
40 indexPS, -- :: PackedString -> Int -> Char
41 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
42 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
43 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
44 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
45 takePS, -- :: Int -> PackedString -> PackedString
46 dropPS, -- :: Int -> PackedString -> PackedString
47 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
48 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
49 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
50 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
51 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
52 linesPS, -- :: PackedString -> [PackedString]
54 wordsPS, -- :: PackedString -> [PackedString]
55 reversePS, -- :: PackedString -> PackedString
56 splitPS, -- :: Char -> PackedString -> [PackedString]
57 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
58 joinPS, -- :: PackedString -> [PackedString] -> PackedString
59 concatPS, -- :: [PackedString] -> PackedString
60 elemPS, -- :: Char -> PackedString -> Bool
63 Pluck out a piece of a PS start and end
64 chars you want; both 0-origin-specified
66 substrPS, -- :: PackedString -> Int -> Int -> PackedString
70 -- Converting to C strings
72 unpackCString#, unpackCString2#, unpackAppendCString#, unpackFoldrCString#,
73 packCBytesST, unpackCString
76 import {-# SOURCE #-} IOBase ( error )
86 %************************************************************************
88 \subsection{@PackedString@ type declaration}
90 %************************************************************************
94 = PS ByteArray# -- the bytes
95 Int# -- length (*not* including NUL at the end)
96 Bool -- True <=> contains a NUL
97 | CPS Addr# -- pointer to the (null-terminated) bytes in C land
98 Int# -- length, as per strlen
99 -- definitely doesn't contain a NUL
101 instance Eq PackedString where
102 x == y = compare x y == EQ
103 x /= y = compare x y /= EQ
105 instance Ord PackedString where
107 x <= y = compare x y /= GT
108 x < y = compare x y == LT
109 x >= y = compare x y /= LT
110 x > y = compare x y == GT
111 max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x }
112 min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y }
114 --instance Read PackedString: ToDo
116 instance Show PackedString where
117 showsPrec p ps r = showsPrec p (unpackPS ps) r
118 showList = showList__ (showsPrec 0)
122 %************************************************************************
124 \subsection{@PackedString@ instances}
126 %************************************************************************
128 We try hard to make this go fast:
130 comparePS :: PackedString -> PackedString -> Ordering
132 comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2)
133 | not has_null1 && not has_null2
134 = unsafePerformPrimIO (
135 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
138 else if res ==# 0# then EQ
142 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
143 ba2 = ByteArray (0, I# (len2 -# 1#)) bs2
145 comparePS (PS bs1 len1 has_null1) (CPS bs2 len2)
147 = unsafePerformPrimIO (
148 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
151 else if res ==# 0# then EQ
155 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
158 comparePS (CPS bs1 len1) (CPS bs2 len2)
159 = unsafePerformPrimIO (
160 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
163 else if res ==# 0# then EQ
170 comparePS a@(CPS _ _) b@(PS _ _ has_null2)
172 = -- try them the other way 'round
173 case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
175 comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
178 end1 = lengthPS# ps1 -# 1#
179 end2 = lengthPS# ps2 -# 1#
182 = if char# ># end1 then
183 if char# ># end2 then -- both strings ran out at once
185 else -- ps1 ran out before ps2
187 else if char# ># end2 then
188 GT -- ps2 ran out before ps1
191 ch1 = indexPS# ps1 char#
192 ch2 = indexPS# ps2 char#
194 if ch1 `eqChar#` ch2 then
195 looking_at (char# +# 1#)
196 else if ch1 `ltChar#` ch2 then LT
201 %************************************************************************
203 \subsection{Constructor functions}
205 %************************************************************************
207 Easy ones first. @packString@ requires getting some heap-bytes and
208 scribbling stuff into them.
211 nilPS :: PackedString
214 consPS :: Char -> PackedString -> PackedString
215 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
217 packString :: [Char] -> PackedString
218 packString str = runST (packStringST str)
220 packStringST :: [Char] -> ST s PackedString
222 let len = length str in
225 packNCharsST :: Int -> [Char] -> ST s PackedString
226 packNCharsST len@(I# length#) str =
228 allocate an array that will hold the string
229 (not forgetting the NUL byte at the end)
231 new_ps_array (length# +# 1#) >>= \ ch_array ->
232 -- fill in packed string from "str"
233 fill_in ch_array 0# str >>
235 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
236 let has_null = byteArrayHasNUL# frozen# length# in
237 return (PS frozen# length# has_null)
239 fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s ()
240 fill_in arr_in# idx [] =
241 write_ps_array arr_in# idx (chr# 0#) >>
244 fill_in arr_in# idx (C# c : cs) =
245 write_ps_array arr_in# idx c >>
246 fill_in arr_in# (idx +# 1#) cs
248 byteArrayToPS :: ByteArray Int -> PackedString
249 byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) =
255 else ((index ixs ix_end) + 1)
258 PS frozen# n# (byteArrayHasNUL# frozen# n#)
260 unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString
261 unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#)
262 = PS frozen# n# (byteArrayHasNUL# frozen# n#)
264 psToByteArray :: PackedString -> ByteArray Int
265 psToByteArray (PS bytes n has_null)
266 = ByteArray (0, I# (n -# 1#)) bytes
268 psToByteArray (CPS addr len#)
271 byte_array_form = packCBytes len (A# addr)
273 case byte_array_form of { PS bytes _ _ ->
274 ByteArray (0, len - 1) bytes }
277 %************************************************************************
279 \subsection{Destructor functions (taking @PackedStrings@ apart)}
281 %************************************************************************
284 -- OK, but this code gets *hammered*:
286 -- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ]
288 unpackPS :: PackedString -> [Char]
289 unpackPS (PS bytes len has_null)
294 | otherwise = C# ch : unpack (nh +# 1#)
296 ch = indexCharArray# bytes nh
298 unpackPS (CPS addr len)
302 | ch `eqChar#` '\0'# = []
303 | otherwise = C# ch : unpack (nh +# 1#)
305 ch = indexCharOffAddr# addr nh
308 Output a packed string via a handle:
312 hPutPS :: Handle -> PackedString -> IO ()
323 _readHandle handle >>= \ htype ->
325 _ErrorHandle ioError ->
326 _writeHandle handle htype >>
329 _writeHandle handle htype >>
330 failWith (IllegalOperation "handle is closed")
331 _SemiClosedHandle _ _ ->
332 _writeHandle handle htype >>
333 failWith (IllegalOperation "handle is closed")
335 _writeHandle handle htype >>
336 failWith (IllegalOperation "handle is not open for writing")
338 _getBufferMode other >>= \ other ->
339 (case _bufferMode other of
340 Just LineBuffering ->
341 writeLines (_filePtr other)
342 Just (BlockBuffering (Just size)) ->
343 writeBlocks (_filePtr other) size
344 Just (BlockBuffering Nothing) ->
345 writeBlocks (_filePtr other) ``BUFSIZ''
346 _ -> -- Nothing is treated pessimistically as NoBuffering
347 writeChars (_filePtr other) 0#
349 _writeHandle handle (_markHandle other) >>
353 _constructError "hPutStr" >>= \ ioError ->
359 writeLines :: Addr -> PrimIO Bool
360 writeLines = writeChunks ``BUFSIZ'' True
362 writeBlocks :: Addr -> Int -> PrimIO Bool
363 writeBlocks fp size = writeChunks size False fp
366 The breaking up of output into lines along \n boundaries
367 works fine as long as there are newlines to split by.
368 Avoid the splitting up into lines altogether (doesn't work
369 for overly long lines like the stuff that showsPrec instances
370 normally return). Instead, we split them up into fixed size
371 chunks before blasting them off to the Real World.
373 Hacked to avoid multiple passes over the strings - unsightly, but
374 a whole lot quicker. -- SOF 3/96
377 writeChunks :: Int -> Bool -> Addr -> PrimIO Bool
378 writeChunks (I# bufLen) chopOnNewLine fp =
379 newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) ->
381 shoveString :: Int# -> Int# -> PrimIO Bool
383 | i ==# pslen = -- end of string
387 _ccall_ writeFile arr fp (I# n) >>= \rc ->
391 case writeCharArray# arr# n (indexPS# ps i) s# of
393 {- Flushing lines - should we bother? -}
394 (if n ==# bufLen then
395 _ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc ->
397 shoveString 0# (i +# 1#)
401 shoveString (n +# 1#) (i +# 1#)) (S# s1#))
405 writeChars :: Addr -> Int# -> PrimIO Bool
407 | i ==# pslen = return True
409 _ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc ->
411 writeChars fp (i +# 1#)
415 ---------------------------------------------
417 putPS :: _FILE -> PackedString -> PrimIO ()
418 putPS file ps@(PS bytes len has_null)
423 byte_array = ByteArray (0, I# (len -# 1#)) bytes
425 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
426 >>= \ (I# written) ->
427 if written ==# len then
430 error "putPS: fwrite failed!\n"
432 putPS file (CPS addr len)
436 = _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} ->
440 The dual to @_putPS@, note that the size of the chunk specified
441 is the upper bound of the size of the chunk returned.
444 getPS :: _FILE -> Int -> PrimIO PackedString
445 getPS file len@(I# len#)
446 | len# <=# 0# = return nilPS -- I'm being kind here.
448 -- Allocate an array for system call to store its bytes into.
449 new_ps_array len# >>= \ ch_arr ->
450 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
452 byte_array = ByteArray (0, I# len#) frozen#
454 _ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) ->
455 if read# ==# 0# then -- EOF or other error
456 error "getPS: EOF reached or other error"
459 The system call may not return the number of
460 bytes requested. Instead of failing with an error
461 if the number of bytes read is less than requested,
462 a packed string containing the bytes we did manage
463 to snarf is returned.
466 has_null = byteArrayHasNUL# frozen# read#
468 return (PS frozen# read# has_null)
472 %************************************************************************
474 \subsection{List-mimicking functions for @PackedStrings@}
476 %************************************************************************
478 First, the basic functions that do look into the representation;
479 @indexPS@ is the most important one.
482 lengthPS :: PackedString -> Int
483 lengthPS ps = I# (lengthPS# ps)
485 {-# INLINE lengthPS# #-}
487 lengthPS# (PS _ i _) = i
488 lengthPS# (CPS _ i) = i
490 {-# INLINE strlen# #-}
492 strlen# :: Addr# -> Int
494 = unsafePerformPrimIO (
495 _ccall_ strlen (A# a) >>= \ len@(I# _) ->
499 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
500 byteArrayHasNUL# bs len
501 = unsafePerformPrimIO (
502 _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) ->
504 if res ==# 0# then False else True
507 ba = ByteArray (0, I# (len -# 1#)) bs
509 -----------------------
511 indexPS :: PackedString -> Int -> Char
512 indexPS ps (I# n) = C# (indexPS# ps n)
514 {-# INLINE indexPS# #-}
516 indexPS# (PS bs i _) n
517 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
521 = indexCharOffAddr# a n
524 Now, the rest of the functions can be defined without digging
525 around in the representation.
528 headPS :: PackedString -> Char
530 | nullPS ps = error "headPS: head []"
531 | otherwise = C# (indexPS# ps 0#)
533 tailPS :: PackedString -> PackedString
535 | len <=# 0# = error "tailPS: tail []"
537 | otherwise = substrPS# ps 1# (len -# 1#)
541 nullPS :: PackedString -> Bool
542 nullPS (PS _ i _) = i ==# 0#
543 nullPS (CPS _ i) = i ==# 0#
545 {- (ToDo: some non-lousy implementations...)
547 Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys)
550 appendPS :: PackedString -> PackedString -> PackedString
554 | otherwise = concatPS [xs,ys]
556 {- OLD: mapPS f xs = packString (map f (unpackPS xs)) -}
558 mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-}
564 new_ps_array (length +# 1#) >>= \ ps_arr ->
565 whizz ps_arr length 0# >>
566 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
567 let has_null = byteArrayHasNUL# frozen# length in
568 return (PS frozen# length has_null))
570 length = lengthPS# xs
572 whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
575 = write_ps_array arr# i (chr# 0#) >>
581 write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >>
582 whizz arr# (n -# 1#) (i +# 1#)
584 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
590 Filtering proceeds as follows:
592 * traverse the list, applying the pred. to each element,
593 remembering the positions where it was satisfied.
595 Encode these positions using a run-length encoding of the gaps
596 between the matching positions.
598 * Allocate a MutableByteArray in the heap big enough to hold
599 all the matched entries, and copy the elements that matched over.
601 A better solution that merges the scan© passes into one,
602 would be to copy the filtered elements over into a growable
603 buffer. No such operation currently supported over
604 MutableByteArrays (could of course use malloc&realloc)
605 But, this solution may in the case of repeated realloc's
606 be worse than the current solution.
610 (rle,len_filtered) = filter_ps len# 0# 0# []
611 len_filtered# = case len_filtered of { I# x# -> x#}
613 if len# ==# len_filtered# then
614 {- not much filtering as everything passed through. -}
616 else if len_filtered# ==# 0# then
619 new_ps_array (len_filtered# +# 1#) >>= \ ps_arr ->
620 copy_arr ps_arr rle 0# 0# >>
621 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
622 let has_null = byteArrayHasNUL# frozen# len_filtered# in
623 return (PS frozen# len_filtered# has_null))
627 matchOffset :: Int# -> [Char] -> (Int,[Char])
628 matchOffset off [] = (I# off,[])
629 matchOffset off (C# c:cs) =
634 if x==# 0# then -- escape code, add 255#
639 copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s ()
640 copy_arr arr# [_] _ _ = return ()
641 copy_arr arr# ls n i =
643 (x,ls') = matchOffset 0# ls
644 n' = n +# (case x of { (I# x#) -> x#}) -# 1#
647 write_ps_array arr# i ch >>
648 copy_arr arr# ls' (n' +# 1#) (i +# 1#)
650 esc :: Int# -> Int# -> [Char] -> [Char]
651 esc v 0# ls = (C# (chr# v)):ls
652 esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls)
654 filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int)
655 filter_ps n hits run acc
658 escs = run `quotInt#` 255#
659 v = run `remInt#` 255#
661 (esc (v +# 1#) escs acc, I# hits)
669 escs = run `quotInt#` 255#
670 v = run `remInt#` 255#
671 acc' = esc (v +# 1#) escs acc
673 filter_ps n' (hits +# 1#) 0# acc'
675 filter_ps n' hits (run +# 1#) acc
678 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
687 --whizzLR :: a -> Int# -> a
690 | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#)
693 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
702 --whizzRL :: a -> Int# -> a
705 | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#)
707 takePS :: Int -> PackedString -> PackedString
710 | otherwise = substrPS# ps 0# (n -# 1#)
712 dropPS :: Int -> PackedString -> PackedString
715 | otherwise = substrPS# ps n (lengthPS# ps -# 1#)
719 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
720 splitAtPS n ps = (takePS n ps, dropPS n ps)
722 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
725 break_pt = char_pos_that_dissatisfies
731 if break_pt ==# 0# then
734 substrPS# ps 0# (break_pt -# 1#)
736 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
740 break_pt = char_pos_that_dissatisfies
746 if len ==# break_pt then
749 substrPS# ps break_pt (len -# 1#)
751 elemPS :: Char -> PackedString -> Bool
755 break_pt = first_char_pos_that_satisfies
763 char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
765 char_pos_that_dissatisfies p ps len pos
766 | pos >=# len = pos -- end
767 | p (indexPS# ps pos) = -- predicate satisfied; keep going
768 char_pos_that_dissatisfies p ps len (pos +# 1#)
769 | otherwise = pos -- predicate not satisfied
771 first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
772 first_char_pos_that_satisfies p ps len pos
773 | pos >=# len = pos -- end
774 | p (indexPS# ps pos) = pos -- got it!
775 | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#)
777 -- ToDo: could certainly go quicker
778 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
779 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
781 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
782 breakPS p ps = spanPS (not . p) ps
784 linesPS :: PackedString -> [PackedString]
785 linesPS ps = splitPS '\n' ps
787 wordsPS :: PackedString -> [PackedString]
788 wordsPS ps = splitWithPS isSpace ps
790 reversePS :: PackedString -> PackedString
792 if nullPS ps then -- don't create stuff unnecessarily.
796 new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte!
797 fill_in arr# (length -# 1#) 0# >>
798 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
799 let has_null = byteArrayHasNUL# frozen# length in
800 return (PS frozen# length has_null))
802 length = lengthPS# ps
804 fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
805 fill_in arr_in# n i =
809 write_ps_array arr_in# i ch >>
811 write_ps_array arr_in# (i +# 1#) (chr# 0#) >>
814 fill_in arr_in# (n -# 1#) (i +# 1#)
816 concatPS :: [PackedString] -> PackedString
820 tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x }
821 tot_len = I# tot_len#
824 new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte!
825 packum arr# pss 0# >>
826 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
828 let has_null = byteArrayHasNUL# frozen# tot_len# in
830 return (PS frozen# tot_len# has_null)
833 packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s ()
836 = write_ps_array arr pos (chr# 0#) >>
838 packum arr (ps : pss) pos
839 = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) ->
840 packum arr pss next_pos
842 fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int
844 fill arr arr_i ps ps_i ps_len
846 = return (I# (arr_i +# ps_len))
848 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >>
849 fill arr arr_i ps (ps_i +# 1#) ps_len
851 ------------------------------------------------------------
852 joinPS :: PackedString -> [PackedString] -> PackedString
853 joinPS filler pss = concatPS (splice pss)
857 splice (x:y:xs) = x:filler:splice (y:xs)
859 -- ToDo: the obvious generalisation
861 Some properties that hold:
864 where False = any (map (x `elemPS`) ls')
865 False = any (map (nullPS) ls')
867 * all x's have been chopped out.
868 * no empty PackedStrings in returned list. A conseq.
873 * joinPS (packString [x]) (_splitPS x ls) = ls
877 splitPS :: Char -> PackedString -> [PackedString]
878 splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch)
880 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
881 splitWithPS pred ps =
891 first_char_pos_that_satisfies
897 if break_pt ==# n then -- immediate match, no substring to cut out.
898 splitify (break_pt +# 1#)
900 substrPS# ps n (break_pt -# 1#): -- leave out the matching character
901 splitify (break_pt +# 1#)
904 %************************************************************************
906 \subsection{Local utility functions}
908 %************************************************************************
910 The definition of @_substrPS@ is essentially:
911 @take (end - begin + 1) (drop begin str)@.
914 substrPS :: PackedString -> Int -> Int -> PackedString
915 substrPS ps (I# begin) (I# end) = substrPS# ps begin end
919 = error "substrPS: bounds out of range"
921 | s >=# len || result_len# <=# 0#
926 new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte!
928 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
930 let has_null = byteArrayHasNUL# frozen# result_len# in
932 return (PS frozen# result_len# has_null)
937 result_len# = (if e <# len then (e +# 1#) else len) -# s
938 result_len = I# result_len#
940 -----------------------
941 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
944 | idx ==# result_len#
945 = write_ps_array arr_in# idx (chr# 0#) >>
949 ch = indexPS# ps (s +# idx)
951 write_ps_array arr_in# idx ch >>
952 fill_in arr_in# (idx +# 1#)
955 (Very :-) ``Specialised'' versions of some CharArray things...
958 new_ps_array :: Int# -> ST s (MutableByteArray s Int)
959 write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s ()
960 freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int)
962 new_ps_array size = ST $ \ (S# s) ->
963 case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
964 (MutableByteArray bot barr#, S# s2#)}
966 bot = error "new_ps_array"
968 write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ (S# s#) ->
969 case writeCharArray# barr# n ch s# of { s2# ->
972 -- same as unsafeFreezeByteArray
973 freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ (S# s#) ->
974 case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
975 (ByteArray ixs frozen#, S# s2#) }
979 %*********************************************************
981 \subsection{Packing and unpacking C strings}
983 %*********************************************************
986 unpackCString :: Addr -> [Char]
988 -- Calls to the next four are injected by the compiler itself,
989 -- to deal with literal strings
990 packCString# :: [Char] -> ByteArray#
991 unpackCString# :: Addr# -> [Char]
992 unpackCString2# :: Addr# -> Int# -> [Char]
993 unpackAppendCString# :: Addr# -> [Char] -> [Char]
994 unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
996 packCString# str = case (packString str) of { PS bytes _ _ -> bytes }
998 unpackCString (A# addr) = unpackCString# addr
1004 | ch `eqChar#` '\0'# = []
1005 | otherwise = C# ch : unpack (nh +# 1#)
1007 ch = indexCharOffAddr# addr nh
1009 unpackCString2# addr len
1010 -- This one is called by the compiler to unpack literal strings with NULs in them; rare.
1011 = unpackPS (packCBytes (I# len) (A# addr))
1013 unpackAppendCString# addr rest
1017 | ch `eqChar#` '\0'# = rest
1018 | otherwise = C# ch : unpack (nh +# 1#)
1020 ch = indexCharOffAddr# addr nh
1022 unpackFoldrCString# addr f z
1026 | ch `eqChar#` '\0'# = z
1027 | otherwise = C# ch `f` unpack (nh +# 1#)
1029 ch = indexCharOffAddr# addr nh
1032 cStringToPS :: Addr -> PackedString
1033 cStringToPS (A# a#) = -- the easy one; we just believe the caller
1036 len = case (strlen# a#) of { I# x -> x }
1038 packBytesForC :: [Char] -> ByteArray Int
1039 packBytesForC str = psToByteArray (packString str)
1041 psToByteArrayST :: [Char] -> ST s (ByteArray Int)
1042 psToByteArrayST str =
1043 packStringST str >>= \ (PS bytes n has_null) ->
1044 --later? ASSERT(not has_null)
1045 return (ByteArray (0, I# (n -# 1#)) bytes)
1047 packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int)
1048 packNBytesForCST len str =
1049 packNCharsST len str >>= \ (PS bytes n has_null) ->
1050 return (ByteArray (0, I# (n -# 1#)) bytes)
1052 packCBytes :: Int -> Addr -> PackedString
1053 packCBytes len addr = runST (packCBytesST len addr)
1055 packCBytesST :: Int -> Addr -> ST s PackedString
1056 packCBytesST len@(I# length#) (A# addr) =
1058 allocate an array that will hold the string
1059 (not forgetting the NUL byte at the end)
1061 new_ps_array (length# +# 1#) >>= \ ch_array ->
1062 -- fill in packed string from "addr"
1063 fill_in ch_array 0# >>
1064 -- freeze the puppy:
1065 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
1066 let has_null = byteArrayHasNUL# frozen# length# in
1067 return (PS frozen# length# has_null)
1069 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
1073 = write_ps_array arr_in# idx (chr# 0#) >>
1076 = case (indexCharOffAddr# addr idx) of { ch ->
1077 write_ps_array arr_in# idx ch >>
1078 fill_in arr_in# (idx +# 1#) }