2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1996
4 \section{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
12 PackedString, -- abstract
14 -- Creating the beasts
15 packString, -- :: [Char] -> PackedString
16 packStringST, -- :: [Char] -> ST s PackedString
17 packCBytesST, -- :: Int -> Addr -> ST s PackedString
19 byteArrayToPS, -- :: ByteArray Int -> PackedString
20 cByteArrayToPS, -- :: ByteArray Int -> PackedString
21 unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString
23 psToByteArray, -- :: PackedString -> ByteArray Int
24 psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int)
25 psToCString, -- :: PackedString -> Addr
26 isCString, -- :: PackedString -> Bool
28 unpackPS, -- :: PackedString -> [Char]
29 unpackNBytesPS, -- :: PackedString -> Int -> [Char]
30 unpackPSIO, -- :: PackedString -> IO [Char]
33 hPutPS, -- :: Handle -> PackedString -> IO ()
34 putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type
35 getPS, -- :: FILE -> Int -> PrimIO PackedString
37 nilPS, -- :: PackedString
38 consPS, -- :: Char -> PackedString -> PackedString
39 headPS, -- :: PackedString -> Char
40 tailPS, -- :: PackedString -> PackedString
41 nullPS, -- :: PackedString -> Bool
42 appendPS, -- :: PackedString -> PackedString -> PackedString
43 lengthPS, -- :: PackedString -> Int
44 {- 0-origin indexing into the string -}
45 indexPS, -- :: PackedString -> Int -> Char
46 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
47 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
48 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
49 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
50 takePS, -- :: Int -> PackedString -> PackedString
51 dropPS, -- :: Int -> PackedString -> PackedString
52 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
53 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
54 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
55 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
56 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
57 linesPS, -- :: PackedString -> [PackedString]
59 wordsPS, -- :: PackedString -> [PackedString]
60 reversePS, -- :: PackedString -> PackedString
61 splitPS, -- :: Char -> PackedString -> [PackedString]
62 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
63 joinPS, -- :: PackedString -> [PackedString] -> PackedString
64 concatPS, -- :: [PackedString] -> PackedString
65 elemPS, -- :: Char -> PackedString -> Bool
68 Pluck out a piece of a PS start and end
69 chars you want; both 0-origin-specified
71 substrPS, -- :: PackedString -> Int -> Int -> PackedString
75 -- Converting to C strings
88 import PrelBase ( showList__ ) -- ToDo: better
91 import PrelArr ( StateAndMutableByteArray#(..) , StateAndByteArray#(..) )
94 import IOExts ( unsafePerformIO )
101 %************************************************************************
103 \subsection{@PackedString@ type declaration}
105 %************************************************************************
109 = PS ByteArray# -- the bytes
110 Int# -- length (*not* including NUL at the end)
111 Bool -- True <=> contains a NUL
112 | CPS Addr# -- pointer to the (null-terminated) bytes in C land
113 Int# -- length, as per strlen
114 -- definitely doesn't contain a NUL
116 instance Eq PackedString where
117 x == y = compare x y == EQ
118 x /= y = compare x y /= EQ
120 instance Ord PackedString where
122 x <= y = compare x y /= GT
123 x < y = compare x y == LT
124 x >= y = compare x y /= LT
125 x > y = compare x y == GT
126 max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x }
127 min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y }
129 --instance Read PackedString: ToDo
131 instance Show PackedString where
132 showsPrec p ps r = showsPrec p (unpackPS ps) r
133 showList = showList__ (showsPrec 0)
137 %************************************************************************
139 \subsection{@PackedString@ instances}
141 %************************************************************************
143 We try hard to make this go fast:
145 comparePS :: PackedString -> PackedString -> Ordering
147 comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2)
148 | not has_null1 && not has_null2
150 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
153 else if res ==# 0# then EQ
157 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
158 ba2 = ByteArray (0, I# (len2 -# 1#)) bs2
160 comparePS (PS bs1 len1 has_null1) (CPS bs2 len2)
163 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
166 else if res ==# 0# then EQ
170 ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
173 comparePS (CPS bs1 len1) (CPS bs2 len2)
175 _ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
178 else if res ==# 0# then EQ
185 comparePS a@(CPS _ _) b@(PS _ _ has_null2)
187 = -- try them the other way 'round
188 case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
190 comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
193 end1 = lengthPS# ps1 -# 1#
194 end2 = lengthPS# ps2 -# 1#
197 = if char# ># end1 then
198 if char# ># end2 then -- both strings ran out at once
200 else -- ps1 ran out before ps2
202 else if char# ># end2 then
203 GT -- ps2 ran out before ps1
206 ch1 = indexPS# ps1 char#
207 ch2 = indexPS# ps2 char#
209 if ch1 `eqChar#` ch2 then
210 looking_at (char# +# 1#)
211 else if ch1 `ltChar#` ch2 then LT
216 %************************************************************************
218 \subsection{Constructor functions}
220 %************************************************************************
222 Easy ones first. @packString@ requires getting some heap-bytes and
223 scribbling stuff into them.
226 nilPS :: PackedString
229 consPS :: Char -> PackedString -> PackedString
230 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
232 packString :: [Char] -> PackedString
233 packString str = runST (packStringST str)
235 packStringST :: [Char] -> ST s PackedString
237 let len = length str in
240 packNCharsST :: Int -> [Char] -> ST s PackedString
241 packNCharsST len@(I# length#) str =
243 allocate an array that will hold the string
244 (not forgetting the NUL byte at the end)
246 new_ps_array (length# +# 1#) >>= \ ch_array ->
247 -- fill in packed string from "str"
248 fill_in ch_array 0# str >>
250 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
251 let has_null = byteArrayHasNUL# frozen# length# in
252 return (PS frozen# length# has_null)
254 fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s ()
255 fill_in arr_in# idx [] =
256 write_ps_array arr_in# idx (chr# 0#) >>
259 fill_in arr_in# idx (C# c : cs) =
260 write_ps_array arr_in# idx c >>
261 fill_in arr_in# (idx +# 1#) cs
263 byteArrayToPS :: ByteArray Int -> PackedString
264 byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) =
270 else ((index ixs ix_end) + 1)
273 PS frozen# n# (byteArrayHasNUL# frozen# n#)
275 -- byteArray is zero-terminated, make everything upto it
277 cByteArrayToPS :: ByteArray Int -> PackedString
278 cByteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) =
284 else ((index ixs ix_end) + 1)
290 | ch# `eqChar#` '\0'# = i# -- everything upto the sentinel
291 | otherwise = findNull (i# +# 1#)
293 ch# = indexCharArray# frozen# i#
295 PS frozen# len# False
297 unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString
298 unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#)
299 = PS frozen# n# (byteArrayHasNUL# frozen# n#)
301 psToByteArray :: PackedString -> ByteArray Int
302 psToByteArray (PS bytes n has_null)
303 = ByteArray (0, I# (n -# 1#)) bytes
305 psToByteArray (CPS addr len#)
308 byte_array_form = packCBytes len (A# addr)
310 case byte_array_form of { PS bytes _ _ ->
311 ByteArray (0, len - 1) bytes }
313 -- isCString is useful when passing PackedStrings to the
314 -- outside world, and need to figure out whether you can
315 -- pass it as an Addr or ByteArray.
317 isCString :: PackedString -> Bool
318 isCString (CPS _ _ ) = True
321 psToCString :: PackedString -> Addr
322 psToCString (CPS addr _) = (A# addr)
323 psToCString (PS bytes n# has_null) =
325 stuff <- _ccall_ malloc ((I# n#) * (``sizeof(char)''))
328 | n# ==# 0# = return ()
330 let ch# = indexCharArray# bytes i#
331 writeCharOffAddr stuff (I# i#) (C# ch#)
332 fill_in (n# -# 1#) (i# +# 1#)
338 %************************************************************************
340 \subsection{Destructor functions (taking @PackedStrings@ apart)}
342 %************************************************************************
345 -- OK, but this code gets *hammered*:
347 -- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ]
349 unpackPS :: PackedString -> [Char]
350 unpackPS (PS bytes len has_null)
355 | otherwise = C# ch : unpack (nh +# 1#)
357 ch = indexCharArray# bytes nh
359 unpackPS (CPS addr len)
363 | ch `eqChar#` '\0'# = []
364 | otherwise = C# ch : unpack (nh +# 1#)
366 ch = indexCharOffAddr# addr nh
368 unpackNBytesPS :: PackedString -> Int -> [Char]
369 unpackNBytesPS ps len@(I# l#)
370 | len < 0 = error ("PackedString.unpackNBytesPS: negative length "++ show len)
373 PS bytes len# has_null -> unpackPS (PS bytes (min# len# l#) has_null)
374 CPS a len# -> unpackPS (CPS a (min# len# l#))
380 unpackPSIO :: PackedString -> IO String
381 unpackPSIO ps@(PS bytes len has_null) = return (unpackPS ps)
382 unpackPSIO (CPS addr len)
386 ch <- readCharOffAddr (A# addr) (I# nh)
390 ls <- unpack (nh +# 1#)
395 Output a packed string via a handle:
399 hPutPS :: Handle -> PackedString -> IO ()
410 _readHandle handle >>= \ htype ->
412 _ErrorHandle ioError ->
413 _writeHandle handle htype >>
416 _writeHandle handle htype >>
417 failWith (IllegalOperation "handle is closed")
418 _SemiClosedHandle _ _ ->
419 _writeHandle handle htype >>
420 failWith (IllegalOperation "handle is closed")
422 _writeHandle handle htype >>
423 failWith (IllegalOperation "handle is not open for writing")
425 _getBufferMode other >>= \ other ->
426 (case _bufferMode other of
427 Just LineBuffering ->
428 writeLines (_filePtr other)
429 Just (BlockBuffering (Just size)) ->
430 writeBlocks (_filePtr other) size
431 Just (BlockBuffering Nothing) ->
432 writeBlocks (_filePtr other) ``BUFSIZ''
433 _ -> -- Nothing is treated pessimistically as NoBuffering
434 writeChars (_filePtr other) 0#
436 _writeHandle handle (_markHandle other) >>
440 _constructError "hPutStr" >>= \ ioError ->
446 writeLines :: Addr -> IO Bool
447 writeLines = writeChunks ``BUFSIZ'' True
449 writeBlocks :: Addr -> Int -> IO Bool
450 writeBlocks fp size = writeChunks size False fp
453 The breaking up of output into lines along \n boundaries
454 works fine as long as there are newlines to split by.
455 Avoid the splitting up into lines altogether (doesn't work
456 for overly long lines like the stuff that showsPrec instances
457 normally return). Instead, we split them up into fixed size
458 chunks before blasting them off to the Real World.
460 Hacked to avoid multiple passes over the strings - unsightly, but
461 a whole lot quicker. -- SOF 3/96
464 writeChunks :: Int -> Bool -> Addr -> IO Bool
465 writeChunks (I# bufLen) chopOnNewLine fp =
466 newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) ->
468 shoveString :: Int# -> Int# -> IO Bool
470 | i ==# pslen = -- end of string
474 _ccall_ writeFile arr fp (I# n) >>= \rc ->
478 case writeCharArray# arr# n (indexPS# ps i) s# of
480 {- Flushing lines - should we bother? -}
481 (if n ==# bufLen then
482 _ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc ->
484 shoveString 0# (i +# 1#)
488 shoveString (n +# 1#) (i +# 1#)) (S# s1#))
492 writeChars :: Addr -> Int# -> IO Bool
494 | i ==# pslen = return True
496 _ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc ->
498 writeChars fp (i +# 1#)
502 ---------------------------------------------
504 putPS :: _FILE -> PackedString -> IO ()
505 putPS file ps@(PS bytes len has_null)
510 byte_array = ByteArray (0, I# (len -# 1#)) bytes
512 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
513 >>= \ (I# written) ->
514 if written ==# len then
517 error "putPS: fwrite failed!\n"
519 putPS file (CPS addr len)
523 = _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} ->
527 The dual to @_putPS@, note that the size of the chunk specified
528 is the upper bound of the size of the chunk returned.
531 getPS :: _FILE -> Int -> IO PackedString
532 getPS file len@(I# len#)
533 | len# <=# 0# = return nilPS -- I'm being kind here.
535 -- Allocate an array for system call to store its bytes into.
536 new_ps_array len# >>= \ ch_arr ->
537 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
539 byte_array = ByteArray (0, I# len#) frozen#
541 _ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) ->
542 if read# ==# 0# then -- EOF or other error
543 error "getPS: EOF reached or other error"
546 The system call may not return the number of
547 bytes requested. Instead of failing with an error
548 if the number of bytes read is less than requested,
549 a packed string containing the bytes we did manage
550 to snarf is returned.
553 has_null = byteArrayHasNUL# frozen# read#
555 return (PS frozen# read# has_null)
559 %************************************************************************
561 \subsection{List-mimicking functions for @PackedStrings@}
563 %************************************************************************
565 First, the basic functions that do look into the representation;
566 @indexPS@ is the most important one.
569 lengthPS :: PackedString -> Int
570 lengthPS ps = I# (lengthPS# ps)
572 {-# INLINE lengthPS# #-}
574 lengthPS# (PS _ i _) = i
575 lengthPS# (CPS _ i) = i
577 {-# INLINE strlen# #-}
579 strlen# :: Addr# -> Int
582 _ccall_ strlen (A# a) >>= \ len@(I# _) ->
586 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
587 byteArrayHasNUL# bs len
589 _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) ->
591 if res ==# 0# then False else True
594 ba = ByteArray (0, I# (len -# 1#)) bs
596 -----------------------
598 indexPS :: PackedString -> Int -> Char
599 indexPS ps (I# n) = C# (indexPS# ps n)
601 {-# INLINE indexPS# #-}
603 indexPS# (PS bs i _) n
604 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
608 = indexCharOffAddr# a n
611 Now, the rest of the functions can be defined without digging
612 around in the representation.
615 headPS :: PackedString -> Char
617 | nullPS ps = error "headPS: head []"
618 | otherwise = C# (indexPS# ps 0#)
620 tailPS :: PackedString -> PackedString
622 | len <=# 0# = error "tailPS: tail []"
624 | otherwise = substrPS# ps 1# (len -# 1#)
628 nullPS :: PackedString -> Bool
629 nullPS (PS _ i _) = i ==# 0#
630 nullPS (CPS _ i) = i ==# 0#
632 {- (ToDo: some non-lousy implementations...)
634 Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys)
637 appendPS :: PackedString -> PackedString -> PackedString
641 | otherwise = concatPS [xs,ys]
643 {- OLD: mapPS f xs = packString (map f (unpackPS xs)) -}
645 mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-}
651 new_ps_array (length +# 1#) >>= \ ps_arr ->
652 whizz ps_arr length 0# >>
653 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
654 let has_null = byteArrayHasNUL# frozen# length in
655 return (PS frozen# length has_null))
657 length = lengthPS# xs
659 whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
662 = write_ps_array arr# i (chr# 0#) >>
668 write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >>
669 whizz arr# (n -# 1#) (i +# 1#)
671 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
677 Filtering proceeds as follows:
679 * traverse the list, applying the pred. to each element,
680 remembering the positions where it was satisfied.
682 Encode these positions using a run-length encoding of the gaps
683 between the matching positions.
685 * Allocate a MutableByteArray in the heap big enough to hold
686 all the matched entries, and copy the elements that matched over.
688 A better solution that merges the scan© passes into one,
689 would be to copy the filtered elements over into a growable
690 buffer. No such operation currently supported over
691 MutableByteArrays (could of course use malloc&realloc)
692 But, this solution may in the case of repeated realloc's
693 be worse than the current solution.
697 (rle,len_filtered) = filter_ps (len# -# 1#) 0# 0# []
698 len_filtered# = case len_filtered of { I# x# -> x#}
700 if len# ==# len_filtered# then
701 {- not much filtering as everything passed through. -}
703 else if len_filtered# ==# 0# then
706 new_ps_array (len_filtered# +# 1#) >>= \ ps_arr ->
707 copy_arr ps_arr rle 0# 0# >>
708 freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
709 let has_null = byteArrayHasNUL# frozen# len_filtered# in
710 return (PS frozen# len_filtered# has_null))
714 matchOffset :: Int# -> [Char] -> (Int,[Char])
715 matchOffset off [] = (I# off,[])
716 matchOffset off (C# c:cs) =
721 if x==# 0# then -- escape code, add 255#
726 copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s ()
727 copy_arr arr# [_] _ _ = return ()
728 copy_arr arr# ls n i =
730 (x,ls') = matchOffset 0# ls
731 n' = n +# (case x of { (I# x#) -> x#}) -# 1#
734 write_ps_array arr# i ch >>
735 copy_arr arr# ls' (n' +# 1#) (i +# 1#)
737 esc :: Int# -> Int# -> [Char] -> [Char]
738 esc v 0# ls = (C# (chr# v)):ls
739 esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls)
741 filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int)
742 filter_ps n hits run acc
745 escs = run `quotInt#` 255#
746 v = run `remInt#` 255#
748 (esc (v +# 1#) escs acc, I# hits)
756 escs = run `quotInt#` 255#
757 v = run `remInt#` 255#
758 acc' = esc (v +# 1#) escs acc
760 filter_ps n' (hits +# 1#) 0# acc'
762 filter_ps n' hits (run +# 1#) acc
765 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
774 --whizzLR :: a -> Int# -> a
777 | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#)
780 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
789 --whizzRL :: a -> Int# -> a
792 | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#)
794 takePS :: Int -> PackedString -> PackedString
797 | otherwise = substrPS# ps 0# (n -# 1#)
799 dropPS :: Int -> PackedString -> PackedString
802 | otherwise = substrPS# ps n (lengthPS# ps -# 1#)
806 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
807 splitAtPS n ps = (takePS n ps, dropPS n ps)
809 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
812 break_pt = char_pos_that_dissatisfies
818 if break_pt ==# 0# then
821 substrPS# ps 0# (break_pt -# 1#)
823 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
827 break_pt = char_pos_that_dissatisfies
833 if len ==# break_pt then
836 substrPS# ps break_pt (len -# 1#)
838 elemPS :: Char -> PackedString -> Bool
842 break_pt = first_char_pos_that_satisfies
850 char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
852 char_pos_that_dissatisfies p ps len pos
853 | pos >=# len = pos -- end
854 | p (indexPS# ps pos) = -- predicate satisfied; keep going
855 char_pos_that_dissatisfies p ps len (pos +# 1#)
856 | otherwise = pos -- predicate not satisfied
858 first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
859 first_char_pos_that_satisfies p ps len pos
860 | pos >=# len = pos -- end
861 | p (indexPS# ps pos) = pos -- got it!
862 | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#)
864 -- ToDo: could certainly go quicker
865 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
866 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
868 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
869 breakPS p ps = spanPS (not . p) ps
871 linesPS :: PackedString -> [PackedString]
872 linesPS ps = splitPS '\n' ps
874 wordsPS :: PackedString -> [PackedString]
875 wordsPS ps = splitWithPS isSpace ps
877 reversePS :: PackedString -> PackedString
879 if nullPS ps then -- don't create stuff unnecessarily.
883 new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte!
884 fill_in arr# (length -# 1#) 0# >>
885 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
886 let has_null = byteArrayHasNUL# frozen# length in
887 return (PS frozen# length has_null))
889 length = lengthPS# ps
891 fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
892 fill_in arr_in# n i =
896 write_ps_array arr_in# i ch >>
898 write_ps_array arr_in# (i +# 1#) (chr# 0#) >>
901 fill_in arr_in# (n -# 1#) (i +# 1#)
903 concatPS :: [PackedString] -> PackedString
907 tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x }
908 tot_len = I# tot_len#
911 new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte!
912 packum arr# pss 0# >>
913 freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
915 let has_null = byteArrayHasNUL# frozen# tot_len# in
917 return (PS frozen# tot_len# has_null)
920 packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s ()
923 = write_ps_array arr pos (chr# 0#) >>
925 packum arr (ps : pss) pos
926 = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) ->
927 packum arr pss next_pos
929 fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int
931 fill arr arr_i ps ps_i ps_len
933 = return (I# (arr_i +# ps_len))
935 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >>
936 fill arr arr_i ps (ps_i +# 1#) ps_len
938 ------------------------------------------------------------
939 joinPS :: PackedString -> [PackedString] -> PackedString
940 joinPS filler pss = concatPS (splice pss)
944 splice (x:y:xs) = x:filler:splice (y:xs)
946 -- ToDo: the obvious generalisation
948 Some properties that hold:
951 where False = any (map (x `elemPS`) ls')
952 False = any (map (nullPS) ls')
954 * all x's have been chopped out.
955 * no empty PackedStrings in returned list. A conseq.
960 * joinPS (packString [x]) (_splitPS x ls) = ls
964 splitPS :: Char -> PackedString -> [PackedString]
965 splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch)
967 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
968 splitWithPS pred ps =
978 first_char_pos_that_satisfies
984 if break_pt ==# n then -- immediate match, no substring to cut out.
985 splitify (break_pt +# 1#)
987 substrPS# ps n (break_pt -# 1#): -- leave out the matching character
988 splitify (break_pt +# 1#)
991 %************************************************************************
993 \subsection{Local utility functions}
995 %************************************************************************
997 The definition of @_substrPS@ is essentially:
998 @take (end - begin + 1) (drop begin str)@.
1001 substrPS :: PackedString -> Int -> Int -> PackedString
1002 substrPS ps (I# begin) (I# end) = substrPS# ps begin end
1006 = error "substrPS: bounds out of range"
1008 | s >=# len || result_len# <=# 0#
1013 new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte!
1014 fill_in ch_arr 0# >>
1015 freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
1017 let has_null = byteArrayHasNUL# frozen# result_len# in
1019 return (PS frozen# result_len# has_null)
1024 result_len# = (if e <# len then (e +# 1#) else len) -# s
1025 result_len = I# result_len#
1027 -----------------------
1028 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
1031 | idx ==# result_len#
1032 = write_ps_array arr_in# idx (chr# 0#) >>
1036 ch = indexPS# ps (s +# idx)
1038 write_ps_array arr_in# idx ch >>
1039 fill_in arr_in# (idx +# 1#)
1042 (Very :-) ``Specialised'' versions of some CharArray things...
1045 new_ps_array :: Int# -> ST s (MutableByteArray s Int)
1046 write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s ()
1047 freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int)
1049 new_ps_array size = ST $ \ s# ->
1050 case newCharArray# size s# of { StateAndMutableByteArray# s2# barr# ->
1051 STret s2# (MutableByteArray bot barr#)}
1053 bot = error "new_ps_array"
1055 write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ s# ->
1056 case writeCharArray# barr# n ch s# of { s2# ->
1059 -- same as unsafeFreezeByteArray
1060 freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ s# ->
1061 case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
1062 STret s2# (ByteArray ixs frozen#) }
1066 %*********************************************************
1068 \subsection{Packing and unpacking C strings}
1070 %*********************************************************
1073 unpackCString :: Addr -> [Char]
1075 -- Calls to the next four are injected by the compiler itself,
1076 -- to deal with literal strings
1077 packCString# :: [Char] -> ByteArray#
1078 unpackCString# :: Addr# -> [Char]
1079 unpackCString2# :: Addr# -> Int# -> [Char]
1080 unpackAppendCString# :: Addr# -> [Char] -> [Char]
1081 unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
1083 packCString# str = case (packString str) of { PS bytes _ _ -> bytes }
1085 unpackCString a@(A# addr) =
1086 if a == ``NULL'' then
1095 | ch `eqChar#` '\0'# = []
1096 | otherwise = C# ch : unpack (nh +# 1#)
1098 ch = indexCharOffAddr# addr nh
1100 unpackCStringIO :: Addr -> IO String
1101 unpackCStringIO addr
1102 | addr == ``NULL'' = return ""
1103 | otherwise = unpack 0#
1106 ch <- readCharOffAddr addr (I# nh)
1110 ls <- unpack (nh +# 1#)
1113 -- unpack 'len' chars
1114 unpackCStringLenIO :: Addr -> Int -> IO String
1115 unpackCStringLenIO addr l@(I# len#)
1116 | len# <# 0# = fail (userError ("PackedString.unpackCStringLenIO: negative length (" ++ show l ++ ")"))
1117 | otherwise = unpack len#
1119 unpack 0# = return []
1121 ch <- readCharOffAddr addr (I# nh)
1122 ls <- unpack (nh -# 1#)
1126 unpackCString2# addr len
1127 -- This one is called by the compiler to unpack literal strings with NULs in them; rare.
1128 = unpackPS (packCBytes (I# len) (A# addr))
1130 unpackAppendCString# addr rest
1134 | ch `eqChar#` '\0'# = rest
1135 | otherwise = C# ch : unpack (nh +# 1#)
1137 ch = indexCharOffAddr# addr nh
1139 unpackFoldrCString# addr f z
1143 | ch `eqChar#` '\0'# = z
1144 | otherwise = C# ch `f` unpack (nh +# 1#)
1146 ch = indexCharOffAddr# addr nh
1149 cStringToPS :: Addr -> PackedString
1150 cStringToPS (A# a#) = -- the easy one; we just believe the caller
1153 len = case (strlen# a#) of { I# x -> x }
1155 packBytesForC :: [Char] -> ByteArray Int
1156 packBytesForC str = psToByteArray (packString str)
1158 psToByteArrayST :: [Char] -> ST s (ByteArray Int)
1159 psToByteArrayST str =
1160 packStringST str >>= \ (PS bytes n has_null) ->
1161 --later? ASSERT(not has_null)
1162 return (ByteArray (0, I# (n -# 1#)) bytes)
1164 packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int)
1165 packNBytesForCST len str =
1166 packNCharsST len str >>= \ (PS bytes n has_null) ->
1167 return (ByteArray (0, I# (n -# 1#)) bytes)
1169 packCBytes :: Int -> Addr -> PackedString
1170 packCBytes len addr = runST (packCBytesST len addr)
1172 packCBytesST :: Int -> Addr -> ST s PackedString
1173 packCBytesST len@(I# length#) (A# addr) =
1175 allocate an array that will hold the string
1176 (not forgetting the NUL byte at the end)
1178 new_ps_array (length# +# 1#) >>= \ ch_array ->
1179 -- fill in packed string from "addr"
1180 fill_in ch_array 0# >>
1181 -- freeze the puppy:
1182 freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
1183 let has_null = byteArrayHasNUL# frozen# length# in
1184 return (PS frozen# length# has_null)
1186 fill_in :: MutableByteArray s Int -> Int# -> ST s ()
1190 = write_ps_array arr_in# idx (chr# 0#) >>
1193 = case (indexCharOffAddr# addr idx) of { ch ->
1194 write_ps_array arr_in# idx ch >>
1195 fill_in arr_in# (idx +# 1#) }