2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1994
4 \section[PrelPS]{Packed strings}
6 This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
8 Glorious hacking (all the hard work) by Bryan O'Sullivan.
11 module PreludePS{-yes, a Prelude module!-} (
29 unpackPS#, unpackPS2#,
59 -- to make interface self-sufficient
60 _PackedString, -- abstract!
65 import Stdio ( _FILE )
66 import TyArray ( _ByteArray(..) )
73 import Prel ( otherwise, (&&), (||), chr, ($), not, (.), isSpace, flip )
74 import List ( length, (++), map, filter, foldl, foldr,
75 lines, words, reverse, null, foldr1
77 import TyArray ( Array(..) )
81 %************************************************************************
83 \subsection{@_PackedString@ type declaration and interface (signatures)}
85 %************************************************************************
90 = _PS ByteArray# -- the bytes
91 Int# -- length (*not* including NUL at the end)
92 Bool -- True <=> contains a NUL
93 | _CPS Addr# -- pointer to the (null-terminated) bytes in C land
94 Int# -- length, as per strlen
95 -- definitely doesn't contain a NUL
97 _packString :: [Char] -> _PackedString
98 _packStringST :: [Char] -> _ST s _PackedString
99 _packCString :: _Addr -> _PackedString
100 _packCBytes :: Int -> _Addr -> _PackedString
101 _packCBytesST :: Int -> _Addr -> _ST s _PackedString
102 _packStringForC :: [Char] -> ByteArray# -- calls injected by compiler
103 _packBytesForC :: [Char] -> _ByteArray Int
104 _packBytesForCST :: [Char] -> _ST s (_ByteArray Int)
105 _nilPS :: _PackedString
106 _consPS :: Char -> _PackedString -> _PackedString
107 _byteArrayToPS :: _ByteArray Int -> _PackedString
108 _psToByteArray :: _PackedString -> _ByteArray Int
110 --OLD: packString# :: [Char] -> ByteArray#
111 --OLD: packToCString :: [Char] -> _ByteArray Int -- hmmm... weird name
113 _unpackPS :: _PackedString -> [Char]
114 unpackPS# :: Addr# -> [Char] -- calls injected by compiler
115 unpackPS2# :: Addr# -> Int# -> [Char] -- calls injected by compiler
116 --???toCString :: _PackedString -> ByteArray#
117 _putPS :: _FILE -> _PackedString -> PrimIO () -- ToDo: more sensible type
121 _headPS :: _PackedString -> Char
122 _tailPS :: _PackedString -> _PackedString
123 _nullPS :: _PackedString -> Bool
124 _appendPS :: _PackedString -> _PackedString -> _PackedString
125 _lengthPS :: _PackedString -> Int
126 _indexPS :: _PackedString -> Int -> Char
127 -- 0-origin indexing into the string
128 _mapPS :: (Char -> Char) -> _PackedString -> _PackedString {-or String?-}
129 _filterPS :: (Char -> Bool) -> _PackedString -> _PackedString {-or String?-}
130 _foldlPS :: (a -> Char -> a) -> a -> _PackedString -> a
131 _foldrPS :: (Char -> a -> a) -> a -> _PackedString -> a
132 _takePS :: Int -> _PackedString -> _PackedString
133 _dropPS :: Int -> _PackedString -> _PackedString
134 _splitAtPS :: Int -> _PackedString -> (_PackedString, _PackedString)
135 _takeWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
136 _dropWhilePS:: (Char -> Bool) -> _PackedString -> _PackedString
137 _spanPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
138 _breakPS :: (Char -> Bool) -> _PackedString -> (_PackedString, _PackedString)
139 _linesPS :: _PackedString -> [_PackedString]
140 _wordsPS :: _PackedString -> [_PackedString]
141 _reversePS :: _PackedString -> _PackedString
142 _concatPS :: [_PackedString] -> _PackedString
144 _substrPS :: _PackedString -> Int -> Int -> _PackedString
145 -- pluck out a piece of a _PS
146 -- start and end chars you want; both 0-origin-specified
147 --??? _hashPS :: _PackedString -> Int -> Int
148 -- use the _PS to produce a hash value between 0 & m (inclusive)
151 %************************************************************************
153 \subsection{Constructor functions}
155 %************************************************************************
157 Easy ones first. @_packString@ requires getting some heap-bytes and
158 scribbling stuff into them.
161 _packCString (A# a#) -- the easy one; we just believe the caller
164 len = case (strlen# a#) of { I# x -> x }
167 _consPS c cs = _packString (c : (_unpackPS cs)) -- ToDo:better
170 = case (_packString str) of
171 _PS bytes _ _ -> bytes
174 = _psToByteArray (_packString str)
177 = _packStringST str `thenStrictlyST` \ (_PS bytes n has_null) ->
178 --later? ASSERT(not has_null)
179 returnStrictlyST (_ByteArray (0, I# (n -# 1#)) bytes)
181 _packString str = _runST (_packStringST str)
184 = let len = length str in
187 pack_me :: Int -> [Char] -> _ST s _PackedString
189 pack_me len@(I# length#) str
190 = -- allocate an array that will hold the string
191 -- (not forgetting the NUL byte at the end)
192 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
194 -- fill in packed string from "str"
195 fill_in ch_array 0# str `seqStrictlyST`
198 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
200 let has_null = byteArrayHasNUL# frozen# length# in
202 returnStrictlyST (_PS frozen# length# has_null)
204 fill_in :: _MutableByteArray s Int -> Int# -> [Char] -> _ST s ()
206 fill_in arr_in# idx []
207 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
210 fill_in arr_in# idx (C# c : cs)
211 = write_ps_array arr_in# idx c `seqStrictlyST`
212 fill_in arr_in# (idx +# 1#) cs
214 _packCBytes len addr = _runST (_packCBytesST len addr)
216 _packCBytesST len@(I# length#) (A# addr)
217 = -- allocate an array that will hold the string
218 -- (not forgetting the NUL byte at the end)
219 new_ps_array (length# +# 1#) `thenStrictlyST` \ ch_array ->
221 -- fill in packed string from "addr"
222 fill_in ch_array 0# `seqStrictlyST`
225 freeze_ps_array ch_array `thenStrictlyST` \ (_ByteArray _ frozen#) ->
227 let has_null = byteArrayHasNUL# frozen# length# in
229 returnStrictlyST (_PS frozen# length# has_null)
231 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
235 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
238 = case (indexCharOffAddr# addr idx) of { ch ->
239 write_ps_array arr_in# idx ch `seqStrictlyST`
240 fill_in arr_in# (idx +# 1#) }
242 _byteArrayToPS (_ByteArray ixs@(_, ix_end) frozen#)
247 else ((index ixs ix_end) + 1)
250 _PS frozen# n# (byteArrayHasNUL# frozen# n#)
252 _unsafeByteArrayToPS (_ByteArray _ frozen#) (I# n#)
253 = _PS frozen# n# (byteArrayHasNUL# frozen# n#)
255 _psToByteArray (_PS bytes n has_null)
256 = _ByteArray (0, I# (n -# 1#)) bytes
258 _psToByteArray (_CPS addr len#)
261 byte_array_form = _packCBytes len (A# addr)
263 case byte_array_form of { _PS bytes _ _ ->
264 _ByteArray (0, len - 1) bytes }
267 %************************************************************************
269 \subsection{Destructor functions (taking @_PackedStrings@ apart)}
271 %************************************************************************
274 unpackPS# addr -- calls injected by compiler
275 = _unpackPS (_CPS addr len)
277 len = case (strlen# addr) of { I# x -> x }
279 unpackPS2# addr len -- calls injected by compiler
280 -- this one is for literal strings with NULs in them; rare.
281 = _unpackPS (_packCBytes (I# len) (A# addr))
283 -- OK, but this code gets *hammered*:
285 -- = [ _indexPS ps n | n <- [ 0::Int .. _lengthPS ps - 1 ] ]
287 _unpackPS (_PS bytes len has_null)
292 | otherwise = C# ch : unpack (nh +# 1#)
294 ch = indexCharArray# bytes nh
296 _unpackPS (_CPS addr len)
300 | ch `eqChar#` '\0'# = []
301 | otherwise = C# ch : unpack (nh +# 1#)
303 ch = indexCharOffAddr# addr nh
307 _putPS file ps@(_PS bytes len has_null)
312 byte_array = _ByteArray (0, I# (len -# 1#)) bytes
314 _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
315 `thenPrimIO` \ (I# written) ->
316 if written ==# len then
319 error "_putPS: fwrite failed!\n"
321 _putPS file (_CPS addr len)
325 = _ccall_ fputs (A# addr) file `thenPrimIO` \ (I# _){-force type-} ->
329 The dual to @_putPS@, note that the size of the chunk specified
330 is the upper bound of the size of the chunk returned.
333 _getPS :: _FILE -> Int -> PrimIO _PackedString
334 _getPS file len@(I# len#)
335 | len# <=# 0# = returnPrimIO _nilPS -- I'm being kind here.
337 -- Allocate an array for system call to store its bytes into.
338 new_ps_array len# `thenPrimIO` \ ch_arr ->
339 freeze_ps_array ch_arr `thenPrimIO` \ (_ByteArray _ frozen#) ->
341 byte_array = _ByteArray (0, I# len#) frozen#
343 _ccall_ fread byte_array (1::Int) len file `thenPrimIO` \ (I# read#) ->
344 if read# ==# 0# then -- EOF or other error
345 error "_getPS: EOF reached or other error"
348 The system call may not return the number of
349 bytes requested. Instead of failing with an error
350 if the number of bytes read is less than requested,
351 a packed string containing the bytes we did manage
352 to snarf is returned.
355 has_null = byteArrayHasNUL# frozen# read#
357 returnPrimIO (_PS frozen# read# has_null)
361 %************************************************************************
363 \subsection{List-mimicking functions for @_PackedStrings@}
365 %************************************************************************
367 First, the basic functions that do look into the representation;
368 @indexPS@ is the most important one.
370 _lengthPS ps = I# (lengthPS# ps)
372 {-# INLINE lengthPS# #-}
374 lengthPS# (_PS _ i _) = i
375 lengthPS# (_CPS _ i) = i
377 {-# INLINE strlen# #-}
379 strlen# :: Addr# -> Int
381 = unsafePerformPrimIO (
382 _ccall_ strlen (A# a) `thenPrimIO` \ len@(I# _) ->
386 byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
388 byteArrayHasNUL# bs len
389 = unsafePerformPrimIO (
390 _ccall_ byteArrayHasNUL__ ba (I# len) `thenPrimIO` \ (I# res) ->
392 if res ==# 0# then False else True
395 ba = _ByteArray (0, I# (len -# 1#)) bs
397 -----------------------
398 _indexPS ps (I# n) = C# (indexPS# ps n)
400 {-# INLINE indexPS# #-}
402 indexPS# (_PS bs i _) n
403 = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
406 indexPS# (_CPS a _) n
407 = indexCharOffAddr# a n
410 Now, the rest of the functions can be defined without digging
411 around in the representation.
414 | _nullPS ps = error "_headPS: head []"
415 | otherwise = C# (indexPS# ps 0#)
418 | len <=# 0# = error "_tailPS: tail []"
419 | len ==# 1# = _nilPS
420 | otherwise = substrPS# ps 1# (len -# 1#)
424 _nullPS (_PS _ i _) = i ==# 0#
425 _nullPS (_CPS _ i) = i ==# 0#
427 -- ToDo: some non-lousy implementations...
432 | otherwise = _packString (_unpackPS xs ++ _unpackPS ys)
434 _mapPS f xs = _packString (map f (_unpackPS xs))
436 _filterPS p ps = _packString (filter p (_unpackPS ps))
437 _foldlPS f b ps = foldl f b (_unpackPS ps)
438 _foldrPS f b ps = foldr f b (_unpackPS ps)
440 _takePS (I# n) ps = substrPS# ps 0# (n -# 1#)
441 _dropPS (I# n) ps = substrPS# ps n (lengthPS# ps -# 1#)
442 _splitAtPS n ps = (_takePS n ps, _dropPS n ps)
446 break_pt = char_pos_that_dissatisfies
452 substrPS# ps 0# (break_pt -# 1#)
457 break_pt = char_pos_that_dissatisfies
463 substrPS# ps break_pt (len -# 1#)
465 char_pos_that_dissatisfies :: (Char# -> Bool) -> _PackedString -> Int# -> Int# -> Int#
467 char_pos_that_dissatisfies p ps len pos
468 | pos >=# len = pos -- end
469 | p (indexPS# ps pos) = -- predicate satisfied; keep going
470 char_pos_that_dissatisfies p ps len (pos +# 1#)
471 | otherwise = pos -- predicate not satisfied
473 char_pos_that_dissatisfies p ps len pos -- dead code: HACK to avoid badly-typed error msg
476 -- ToDo: could certainly go quicker
477 _spanPS p ps = (_takeWhilePS p ps, _dropWhilePS p ps)
478 _breakPS p ps = _spanPS (not . p) ps
480 _linesPS ps = map _packString (lines (_unpackPS ps))
481 _wordsPS ps = map _packString (words (_unpackPS ps))
483 _reversePS ps = _packString (reverse (_unpackPS ps))
485 _concatPS [] = _nilPS
488 tot_len# = case (foldr ((+) . _lengthPS) 0 pss) of { I# x -> x }
489 tot_len = I# tot_len#
492 new_ps_array (tot_len# +# 1#) `thenStrictlyST` \ arr# -> -- incl NUL byte!
493 packum arr# pss 0# `seqStrictlyST`
494 freeze_ps_array arr# `thenStrictlyST` \ (_ByteArray _ frozen#) ->
496 let has_null = byteArrayHasNUL# frozen# tot_len# in
498 returnStrictlyST (_PS frozen# tot_len# has_null)
501 packum :: _MutableByteArray s Int -> [_PackedString] -> Int# -> _ST s ()
504 = write_ps_array arr pos (chr# 0#) `seqStrictlyST`
506 packum arr (ps : pss) pos
507 = fill arr pos ps 0# (lengthPS# ps) `thenStrictlyST` \ (I# next_pos) ->
508 packum arr pss next_pos
510 fill :: _MutableByteArray s Int -> Int# -> _PackedString -> Int# -> Int# -> _ST s Int
512 fill arr arr_i ps ps_i ps_len
514 = returnStrictlyST (I# (arr_i +# ps_len))
516 = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) `seqStrictlyST`
517 fill arr arr_i ps (ps_i +# 1#) ps_len
520 %************************************************************************
522 \subsection{Instances for @_PackedStrings@: @Eq@, @Ord@, @Text@}
524 %************************************************************************
528 instance Eq _PackedString where
529 a == b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> False }
530 a /= b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> True }
532 instance Ord _PackedString where
533 a <= b = case _tagCmpPS a b of { _LT -> True; _EQ -> True; _GT -> False }
534 a < b = case _tagCmpPS a b of { _LT -> True; _EQ -> False; _GT -> False }
535 a >= b = case _tagCmpPS a b of { _LT -> False; _EQ -> True; _GT -> True }
536 a > b = case _tagCmpPS a b of { _LT -> False; _EQ -> False; _GT -> True }
537 _tagCmp a b = _tagCmpPS a b
540 We try hard to make this go fast:
542 _tagCmpPS :: _PackedString -> _PackedString -> _CMP_TAG
544 _tagCmpPS (_PS bs1 len1 has_null1) (_PS bs2 len2 has_null2)
545 | not has_null1 && not has_null2
546 = unsafePerformPrimIO (
547 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
549 if res <# 0# then _LT
550 else if res ==# 0# then _EQ
554 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
555 ba2 = _ByteArray (0, I# (len2 -# 1#)) bs2
557 _tagCmpPS (_PS bs1 len1 has_null1) (_CPS bs2 len2)
559 = unsafePerformPrimIO (
560 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
562 if res <# 0# then _LT
563 else if res ==# 0# then _EQ
567 ba1 = _ByteArray (0, I# (len1 -# 1#)) bs1
570 _tagCmpPS (_CPS bs1 len1) (_CPS bs2 len2)
571 = unsafePerformPrimIO (
572 _ccall_ strcmp ba1 ba2 `thenPrimIO` \ (I# res) ->
574 if res <# 0# then _LT
575 else if res ==# 0# then _EQ
582 _tagCmpPS a@(_CPS _ _) b@(_PS _ _ has_null2)
584 = -- try them the other way 'round
585 case (_tagCmpPS b a) of { _LT -> _GT; _EQ -> _EQ; _GT -> _LT }
587 _tagCmpPS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
590 end1 = lengthPS# ps1 -# 1#
591 end2 = lengthPS# ps2 -# 1#
594 = if char# ># end1 then
595 if char# ># end2 then -- both strings ran out at once
597 else -- ps1 ran out before ps2
599 else if char# ># end2 then
600 _GT -- ps2 ran out before ps1
603 ch1 = indexPS# ps1 char#
604 ch2 = indexPS# ps2 char#
606 if ch1 `eqChar#` ch2 then
607 looking_at (char# +# 1#)
608 else if ch1 `ltChar#` ch2 then _LT
611 instance Text _PackedString where
612 readsPrec p = error "readsPrec: _PackedString: ToDo"
613 showsPrec p ps r = showsPrec p (_unpackPS ps) r
616 %************************************************************************
618 \subsection{Uniquely PackedString functions}
620 %************************************************************************
622 For @_substrPS@, see the next section.
624 @_hashPS@ is just what we happen to need in GHC...
627 _hashPS ps (I# hASH_TBL_SIZE#)
628 = I# (h `remInt#` hASH_TBL_SIZE#)
632 h | len <=# 0# = 0# -- probably should just be an "error"
633 | len ==# 1# = ord# c1
634 | len ==# 2# = ord# c2
635 | len ==# 3# = ord# c2 +# ord# c3
636 | len ==# 4# = ord# c2 +# ord# c3 +# ord# c4
637 | len ==# 5# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5
638 | len >=# 6# = ord# c2 +# ord# c3 +# ord# c4 +# ord# c5 +# ord# c6
639 | otherwise = 999# -- will never happen
650 %************************************************************************
652 \subsection{Local utility functions}
654 %************************************************************************
656 The definition of @_substrPS@ is essentially:
657 @take (end - begin + 1) (drop begin str)@.
659 _substrPS ps (I# begin) (I# end) = substrPS# ps begin end
663 = error "_substrPS: bounds out of range"
665 | s >=# len || result_len# <=# 0#
670 new_ps_array (result_len# +# 1#) `thenStrictlyST` \ ch_arr -> -- incl NUL byte!
671 fill_in ch_arr 0# `seqStrictlyST`
672 freeze_ps_array ch_arr `thenStrictlyST` \ (_ByteArray _ frozen#) ->
674 let has_null = byteArrayHasNUL# frozen# result_len# in
676 returnStrictlyST (_PS frozen# result_len# has_null)
681 result_len# = (if e <# len then (e +# 1#) else len) -# s
682 result_len = I# result_len#
684 -----------------------
685 fill_in :: _MutableByteArray s Int -> Int# -> _ST s ()
688 | idx ==# result_len#
689 = write_ps_array arr_in# idx (chr# 0#) `seqStrictlyST`
693 ch = indexPS# ps (s +# idx)
695 write_ps_array arr_in# idx ch `seqStrictlyST`
696 fill_in arr_in# (idx +# 1#)
699 (Very :-) ``Specialised'' versions of some CharArray things...
701 new_ps_array :: Int# -> _ST s (_MutableByteArray s Int)
702 write_ps_array :: _MutableByteArray s Int -> Int# -> Char# -> _ST s ()
703 freeze_ps_array :: _MutableByteArray s Int -> _ST s (_ByteArray Int)
705 new_ps_array size (S# s)
706 = case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
707 (_MutableByteArray bot barr#, S# s2#)}
709 bot = error "new_ps_array"
711 write_ps_array (_MutableByteArray _ barr#) n ch (S# s#)
712 = case writeCharArray# barr# n ch s# of { s2# ->
715 -- same as unsafeFreezeByteArray
716 freeze_ps_array (_MutableByteArray ixs arr#) (S# s#)
717 = case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
718 (_ByteArray ixs frozen#, S# s2#) }