From ac3aa323caebe0cbc431a7e8af1735100e2c7a98 Mon Sep 17 00:00:00 2001 From: sof Date: Mon, 25 Aug 1997 22:44:21 +0000 Subject: [PATCH] [project @ 1997-08-25 22:44:21 by sof] Moved to ghc system library --- ghc/lib/glaExts/PackedString.lhs | 1083 -------------------------------------- 1 file changed, 1083 deletions(-) delete mode 100644 ghc/lib/glaExts/PackedString.lhs diff --git a/ghc/lib/glaExts/PackedString.lhs b/ghc/lib/glaExts/PackedString.lhs deleted file mode 100644 index b04a71b..0000000 --- a/ghc/lib/glaExts/PackedString.lhs +++ /dev/null @@ -1,1083 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1993-1996 -% -\section{Packed strings} - -This sits on top of the sequencing/arrays world, notably @ByteArray#@s. - -Glorious hacking (all the hard work) by Bryan O'Sullivan. - -\begin{code} -{-# OPTIONS -fno-implicit-prelude '-#include "cbits/stgio.h"' #-} - -module PackedString ( - PackedString, -- abstract - - -- Creating the beasts - packString, -- :: [Char] -> PackedString - packStringST, -- :: [Char] -> ST s PackedString - packCBytesST, -- :: Int -> Addr -> ST s PackedString - - byteArrayToPS, -- :: ByteArray Int -> PackedString - unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString - - psToByteArray, -- :: PackedString -> ByteArray Int - psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int) - - unpackPS, -- :: PackedString -> [Char] -{-LATER: - hPutPS, -- :: Handle -> PackedString -> IO () - putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type - getPS, -- :: FILE -> Int -> PrimIO PackedString --} - nilPS, -- :: PackedString - consPS, -- :: Char -> PackedString -> PackedString - headPS, -- :: PackedString -> Char - tailPS, -- :: PackedString -> PackedString - nullPS, -- :: PackedString -> Bool - appendPS, -- :: PackedString -> PackedString -> PackedString - lengthPS, -- :: PackedString -> Int - {- 0-origin indexing into the string -} - indexPS, -- :: PackedString -> Int -> Char - mapPS, -- :: (Char -> Char) -> PackedString -> PackedString - filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString - foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a - foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a - takePS, -- :: Int -> PackedString -> PackedString - dropPS, -- :: Int -> PackedString -> PackedString - splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString) - takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString - dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString - spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) - breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) - linesPS, -- :: PackedString -> [PackedString] - - wordsPS, -- :: PackedString -> [PackedString] - reversePS, -- :: PackedString -> PackedString - splitPS, -- :: Char -> PackedString -> [PackedString] - splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString] - joinPS, -- :: PackedString -> [PackedString] -> PackedString - concatPS, -- :: [PackedString] -> PackedString - elemPS, -- :: Char -> PackedString -> Bool - - {- - Pluck out a piece of a PS start and end - chars you want; both 0-origin-specified - -} - substrPS, -- :: PackedString -> Int -> Int -> PackedString - - comparePS, - - -- Converting to C strings - packCString#, - unpackCString#, unpackCString2#, unpackAppendCString#, unpackFoldrCString#, - unpackCString - ) where - -import {-# SOURCE #-} IOBase ( error ) -import Ix -import PrelList -import STBase -import UnsafeST ( unsafePerformPrimIO ) -import ArrBase -import PrelBase -import Foreign ( Addr(..) ) -import GHC - -\end{code} - -%************************************************************************ -%* * -\subsection{@PackedString@ type declaration} -%* * -%************************************************************************ - -\begin{code} -data PackedString - = PS ByteArray# -- the bytes - Int# -- length (*not* including NUL at the end) - Bool -- True <=> contains a NUL - | CPS Addr# -- pointer to the (null-terminated) bytes in C land - Int# -- length, as per strlen - -- definitely doesn't contain a NUL - -instance Eq PackedString where - x == y = compare x y == EQ - x /= y = compare x y /= EQ - -instance Ord PackedString where - compare = comparePS - x <= y = compare x y /= GT - x < y = compare x y == LT - x >= y = compare x y /= LT - x > y = compare x y == GT - max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x } - min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y } - ---instance Read PackedString: ToDo - -instance Show PackedString where - showsPrec p ps r = showsPrec p (unpackPS ps) r - showList = showList__ (showsPrec 0) -\end{code} - - -%************************************************************************ -%* * -\subsection{@PackedString@ instances} -%* * -%************************************************************************ - -We try hard to make this go fast: -\begin{code} -comparePS :: PackedString -> PackedString -> Ordering - -comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2) - | not has_null1 && not has_null2 - = unsafePerformPrimIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray (0, I# (len1 -# 1#)) bs1 - ba2 = ByteArray (0, I# (len2 -# 1#)) bs2 - -comparePS (PS bs1 len1 has_null1) (CPS bs2 len2) - | not has_null1 - = unsafePerformPrimIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray (0, I# (len1 -# 1#)) bs1 - ba2 = A# bs2 - -comparePS (CPS bs1 len1) (CPS bs2 len2) - = unsafePerformPrimIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = A# bs1 - ba2 = A# bs2 - -comparePS a@(CPS _ _) b@(PS _ _ has_null2) - | not has_null2 - = -- try them the other way 'round - case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT } - -comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True) - = looking_at 0# - where - end1 = lengthPS# ps1 -# 1# - end2 = lengthPS# ps2 -# 1# - - looking_at char# - = if char# ># end1 then - if char# ># end2 then -- both strings ran out at once - EQ - else -- ps1 ran out before ps2 - LT - else if char# ># end2 then - GT -- ps2 ran out before ps1 - else - let - ch1 = indexPS# ps1 char# - ch2 = indexPS# ps2 char# - in - if ch1 `eqChar#` ch2 then - looking_at (char# +# 1#) - else if ch1 `ltChar#` ch2 then LT - else GT -\end{code} - - -%************************************************************************ -%* * -\subsection{Constructor functions} -%* * -%************************************************************************ - -Easy ones first. @packString@ requires getting some heap-bytes and -scribbling stuff into them. - -\begin{code} -nilPS :: PackedString -nilPS = CPS ""# 0# - -consPS :: Char -> PackedString -> PackedString -consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better - -packString :: [Char] -> PackedString -packString str = runST (packStringST str) - -packStringST :: [Char] -> ST s PackedString -packStringST str = - let len = length str in - packNCharsST len str - -packNCharsST :: Int -> [Char] -> ST s PackedString -packNCharsST len@(I# length#) str = - {- - allocate an array that will hold the string - (not forgetting the NUL byte at the end) - -} - new_ps_array (length# +# 1#) >>= \ ch_array -> - -- fill in packed string from "str" - fill_in ch_array 0# str >> - -- freeze the puppy: - freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length# in - return (PS frozen# length# has_null) - where - fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s () - fill_in arr_in# idx [] = - write_ps_array arr_in# idx (chr# 0#) >> - return () - - fill_in arr_in# idx (C# c : cs) = - write_ps_array arr_in# idx c >> - fill_in arr_in# (idx +# 1#) cs - -byteArrayToPS :: ByteArray Int -> PackedString -byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) = - let - n# = - case ( - if null (range ixs) - then 0 - else ((index ixs ix_end) + 1) - ) of { I# x -> x } - in - PS frozen# n# (byteArrayHasNUL# frozen# n#) - -unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString -unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#) - = PS frozen# n# (byteArrayHasNUL# frozen# n#) - -psToByteArray :: PackedString -> ByteArray Int -psToByteArray (PS bytes n has_null) - = ByteArray (0, I# (n -# 1#)) bytes - -psToByteArray (CPS addr len#) - = let - len = I# len# - byte_array_form = packCBytes len (A# addr) - in - case byte_array_form of { PS bytes _ _ -> - ByteArray (0, len - 1) bytes } -\end{code} - -%************************************************************************ -%* * -\subsection{Destructor functions (taking @PackedStrings@ apart)} -%* * -%************************************************************************ - -\begin{code} --- OK, but this code gets *hammered*: --- unpackPS ps --- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ] - -unpackPS :: PackedString -> [Char] -unpackPS (PS bytes len has_null) - = unpack 0# - where - unpack nh - | nh >=# len = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharArray# bytes nh - -unpackPS (CPS addr len) - = unpack 0# - where - unpack nh - | ch `eqChar#` '\0'# = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh -\end{code} - -Output a packed string via a handle: - -\begin{code} -{- LATER: -hPutPS :: Handle -> PackedString -> IO () -hPutPS handle ps = - let - len = - case ps of - PS _ len _ -> len - CPS _ len -> len - in - if len ==# 0# then - return () - else - _readHandle handle >>= \ htype -> - case htype of - _ErrorHandle ioError -> - _writeHandle handle htype >> - failWith ioError - _ClosedHandle -> - _writeHandle handle htype >> - failWith (IllegalOperation "handle is closed") - _SemiClosedHandle _ _ -> - _writeHandle handle htype >> - failWith (IllegalOperation "handle is closed") - _ReadHandle _ _ _ -> - _writeHandle handle htype >> - failWith (IllegalOperation "handle is not open for writing") - other -> - _getBufferMode other >>= \ other -> - (case _bufferMode other of - Just LineBuffering -> - writeLines (_filePtr other) - Just (BlockBuffering (Just size)) -> - writeBlocks (_filePtr other) size - Just (BlockBuffering Nothing) -> - writeBlocks (_filePtr other) ``BUFSIZ'' - _ -> -- Nothing is treated pessimistically as NoBuffering - writeChars (_filePtr other) 0# - ) >>= \ success -> - _writeHandle handle (_markHandle other) >> - if success then - return () - else - _constructError "hPutStr" >>= \ ioError -> - failWith ioError - - where - pslen = lengthPS# ps - - writeLines :: Addr -> PrimIO Bool - writeLines = writeChunks ``BUFSIZ'' True - - writeBlocks :: Addr -> Int -> PrimIO Bool - writeBlocks fp size = writeChunks size False fp - - {- - The breaking up of output into lines along \n boundaries - works fine as long as there are newlines to split by. - Avoid the splitting up into lines altogether (doesn't work - for overly long lines like the stuff that showsPrec instances - normally return). Instead, we split them up into fixed size - chunks before blasting them off to the Real World. - - Hacked to avoid multiple passes over the strings - unsightly, but - a whole lot quicker. -- SOF 3/96 - -} - - writeChunks :: Int -> Bool -> Addr -> PrimIO Bool - writeChunks (I# bufLen) chopOnNewLine fp = - newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) -> - let - shoveString :: Int# -> Int# -> PrimIO Bool - shoveString n i - | i ==# pslen = -- end of string - if n ==# 0# then - return True - else - _ccall_ writeFile arr fp (I# n) >>= \rc -> - return (rc==0) - | otherwise = - (\ (S# s#) -> - case writeCharArray# arr# n (indexPS# ps i) s# of - s1# -> - {- Flushing lines - should we bother? -} - (if n ==# bufLen then - _ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc -> - if rc == 0 then - shoveString 0# (i +# 1#) - else - return False - else - shoveString (n +# 1#) (i +# 1#)) (S# s1#)) - in - shoveString 0# 0# - - writeChars :: Addr -> Int# -> PrimIO Bool - writeChars fp i - | i ==# pslen = return True - | otherwise = - _ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc -> - if rc == 0 then - writeChars fp (i +# 1#) - else - return False - ---------------------------------------------- - -putPS :: _FILE -> PackedString -> PrimIO () -putPS file ps@(PS bytes len has_null) - | len ==# 0# - = return () - | otherwise - = let - byte_array = ByteArray (0, I# (len -# 1#)) bytes - in - _ccall_ fwrite byte_array (1::Int){-size-} (I# len) file - >>= \ (I# written) -> - if written ==# len then - return () - else - error "putPS: fwrite failed!\n" - -putPS file (CPS addr len) - | len ==# 0# - = return () - | otherwise - = _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} -> - return () -\end{code} - -The dual to @_putPS@, note that the size of the chunk specified -is the upper bound of the size of the chunk returned. - -\begin{code} -getPS :: _FILE -> Int -> PrimIO PackedString -getPS file len@(I# len#) - | len# <=# 0# = return nilPS -- I'm being kind here. - | otherwise = - -- Allocate an array for system call to store its bytes into. - new_ps_array len# >>= \ ch_arr -> - freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) -> - let - byte_array = ByteArray (0, I# len#) frozen# - in - _ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) -> - if read# ==# 0# then -- EOF or other error - error "getPS: EOF reached or other error" - else - {- - The system call may not return the number of - bytes requested. Instead of failing with an error - if the number of bytes read is less than requested, - a packed string containing the bytes we did manage - to snarf is returned. - -} - let - has_null = byteArrayHasNUL# frozen# read# - in - return (PS frozen# read# has_null) -END LATER -} -\end{code} - -%************************************************************************ -%* * -\subsection{List-mimicking functions for @PackedStrings@} -%* * -%************************************************************************ - -First, the basic functions that do look into the representation; -@indexPS@ is the most important one. - -\begin{code} -lengthPS :: PackedString -> Int -lengthPS ps = I# (lengthPS# ps) - -{-# INLINE lengthPS# #-} - -lengthPS# (PS _ i _) = i -lengthPS# (CPS _ i) = i - -{-# INLINE strlen# #-} - -strlen# :: Addr# -> Int -strlen# a - = unsafePerformPrimIO ( - _ccall_ strlen (A# a) >>= \ len@(I# _) -> - return len - ) - -byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool -byteArrayHasNUL# bs len - = unsafePerformPrimIO ( - _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) -> - return ( - if res ==# 0# then False else True - )) - where - ba = ByteArray (0, I# (len -# 1#)) bs - ------------------------ - -indexPS :: PackedString -> Int -> Char -indexPS ps (I# n) = C# (indexPS# ps n) - -{-# INLINE indexPS# #-} - -indexPS# (PS bs i _) n - = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10) - indexCharArray# bs n - -indexPS# (CPS a _) n - = indexCharOffAddr# a n -\end{code} - -Now, the rest of the functions can be defined without digging -around in the representation. - -\begin{code} -headPS :: PackedString -> Char -headPS ps - | nullPS ps = error "headPS: head []" - | otherwise = C# (indexPS# ps 0#) - -tailPS :: PackedString -> PackedString -tailPS ps - | len <=# 0# = error "tailPS: tail []" - | len ==# 1# = nilPS - | otherwise = substrPS# ps 1# (len -# 1#) - where - len = lengthPS# ps - -nullPS :: PackedString -> Bool -nullPS (PS _ i _) = i ==# 0# -nullPS (CPS _ i) = i ==# 0# - -{- (ToDo: some non-lousy implementations...) - - Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys) - --} -appendPS :: PackedString -> PackedString -> PackedString -appendPS xs ys - | nullPS xs = ys - | nullPS ys = xs - | otherwise = concatPS [xs,ys] - -{- OLD: mapPS f xs = packString (map f (unpackPS xs)) -} - -mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-} -mapPS f xs = - if nullPS xs then - xs - else - runST ( - new_ps_array (length +# 1#) >>= \ ps_arr -> - whizz ps_arr length 0# >> - freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length in - return (PS frozen# length has_null)) - where - length = lengthPS# xs - - whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s () - whizz arr# n i - | n ==# 0# - = write_ps_array arr# i (chr# 0#) >> - return () - | otherwise - = let - ch = indexPS# xs i - in - write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >> - whizz arr# (n -# 1#) (i +# 1#) - -filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-} -filterPS pred ps = - if nullPS ps then - ps - else - {- - Filtering proceeds as follows: - - * traverse the list, applying the pred. to each element, - remembering the positions where it was satisfied. - - Encode these positions using a run-length encoding of the gaps - between the matching positions. - - * Allocate a MutableByteArray in the heap big enough to hold - all the matched entries, and copy the elements that matched over. - - A better solution that merges the scan© passes into one, - would be to copy the filtered elements over into a growable - buffer. No such operation currently supported over - MutableByteArrays (could of course use malloc&realloc) - But, this solution may in the case of repeated realloc's - be worse than the current solution. - -} - runST ( - let - (rle,len_filtered) = filter_ps len# 0# 0# [] - len_filtered# = case len_filtered of { I# x# -> x#} - in - if len# ==# len_filtered# then - {- not much filtering as everything passed through. -} - return ps - else if len_filtered# ==# 0# then - return nilPS - else - new_ps_array (len_filtered# +# 1#) >>= \ ps_arr -> - copy_arr ps_arr rle 0# 0# >> - freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# len_filtered# in - return (PS frozen# len_filtered# has_null)) - where - len# = lengthPS# ps - - matchOffset :: Int# -> [Char] -> (Int,[Char]) - matchOffset off [] = (I# off,[]) - matchOffset off (C# c:cs) = - let - x = ord# c - off' = off +# x - in - if x==# 0# then -- escape code, add 255# - matchOffset off' cs - else - (I# off', cs) - - copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s () - copy_arr arr# [_] _ _ = return () - copy_arr arr# ls n i = - let - (x,ls') = matchOffset 0# ls - n' = n +# (case x of { (I# x#) -> x#}) -# 1# - ch = indexPS# ps n' - in - write_ps_array arr# i ch >> - copy_arr arr# ls' (n' +# 1#) (i +# 1#) - - esc :: Int# -> Int# -> [Char] -> [Char] - esc v 0# ls = (C# (chr# v)):ls - esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls) - - filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int) - filter_ps n hits run acc - | n <# 0# = - let - escs = run `quotInt#` 255# - v = run `remInt#` 255# - in - (esc (v +# 1#) escs acc, I# hits) - | otherwise - = let - ch = indexPS# ps n - n' = n -# 1# - in - if pred (C# ch) then - let - escs = run `quotInt#` 255# - v = run `remInt#` 255# - acc' = esc (v +# 1#) escs acc - in - filter_ps n' (hits +# 1#) 0# acc' - else - filter_ps n' hits (run +# 1#) acc - - -foldlPS :: (a -> Char -> a) -> a -> PackedString -> a -foldlPS f b ps - = if nullPS ps then - b - else - whizzLR b 0# - where - len = lengthPS# ps - - --whizzLR :: a -> Int# -> a - whizzLR b idx - | idx ==# len = b - | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#) - - -foldrPS :: (Char -> a -> a) -> a -> PackedString -> a -foldrPS f b ps - = if nullPS ps then - b - else - whizzRL b len - where - len = lengthPS# ps - - --whizzRL :: a -> Int# -> a - whizzRL b idx - | idx <# 0# = b - | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#) - -takePS :: Int -> PackedString -> PackedString -takePS (I# n) ps - | n ==# 0# = nilPS - | otherwise = substrPS# ps 0# (n -# 1#) - -dropPS :: Int -> PackedString -> PackedString -dropPS (I# n) ps - | n ==# len = ps - | otherwise = substrPS# ps n (lengthPS# ps -# 1#) - where - len = lengthPS# ps - -splitAtPS :: Int -> PackedString -> (PackedString, PackedString) -splitAtPS n ps = (takePS n ps, dropPS n ps) - -takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString -takeWhilePS pred ps - = let - break_pt = char_pos_that_dissatisfies - (\ c -> pred (C# c)) - ps - (lengthPS# ps) - 0# - in - if break_pt ==# 0# then - nilPS - else - substrPS# ps 0# (break_pt -# 1#) - -dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString -dropWhilePS pred ps - = let - len = lengthPS# ps - break_pt = char_pos_that_dissatisfies - (\ c -> pred (C# c)) - ps - len - 0# - in - if len ==# break_pt then - nilPS - else - substrPS# ps break_pt (len -# 1#) - -elemPS :: Char -> PackedString -> Bool -elemPS (C# ch) ps - = let - len = lengthPS# ps - break_pt = first_char_pos_that_satisfies - (`eqChar#` ch) - ps - len - 0# - in - break_pt <# len - -char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# - -char_pos_that_dissatisfies p ps len pos - | pos >=# len = pos -- end - | p (indexPS# ps pos) = -- predicate satisfied; keep going - char_pos_that_dissatisfies p ps len (pos +# 1#) - | otherwise = pos -- predicate not satisfied - -first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# -first_char_pos_that_satisfies p ps len pos - | pos >=# len = pos -- end - | p (indexPS# ps pos) = pos -- got it! - | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#) - --- ToDo: could certainly go quicker -spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) -spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps) - -breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) -breakPS p ps = spanPS (not . p) ps - -linesPS :: PackedString -> [PackedString] -linesPS ps = splitPS '\n' ps - -wordsPS :: PackedString -> [PackedString] -wordsPS ps = splitWithPS isSpace ps - -reversePS :: PackedString -> PackedString -reversePS ps = - if nullPS ps then -- don't create stuff unnecessarily. - ps - else - runST ( - new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte! - fill_in arr# (length -# 1#) 0# >> - freeze_ps_array arr# >>= \ (ByteArray _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length in - return (PS frozen# length has_null)) - where - length = lengthPS# ps - - fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s () - fill_in arr_in# n i = - let - ch = indexPS# ps n - in - write_ps_array arr_in# i ch >> - if n ==# 0# then - write_ps_array arr_in# (i +# 1#) (chr# 0#) >> - return () - else - fill_in arr_in# (n -# 1#) (i +# 1#) - -concatPS :: [PackedString] -> PackedString -concatPS [] = nilPS -concatPS pss - = let - tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x } - tot_len = I# tot_len# - in - runST ( - new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte! - packum arr# pss 0# >> - freeze_ps_array arr# >>= \ (ByteArray _ frozen#) -> - - let has_null = byteArrayHasNUL# frozen# tot_len# in - - return (PS frozen# tot_len# has_null) - ) - where - packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s () - - packum arr [] pos - = write_ps_array arr pos (chr# 0#) >> - return () - packum arr (ps : pss) pos - = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) -> - packum arr pss next_pos - - fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int - - fill arr arr_i ps ps_i ps_len - | ps_i ==# ps_len - = return (I# (arr_i +# ps_len)) - | otherwise - = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >> - fill arr arr_i ps (ps_i +# 1#) ps_len - ------------------------------------------------------------- -joinPS :: PackedString -> [PackedString] -> PackedString -joinPS filler pss = concatPS (splice pss) - where - splice [] = [] - splice [x] = [x] - splice (x:y:xs) = x:filler:splice (y:xs) - --- ToDo: the obvious generalisation -{- - Some properties that hold: - - * splitPS x ls = ls' - where False = any (map (x `elemPS`) ls') - False = any (map (nullPS) ls') - - * all x's have been chopped out. - * no empty PackedStrings in returned list. A conseq. - of this is: - splitPS x nilPS = [] - - - * joinPS (packString [x]) (_splitPS x ls) = ls - --} - -splitPS :: Char -> PackedString -> [PackedString] -splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch) - -splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] -splitWithPS pred ps = - splitify 0# - where - len = lengthPS# ps - - splitify n - | n >=# len = [] - | otherwise = - let - break_pt = - first_char_pos_that_satisfies - (\ c -> pred (C# c)) - ps - len - n - in - if break_pt ==# n then -- immediate match, no substring to cut out. - splitify (break_pt +# 1#) - else - substrPS# ps n (break_pt -# 1#): -- leave out the matching character - splitify (break_pt +# 1#) -\end{code} - -%************************************************************************ -%* * -\subsection{Local utility functions} -%* * -%************************************************************************ - -The definition of @_substrPS@ is essentially: -@take (end - begin + 1) (drop begin str)@. - -\begin{code} -substrPS :: PackedString -> Int -> Int -> PackedString -substrPS ps (I# begin) (I# end) = substrPS# ps begin end - -substrPS# ps s e - | s <# 0# || e <# s - = error "substrPS: bounds out of range" - - | s >=# len || result_len# <=# 0# - = nilPS - - | otherwise - = runST ( - new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte! - fill_in ch_arr 0# >> - freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) -> - - let has_null = byteArrayHasNUL# frozen# result_len# in - - return (PS frozen# result_len# has_null) - ) - where - len = lengthPS# ps - - result_len# = (if e <# len then (e +# 1#) else len) -# s - result_len = I# result_len# - - ----------------------- - fill_in :: MutableByteArray s Int -> Int# -> ST s () - - fill_in arr_in# idx - | idx ==# result_len# - = write_ps_array arr_in# idx (chr# 0#) >> - return () - | otherwise - = let - ch = indexPS# ps (s +# idx) - in - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) -\end{code} - -(Very :-) ``Specialised'' versions of some CharArray things... - -\begin{code} -new_ps_array :: Int# -> ST s (MutableByteArray s Int) -write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s () -freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int) - -new_ps_array size = ST $ \ (S# s) -> - case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# -> - (MutableByteArray bot barr#, S# s2#)} - where - bot = error "new_ps_array" - -write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ (S# s#) -> - case writeCharArray# barr# n ch s# of { s2# -> - ((), S# s2#)} - --- same as unsafeFreezeByteArray -freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ (S# s#) -> - case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# -> - (ByteArray ixs frozen#, S# s2#) } -\end{code} - - -%********************************************************* -%* * -\subsection{Packing and unpacking C strings} -%* * -%********************************************************* - -\begin{code} -unpackCString :: Addr -> [Char] - --- Calls to the next four are injected by the compiler itself, --- to deal with literal strings -packCString# :: [Char] -> ByteArray# -unpackCString# :: Addr# -> [Char] -unpackCString2# :: Addr# -> Int# -> [Char] -unpackAppendCString# :: Addr# -> [Char] -> [Char] -unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a - -packCString# str = case (packString str) of { PS bytes _ _ -> bytes } - -unpackCString (A# addr) = unpackCString# addr - -unpackCString# addr - = unpack 0# - where - unpack nh - | ch `eqChar#` '\0'# = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh - -unpackCString2# addr len - -- This one is called by the compiler to unpack literal strings with NULs in them; rare. - = unpackPS (packCBytes (I# len) (A# addr)) - -unpackAppendCString# addr rest - = unpack 0# - where - unpack nh - | ch `eqChar#` '\0'# = rest - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh - -unpackFoldrCString# addr f z - = unpack 0# - where - unpack nh - | ch `eqChar#` '\0'# = z - | otherwise = C# ch `f` unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh - - -cStringToPS :: Addr -> PackedString -cStringToPS (A# a#) = -- the easy one; we just believe the caller - CPS a# len - where - len = case (strlen# a#) of { I# x -> x } - -packBytesForC :: [Char] -> ByteArray Int -packBytesForC str = psToByteArray (packString str) - -psToByteArrayST :: [Char] -> ST s (ByteArray Int) -psToByteArrayST str = - packStringST str >>= \ (PS bytes n has_null) -> - --later? ASSERT(not has_null) - return (ByteArray (0, I# (n -# 1#)) bytes) - -packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int) -packNBytesForCST len str = - packNCharsST len str >>= \ (PS bytes n has_null) -> - return (ByteArray (0, I# (n -# 1#)) bytes) - -packCBytes :: Int -> Addr -> PackedString -packCBytes len addr = runST (packCBytesST len addr) - -packCBytesST :: Int -> Addr -> ST s PackedString -packCBytesST len@(I# length#) (A# addr) = - {- - allocate an array that will hold the string - (not forgetting the NUL byte at the end) - -} - new_ps_array (length# +# 1#) >>= \ ch_array -> - -- fill in packed string from "addr" - fill_in ch_array 0# >> - -- freeze the puppy: - freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length# in - return (PS frozen# length# has_null) - where - fill_in :: MutableByteArray s Int -> Int# -> ST s () - - fill_in arr_in# idx - | idx ==# length# - = write_ps_array arr_in# idx (chr# 0#) >> - return () - | otherwise - = case (indexCharOffAddr# addr idx) of { ch -> - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) } - -\end{code} -- 1.7.10.4