X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FPackedString.hs;h=d2998bdae76a13955cc0e846afd1bdb21ecb43be;hb=f7a485978f04e84b086f1974b88887cc72d832d0;hp=6fc1a8f2beab4bb1e770855ebf14e151150596bd;hpb=7f1f4e7a695c402ddd3a1dc2cc7114e649a78ebc;p=ghc-base.git diff --git a/Data/PackedString.hs b/Data/PackedString.hs index 6fc1a8f..d2998bd 100644 --- a/Data/PackedString.hs +++ b/Data/PackedString.hs @@ -1,18 +1,17 @@ -{-# OPTIONS -#include "PackedString.h" #-} ----------------------------------------------------------------------------- --- +-- | -- Module : Data.PackedString -- Copyright : (c) The University of Glasgow 2001 --- License : BSD-style (see the file libraries/core/LICENSE) +-- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : experimental --- Portability : non-portable --- --- $Id: PackedString.hs,v 1.1 2001/06/28 14:15:02 simonmar Exp $ +-- Portability : portable -- -- The PackedString type, and associated operations. --- GHC implementation by Bryan O'Sullivan. +-- +-- Original GHC implementation by Bryan O\'Sullivan, +-- rewritten to use UArray by Simon Marlow. -- ----------------------------------------------------------------------------- @@ -20,21 +19,8 @@ module Data.PackedString ( PackedString, -- abstract, instances: Eq, Ord, Show, Typeable -- Creating the beasts - packString, -- :: [Char] -> PackedString - packStringST, -- :: [Char] -> ST s PackedString - packCBytesST, -- :: Int -> Ptr a -> ST s PackedString - - byteArrayToPS, -- :: ByteArray Int -> PackedString - cByteArrayToPS, -- :: ByteArray Int -> PackedString - unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString - - psToByteArray, -- :: PackedString -> ByteArray Int - psToCString, -- :: PackedString -> Ptr a - isCString, -- :: PackedString -> Bool - - unpackPS, -- :: PackedString -> [Char] - unpackNBytesPS, -- :: PackedString -> Int -> [Char] - unpackPSIO, -- :: PackedString -> IO [Char] + packString, -- :: [Char] -> PackedString + unpackPS, -- :: PackedString -> [Char] hPutPS, -- :: Handle -> PackedString -> IO () hGetPS, -- :: Handle -> Int -> IO PackedString @@ -46,15 +32,19 @@ module Data.PackedString ( nullPS, -- :: PackedString -> Bool appendPS, -- :: PackedString -> PackedString -> PackedString lengthPS, -- :: PackedString -> Int - {- 0-origin indexing into the string -} indexPS, -- :: PackedString -> Int -> Char mapPS, -- :: (Char -> Char) -> PackedString -> PackedString filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString - foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a - foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a + reversePS, -- :: PackedString -> PackedString + concatPS, -- :: [PackedString] -> PackedString + elemPS, -- :: Char -> PackedString -> Bool + substrPS, -- :: PackedString -> Int -> Int -> PackedString takePS, -- :: Int -> PackedString -> PackedString dropPS, -- :: Int -> PackedString -> PackedString splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString) + + foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a + foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) @@ -62,428 +52,86 @@ module Data.PackedString ( linesPS, -- :: PackedString -> [PackedString] wordsPS, -- :: PackedString -> [PackedString] - reversePS, -- :: PackedString -> PackedString splitPS, -- :: Char -> PackedString -> [PackedString] splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString] - joinPS, -- :: PackedString -> [PackedString] -> PackedString - concatPS, -- :: [PackedString] -> PackedString - elemPS, -- :: Char -> PackedString -> Bool - {- - Pluck out a piece of a PS start and end - chars you want; both 0-origin-specified - -} - substrPS, -- :: PackedString -> Int -> Int -> PackedString - - comparePS -- :: PackedString -> PackedString -> Ordering +-- joinPS, -- :: PackedString -> [PackedString] -> PackedString ) where import Prelude -import Foreign -import Foreign.C - -import GHC.Prim -import GHC.Base -import GHC.ST -import GHC.ByteArr - -import GHC.Show ( showList__ ) -- ToDo: better -import GHC.Pack ( new_ps_array, freeze_ps_array, write_ps_array ) - -import Control.Monad.ST +import Data.Array.Unboxed +import Data.Array.IO +import Data.Dynamic +import Data.Char import System.IO -import System.IO.Unsafe ( unsafePerformIO ) -import GHC.IO ( hPutBufBA, hGetBufBA ) - -import Data.Ix -import Data.Char ( isSpace ) -import Data.Dynamic -- ----------------------------------------------------------------------------- -- PackedString type declaration -data PackedString - = PS ByteArray# -- the bytes - Int# -- length (*not* including NUL at the end) - Bool -- True <=> contains a NUL - | CPS Addr# -- pointer to the (null-terminated) bytes in C land - Int# -- length, as per strlen - -- definitely doesn't contain a NUL +newtype PackedString = PS (UArray Int Char) instance Eq PackedString where - x == y = compare x y == EQ - x /= y = compare x y /= EQ + (PS x) == (PS y) = x == y instance Ord PackedString where - compare = comparePS - x <= y = compare x y /= GT - x < y = compare x y == LT - x >= y = compare x y /= LT - x > y = compare x y == GT - max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x } - min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y } + compare (PS x) (PS y) = compare x y --instance Read PackedString: ToDo instance Show PackedString where showsPrec p ps r = showsPrec p (unpackPS ps) r - showList = showList__ (showsPrec 0) #include "Dynamic.h" INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString") -- ----------------------------------------------------------------------------- --- PackedString instances - --- We try hard to make this go fast: - -comparePS :: PackedString -> PackedString -> Ordering - -comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2) - | not has_null1 && not has_null2 - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray 0 (I# (len1 -# 1#)) bs1 - ba2 = ByteArray 0 (I# (len2 -# 1#)) bs2 - -comparePS (PS bs1 len1 has_null1) (CPS bs2 _) - | not has_null1 - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray 0 (I# (len1 -# 1#)) bs1 - ba2 = Ptr bs2 - -comparePS (CPS bs1 len1) (CPS bs2 _) - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = Ptr bs1 - ba2 = Ptr bs2 - -comparePS a@(CPS _ _) b@(PS _ _ has_null2) - | not has_null2 - = -- try them the other way 'round - case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT } - -comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True) - = looking_at 0# - where - end1 = lengthPS# ps1 -# 1# - end2 = lengthPS# ps2 -# 1# - - looking_at char# - = if char# ># end1 then - if char# ># end2 then -- both strings ran out at once - EQ - else -- ps1 ran out before ps2 - LT - else if char# ># end2 then - GT -- ps2 ran out before ps1 - else - let - ch1 = indexPS# ps1 char# - ch2 = indexPS# ps2 char# - in - if ch1 `eqChar#` ch2 then - looking_at (char# +# 1#) - else if ch1 `ltChar#` ch2 then LT - else GT - - --- ----------------------------------------------------------------------------- -- Constructor functions --- Easy ones first. @packString@ requires getting some heap-bytes and --- scribbling stuff into them. - nilPS :: PackedString -nilPS = CPS ""# 0# +nilPS = PS (array (0,-1) []) consPS :: Char -> PackedString -> PackedString consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better packString :: [Char] -> PackedString -packString str = runST (packStringST str) - -packStringST :: [Char] -> ST s PackedString -packStringST str = - let len = length str in - packNCharsST len str - -packNCharsST :: Int -> [Char] -> ST s PackedString -packNCharsST (I# length#) str = - {- - allocate an array that will hold the string - (not forgetting the NUL byte at the end) - -} - new_ps_array (length# +# 1#) >>= \ ch_array -> - -- fill in packed string from "str" - fill_in ch_array 0# str >> - -- freeze the puppy: - freeze_ps_array ch_array length# >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length# in - return (PS frozen# length# has_null) - where - fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s () - fill_in arr_in# idx [] = - write_ps_array arr_in# idx (chr# 0#) >> - return () - - fill_in arr_in# idx (C# c : cs) = - write_ps_array arr_in# idx c >> - fill_in arr_in# (idx +# 1#) cs - -byteArrayToPS :: ByteArray Int -> PackedString -byteArrayToPS (ByteArray l u frozen#) = - let - ixs = (l,u) - n# = - case ( - if null (range ixs) - then 0 - else ((index ixs u) + 1) - ) of { I# x -> x } - in - PS frozen# n# (byteArrayHasNUL# frozen# n#) - --- byteArray is zero-terminated, make everything upto it --- a packed string. -cByteArrayToPS :: ByteArray Int -> PackedString -cByteArrayToPS (ByteArray l u frozen#) = - let - ixs = (l,u) - n# = - case ( - if null (range ixs) - then 0 - else ((index ixs u) + 1) - ) of { I# x -> x } - len# = findNull 0# - - findNull i# - | i# ==# n# = n# - | ch# `eqChar#` '\0'# = i# -- everything upto the sentinel - | otherwise = findNull (i# +# 1#) - where - ch# = indexCharArray# frozen# i# - in - PS frozen# len# False - -unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString -unsafeByteArrayToPS (ByteArray _ _ frozen#) (I# n#) - = PS frozen# n# (byteArrayHasNUL# frozen# n#) - -psToByteArray :: PackedString -> ByteArray Int -psToByteArray (PS bytes n _) = ByteArray 0 (I# (n -# 1#)) bytes - -psToByteArray (CPS addr len#) - = let - len = I# len# - byte_array_form = packCBytes len (Ptr addr) - in - case byte_array_form of { PS bytes _ _ -> - ByteArray 0 (len - 1) bytes } - --- isCString is useful when passing PackedStrings to the --- outside world, and need to figure out whether you can --- pass it as an Addr or ByteArray. --- -isCString :: PackedString -> Bool -isCString (CPS _ _ ) = True -isCString _ = False - --- psToCString doesn't add a zero terminator! --- this doesn't appear to be very useful --SDM -psToCString :: PackedString -> Ptr a -psToCString (CPS addr _) = (Ptr addr) -psToCString (PS bytes l# _) = - unsafePerformIO $ do - stuff <- mallocBytes (I# (l# +# 1#)) - let - fill_in n# i# - | n# ==# 0# = return () - | otherwise = do - let ch# = indexCharArray# bytes i# - pokeByteOff stuff (I# i#) (castCharToCChar (C# ch#)) - fill_in (n# -# 1#) (i# +# 1#) - fill_in l# 0# - pokeByteOff stuff (I# l#) (C# '\0'#) - return stuff +packString str = packNChars (length str) str + +packNChars :: Int -> [Char] -> PackedString +packNChars len str = PS (array (0,len-1) (zip [0..] str)) -- ----------------------------------------------------------------------------- -- Destructor functions (taking PackedStrings apart) --- OK, but this code gets *hammered*: --- unpackPS ps --- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ] - unpackPS :: PackedString -> [Char] -unpackPS (PS bytes len _) = unpack 0# - where - unpack nh - | nh >=# len = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharArray# bytes nh - -unpackPS (CPS addr _) = unpack 0# - where - unpack nh - | ch `eqChar#` '\0'# = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh - -unpackNBytesPS :: PackedString -> Int -> [Char] -unpackNBytesPS ps len@(I# l#) - | len < 0 = error ("PackedString.unpackNBytesPS: negative length "++ show len) - | len == 0 = [] - | otherwise = - case ps of - PS bytes len# has_null -> unpackPS (PS bytes (min# len# l#) has_null) - CPS a len# -> unpackPS (CPS a (min# len# l#)) - where - min# x# y# - | x# <# y# = x# - | otherwise = y# - -unpackPSIO :: PackedString -> IO String -unpackPSIO ps@(PS bytes _ _) = return (unpackPS ps) -unpackPSIO (CPS addr _) = unpack 0# - where - unpack nh = do - ch <- peekByteOff (Ptr addr) (I# nh) - let c = castCCharToChar ch - if c == '\0' - then return [] - else do - ls <- unpack (nh +# 1#) - return (c : ls) - --- Output a packed string via a handle: - -hPutPS :: Handle -> PackedString -> IO () -hPutPS handle (CPS a# len#) = hPutBuf handle (Ptr a#) (I# len#) -hPutPS handle (PS ba# len# _) = do - let mba = MutableByteArray (bottom::Int) bottom (unsafeCoerce# ba#) - hPutBufBA handle mba (I# len#) - where - bottom = error "hPutPS" - --- The dual to @_putPS@, note that the size of the chunk specified --- is the upper bound of the size of the chunk returned. - -hGetPS :: Handle -> Int -> IO PackedString -hGetPS hdl len@(I# len#) - | len# <=# 0# = return nilPS -- I'm being kind here. - | otherwise = - -- Allocate an array for system call to store its bytes into. - stToIO (new_ps_array len# ) >>= \ ch_arr -> - stToIO (freeze_ps_array ch_arr len#) >>= \ (ByteArray _ _ frozen#) -> - hGetBufBA hdl ch_arr len >>= \ (I# read#) -> - if read# ==# 0# then -- EOF or other error - ioError (userError "hGetPS: EOF reached or other error") - else - {- - The system call may not return the number of - bytes requested. Instead of failing with an error - if the number of bytes read is less than requested, - a packed string containing the bytes we did manage - to snarf is returned. - -} - let - has_null = byteArrayHasNUL# frozen# read# - in - return (PS frozen# read# has_null) +unpackPS (PS ps) = elems ps -- ----------------------------------------------------------------------------- -- List-mimicking functions for PackedStrings --- First, the basic functions that do look into the representation; --- @indexPS@ is the most important one. - -lengthPS :: PackedString -> Int -lengthPS ps = I# (lengthPS# ps) - -{-# INLINE lengthPS# #-} - -lengthPS# :: PackedString -> Int# -lengthPS# (PS _ i _) = i -lengthPS# (CPS _ i) = i - -{-# INLINE strlen# #-} - -strlen# :: Addr# -> Int -strlen# a - = unsafePerformIO ( - _ccall_ strlen (Ptr a) >>= \ len@(I# _) -> - return len - ) - -byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool -byteArrayHasNUL# bs len - = unsafePerformIO ( - _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) -> - return ( - if res ==# 0# then False else True - )) - where - ba = ByteArray 0 (I# (len -# 1#)) bs - ------------------------ +lengthPS :: PackedString -> Int +lengthPS (PS ps) = rangeSize (bounds ps) indexPS :: PackedString -> Int -> Char -indexPS ps (I# n) = C# (indexPS# ps n) - -{-# INLINE indexPS# #-} - -indexPS# :: PackedString -> Int# -> Char# -indexPS# (PS bs i _) n - = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10) - indexCharArray# bs n - -indexPS# (CPS a _) n - = indexCharOffAddr# a n - --- Now, the rest of the functions can be defined without digging --- around in the representation. +indexPS (PS ps) i = ps ! i headPS :: PackedString -> Char headPS ps - | nullPS ps = error "headPS: head []" - | otherwise = C# (indexPS# ps 0#) + | nullPS ps = error "Data.PackedString.headPS: head []" + | otherwise = indexPS ps 0 tailPS :: PackedString -> PackedString tailPS ps - | len <=# 0# = error "tailPS: tail []" - | len ==# 1# = nilPS - | otherwise = substrPS# ps 1# (len -# 1#) + | len <= 0 = error "Data.PackedString.tailPS: tail []" + | len == 1 = nilPS + | otherwise = substrPS ps 1 (len - 1) where - len = lengthPS# ps + len = lengthPS ps nullPS :: PackedString -> Bool -nullPS (PS _ i _) = i ==# 0# -nullPS (CPS _ i) = i ==# 0# +nullPS (PS ps) = rangeSize (bounds ps) == 0 appendPS :: PackedString -> PackedString -> PackedString appendPS xs ys @@ -491,224 +139,36 @@ appendPS xs ys | nullPS ys = xs | otherwise = concatPS [xs,ys] -mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-} -mapPS f xs = - if nullPS xs then - xs - else - runST ( - new_ps_array (length +# 1#) >>= \ ps_arr -> - whizz ps_arr length 0# >> - freeze_ps_array ps_arr length >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length in - return (PS frozen# length has_null)) - where - length = lengthPS# xs - - whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s () - whizz arr# n i - | n ==# 0# - = write_ps_array arr# i (chr# 0#) >> - return () - | otherwise - = let - ch = indexPS# xs i - in - write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >> - whizz arr# (n -# 1#) (i +# 1#) +mapPS :: (Char -> Char) -> PackedString -> PackedString +mapPS f (PS ps) = PS (amap f ps) filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-} -filterPS pred ps = - if nullPS ps then - ps - else - {- - Filtering proceeds as follows: - - * traverse the list, applying the pred. to each element, - remembering the positions where it was satisfied. - - Encode these positions using a run-length encoding of the gaps - between the matching positions. - - * Allocate a MutableByteArray in the heap big enough to hold - all the matched entries, and copy the elements that matched over. - - A better solution that merges the scan© passes into one, - would be to copy the filtered elements over into a growable - buffer. No such operation currently supported over - MutableByteArrays (could of course use malloc&realloc) - But, this solution may in the case of repeated realloc's - be worse than the current solution. - -} - runST ( - let - (rle,len_filtered) = filter_ps (len# -# 1#) 0# 0# [] - len_filtered# = case len_filtered of { I# x# -> x#} - in - if len# ==# len_filtered# then - {- not much filtering as everything passed through. -} - return ps - else if len_filtered# ==# 0# then - return nilPS - else - new_ps_array (len_filtered# +# 1#) >>= \ ps_arr -> - copy_arr ps_arr rle 0# 0# >> - freeze_ps_array ps_arr len_filtered# >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# len_filtered# in - return (PS frozen# len_filtered# has_null)) - where - len# = lengthPS# ps - - matchOffset :: Int# -> [Char] -> (Int,[Char]) - matchOffset off [] = (I# off,[]) - matchOffset off (C# c:cs) = - let - x = ord# c - off' = off +# x - in - if x==# 0# then -- escape code, add 255# - matchOffset off' cs - else - (I# off', cs) - - copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s () - copy_arr _ [_] _ _ = return () - copy_arr arr# ls n i = - let - (x,ls') = matchOffset 0# ls - n' = n +# (case x of { (I# x#) -> x#}) -# 1# - ch = indexPS# ps n' - in - write_ps_array arr# i ch >> - copy_arr arr# ls' (n' +# 1#) (i +# 1#) - - esc :: Int# -> Int# -> [Char] -> [Char] - esc v 0# ls = (C# (chr# v)):ls - esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls) - - filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int) - filter_ps n hits run acc - | n <# 0# = - let - escs = run `quotInt#` 255# - v = run `remInt#` 255# - in - (esc (v +# 1#) escs acc, I# hits) - | otherwise - = let - ch = indexPS# ps n - n' = n -# 1# - in - if pred (C# ch) then - let - escs = run `quotInt#` 255# - v = run `remInt#` 255# - acc' = esc (v +# 1#) escs acc - in - filter_ps n' (hits +# 1#) 0# acc' - else - filter_ps n' hits (run +# 1#) acc - +filterPS pred ps = packString (filter pred (unpackPS ps)) foldlPS :: (a -> Char -> a) -> a -> PackedString -> a -foldlPS f b ps - = if nullPS ps then - b - else - whizzLR b 0# - where - len = lengthPS# ps - - --whizzLR :: a -> Int# -> a - whizzLR b idx - | idx ==# len = b - | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#) - +foldlPS f b ps = foldl f b (unpackPS ps) foldrPS :: (Char -> a -> a) -> a -> PackedString -> a -foldrPS f v ps - | nullPS ps = v - | otherwise = whizzRL v len - where - len = lengthPS# ps - - --whizzRL :: a -> Int# -> a - whizzRL b idx - | idx <# 0# = b - | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#) +foldrPS f v ps = foldr f v (unpackPS ps) takePS :: Int -> PackedString -> PackedString -takePS (I# n) ps - | n ==# 0# = nilPS - | otherwise = substrPS# ps 0# (n -# 1#) +takePS n ps = substrPS ps 0 (n-1) dropPS :: Int -> PackedString -> PackedString -dropPS (I# n) ps - | n ==# len = nilPS - | otherwise = substrPS# ps n (lengthPS# ps -# 1#) - where - len = lengthPS# ps +dropPS n ps = substrPS ps n (lengthPS ps - 1) splitAtPS :: Int -> PackedString -> (PackedString, PackedString) splitAtPS n ps = (takePS n ps, dropPS n ps) takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString -takeWhilePS pred ps - = let - break_pt = char_pos_that_dissatisfies - (\ c -> pred (C# c)) - ps - (lengthPS# ps) - 0# - in - if break_pt ==# 0# then - nilPS - else - substrPS# ps 0# (break_pt -# 1#) +takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps)) dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString -dropWhilePS pred ps - = let - len = lengthPS# ps - break_pt = char_pos_that_dissatisfies - (\ c -> pred (C# c)) - ps - len - 0# - in - if len ==# break_pt then - nilPS - else - substrPS# ps break_pt (len -# 1#) +dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps)) elemPS :: Char -> PackedString -> Bool -elemPS (C# ch) ps - = let - len = lengthPS# ps - break_pt = first_char_pos_that_satisfies - (`eqChar#` ch) - ps - len - 0# - in - break_pt <# len - -char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# - -char_pos_that_dissatisfies p ps len pos - | pos >=# len = pos -- end - | p (indexPS# ps pos) = -- predicate satisfied; keep going - char_pos_that_dissatisfies p ps len (pos +# 1#) - | otherwise = pos -- predicate not satisfied - -first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# -first_char_pos_that_satisfies p ps len pos - | pos >=# len = pos -- end - | p (indexPS# ps pos) = pos -- got it! - | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#) - --- ToDo: could certainly go quicker +elemPS c ps = c `elem` unpackPS ps + spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps) @@ -722,66 +182,13 @@ wordsPS :: PackedString -> [PackedString] wordsPS ps = splitWithPS isSpace ps reversePS :: PackedString -> PackedString -reversePS ps = - if nullPS ps then -- don't create stuff unnecessarily. - ps - else - runST ( - new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte! - fill_in arr# (length -# 1#) 0# >> - freeze_ps_array arr# length >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length in - return (PS frozen# length has_null)) - where - length = lengthPS# ps - - fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s () - fill_in arr_in# n i = - let - ch = indexPS# ps n - in - write_ps_array arr_in# i ch >> - if n ==# 0# then - write_ps_array arr_in# (i +# 1#) (chr# 0#) >> - return () - else - fill_in arr_in# (n -# 1#) (i +# 1#) - -concatPS :: [PackedString] -> PackedString -concatPS [] = nilPS -concatPS pss - = let - tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x } - in - runST ( - new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte! - packum arr# pss 0# >> - freeze_ps_array arr# tot_len# >>= \ (ByteArray _ _ frozen#) -> - - let has_null = byteArrayHasNUL# frozen# tot_len# in - - return (PS frozen# tot_len# has_null) - ) - where - packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s () - - packum arr [] pos - = write_ps_array arr pos (chr# 0#) >> - return () - packum arr (ps : pss) pos - = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) -> - packum arr pss next_pos - - fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int +reversePS ps = packString (reverse (unpackPS ps)) - fill arr arr_i ps ps_i ps_len - | ps_i ==# ps_len - = return (I# (arr_i +# ps_len)) - | otherwise - = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >> - fill arr arr_i ps (ps_i +# 1#) ps_len +concatPS :: [PackedString] -> PackedString +concatPS pss = packString (concat (map unpackPS pss)) ------------------------------------------------------------ +{- joinPS :: PackedString -> [PackedString] -> PackedString joinPS filler pss = concatPS (splice pss) where @@ -806,32 +213,33 @@ joinPS filler pss = concatPS (splice pss) * joinPS (packString [x]) (_splitPS x ls) = ls -} +-} splitPS :: Char -> PackedString -> [PackedString] -splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch) +splitPS c = splitWithPS (== c) splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] -splitWithPS pred ps = - splitify 0# +splitWithPS pred (PS ps) = + splitify 0 where - len = lengthPS# ps + len = lengthPS (PS ps) splitify n - | n >=# len = [] + | n >= len = [] | otherwise = let - break_pt = - first_char_pos_that_satisfies - (\ c -> pred (C# c)) - ps - len - n + break_pt = first_pos_that_satisfies pred ps len n in - if break_pt ==# n then -- immediate match, no substring to cut out. - splitify (break_pt +# 1#) + if break_pt == n then -- immediate match, no substring to cut out. + splitify (break_pt + 1) else - substrPS# ps n (break_pt -# 1#): -- leave out the matching character - splitify (break_pt +# 1#) + substrPS (PS ps) n (break_pt - 1) -- leave out the matching character + : splitify (break_pt + 1) + +first_pos_that_satisfies pred ps len n = + case [ m | m <- [n..len], pred (ps ! m) ] of + [] -> len + (m:_) -> m -- ----------------------------------------------------------------------------- -- Local utility functions @@ -840,75 +248,24 @@ splitWithPS pred ps = -- @take (end - begin + 1) (drop begin str)@. substrPS :: PackedString -> Int -> Int -> PackedString -substrPS ps (I# begin) (I# end) = substrPS# ps begin end - -substrPS# :: PackedString -> Int# -> Int# -> PackedString -substrPS# ps s e - | s <# 0# || s >=# len || result_len# <=# 0# - = nilPS - - | otherwise - = runST ( - new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte! - fill_in ch_arr 0# >> - freeze_ps_array ch_arr result_len# >>= \ (ByteArray _ _ frozen#) -> - - let has_null = byteArrayHasNUL# frozen# result_len# in - - return (PS frozen# result_len# has_null) - ) - where - len = lengthPS# ps - - result_len# = (if e <# len then (e +# 1#) else len) -# s +substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ] - ----------------------- - fill_in :: MutableByteArray s Int -> Int# -> ST s () +-- ----------------------------------------------------------------------------- +-- hPutPS - fill_in arr_in# idx - | idx ==# result_len# - = write_ps_array arr_in# idx (chr# 0#) >> - return () - | otherwise - = let - ch = indexPS# ps (s +# idx) - in - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) +hPutPS :: Handle -> PackedString -> IO () +hPutPS h (PS ps) = do + let l = lengthPS (PS ps) + arr <- newArray_ (0, l-1) + sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ] + hPutArray h arr l -- ----------------------------------------------------------------------------- --- Packing and unpacking C strings +-- hGetPS -cStringToPS :: Ptr a -> PackedString -cStringToPS (Ptr a#) = -- the easy one; we just believe the caller - CPS a# len - where - len = case (strlen# a#) of { I# x -> x } - -packCBytes :: Int -> Ptr a -> PackedString -packCBytes len addr = runST (packCBytesST len addr) - -packCBytesST :: Int -> Ptr a -> ST s PackedString -packCBytesST (I# length#) (Ptr addr) = - {- - allocate an array that will hold the string - (not forgetting the NUL byte at the end) - -} - new_ps_array (length# +# 1#) >>= \ ch_array -> - -- fill in packed string from "addr" - fill_in ch_array 0# >> - -- freeze the puppy: - freeze_ps_array ch_array length# >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length# in - return (PS frozen# length# has_null) - where - fill_in :: MutableByteArray s Int -> Int# -> ST s () - - fill_in arr_in# idx - | idx ==# length# - = write_ps_array arr_in# idx (chr# 0#) >> - return () - | otherwise - = case (indexCharOffAddr# addr idx) of { ch -> - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) } +hGetPS :: Handle -> Int -> IO PackedString +hGetPS h i = do + arr <- newArray_ (0, i-1) + l <- hGetArray h arr i + chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1] + return (packString chars)