X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FPackedString.hs;h=46fb4baf706a8c5d9cf2ab5a8d6f03b83cc855c2;hb=b9b6e38a1ebb5f05b382609fe0776d91cdd1090b;hp=8feb2a85e3d952bce8ee3b3272dc4c99d56dc424;hpb=5545727d5a6a1fc6d5d00f32a92a8fdf0fb7ca77;p=haskell-directory.git diff --git a/Data/PackedString.hs b/Data/PackedString.hs index 8feb2a8..46fb4ba 100644 --- a/Data/PackedString.hs +++ b/Data/PackedString.hs @@ -1,44 +1,35 @@ -{-# OPTIONS -#include "PackedString.h" #-} ----------------------------------------------------------------------------- --- +-- | -- Module : Data.PackedString -- Copyright : (c) The University of Glasgow 2001 --- License : BSD-style (see the file libraries/core/LICENSE) +-- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : experimental -- Portability : portable -- --- $Id: PackedString.hs,v 1.2 2001/07/03 11:37:50 simonmar Exp $ --- --- The PackedString type, and associated operations. --- GHC implementation by Bryan O'Sullivan. +-- An efficient implementation of strings. -- ----------------------------------------------------------------------------- +-- Original GHC implementation by Bryan O\'Sullivan, +-- rewritten to use UArray by Simon Marlow. + module Data.PackedString ( + -- * The @PackedString@ type PackedString, -- abstract, instances: Eq, Ord, Show, Typeable - -- Creating the beasts - packString, -- :: [Char] -> PackedString - packStringST, -- :: [Char] -> ST s PackedString - packCBytesST, -- :: Int -> Ptr a -> ST s PackedString - - byteArrayToPS, -- :: ByteArray Int -> PackedString - cByteArrayToPS, -- :: ByteArray Int -> PackedString - unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString - - psToByteArray, -- :: PackedString -> ByteArray Int - psToCString, -- :: PackedString -> Ptr a - isCString, -- :: PackedString -> Bool - - unpackPS, -- :: PackedString -> [Char] - unpackNBytesPS, -- :: PackedString -> Int -> [Char] - unpackPSIO, -- :: PackedString -> IO [Char] + -- * Converting to and from @PackedString@s + packString, -- :: String -> PackedString + unpackPS, -- :: PackedString -> String +#ifndef __NHC__ + -- * I\/O with @PackedString@s hPutPS, -- :: Handle -> PackedString -> IO () hGetPS, -- :: Handle -> Int -> IO PackedString +#endif + -- * List-like manipulation functions nilPS, -- :: PackedString consPS, -- :: Char -> PackedString -> PackedString headPS, -- :: PackedString -> Char @@ -46,742 +37,218 @@ module Data.PackedString ( nullPS, -- :: PackedString -> Bool appendPS, -- :: PackedString -> PackedString -> PackedString lengthPS, -- :: PackedString -> Int - {- 0-origin indexing into the string -} indexPS, -- :: PackedString -> Int -> Char mapPS, -- :: (Char -> Char) -> PackedString -> PackedString filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString - foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a - foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a + reversePS, -- :: PackedString -> PackedString + concatPS, -- :: [PackedString] -> PackedString + elemPS, -- :: Char -> PackedString -> Bool + substrPS, -- :: PackedString -> Int -> Int -> PackedString takePS, -- :: Int -> PackedString -> PackedString dropPS, -- :: Int -> PackedString -> PackedString splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString) + + foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a + foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) linesPS, -- :: PackedString -> [PackedString] - + unlinesPS, -- :: [PackedString] -> PackedString wordsPS, -- :: PackedString -> [PackedString] - reversePS, -- :: PackedString -> PackedString + unwordsPS, -- :: [PackedString] -> PackedString splitPS, -- :: Char -> PackedString -> [PackedString] splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString] - joinPS, -- :: PackedString -> [PackedString] -> PackedString - concatPS, -- :: [PackedString] -> PackedString - elemPS, -- :: Char -> PackedString -> Bool - {- - Pluck out a piece of a PS start and end - chars you want; both 0-origin-specified - -} - substrPS, -- :: PackedString -> Int -> Int -> PackedString - - comparePS -- :: PackedString -> PackedString -> Ordering + joinPS, -- :: PackedString -> [PackedString] -> PackedString ) where import Prelude -import Foreign -import Foreign.C +#ifndef __NHC__ -import GHC.Prim -import GHC.Base -import GHC.ST -import GHC.ByteArr - -import GHC.Show ( showList__ ) -- ToDo: better -import GHC.Pack ( new_ps_array, freeze_ps_array, write_ps_array ) - -import Control.Monad.ST +import Data.Array.Unboxed +import Data.Array.IO +import Data.Typeable +import Data.Char import System.IO -import System.IO.Unsafe ( unsafePerformIO ) -import GHC.IO ( hPutBufBA, hGetBufBA ) - -import Data.Ix -import Data.Char ( isSpace ) -import Data.Dynamic -- ----------------------------------------------------------------------------- -- PackedString type declaration -data PackedString - = PS ByteArray# -- the bytes - Int# -- length (*not* including NUL at the end) - Bool -- True <=> contains a NUL - | CPS Addr# -- pointer to the (null-terminated) bytes in C land - Int# -- length, as per strlen - -- definitely doesn't contain a NUL +-- | A space-efficient representation of a 'String', which supports various +-- efficient operations. A 'PackedString' contains full Unicode 'Char's. +newtype PackedString = PS (UArray Int Char) + +-- ToDo: we could support "slices", i.e. include offset and length fields into +-- the string, so that operations like take/drop could be O(1). Perhaps making +-- a slice should be conditional on the ratio of the slice/string size to +-- limit memory leaks. instance Eq PackedString where - x == y = compare x y == EQ - x /= y = compare x y /= EQ + (PS x) == (PS y) = x == y instance Ord PackedString where - compare = comparePS - x <= y = compare x y /= GT - x < y = compare x y == LT - x >= y = compare x y /= LT - x > y = compare x y == GT - max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x } - min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y } + compare (PS x) (PS y) = compare x y --instance Read PackedString: ToDo instance Show PackedString where showsPrec p ps r = showsPrec p (unpackPS ps) r - showList = showList__ (showsPrec 0) -#include "Dynamic.h" +#include "Typeable.h" INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString") -- ----------------------------------------------------------------------------- --- PackedString instances - --- We try hard to make this go fast: - -comparePS :: PackedString -> PackedString -> Ordering - -comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2) - | not has_null1 && not has_null2 - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray 0 (I# (len1 -# 1#)) bs1 - ba2 = ByteArray 0 (I# (len2 -# 1#)) bs2 - -comparePS (PS bs1 len1 has_null1) (CPS bs2 _) - | not has_null1 - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = ByteArray 0 (I# (len1 -# 1#)) bs1 - ba2 = Ptr bs2 - -comparePS (CPS bs1 len1) (CPS bs2 _) - = unsafePerformIO ( - _ccall_ strcmp ba1 ba2 >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then EQ - else GT - )) - where - ba1 = Ptr bs1 - ba2 = Ptr bs2 - -comparePS a@(CPS _ _) b@(PS _ _ has_null2) - | not has_null2 - = -- try them the other way 'round - case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT } - -comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True) - = looking_at 0# - where - end1 = lengthPS# ps1 -# 1# - end2 = lengthPS# ps2 -# 1# - - looking_at char# - = if char# ># end1 then - if char# ># end2 then -- both strings ran out at once - EQ - else -- ps1 ran out before ps2 - LT - else if char# ># end2 then - GT -- ps2 ran out before ps1 - else - let - ch1 = indexPS# ps1 char# - ch2 = indexPS# ps2 char# - in - if ch1 `eqChar#` ch2 then - looking_at (char# +# 1#) - else if ch1 `ltChar#` ch2 then LT - else GT - - --- ----------------------------------------------------------------------------- -- Constructor functions --- Easy ones first. @packString@ requires getting some heap-bytes and --- scribbling stuff into them. - +-- | The 'nilPS' value is the empty string. nilPS :: PackedString -nilPS = CPS ""# 0# +nilPS = PS (array (0,-1) []) +-- | The 'consPS' function prepends the given character to the +-- given string. consPS :: Char -> PackedString -> PackedString consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better -packString :: [Char] -> PackedString -packString str = runST (packStringST str) - -packStringST :: [Char] -> ST s PackedString -packStringST str = - let len = length str in - packNCharsST len str - -packNCharsST :: Int -> [Char] -> ST s PackedString -packNCharsST (I# length#) str = - {- - allocate an array that will hold the string - (not forgetting the NUL byte at the end) - -} - new_ps_array (length# +# 1#) >>= \ ch_array -> - -- fill in packed string from "str" - fill_in ch_array 0# str >> - -- freeze the puppy: - freeze_ps_array ch_array length# >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length# in - return (PS frozen# length# has_null) - where - fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s () - fill_in arr_in# idx [] = - write_ps_array arr_in# idx (chr# 0#) >> - return () - - fill_in arr_in# idx (C# c : cs) = - write_ps_array arr_in# idx c >> - fill_in arr_in# (idx +# 1#) cs - -byteArrayToPS :: ByteArray Int -> PackedString -byteArrayToPS (ByteArray l u frozen#) = - let - ixs = (l,u) - n# = - case ( - if null (range ixs) - then 0 - else ((index ixs u) + 1) - ) of { I# x -> x } - in - PS frozen# n# (byteArrayHasNUL# frozen# n#) - --- byteArray is zero-terminated, make everything upto it --- a packed string. -cByteArrayToPS :: ByteArray Int -> PackedString -cByteArrayToPS (ByteArray l u frozen#) = - let - ixs = (l,u) - n# = - case ( - if null (range ixs) - then 0 - else ((index ixs u) + 1) - ) of { I# x -> x } - len# = findNull 0# - - findNull i# - | i# ==# n# = n# - | ch# `eqChar#` '\0'# = i# -- everything upto the sentinel - | otherwise = findNull (i# +# 1#) - where - ch# = indexCharArray# frozen# i# - in - PS frozen# len# False - -unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString -unsafeByteArrayToPS (ByteArray _ _ frozen#) (I# n#) - = PS frozen# n# (byteArrayHasNUL# frozen# n#) - -psToByteArray :: PackedString -> ByteArray Int -psToByteArray (PS bytes n _) = ByteArray 0 (I# (n -# 1#)) bytes - -psToByteArray (CPS addr len#) - = let - len = I# len# - byte_array_form = packCBytes len (Ptr addr) - in - case byte_array_form of { PS bytes _ _ -> - ByteArray 0 (len - 1) bytes } - --- isCString is useful when passing PackedStrings to the --- outside world, and need to figure out whether you can --- pass it as an Addr or ByteArray. --- -isCString :: PackedString -> Bool -isCString (CPS _ _ ) = True -isCString _ = False - --- psToCString doesn't add a zero terminator! --- this doesn't appear to be very useful --SDM -psToCString :: PackedString -> Ptr a -psToCString (CPS addr _) = (Ptr addr) -psToCString (PS bytes l# _) = - unsafePerformIO $ do - stuff <- mallocBytes (I# (l# +# 1#)) - let - fill_in n# i# - | n# ==# 0# = return () - | otherwise = do - let ch# = indexCharArray# bytes i# - pokeByteOff stuff (I# i#) (castCharToCChar (C# ch#)) - fill_in (n# -# 1#) (i# +# 1#) - fill_in l# 0# - pokeByteOff stuff (I# l#) (C# '\0'#) - return stuff +-- | Convert a 'String' into a 'PackedString' +packString :: String -> PackedString +packString str = packNChars (length str) str + +-- | The 'packNChars' function creates a 'PackedString' out of the +-- first @len@ elements of the given 'String'. +packNChars :: Int -> [Char] -> PackedString +packNChars len str = PS (listArray (0,len-1) str) -- ----------------------------------------------------------------------------- -- Destructor functions (taking PackedStrings apart) --- OK, but this code gets *hammered*: --- unpackPS ps --- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ] - -unpackPS :: PackedString -> [Char] -unpackPS (PS bytes len _) = unpack 0# - where - unpack nh - | nh >=# len = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharArray# bytes nh - -unpackPS (CPS addr _) = unpack 0# - where - unpack nh - | ch `eqChar#` '\0'# = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharOffAddr# addr nh - -unpackNBytesPS :: PackedString -> Int -> [Char] -unpackNBytesPS ps len@(I# l#) - | len < 0 = error ("PackedString.unpackNBytesPS: negative length "++ show len) - | len == 0 = [] - | otherwise = - case ps of - PS bytes len# has_null -> unpackPS (PS bytes (min# len# l#) has_null) - CPS a len# -> unpackPS (CPS a (min# len# l#)) - where - min# x# y# - | x# <# y# = x# - | otherwise = y# - -unpackPSIO :: PackedString -> IO String -unpackPSIO ps@(PS bytes _ _) = return (unpackPS ps) -unpackPSIO (CPS addr _) = unpack 0# - where - unpack nh = do - ch <- peekByteOff (Ptr addr) (I# nh) - let c = castCCharToChar ch - if c == '\0' - then return [] - else do - ls <- unpack (nh +# 1#) - return (c : ls) - --- Output a packed string via a handle: - -hPutPS :: Handle -> PackedString -> IO () -hPutPS handle (CPS a# len#) = hPutBuf handle (Ptr a#) (I# len#) -hPutPS handle (PS ba# len# _) = do - let mba = MutableByteArray (bottom::Int) bottom (unsafeCoerce# ba#) - hPutBufBA handle mba (I# len#) - where - bottom = error "hPutPS" - --- The dual to @_putPS@, note that the size of the chunk specified --- is the upper bound of the size of the chunk returned. - -hGetPS :: Handle -> Int -> IO PackedString -hGetPS hdl len@(I# len#) - | len# <=# 0# = return nilPS -- I'm being kind here. - | otherwise = - -- Allocate an array for system call to store its bytes into. - stToIO (new_ps_array len# ) >>= \ ch_arr -> - stToIO (freeze_ps_array ch_arr len#) >>= \ (ByteArray _ _ frozen#) -> - hGetBufBA hdl ch_arr len >>= \ (I# read#) -> - if read# ==# 0# then -- EOF or other error - ioError (userError "hGetPS: EOF reached or other error") - else - {- - The system call may not return the number of - bytes requested. Instead of failing with an error - if the number of bytes read is less than requested, - a packed string containing the bytes we did manage - to snarf is returned. - -} - let - has_null = byteArrayHasNUL# frozen# read# - in - return (PS frozen# read# has_null) +-- | Convert a 'PackedString' into a 'String' +unpackPS :: PackedString -> String +unpackPS (PS ps) = elems ps -- ----------------------------------------------------------------------------- -- List-mimicking functions for PackedStrings --- First, the basic functions that do look into the representation; --- @indexPS@ is the most important one. - -lengthPS :: PackedString -> Int -lengthPS ps = I# (lengthPS# ps) - -{-# INLINE lengthPS# #-} - -lengthPS# :: PackedString -> Int# -lengthPS# (PS _ i _) = i -lengthPS# (CPS _ i) = i - -{-# INLINE strlen# #-} - -strlen# :: Addr# -> Int -strlen# a - = unsafePerformIO ( - _ccall_ strlen (Ptr a) >>= \ len@(I# _) -> - return len - ) - -byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool -byteArrayHasNUL# bs len - = unsafePerformIO ( - _ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) -> - return ( - if res ==# 0# then False else True - )) - where - ba = ByteArray 0 (I# (len -# 1#)) bs - ------------------------ +-- | The 'lengthPS' function returns the length of the input list. Analogous to 'length'. +lengthPS :: PackedString -> Int +lengthPS (PS ps) = rangeSize (bounds ps) +-- | The 'indexPS' function returns the character in the string at the given position. indexPS :: PackedString -> Int -> Char -indexPS ps (I# n) = C# (indexPS# ps n) - -{-# INLINE indexPS# #-} - -indexPS# :: PackedString -> Int# -> Char# -indexPS# (PS bs i _) n - = --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10) - indexCharArray# bs n - -indexPS# (CPS a _) n - = indexCharOffAddr# a n - --- Now, the rest of the functions can be defined without digging --- around in the representation. +indexPS (PS ps) i = ps ! i +-- | The 'headPS' function returns the first element of a 'PackedString' or throws an +-- error if the string is empty. headPS :: PackedString -> Char headPS ps - | nullPS ps = error "headPS: head []" - | otherwise = C# (indexPS# ps 0#) + | nullPS ps = error "Data.PackedString.headPS: head []" + | otherwise = indexPS ps 0 +-- | The 'tailPS' function returns the tail of a 'PackedString' or throws an error +-- if the string is empty. tailPS :: PackedString -> PackedString tailPS ps - | len <=# 0# = error "tailPS: tail []" - | len ==# 1# = nilPS - | otherwise = substrPS# ps 1# (len -# 1#) + | len <= 0 = error "Data.PackedString.tailPS: tail []" + | len == 1 = nilPS + | otherwise = substrPS ps 1 (len - 1) where - len = lengthPS# ps + len = lengthPS ps +-- | The 'nullPS' function returns True iff the argument is null. nullPS :: PackedString -> Bool -nullPS (PS _ i _) = i ==# 0# -nullPS (CPS _ i) = i ==# 0# +nullPS (PS ps) = rangeSize (bounds ps) == 0 +-- | The 'appendPS' function appends the second string onto the first. appendPS :: PackedString -> PackedString -> PackedString appendPS xs ys | nullPS xs = ys | nullPS ys = xs | otherwise = concatPS [xs,ys] -mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-} -mapPS f xs = - if nullPS xs then - xs - else - runST ( - new_ps_array (length +# 1#) >>= \ ps_arr -> - whizz ps_arr length 0# >> - freeze_ps_array ps_arr length >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length in - return (PS frozen# length has_null)) - where - length = lengthPS# xs - - whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s () - whizz arr# n i - | n ==# 0# - = write_ps_array arr# i (chr# 0#) >> - return () - | otherwise - = let - ch = indexPS# xs i - in - write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >> - whizz arr# (n -# 1#) (i +# 1#) +-- | The 'mapPS' function applies a function to each character in the string. +mapPS :: (Char -> Char) -> PackedString -> PackedString +mapPS f (PS ps) = PS (amap f ps) +-- | The 'filterPS' function filters out the appropriate substring. filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-} -filterPS pred ps = - if nullPS ps then - ps - else - {- - Filtering proceeds as follows: - - * traverse the list, applying the pred. to each element, - remembering the positions where it was satisfied. - - Encode these positions using a run-length encoding of the gaps - between the matching positions. - - * Allocate a MutableByteArray in the heap big enough to hold - all the matched entries, and copy the elements that matched over. - - A better solution that merges the scan© passes into one, - would be to copy the filtered elements over into a growable - buffer. No such operation currently supported over - MutableByteArrays (could of course use malloc&realloc) - But, this solution may in the case of repeated realloc's - be worse than the current solution. - -} - runST ( - let - (rle,len_filtered) = filter_ps (len# -# 1#) 0# 0# [] - len_filtered# = case len_filtered of { I# x# -> x#} - in - if len# ==# len_filtered# then - {- not much filtering as everything passed through. -} - return ps - else if len_filtered# ==# 0# then - return nilPS - else - new_ps_array (len_filtered# +# 1#) >>= \ ps_arr -> - copy_arr ps_arr rle 0# 0# >> - freeze_ps_array ps_arr len_filtered# >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# len_filtered# in - return (PS frozen# len_filtered# has_null)) - where - len# = lengthPS# ps - - matchOffset :: Int# -> [Char] -> (Int,[Char]) - matchOffset off [] = (I# off,[]) - matchOffset off (C# c:cs) = - let - x = ord# c - off' = off +# x - in - if x==# 0# then -- escape code, add 255# - matchOffset off' cs - else - (I# off', cs) - - copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s () - copy_arr _ [_] _ _ = return () - copy_arr arr# ls n i = - let - (x,ls') = matchOffset 0# ls - n' = n +# (case x of { (I# x#) -> x#}) -# 1# - ch = indexPS# ps n' - in - write_ps_array arr# i ch >> - copy_arr arr# ls' (n' +# 1#) (i +# 1#) - - esc :: Int# -> Int# -> [Char] -> [Char] - esc v 0# ls = (C# (chr# v)):ls - esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls) - - filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int) - filter_ps n hits run acc - | n <# 0# = - let - escs = run `quotInt#` 255# - v = run `remInt#` 255# - in - (esc (v +# 1#) escs acc, I# hits) - | otherwise - = let - ch = indexPS# ps n - n' = n -# 1# - in - if pred (C# ch) then - let - escs = run `quotInt#` 255# - v = run `remInt#` 255# - acc' = esc (v +# 1#) escs acc - in - filter_ps n' (hits +# 1#) 0# acc' - else - filter_ps n' hits (run +# 1#) acc - +filterPS pred ps = packString (filter pred (unpackPS ps)) +-- | The 'foldlPS' function behaves like 'foldl' on 'PackedString's. foldlPS :: (a -> Char -> a) -> a -> PackedString -> a -foldlPS f b ps - = if nullPS ps then - b - else - whizzLR b 0# - where - len = lengthPS# ps - - --whizzLR :: a -> Int# -> a - whizzLR b idx - | idx ==# len = b - | otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#) - +foldlPS f b ps = foldl f b (unpackPS ps) +-- | The 'foldrPS' function behaves like 'foldr' on 'PackedString's. foldrPS :: (Char -> a -> a) -> a -> PackedString -> a -foldrPS f v ps - | nullPS ps = v - | otherwise = whizzRL v len - where - len = lengthPS# ps - - --whizzRL :: a -> Int# -> a - whizzRL b idx - | idx <# 0# = b - | otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#) +foldrPS f v ps = foldr f v (unpackPS ps) +-- | The 'takePS' function takes the first @n@ characters of a 'PackedString'. takePS :: Int -> PackedString -> PackedString -takePS (I# n) ps - | n ==# 0# = nilPS - | otherwise = substrPS# ps 0# (n -# 1#) +takePS n ps = substrPS ps 0 (n-1) +-- | The 'dropPS' function drops the first @n@ characters of a 'PackedString'. dropPS :: Int -> PackedString -> PackedString -dropPS (I# n) ps - | n ==# len = nilPS - | otherwise = substrPS# ps n (lengthPS# ps -# 1#) - where - len = lengthPS# ps +dropPS n ps = substrPS ps n (lengthPS ps - 1) +-- | The 'splitWithPS' function splits a 'PackedString' at a given index. splitAtPS :: Int -> PackedString -> (PackedString, PackedString) splitAtPS n ps = (takePS n ps, dropPS n ps) +-- | The 'takeWhilePS' function is analogous to the 'takeWhile' function. takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString -takeWhilePS pred ps - = let - break_pt = char_pos_that_dissatisfies - (\ c -> pred (C# c)) - ps - (lengthPS# ps) - 0# - in - if break_pt ==# 0# then - nilPS - else - substrPS# ps 0# (break_pt -# 1#) +takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps)) +-- | The 'dropWhilePS' function is analogous to the 'dropWhile' function. dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString -dropWhilePS pred ps - = let - len = lengthPS# ps - break_pt = char_pos_that_dissatisfies - (\ c -> pred (C# c)) - ps - len - 0# - in - if len ==# break_pt then - nilPS - else - substrPS# ps break_pt (len -# 1#) +dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps)) +-- | The 'elemPS' function returns True iff the given element is in the string. elemPS :: Char -> PackedString -> Bool -elemPS (C# ch) ps - = let - len = lengthPS# ps - break_pt = first_char_pos_that_satisfies - (`eqChar#` ch) - ps - len - 0# - in - break_pt <# len - -char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# - -char_pos_that_dissatisfies p ps len pos - | pos >=# len = pos -- end - | p (indexPS# ps pos) = -- predicate satisfied; keep going - char_pos_that_dissatisfies p ps len (pos +# 1#) - | otherwise = pos -- predicate not satisfied - -first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int# -first_char_pos_that_satisfies p ps len pos - | pos >=# len = pos -- end - | p (indexPS# ps pos) = pos -- got it! - | otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#) - --- ToDo: could certainly go quicker +elemPS c ps = c `elem` unpackPS ps + +-- | The 'spanPS' function returns a pair containing the result of +-- running both 'takeWhilePS' and 'dropWhilePS'. spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps) +-- | The 'breakPS' function breaks a string at the first position which +-- satisfies the predicate. breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) breakPS p ps = spanPS (not . p) ps +-- | The 'linesPS' function splits the input on line-breaks. linesPS :: PackedString -> [PackedString] linesPS ps = splitPS '\n' ps -wordsPS :: PackedString -> [PackedString] -wordsPS ps = splitWithPS isSpace ps +-- | The 'unlinesPS' function concatenates the input list after +-- interspersing newlines. +unlinesPS :: [PackedString] -> PackedString +unlinesPS = joinPS (packString "\n") -reversePS :: PackedString -> PackedString -reversePS ps = - if nullPS ps then -- don't create stuff unnecessarily. - ps - else - runST ( - new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte! - fill_in arr# (length -# 1#) 0# >> - freeze_ps_array arr# length >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length in - return (PS frozen# length has_null)) - where - length = lengthPS# ps - - fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s () - fill_in arr_in# n i = - let - ch = indexPS# ps n - in - write_ps_array arr_in# i ch >> - if n ==# 0# then - write_ps_array arr_in# (i +# 1#) (chr# 0#) >> - return () - else - fill_in arr_in# (n -# 1#) (i +# 1#) - -concatPS :: [PackedString] -> PackedString -concatPS [] = nilPS -concatPS pss - = let - tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x } - in - runST ( - new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte! - packum arr# pss 0# >> - freeze_ps_array arr# tot_len# >>= \ (ByteArray _ _ frozen#) -> - - let has_null = byteArrayHasNUL# frozen# tot_len# in - - return (PS frozen# tot_len# has_null) - ) - where - packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s () +-- | The 'wordsPS' function is analogous to the 'words' function. +wordsPS :: PackedString -> [PackedString] +wordsPS ps = filter (not.nullPS) (splitWithPS isSpace ps) - packum arr [] pos - = write_ps_array arr pos (chr# 0#) >> - return () - packum arr (ps : pss) pos - = fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) -> - packum arr pss next_pos +-- | The 'unwordsPS' function is analogous to the 'unwords' function. +unwordsPS :: [PackedString] -> PackedString +unwordsPS = joinPS (packString " ") - fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int +-- | The 'reversePS' function reverses the string. +reversePS :: PackedString -> PackedString +reversePS ps = packString (reverse (unpackPS ps)) - fill arr arr_i ps ps_i ps_len - | ps_i ==# ps_len - = return (I# (arr_i +# ps_len)) - | otherwise - = write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >> - fill arr arr_i ps (ps_i +# 1#) ps_len +-- | The 'concatPS' function concatenates a list of 'PackedString's. +concatPS :: [PackedString] -> PackedString +concatPS pss = packString (concat (map unpackPS pss)) ------------------------------------------------------------ + +-- | The 'joinPS' function takes a 'PackedString' and a list of 'PackedString's +-- and concatenates the list after interspersing the first argument between +-- each element of the list. joinPS :: PackedString -> [PackedString] -> PackedString joinPS filler pss = concatPS (splice pss) where @@ -795,43 +262,39 @@ joinPS filler pss = concatPS (splice pss) * splitPS x ls = ls' where False = any (map (x `elemPS`) ls') - False = any (map (nullPS) ls') - - * all x's have been chopped out. - * no empty PackedStrings in returned list. A conseq. - of this is: - splitPS x nilPS = [] - - - * joinPS (packString [x]) (_splitPS x ls) = ls + * joinPS (packString [x]) (splitPS x ls) = ls -} +-- | The 'splitPS' function splits the input string on each occurrence of the given 'Char'. splitPS :: Char -> PackedString -> [PackedString] -splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch) +splitPS c = splitWithPS (== c) +-- | The 'splitWithPS' function takes a character predicate and splits the input string +-- at each character which satisfies the predicate. splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] -splitWithPS pred ps = - splitify 0# +splitWithPS pred (PS ps) = + splitify 0 where - len = lengthPS# ps + len = lengthPS (PS ps) splitify n - | n >=# len = [] + | n >= len = [] | otherwise = let - break_pt = - first_char_pos_that_satisfies - (\ c -> pred (C# c)) - ps - len - n + break_pt = first_pos_that_satisfies pred ps len n in - if break_pt ==# n then -- immediate match, no substring to cut out. - splitify (break_pt +# 1#) + if break_pt == n then -- immediate match, empty substring + nilPS + : splitify (break_pt + 1) else - substrPS# ps n (break_pt -# 1#): -- leave out the matching character - splitify (break_pt +# 1#) + substrPS (PS ps) n (break_pt - 1) -- leave out the matching character + : splitify (break_pt + 1) + +first_pos_that_satisfies pred ps len n = + case [ m | m <- [n..len-1], pred (ps ! m) ] of + [] -> len + (m:_) -> m -- ----------------------------------------------------------------------------- -- Local utility functions @@ -839,76 +302,127 @@ splitWithPS pred ps = -- The definition of @_substrPS@ is essentially: -- @take (end - begin + 1) (drop begin str)@. +-- | The 'substrPS' function takes a 'PackedString' and two indices +-- and returns the substring of the input string between (and including) +-- these indices. substrPS :: PackedString -> Int -> Int -> PackedString -substrPS ps (I# begin) (I# end) = substrPS# ps begin end - -substrPS# :: PackedString -> Int# -> Int# -> PackedString -substrPS# ps s e - | s <# 0# || s >=# len || result_len# <=# 0# - = nilPS - - | otherwise - = runST ( - new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte! - fill_in ch_arr 0# >> - freeze_ps_array ch_arr result_len# >>= \ (ByteArray _ _ frozen#) -> - - let has_null = byteArrayHasNUL# frozen# result_len# in - - return (PS frozen# result_len# has_null) - ) - where - len = lengthPS# ps +substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ] - result_len# = (if e <# len then (e +# 1#) else len) -# s - - ----------------------- - fill_in :: MutableByteArray s Int -> Int# -> ST s () +-- ----------------------------------------------------------------------------- +-- hPutPS - fill_in arr_in# idx - | idx ==# result_len# - = write_ps_array arr_in# idx (chr# 0#) >> - return () - | otherwise - = let - ch = indexPS# ps (s +# idx) - in - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) +-- | Outputs a 'PackedString' to the specified 'Handle'. +-- +-- NOTE: the representation of the 'PackedString' in the file is assumed to +-- be in the ISO-8859-1 encoding. In other words, only the least significant +-- byte is taken from each character in the 'PackedString'. +hPutPS :: Handle -> PackedString -> IO () +hPutPS h (PS ps) = do + let l = lengthPS (PS ps) + arr <- newArray_ (0, l-1) + sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ] + hPutArray h arr l -- ----------------------------------------------------------------------------- --- Packing and unpacking C strings +-- hGetPS -cStringToPS :: Ptr a -> PackedString -cStringToPS (Ptr a#) = -- the easy one; we just believe the caller - CPS a# len - where - len = case (strlen# a#) of { I# x -> x } - -packCBytes :: Int -> Ptr a -> PackedString -packCBytes len addr = runST (packCBytesST len addr) - -packCBytesST :: Int -> Ptr a -> ST s PackedString -packCBytesST (I# length#) (Ptr addr) = - {- - allocate an array that will hold the string - (not forgetting the NUL byte at the end) - -} - new_ps_array (length# +# 1#) >>= \ ch_array -> - -- fill in packed string from "addr" - fill_in ch_array 0# >> - -- freeze the puppy: - freeze_ps_array ch_array length# >>= \ (ByteArray _ _ frozen#) -> - let has_null = byteArrayHasNUL# frozen# length# in - return (PS frozen# length# has_null) +-- | Read a 'PackedString' directly from the specified 'Handle'. +-- This is far more efficient than reading the characters into a 'String' +-- and then using 'packString'. +-- +-- NOTE: as with 'hPutPS', the string representation in the file is +-- assumed to be ISO-8859-1. +hGetPS :: Handle -> Int -> IO PackedString +hGetPS h i = do + arr <- newArray_ (0, i-1) + l <- hGetArray h arr i + chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1] + return (packNChars l chars) + +#else /* __NHC__ */ + +--import Prelude hiding (append, break, concat, cons, drop, dropWhile, +-- filter, foldl, foldr, head, length, lines, map, +-- nil, null, reverse, span, splitAt, subst, tail, +-- take, takeWhile, unlines, unwords, words) +-- also hiding: Ix(..), Functor(..) +import qualified NHC.PackedString +import NHC.PackedString (PackedString,packString,unpackPS) +import List (intersperse) + + +nilPS :: PackedString +consPS :: Char -> PackedString -> PackedString +headPS :: PackedString -> Char +tailPS :: PackedString -> PackedString +nullPS :: PackedString -> Bool +appendPS :: PackedString -> PackedString -> PackedString +lengthPS :: PackedString -> Int +indexPS :: PackedString -> Int -> Char +mapPS :: (Char -> Char) -> PackedString -> PackedString +filterPS :: (Char -> Bool) -> PackedString -> PackedString +reversePS :: PackedString -> PackedString +concatPS :: [PackedString] -> PackedString +elemPS :: Char -> PackedString -> Bool +substrPS :: PackedString -> Int -> Int -> PackedString +takePS :: Int -> PackedString -> PackedString +dropPS :: Int -> PackedString -> PackedString +splitAtPS :: Int -> PackedString -> (PackedString, PackedString) + +foldlPS :: (a -> Char -> a) -> a -> PackedString -> a +foldrPS :: (Char -> a -> a) -> a -> PackedString -> a +takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString +dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString +spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) +breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) +linesPS :: PackedString -> [PackedString] +unlinesPS :: [PackedString] -> PackedString + +wordsPS :: PackedString -> [PackedString] +unwordsPS :: [PackedString] -> PackedString +splitPS :: Char -> PackedString -> [PackedString] +splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] +joinPS :: PackedString -> [PackedString] -> PackedString + +nilPS = NHC.PackedString.nil +consPS = NHC.PackedString.cons +headPS = NHC.PackedString.head +tailPS = NHC.PackedString.tail +nullPS = NHC.PackedString.null +appendPS = NHC.PackedString.append +lengthPS = NHC.PackedString.length +indexPS p i = (unpackPS p) !! i +mapPS = NHC.PackedString.map +filterPS = NHC.PackedString.filter +reversePS = NHC.PackedString.reverse +concatPS = NHC.PackedString.concat +elemPS c p = c `elem` unpackPS p +substrPS = NHC.PackedString.substr +takePS = NHC.PackedString.take +dropPS = NHC.PackedString.drop +splitAtPS = NHC.PackedString.splitAt + +foldlPS = NHC.PackedString.foldl +foldrPS = NHC.PackedString.foldr +takeWhilePS = NHC.PackedString.takeWhile +dropWhilePS = NHC.PackedString.dropWhile +spanPS = NHC.PackedString.span +breakPS = NHC.PackedString.break +linesPS = NHC.PackedString.lines +unlinesPS = NHC.PackedString.unlines + +wordsPS = NHC.PackedString.words +unwordsPS = NHC.PackedString.unwords +splitPS c = splitWithPS (==c) +splitWithPS p = + map packString . split' p [] . unpackPS where - fill_in :: MutableByteArray s Int -> Int# -> ST s () - - fill_in arr_in# idx - | idx ==# length# - = write_ps_array arr_in# idx (chr# 0#) >> - return () - | otherwise - = case (indexCharOffAddr# addr idx) of { ch -> - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) } + split' :: (Char->Bool) -> String -> String -> [String] + split' pred [] [] = [] + split' pred acc [] = [reverse acc] + split' pred acc (x:xs) | pred x = reverse acc: split' pred [] xs + | otherwise = split' pred (x:acc) xs + +joinPS sep = concatPS . intersperse sep + +#endif