From 4a1f785332f15f382c5f5a729bbdaba54d69870a Mon Sep 17 00:00:00 2001 From: Don Stewart Date: Sat, 1 Jul 2006 08:43:45 +0000 Subject: [PATCH] Import Data.ByteString.Lazy, improve ByteString Fusion, and resync with FPS head This patch imports the Data.ByteString.Lazy module, and its helpers, providing a ByteString implemented as a lazy list of strict cache-sized chunks. This type allows the usual lazy operations to be written on bytestrings, including lazy IO, with much improved space and time over the [Char] equivalents. --- Data/ByteString.hs | 1016 ++++++++++----------------------- Data/ByteString/Base.hs | 463 +++++++++++++++ Data/ByteString/Char8.hs | 205 +++---- Data/ByteString/Fusion.hs | 700 +++++++++++++++++++++++ Data/ByteString/Lazy.hs | 1246 +++++++++++++++++++++++++++++++++++++++++ Data/ByteString/Lazy/Char8.hs | 693 +++++++++++++++++++++++ Makefile | 1 + base.cabal | 4 + cbits/fpstring.c | 10 +- include/fpstring.h | 10 +- package.conf.in | 4 + 11 files changed, 3507 insertions(+), 845 deletions(-) create mode 100644 Data/ByteString/Base.hs create mode 100644 Data/ByteString/Fusion.hs create mode 100644 Data/ByteString/Lazy.hs create mode 100644 Data/ByteString/Lazy/Char8.hs diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 2001110..9187ff5 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -1,4 +1,4 @@ -{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-} +{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-} -- -- Module : ByteString -- Copyright : (c) The University of Glasgow 2001, @@ -39,11 +39,11 @@ module Data.ByteString ( -- * The @ByteString@ type - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - singleton, -- :: Word8 -> ByteString + singleton, -- :: Word8 -> ByteString pack, -- :: [Word8] -> ByteString unpack, -- :: ByteString -> [Word8] packWith, -- :: (a -> Word8) -> [a] -> ByteString @@ -87,10 +87,12 @@ module Data.ByteString ( -- ** Scans scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString + scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString + scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -- ** Accumulating maps mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) --- mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) + mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString -- ** Unfolding ByteStrings @@ -109,6 +111,7 @@ module Data.ByteString ( span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) group, -- :: ByteString -> [ByteString] groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] inits, -- :: ByteString -> [ByteString] @@ -164,47 +167,28 @@ module Data.ByteString ( -- * Zipping and unzipping ByteStrings zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] + zipWith', unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) -- * Ordered ByteStrings sort, -- :: ByteString -> ByteString - -- * Unchecked access - unsafeHead, -- :: ByteString -> Word8 - unsafeTail, -- :: ByteString -> ByteString - unsafeIndex, -- :: ByteString -> Int -> Word8 - unsafeTake, -- :: Int -> ByteString -> ByteString - unsafeDrop, -- :: Int -> ByteString -> ByteString - - -- * Low level introduction and elimination - generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString - create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString - fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString - toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int) - skipIndex, -- :: ByteString -> Int + -- * Low level CString conversions -- ** Packing CStrings and pointers packCString, -- :: CString -> ByteString packCStringLen, -- :: CString -> ByteString packMallocCString, -- :: CString -> ByteString -#if defined(__GLASGOW_HASKELL__) - packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString - packAddress, -- :: Addr# -> ByteString - unsafePackAddress, -- :: Int -> Addr# -> ByteString - unsafeFinalize, -- :: ByteString -> IO () -#endif - -- ** Using ByteStrings as CStrings useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a - unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a - unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a + useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a -- ** Copying ByteStrings -- | These functions perform memcpy(3) operations copy, -- :: ByteString -> ByteString - copyCString, -- :: CString -> ByteString - copyCStringLen, -- :: CStringLen -> ByteString + copyCString, -- :: CString -> IO ByteString + copyCStringLen, -- :: CStringLen -> IO ByteString -- * I\/O with 'ByteString's @@ -220,6 +204,7 @@ module Data.ByteString ( -- ** Files readFile, -- :: FilePath -> IO ByteString writeFile, -- :: FilePath -> ByteString -> IO () + appendFile, -- :: FilePath -> ByteString -> IO () -- mmapFile, -- :: FilePath -> IO ByteString -- ** I\/O with Handles @@ -232,16 +217,14 @@ module Data.ByteString ( hGetContents, -- :: Handle -> IO ByteString hGet, -- :: Handle -> Int -> IO ByteString hPut, -- :: Handle -> ByteString -> IO () + hPutStr, -- :: Handle -> ByteString -> IO () + hPutStrLn, -- :: Handle -> ByteString -> IO () -- * Fusion utilities #if defined(__GLASGOW_HASKELL__) unpackList, -- eek, otherwise it gets thrown away by the simplifier #endif - - noAL, NoAL, loopArr, loopAcc, loopSndAcc, - loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL, - mapAccumEFL, mapIndexEFL, - + lengthU, maximumU, minimumU ) where import qualified Prelude as P @@ -250,13 +233,16 @@ import Prelude hiding (reverse,head,tail,last,init,null ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,maximum ,minimum,all,concatMap,foldl1,foldr1 - ,scanl,scanl1,readFile,writeFile,replicate + ,scanl,scanl1,scanr,scanr1 + ,readFile,writeFile,appendFile,replicate ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) +import Data.ByteString.Base +import Data.ByteString.Fusion + import qualified Data.List as List -import Data.Char import Data.Word (Word8) import Data.Maybe (listToMaybe) import Data.Array (listArray) @@ -267,7 +253,7 @@ import Control.Exception (bracket, assert) import Control.Monad (when) import Foreign.C.String (CString, CStringLen) -import Foreign.C.Types (CSize,CInt) +import Foreign.C.Types (CSize) import Foreign.ForeignPtr import Foreign.Marshal.Array import Foreign.Ptr @@ -286,8 +272,6 @@ import System.IO.Unsafe #if defined(__GLASGOW_HASKELL__) -import Data.Generics (Data(..), Typeable(..)) - import System.IO (hGetBufNonBlocking) import System.IO.Error (isEOFError) @@ -295,8 +279,8 @@ import Foreign.Marshal (alloca) import qualified Foreign.Concurrent as FC (newForeignPtr) import GHC.Handle -import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#) -import GHC.Base (build, unsafeChr) +import GHC.Prim (Word#, (+#), writeWord8OffAddr#) +import GHC.Base (build) import GHC.Word hiding (Word8) import GHC.Ptr (Ptr(..)) import GHC.ST (ST(..)) @@ -304,9 +288,6 @@ import GHC.IOBase #endif --- CFILES stuff is Hugs only -{-# CFILES cbits/fpstring.c #-} - -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns @@ -320,19 +301,6 @@ import GHC.IOBase -- ----------------------------------------------------------------------------- --- | A space-efficient representation of a Word8 vector, supporting many --- efficient operations. A 'ByteString' contains 8-bit characters only. --- --- Instances of Eq, Ord, Read, Show, Data, Typeable --- -data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) - {-# UNPACK #-} !Int - {-# UNPACK #-} !Int - -#if defined(__GLASGOW_HASKELL__) - deriving (Data, Typeable) -#endif - instance Eq ByteString where (==) = eq @@ -373,7 +341,7 @@ compareBytes (PS x1 s1 l1) (PS x2 s2 l2) withForeignPtr x1 $ \p1 -> withForeignPtr x2 $ \p2 -> do i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2) - return $ case i `compare` 0 of + return $! case i `compare` 0 of EQ -> l1 `compare` l2 x -> x {-# INLINE compareBytes #-} @@ -409,14 +377,12 @@ cmp p1 p2 n len1 len2 -- | /O(1)/ The empty 'ByteString' empty :: ByteString -empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0 +empty = unsafeCreate 0 $ const $ return () {-# NOINLINE empty #-} -- | /O(1)/ Convert a 'Word8' into a 'ByteString' singleton :: Word8 -> ByteString -singleton c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do - withForeignPtr fp $ \p -> poke p c - return $ PS fp 0 1 +singleton c = unsafeCreate 1 $ \p -> poke p c {-# INLINE singleton #-} -- @@ -446,14 +412,14 @@ pack :: [Word8] -> ByteString #if !defined(__GLASGOW_HASKELL__) -pack str = create (P.length str) $ \p -> go p str +pack str = unsafeCreate (P.length str) $ \p -> go p str where go _ [] = return () go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff #else /* hack away */ -pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) +pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) where go _ _ [] = return () go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs @@ -506,6 +472,12 @@ unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do {-# INLINE [0] unpackFoldr #-} -- TODO just use normal foldr here. +-- +-- or +-- unpack xs | null xs = [] +-- | otherwise = unsafeHead xs : unpack (unsafeTail xs) +-- +-- ? #endif @@ -514,7 +486,7 @@ unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some -- conversion function packWith :: (a -> Word8) -> [a] -> ByteString -packWith k str = create (P.length str) $ \p -> go p str +packWith k str = unsafeCreate (P.length str) $ \p -> go p str where STRICT2(go) go _ [] = return () @@ -539,40 +511,50 @@ unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> -- | /O(1)/ Test whether a ByteString is empty. null :: ByteString -> Bool -null (PS _ _ l) = l == 0 +null (PS _ _ l) = assert (l >= 0) $ l <= 0 {-# INLINE null #-} +-- --------------------------------------------------------------------- -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. length :: ByteString -> Int -length (PS _ _ l) = l +length (PS _ _ l) = assert (l >= 0) $ l + +-- +-- length/loop fusion. When taking the length of any fuseable loop, +-- rewrite it as a foldl', and thus avoid allocating the result buffer +-- worth around 10% in speed testing. +-- #if defined(__GLASGOW_HASKELL__) {-# INLINE [1] length #-} #endif -{-# +lengthU :: ByteString -> Int +lengthU = foldl' (const . (+1)) (0::Int) +{-# INLINE lengthU #-} --- Translate length into a loop. --- Performace ok, but allocates too much, so disable for now. +{-# RULES - "length/loop" forall f acc s . - length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s)) +-- v2 fusion +"length/loop" forall loop s . + length (loopArr (loopWrapper loop s)) = + lengthU (loopArr (loopWrapper loop s)) #-} +------------------------------------------------------------------------ + -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different -- complexity, as it requires a memcpy. cons :: Word8 -> ByteString -> ByteString -cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do +cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do poke p c memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) {-# INLINE cons #-} --- todo fuse - -- | /O(n)/ Append a byte to the end of a 'ByteString' snoc :: ByteString -> Word8 -> ByteString -snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do +snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do memcpy p (f `plusPtr` s) (fromIntegral l) poke (p `plusPtr` l) c {-# INLINE snoc #-} @@ -580,13 +562,15 @@ snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do -- todo fuse -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. +-- An exception will be thrown in the case of an empty ByteString. head :: ByteString -> Word8 -head ps@(PS x s _) - | null ps = errorEmptyList "head" +head (PS x s l) + | l <= 0 = errorEmptyList "head" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s {-# INLINE head #-} -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. +-- An exception will be thrown in the case of an empty ByteString. tail :: ByteString -> ByteString tail (PS p s l) | l <= 0 = errorEmptyList "tail" @@ -594,6 +578,7 @@ tail (PS p s l) {-# INLINE tail #-} -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. +-- An exception will be thrown in the case of an empty ByteString. last :: ByteString -> Word8 last ps@(PS x s l) | null ps = errorEmptyList "last" @@ -601,9 +586,10 @@ last ps@(PS x s l) {-# INLINE last #-} -- | /O(1)/ Return all the elements of a 'ByteString' except the last one. +-- An exception will be thrown in the case of an empty ByteString. init :: ByteString -> ByteString -init (PS p s l) - | l <= 0 = errorEmptyList "init" +init ps@(PS p s l) + | null ps = errorEmptyList "init" | otherwise = PS p s (l-1) {-# INLINE init #-} @@ -620,19 +606,23 @@ append xs ys | null xs = ys -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each -- element of @xs@. This function is subject to array fusion. map :: (Word8 -> Word8) -> ByteString -> ByteString -map f = loopArr . loopU (mapEFL f) noAL +#if defined(LOOPU_FUSION) +map f = loopArr . loopU (mapEFL f) NoAcc +#elif defined(LOOPUP_FUSION) +map f = loopArr . loopUp (mapEFL f) NoAcc +#elif defined(LOOPNOACC_FUSION) +map f = loopArr . loopNoAcc (mapEFL f) +#else +map f = loopArr . loopMap f +#endif {-# INLINE map #-} -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is -- slightly faster for one-shot cases. map' :: (Word8 -> Word8) -> ByteString -> ByteString -map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do - np <- mallocByteString (len+1) - withForeignPtr np $ \p -> do - map_ 0 (a `plusPtr` s) p - return (PS np 0 len) +map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> + create len $ map_ 0 (a `plusPtr` s) where - map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () STRICT3(map_) map_ n p1 p2 @@ -645,12 +635,10 @@ map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. reverse :: ByteString -> ByteString -reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> +reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> c_reverse p (f `plusPtr` s) (fromIntegral l) -{- -reverse = pack . P.reverse . unpack --} +-- todo, fuseable version -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a -- 'ByteString' and \`intersperses\' that byte between the elements of @@ -659,7 +647,7 @@ reverse = pack . P.reverse . unpack intersperse :: Word8 -> ByteString -> ByteString intersperse c ps@(PS x s l) | length ps < 2 = ps - | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f -> + | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f -> c_intersperse p (f `plusPtr` s) (fromIntegral l) c {- @@ -679,7 +667,11 @@ transpose ps = P.map pack (List.transpose (P.map unpack ps)) -- ByteString using the binary operator, from left to right. -- This function is subject to array fusion. foldl :: (a -> Word8 -> a) -> a -> ByteString -> a +#if !defined(LOOPU_FUSION) +foldl f z = loopAcc . loopUp (foldEFL f) z +#else foldl f z = loopAcc . loopU (foldEFL f) z +#endif {-# INLINE foldl #-} {- @@ -697,43 +689,45 @@ foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> -} -- | 'foldl\'' is like 'foldl', but strict in the accumulator. +-- Though actually foldl is also strict in the accumulator. foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a -foldl' f z = loopAcc . loopU (foldEFL' f) z +foldl' = foldl +-- foldl' f z = loopAcc . loopU (foldEFL' f) z {-# INLINE foldl' #-} -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a ByteString, -- reduces the ByteString using the binary operator, from right to left. foldr :: (Word8 -> a -> a) -> a -> ByteString -> a -foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> - go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) - where - STRICT2(go) - go p q | p == q = return z - | otherwise = do c <- peek p - ws <- go (p `plusPtr` 1) q - return $ c `k` ws +foldr k z = loopAcc . loopDown (foldEFL (flip k)) z +{-# INLINE foldr #-} -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'ByteStrings'. --- This function is subject to array fusion. +-- This function is subject to array fusion. +-- An exception will be thrown in the case of an empty ByteString. foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldl1 f ps | null ps = errorEmptyList "foldl1" | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) +{-# INLINE foldl1 #-} -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. +-- An exception will be thrown in the case of an empty ByteString. foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldl1' f ps | null ps = errorEmptyList "foldl1'" | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) +{-# INLINE foldl1' #-} -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'ByteString's +-- An exception will be thrown in the case of an empty ByteString. foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldr1 f ps | null ps = errorEmptyList "foldr1" | otherwise = foldr f (last ps) (init ps) +{-# INLINE foldr1 #-} -- --------------------------------------------------------------------- -- Special folds @@ -742,7 +736,7 @@ foldr1 f ps concat :: [ByteString] -> ByteString concat [] = empty concat [ps] = ps -concat xs = create len $ \ptr -> go xs ptr +concat xs = unsafeCreate len $ \ptr -> go xs ptr where len = P.sum . P.map length $ xs STRICT2(go) go [] _ = return () @@ -752,8 +746,9 @@ concat xs = create len $ \ptr -> go xs ptr -- | Map a function over a 'ByteString' and concatenate the results concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString -concatMap f = foldr (append . f) empty --- A silly function for ByteStrings anyway. +concatMap f = concat . foldr ((:) . f) [] + +-- foldr (append . f) empty -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if -- any element of the 'ByteString' satisfies the predicate. @@ -783,62 +778,85 @@ all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> if f c then go (p `plusPtr` 1) q else return False --- todo fuse + +------------------------------------------------------------------------ -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' +-- This function will fuse. +-- An exception will be thrown in the case of an empty ByteString. maximum :: ByteString -> Word8 maximum xs@(PS x s l) | null xs = errorEmptyList "maximum" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> - return $ c_maximum (p `plusPtr` s) (fromIntegral l) -{-# INLINE maximum #-} + c_maximum (p `plusPtr` s) (fromIntegral l) -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' +-- This function will fuse. +-- An exception will be thrown in the case of an empty ByteString. minimum :: ByteString -> Word8 minimum xs@(PS x s l) | null xs = errorEmptyList "minimum" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> - return $ c_minimum (p `plusPtr` s) (fromIntegral l) -{-# INLINE minimum #-} + c_minimum (p `plusPtr` s) (fromIntegral l) + +-- +-- minimum/maximum/loop fusion. As for length (and other folds), when we +-- see we're applied after a fuseable op, switch from using the C +-- version, to the fuseable version. The result should then avoid +-- allocating a buffer. +-- --- fusion is too slow here (10x) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] minimum #-} +{-# INLINE [1] maximum #-} +#endif -{- -maximum xs@(PS x s l) - | null xs = errorEmptyList "maximum" - | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do - w <- peek p - maximum_ (p `plusPtr` s) 0 l w +maximumU :: ByteString -> Word8 +maximumU = foldl1' max +{-# INLINE maximumU #-} -maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 -STRICT4(maximum_) -maximum_ ptr n m c - | n >= m = return c - | otherwise = do w <- peekByteOff ptr n - maximum_ ptr (n+1) m (if w > c then w else c) +minimumU :: ByteString -> Word8 +minimumU = foldl1' min +{-# INLINE minimumU #-} -minimum xs@(PS x s l) - | null xs = errorEmptyList "minimum" - | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do - w <- peek p - minimum_ (p `plusPtr` s) 0 l w - -minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 -STRICT4(minimum_) -minimum_ ptr n m c - | n >= m = return c - | otherwise = do w <- peekByteOff ptr n - minimum_ ptr (n+1) m (if w < c then w else c) --} +{-# RULES + +"minimum/loop" forall loop s . + minimum (loopArr (loopWrapper loop s)) = + minimumU (loopArr (loopWrapper loop s)) + +"maximum/loop" forall loop s . + maximum (loopArr (loopWrapper loop s)) = + maximumU (loopArr (loopWrapper loop s)) + #-} + +------------------------------------------------------------------------ + +-- | The 'mapAccumL' function behaves like a combination of 'map' and +-- 'foldl'; it applies a function to each element of a ByteString, +-- passing an accumulating parameter from left to right, and returning a +-- final value of this accumulator together with the new list. mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumL f z = loopU (mapAccumEFL f) z +#if !defined(LOOPU_FUSION) +mapAccumL f z = unSP . loopUp (mapAccumEFL f) z +#else +mapAccumL f z = unSP . loopU (mapAccumEFL f) z +#endif +{-# INLINE mapAccumL #-} ---mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) +-- | The 'mapAccumR' function behaves like a combination of 'map' and +-- 'foldr'; it applies a function to each element of a ByteString, +-- passing an accumulating parameter from right to left, and returning a +-- final value of this accumulator together with the new ByteString. +mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) +mapAccumR f z = unSP . loopDown (mapAccumEFL f) z +{-# INLINE mapAccumR #-} -- | /O(n)/ map Word8 functions, provided with the index at each position mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString -mapIndexed f = loopArr . loopU (mapIndexEFL f) 0 +mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0 +{-# INLINE mapIndexed #-} -- --------------------------------------------------------------------- -- Building ByteStrings @@ -852,7 +870,15 @@ mapIndexed f = loopArr . loopU (mapIndexEFL f) 0 -- -- > last (scanl f z xs) == foldl f z xs. scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString -scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space +#if !defined(LOOPU_FUSION) +scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0) +#else +scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) +#endif + + -- n.b. haskell's List scan returns a list one bigger than the + -- input, so we need to snoc here to get some extra space, however, + -- it breaks map/up fusion (i.e. scanl . map no longer fuses) {-# INLINE scanl #-} -- | 'scanl1' is a variant of 'scanl' that has no starting value argument. @@ -865,6 +891,18 @@ scanl1 f ps | otherwise = scanl f (unsafeHead ps) (unsafeTail ps) {-# INLINE scanl1 #-} +-- | scanr is the right-to-left dual of scanl. +scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString +scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space +{-# INLINE scanr #-} + +-- | 'scanr1' is a variant of 'scanr' that has no starting value argument. +scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString +scanr1 f ps + | null ps = empty + | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions +{-# INLINE scanr1 #-} + -- --------------------------------------------------------------------- -- Unfolds and replicates @@ -875,17 +913,10 @@ scanl1 f ps -- -- This implemenation uses @memset(3)@ replicate :: Int -> Word8 -> ByteString -replicate w c | w <= 0 = empty - | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return () - -{- --- About 5x slower -replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w - where - STRICT2(go) - go _ 0 = return w - go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1) --} +replicate w c + | w <= 0 = empty + | otherwise = unsafeCreate w $ \ptr -> + memset ptr c (fromIntegral w) >> return () -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a @@ -918,18 +949,15 @@ unfoldr f = concat . unfoldChunk 32 64 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) unfoldrN i f x0 | i < 0 = (empty, Just x0) - | otherwise = inlinePerformIO $ do - fp <- mallocByteString i - withForeignPtr fp (\p -> go fp p x0 0) - where STRICT4(go) - go fp p x n = + | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 + where STRICT3(go) + go p x n = case f x of - Nothing -> let s = copy (PS fp 0 n) - in s `seq` return (s, Nothing) + Nothing -> return (0, n, Nothing) Just (w,x') - | n == i -> return (PS fp 0 i, Just x) + | n == i -> return (0, n, Just x) | otherwise -> do poke p w - go fp (p `plusPtr` 1) x' (n+1) + go (p `plusPtr` 1) x' (n+1) -- --------------------------------------------------------------------- -- Substrings @@ -977,6 +1005,12 @@ break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) {-# INLINE break #-} +-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' +-- +-- breakEnd p == spanEnd (not.p) +breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) +breakEnd p ps = splitAt (findFromEndUntil p ps) ps + -- | 'breakByte' breaks its ByteString argument at the first occurence -- of the specified byte. It is more efficient than 'break' as it is -- implemented with @memchr(3)@. I.e. @@ -1095,9 +1129,10 @@ split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do let ptr = p `plusPtr` s STRICT1(loop) - loop n = do - let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n)) - if q == nullPtr + loop n = + let q = inlinePerformIO $ memchr (ptr `plusPtr` n) + w (fromIntegral (l-n)) + in if q == nullPtr then [PS x (s+n) (l-n)] else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1) @@ -1175,7 +1210,7 @@ join s = concat . (List.intersperse s) -- with a char. Around 4 times faster than the generalised join. -- joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString -joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr -> +joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr -> withForeignPtr ffp $ \fp -> withForeignPtr fgp $ \gp -> do memcpy ptr (fp `plusPtr` s) (fromIntegral l) @@ -1204,8 +1239,8 @@ index ps n elemIndex :: Word8 -> ByteString -> Maybe Int elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do let p' = p `plusPtr` s - q = memchr p' c (fromIntegral l) - return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p' + q <- memchr p' c (fromIntegral l) + return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p' {-# INLINE elemIndex #-} -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the @@ -1236,12 +1271,13 @@ elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do let ptr = p `plusPtr` s STRICT1(loop) - loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n)) + loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n) + w (fromIntegral (l - n)) in if q == nullPtr then [] else let i = q `minusPtr` ptr in i : loop (i+1) - return (loop 0) + return $! loop 0 {-# INLINE elemIndices #-} {- @@ -1261,7 +1297,7 @@ elemIndices c ps = loop 0 ps -- But more efficiently than using length on the intermediate list. count :: Word8 -> ByteString -> Int count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> - return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w + fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w {-# INLINE count #-} {- @@ -1274,7 +1310,7 @@ count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> go :: Ptr Word8 -> CSize -> Int -> IO Int STRICT3(go) go p l i = do - let q = memchr p w l + q <- memchr p w l if q == nullPtr then return i else do let k = fromIntegral $ q `minusPtr` p @@ -1335,7 +1371,15 @@ notElem c ps = not (elem c ps) -- returns a ByteString containing those characters that satisfy the -- predicate. This function is subject to array fusion. filter :: (Word8 -> Bool) -> ByteString -> ByteString -filter p = loopArr . loopU (filterEFL p) noAL +#if defined(LOOPU_FUSION) +filter p = loopArr . loopU (filterEFL p) NoAcc +#elif defined(LOOPUP_FUSION) +filter p = loopArr . loopUp (filterEFL p) NoAcc +#elif defined(LOOPNOACC_FUSION) +filter p = loopArr . loopNoAcc (filterEFL p) +#else +filter f = loopArr . loopFilter f +#endif {-# INLINE filter #-} -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be @@ -1343,9 +1387,9 @@ filter p = loopArr . loopU (filterEFL p) noAL filter' :: (Word8 -> Bool) -> ByteString -> ByteString filter' k ps@(PS x s l) | null ps = ps - | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do + | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) - return (t `minusPtr` p) -- actual length + return $! t `minusPtr` p -- actual length where STRICT3(go) go f t end | f == end = return t @@ -1415,7 +1459,7 @@ isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2) | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> withForeignPtr x2 $ \p2 -> do i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1) - return (i == 0) + return $! i == 0 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' -- iff the first is a suffix of the second. @@ -1433,7 +1477,7 @@ isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2) | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> withForeignPtr x2 $ \p2 -> do i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1) - return (i == 0) + return $! i == 0 -- | Check whether one string is a substring of another. @isSubstringOf -- p s@ is equivalent to @not (null (findSubstrings p s))@. @@ -1491,12 +1535,44 @@ zip ps qs -- | 'zipWith' generalises 'zip' by zipping with the function given as -- the first argument, instead of a tupling function. For example, -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of --- corresponding sums. +-- corresponding sums. zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] zipWith f ps qs | null ps || null qs = [] | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs) +-- +-- | A specialised version of zipWith for the common case of a +-- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules +-- are used to automatically covert zipWith into zipWith' when a pack is +-- performed on the result of zipWith, but we also export it for +-- convenience. +-- +zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString +zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $ + withForeignPtr fp $ \a -> + withForeignPtr fq $ \b -> + create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t) + where + zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () + STRICT4(zipWith_) + zipWith_ n p1 p2 r + | n >= len = return () + | otherwise = do + x <- peekByteOff p1 n + y <- peekByteOff p2 n + pokeByteOff r n (f x y) + zipWith_ (n+1) p1 p2 r + + len = min l m +{-# INLINE zipWith' #-} + +{-# RULES + +"Specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q . + pack (zipWith f p q) = zipWith' f p q + #-} + -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of -- ByteStrings. Note that this performs two 'pack' operations. unzip :: [(Word8,Word8)] -> (ByteString,ByteString) @@ -1522,10 +1598,10 @@ tails p | null p = [empty] -- | /O(n)/ Sort a ByteString efficiently, using counting sort. sort :: ByteString -> ByteString -sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do +sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) - withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l) + withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l) let STRICT2(go) go 256 _ = return () @@ -1534,30 +1610,13 @@ sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do go (i + 1) (ptr `plusPtr` (fromIntegral n)) go 0 p --- "countEach counts str l" counts the number of occurences of each Word8 in --- str, and stores the result in counts. -countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO () -STRICT3(countEach) -countEach counts str l = go 0 - where - STRICT1(go) - go i | i == l = return () - | otherwise = do k <- fromIntegral `fmap` peekElemOff str i - x <- peekElemOff counts k - pokeElemOff counts k (x + 1) - go (i + 1) - {- sort :: ByteString -> ByteString -sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do +sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do memcpy p (f `plusPtr` s) l c_qsort p l -- inplace -} -{- -sort = pack . List.sort . unpack --} - -- | The 'sortBy' function is the non-overloaded version of 'sort'. -- -- Try some linear sorts: radix, counting @@ -1567,167 +1626,51 @@ sort = pack . List.sort . unpack -- sortBy f ps = undefined -- --------------------------------------------------------------------- --- --- Extensions to the basic interface --- - --- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the --- check for the empty case, so there is an obligation on the programmer --- to provide a proof that the ByteString is non-empty. -unsafeHead :: ByteString -> Word8 -unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s -{-# INLINE unsafeHead #-} - --- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the --- check for the empty case. As with 'unsafeHead', the programmer must --- provide a separate proof that the ByteString is non-empty. -unsafeTail :: ByteString -> ByteString -unsafeTail (PS ps s l) = PS ps (s+1) (l-1) -{-# INLINE unsafeTail #-} - --- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' --- This omits the bounds check, which means there is an accompanying --- obligation on the programmer to ensure the bounds are checked in some --- other way. -unsafeIndex :: ByteString -> Int -> Word8 -unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) -{-# INLINE unsafeIndex #-} - --- | A variety of 'take' which omits the checks on @n@ so there is an --- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. -unsafeTake :: Int -> ByteString -> ByteString -unsafeTake n (PS x s l) = - assert (0 <= n && n <= l) $ PS x s n -{-# INLINE unsafeTake #-} - --- | A variety of 'drop' which omits the checks on @n@ so there is an --- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. -unsafeDrop :: Int -> ByteString -> ByteString -unsafeDrop n (PS x s l) = - assert (0 <= n && n <= l) $ PS x (s+n) (l-n) -{-# INLINE unsafeDrop #-} - --- --------------------------------------------------------------------- -- Low level constructors -#if defined(__GLASGOW_HASKELL__) --- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an --- Addr\# (an arbitrary machine address assumed to point outside the --- garbage-collected heap) into a @ByteString@. A much faster way to --- create an Addr\# is with an unboxed string literal, than to pack a --- boxed string. A unboxed string literal is compiled to a static @char --- []@ by GHC. Establishing the length of the string requires a call to --- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as --- is the case with "string"# literals in GHC). Use 'unsafePackAddress' --- if you know the length of the string statically. --- --- An example: --- --- > literalFS = packAddress "literal"# --- -packAddress :: Addr# -> ByteString -packAddress addr# = inlinePerformIO $ do - p <- newForeignPtr_ cstr - return $ PS p 0 (fromIntegral $ c_strlen cstr) - where - cstr = Ptr addr# -{-# INLINE packAddress #-} - --- | /O(1)/ 'unsafePackAddress' provides constant-time construction of --- 'ByteStrings' -- which is ideal for string literals. It packs a --- null-terminated sequence of bytes into a 'ByteString', given a raw --- 'Addr\#' to the string, and the length of the string. Make sure the --- length is correct, otherwise use the safer 'packAddress' (where the --- length will be calculated once at runtime). -unsafePackAddress :: Int -> Addr# -> ByteString -unsafePackAddress len addr# = inlinePerformIO $ do - p <- newForeignPtr_ cstr - return $ PS p 0 len - where cstr = Ptr addr# - -#endif - --- | /O(1)/ Build a ByteString from a ForeignPtr -fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString -fromForeignPtr fp l = PS fp 0 l - --- | /O(1)/ Deconstruct a ForeignPtr from a ByteString -toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -toForeignPtr (PS ps s l) = (ps, s, l) - --- | /O(1)/ 'skipIndex' returns the internal skipped index of the --- current 'ByteString' from any larger string it was created from, as --- an 'Int'. -skipIndex :: ByteString -> Int -skipIndex (PS _ s _) = s -{-# INLINE skipIndex #-} - -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/ -- finalizer associated to it. The ByteString length is calculated using -- /strlen(3)/, and thus the complexity is a /O(n)/. packCString :: CString -> ByteString -packCString cstr = inlinePerformIO $ do +packCString cstr = unsafePerformIO $ do fp <- newForeignPtr_ (castPtr cstr) - return $ PS fp 0 (fromIntegral $ c_strlen cstr) + l <- c_strlen cstr + return $! PS fp 0 (fromIntegral l) -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will -- have /no/ finalizer associated with it. This operation has /O(1)/ -- complexity as we already know the final size, so no /strlen(3)/ is -- required. packCStringLen :: CStringLen -> ByteString -packCStringLen (ptr,len) = inlinePerformIO $ do +packCStringLen (ptr,len) = unsafePerformIO $ do fp <- newForeignPtr_ (castPtr ptr) - return $ PS fp 0 (fromIntegral len) + return $! PS fp 0 (fromIntegral len) -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will -- have a @free(3)@ finalizer associated to it. packMallocCString :: CString -> ByteString -packMallocCString cstr = inlinePerformIO $ do +packMallocCString cstr = unsafePerformIO $ do fp <- newForeignFreePtr (castPtr cstr) - return $ PS fp 0 (fromIntegral $ c_strlen cstr) - -#if defined(__GLASGOW_HASKELL__) --- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a --- length, and an IO action representing a finalizer. This function is --- not available on Hugs. --- -packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString -packCStringFinalizer p l f = do - fp <- FC.newForeignPtr p f - return $ PS fp 0 l - --- | Explicitly run the finaliser associated with a 'ByteString'. --- Further references to this value may generate invalid memory --- references. This operation is unsafe, as there may be other --- 'ByteStrings' referring to the same underlying pages. If you use --- this, you need to have a proof of some kind that all 'ByteString's --- ever generated from the underlying byte array are no longer live. -unsafeFinalize :: ByteString -> IO () -unsafeFinalize (PS p _ _) = finalizeForeignPtr p - -#endif + len <- c_strlen cstr + return $! PS fp 0 (fromIntegral len) --- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@. --- The @CString@ should not be freed afterwards. This is a memcpy(3). +-- | /O(n) construction/ Use a @ByteString@ with a function requiring a +-- null-terminated @CString@. The @CString@ will be freed +-- automatically. This is a memcpy(3). useAsCString :: ByteString -> (CString -> IO a) -> IO a -useAsCString (PS ps s l) = bracket alloc (c_free.castPtr) +useAsCString ps f = useAsCStringLen ps (\(s,_) -> f s) + +-- | /O(n) construction/ Use a @ByteString@ with a function requiring a +-- @CStringLen@. The @CStringLen@ will be freed automatically. This is a +-- memcpy(3). +useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a +useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst) where alloc = withForeignPtr ps $ \p -> do buf <- c_malloc (fromIntegral l+1) memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l) - poke (buf `plusPtr` l) (0::Word8) - return $ castPtr buf - --- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@. --- Warning: modifying the @CString@ will affect the @ByteString@. --- Why is this function unsafe? It relies on the null byte at the end of --- the ByteString to be there. This is /not/ the case if your ByteString --- has been spliced from a larger string (i.e. with take or drop). --- Unless you can guarantee the null byte, you should use the safe --- version, which will copy the string first. --- -unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a -unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s) + poke (buf `plusPtr` l) (0::Word8) -- n.b. + return $! (castPtr buf, l) -- | /O(n)/ Make a copy of the 'ByteString' with its own storage. -- This is mainly useful to allow the rest of the data pointed @@ -1735,64 +1678,20 @@ unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plu -- if a large string has been read in, and only a small part of it -- is needed in the rest of the program. copy :: ByteString -> ByteString -copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> +copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) (fromIntegral l) -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the -- CString is going to be deallocated from C land. copyCString :: CString -> IO ByteString -copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr)) +copyCString cstr = do + len <- c_strlen cstr + copyCStringLen (cstr, fromIntegral len) -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known. copyCStringLen :: CStringLen -> IO ByteString -copyCStringLen (cstr, len) = do - fp <- mallocForeignPtrArray (len+1) - withForeignPtr fp $ \p -> do - memcpy p (castPtr cstr) (fromIntegral len) - poke (p `plusPtr` len) (0 :: Word8) - return $! PS fp 0 len - --- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@. --- Warning: modifying the @CStringLen@ will affect the @ByteString@. --- This is analogous to unsafeUseAsCString, and comes with the same --- safety requirements. --- -unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a -unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l) - --- | Given the maximum size needed and a function to make the contents --- of a ByteString, generate makes the 'ByteString'. The generating --- function is required to return the actual final size (<= the maximum --- size), and the resulting byte array is realloced to this size. The --- string is padded at the end with a null byte. --- --- generate is the main mechanism for creating custom, efficient --- ByteString functions, using Haskell or C functions to fill the space. --- -generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString -generate i f = do - fp <- mallocByteString i - (ptr,n) <- withForeignPtr fp $ \p -> do - i' <- f p - if i' == i - then return (fp,i') - else do fp_ <- mallocByteString i' -- realloc - withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i') - return (fp_,i') - return (PS ptr 0 n) - -{- --- --- On the C malloc heap. Less fun. --- -generate i f = do - p <- mallocArray (i+1) - i' <- f p - p' <- reallocArray p (i'+1) - poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work - fp <- newForeignFreePtr p' - return $ PS fp 0 i' --} +copyCStringLen (cstr, len) = create len $ \p -> + memcpy p (castPtr cstr) (fromIntegral len) -- --------------------------------------------------------------------- -- line IO @@ -1873,12 +1772,11 @@ hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do -- TODO, rewrite to use normal memcpy mkPS :: RawBuffer -> Int -> Int -> IO ByteString -mkPS buf start end = do +mkPS buf start end = let len = end - start - fp <- mallocByteString len - withForeignPtr fp $ \p -> do + in create len $ \p -> do memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len) - return (PS fp 0 len) + return () mkBigPS :: Int -> [ByteString] -> IO ByteString mkBigPS _ [ps] = return ps @@ -1891,27 +1789,33 @@ mkBigPS _ pss = return $! concat (P.reverse pss) -- | Outputs a 'ByteString' to the specified 'Handle'. hPut :: Handle -> ByteString -> IO () -hPut _ (PS _ _ 0) = return () -hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l +hPut _ (PS _ _ 0) = return () hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l +-- | A synonym for @hPut@, for compatibility +hPutStr :: Handle -> ByteString -> IO () +hPutStr = hPut + +-- | Write a ByteString to a handle, appending a newline byte +hPutStrLn :: Handle -> ByteString -> IO () +hPutStrLn h ps + | length ps < 1024 = hPut h (ps `snoc` 0x0a) + | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy + -- | Write a ByteString to stdout putStr :: ByteString -> IO () putStr = hPut stdout -- | Write a ByteString to stdout, appending a newline byte putStrLn :: ByteString -> IO () -putStrLn ps = hPut stdout ps >> hPut stdout nl - where nl = singleton 0x0a +putStrLn = hPutStrLn stdout -- | Read a 'ByteString' directly from the specified 'Handle'. This -- is far more efficient than reading the characters into a 'String' -- and then using 'pack'. hGet :: Handle -> Int -> IO ByteString hGet _ 0 = return empty -hGet h i = do fp <- mallocByteString i - l <- withForeignPtr fp $ \p-> hGetBuf h p i - return $ PS fp 0 l +hGet h i = createAndTrim i $ \p -> hGetBuf h p i #if defined(__GLASGOW_HASKELL__) -- | hGetNonBlocking is identical to 'hGet', except that it will never block @@ -1919,10 +1823,7 @@ hGet h i = do fp <- mallocByteString i -- is available. hGetNonBlocking :: Handle -> Int -> IO ByteString hGetNonBlocking _ 0 = return empty -hGetNonBlocking h i = do - fp <- mallocByteString i - l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i - return $ PS fp 0 l +hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i #endif -- | Read entire handle contents into a 'ByteString'. @@ -1942,7 +1843,7 @@ hGetContents h = do if i < start_size then do p' <- reallocArray p i fp <- newForeignFreePtr p' - return $ PS fp 0 i + return $! PS fp 0 i else f p start_size where f p s = do @@ -1953,7 +1854,7 @@ hGetContents h = do then do let i' = s + i p'' <- reallocArray p' i' fp <- newForeignFreePtr p'' - return $ PS fp 0 i' + return $! PS fp 0 i' else f p' s' -- | getContents. Equivalent to hGetContents stdin @@ -1965,18 +1866,19 @@ getContents = hGetContents stdin -- 'pack'. It also may be more efficient than opening the file and -- reading it using hGet. readFile :: FilePath -> IO ByteString -readFile f = do - h <- openBinaryFile f ReadMode - l <- hFileSize h - s <- hGet h $ fromIntegral l - hClose h - return s +readFile f = bracket (openBinaryFile f ReadMode) hClose + (\h -> hFileSize h >>= hGet h . fromIntegral) -- | Write a 'ByteString' to a file. writeFile :: FilePath -> ByteString -> IO () writeFile f ps = bracket (openBinaryFile f WriteMode) hClose (\h -> hPut h ps) +-- | Append a 'ByteString' to a file. +appendFile :: FilePath -> ByteString -> IO () +appendFile f txt = bracket (openBinaryFile f AppendMode) hClose + (\hdl -> hPut hdl txt) + {- -- -- Disable until we can move it into a portable .hsc file @@ -1996,7 +1898,7 @@ writeFile f ps = bracket (openBinaryFile f WriteMode) hClose -- On systems without mmap, this is the same as a readFile. -- mmapFile :: FilePath -> IO ByteString -mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l +mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l mmap :: FilePath -> IO (ForeignPtr Word8, Int) mmap f = do @@ -2020,6 +1922,8 @@ mmap f = do else do -- The munmap leads to crashes on OpenBSD. -- maybe there's a use after unmap in there somewhere? + -- Bulat suggests adding the hClose to the + -- finalizer, excellent idea. #if !defined(__OpenBSD__) let unmap = c_munmap p l >> return () #else @@ -2050,41 +1954,6 @@ getArgs = -- --------------------------------------------------------------------- -- Internal utilities --- Unsafe conversion between 'Word8' and 'Char'. These are nops, and --- silently truncate to 8 bits Chars > '\255'. They are provided as --- convenience for ByteString construction. -w2c :: Word8 -> Char -#if !defined(__GLASGOW_HASKELL__) -w2c = chr . fromIntegral -#else -w2c = unsafeChr . fromIntegral -#endif -{-# INLINE w2c #-} - -c2w :: Char -> Word8 -c2w = fromIntegral . ord -{-# INLINE c2w #-} - --- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way --- is padded with a null byte. -mallocByteString :: Int -> IO (ForeignPtr Word8) -mallocByteString l = do - fp <- mallocForeignPtrArray (l+1) - withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8) - return fp - --- | A way of creating ForeignPtrs outside the IO monad. The @Int@ --- argument gives the final size of the ByteString. Unlike 'generate' --- the ByteString is not reallocated if the final size is less than the --- estimated size. Also, unlike 'generate' ByteString's created this way --- are managed on the Haskell heap. -create :: Int -> (Ptr Word8 -> IO ()) -> ByteString -create l write_ptr = inlinePerformIO $ do - fp <- mallocByteString (l+1) - withForeignPtr fp $ \p -> write_ptr p - return $ PS fp 0 l -{-# INLINE create #-} - -- | Perform an operation with a temporary ByteString withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b withPtr fp io = inlinePerformIO (withForeignPtr fp io) @@ -2108,17 +1977,6 @@ findFromEndUntil f ps@(PS x s l) = else if f (last ps) then l else findFromEndUntil f (PS x s (l-1)) --- Just like inlinePerformIO, but we inline it. Big performance gains as --- it exposes lots of things to further inlining --- -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -#if defined(__GLASGOW_HASKELL__) -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -#else -inlinePerformIO = unsafePerformIO -#endif - {-# INLINE newForeignFreePtr #-} newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8) #if defined(__GLASGOW_HASKELL__) @@ -2126,259 +1984,3 @@ newForeignFreePtr p = FC.newForeignPtr p (c_free p) #else newForeignFreePtr p = newForeignPtr c_free_finalizer p #endif - --- --------------------------------------------------------------------- --- --- Standard C functions --- - -foreign import ccall unsafe "string.h strlen" c_strlen - :: CString -> CInt - -foreign import ccall unsafe "stdlib.h malloc" c_malloc - :: CInt -> IO (Ptr Word8) - -foreign import ccall unsafe "static stdlib.h free" c_free - :: Ptr Word8 -> IO () - -#if !defined(__GLASGOW_HASKELL__) -foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer - :: FunPtr (Ptr Word8 -> IO ()) -#endif - -foreign import ccall unsafe "string.h memset" memset - :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) - -foreign import ccall unsafe "string.h memchr" memchr - :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8 - -foreign import ccall unsafe "string.h memcmp" memcmp - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt - -foreign import ccall unsafe "string.h memcpy" memcpy - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () - --- --------------------------------------------------------------------- --- --- Uses our C code --- - -foreign import ccall unsafe "static fpstring.h reverse" c_reverse - :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () - -foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse - :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () - -foreign import ccall unsafe "static fpstring.h maximum" c_maximum - :: Ptr Word8 -> CInt -> Word8 - -foreign import ccall unsafe "static fpstring.h minimum" c_minimum - :: Ptr Word8 -> CInt -> Word8 - -foreign import ccall unsafe "static fpstring.h count" c_count - :: Ptr Word8 -> CInt -> Word8 -> CInt - --- --------------------------------------------------------------------- --- MMap - -{- -foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap - :: Int -> Int -> IO (Ptr Word8) - -foreign import ccall unsafe "static unistd.h close" c_close - :: Int -> IO Int - -# if !defined(__OpenBSD__) -foreign import ccall unsafe "static sys/mman.h munmap" c_munmap - :: Ptr Word8 -> Int -> IO Int -# endif --} - --- --------------------------------------------------------------------- --- Internal GHC Haskell magic - -#if defined(__GLASGOW_HASKELL__) -foreign import ccall unsafe "RtsAPI.h getProgArgv" - getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO () - -foreign import ccall unsafe "__hscore_memcpy_src_off" - memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) -#endif - --- --------------------------------------------------------------------- --- --- Functional array fusion for ByteStrings. --- --- From the Data Parallel Haskell project, --- http://www.cse.unsw.edu.au/~chak/project/dph/ --- - --- |Data type for accumulators which can be ignored. The rewrite rules rely on --- the fact that no bottoms of this type are ever constructed; hence, we can --- assume @(_ :: NoAL) `seq` x = x@. --- -data NoAL = NoAL - --- | Special forms of loop arguments --- --- * These are common special cases for the three function arguments of gen --- and loop; we give them special names to make it easier to trigger RULES --- applying in the special cases represented by these arguments. The --- "INLINE [1]" makes sure that these functions are only inlined in the last --- two simplifier phases. --- --- * In the case where the accumulator is not needed, it is better to always --- explicitly return a value `()', rather than just copy the input to the --- output, as the former gives GHC better local information. --- - --- | Element function expressing a mapping only -mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8)) -mapEFL f = \_ e -> (noAL, (Just $ f e)) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] mapEFL #-} -#endif - --- | Element function implementing a filter function only -filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8)) -filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] filterEFL #-} -#endif - --- |Element function expressing a reduction only -foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8)) -foldEFL f = \a e -> (f a e, Nothing) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] foldEFL #-} -#endif - --- | A strict foldEFL. -foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8)) -foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] foldEFL' #-} -#endif - --- | Element function expressing a prefix reduction only --- -scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8) -scanEFL f = \a e -> (f a e, Just a) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] scanEFL #-} -#endif - --- | Element function implementing a map and fold --- -mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> acc -> Word8 -> (acc, Maybe Word8) -mapAccumEFL f = \a e -> case f a e of (a', e') -> (a', Just e') -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] mapAccumEFL #-} -#endif - --- | Element function implementing a map with index --- -mapIndexEFL :: (Int -> Word8 -> Word8) -> Int -> Word8 -> (Int, Maybe Word8) -mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i', Just $ f i e) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] mapIndexEFL #-} -#endif - --- | No accumulator -noAL :: NoAL -noAL = NoAL -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] noAL #-} -#endif - --- | Projection functions that are fusion friendly (as in, we determine when --- they are inlined) -loopArr :: (acc, byteString) -> byteString -loopArr (_, arr) = arr -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopArr #-} -#endif - -loopAcc :: (acc, byteString) -> acc -loopAcc (acc, _) = acc -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopAcc #-} -#endif - -loopSndAcc :: ((acc1, acc2), byteString) -> (acc2, byteString) -loopSndAcc ((_, acc), arr) = (acc, arr) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopSndAcc #-} -#endif - ------------------------------------------------------------------------- - --- --- size, and then percentage. --- - --- | Iteration over over ByteStrings -loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem - -> acc -- ^ initial acc value - -> ByteString -- ^ input ByteString - -> (acc, ByteString) - -loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do - fp <- mallocByteString i - (ptr,n,acc) <- withForeignPtr fp $ \p -> do - (acc, i') <- go (a `plusPtr` s) p start - if i' == i - then return (fp,i',acc) -- no realloc for map - else do fp_ <- mallocByteString i' -- realloc - withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i') - return (fp_,i',acc) - - return (acc, PS ptr 0 n) - where - go p ma = trans 0 0 - where - STRICT3(trans) - trans a_off ma_off acc - | a_off >= i = return (acc, ma_off) - | otherwise = do - x <- peekByteOff p a_off - let (acc', oe) = f acc x - ma_off' <- case oe of - Nothing -> return ma_off - Just e -> do pokeByteOff ma ma_off e - return $ ma_off + 1 - trans (a_off+1) ma_off' acc' - -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopU #-} -#endif - -infixr 9 `fuseEFL` - --- |Fuse to flat loop functions -fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) - -> (a2 -> Word8 -> (a2, Maybe Word8)) - -> (a1, a2) - -> Word8 - -> ((a1, a2), Maybe Word8) -fuseEFL f g (acc1, acc2) e1 = - case f acc1 e1 of - (acc1', Nothing) -> ((acc1', acc2), Nothing) - (acc1', Just e2) -> - case g acc2 e2 of - (acc2', res) -> ((acc1', acc2'), res) - -{-# RULES - -"loop/loop fusion!" forall em1 em2 start1 start2 arr. - loopU em2 start2 (loopArr (loopU em1 start1 arr)) = - loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr) - -"loopArr/loopSndAcc" forall x. - loopArr (loopSndAcc x) = loopArr x - -"seq/NoAL" forall (u::NoAL) e. - u `seq` e = e - - #-} - diff --git a/Data/ByteString/Base.hs b/Data/ByteString/Base.hs new file mode 100644 index 0000000..dac2a16 --- /dev/null +++ b/Data/ByteString/Base.hs @@ -0,0 +1,463 @@ +{-# OPTIONS_GHC -cpp -fglasgow-exts #-} +-- +-- Module : ByteString.Base +-- License : BSD-style +-- Maintainer : dons@cse.unsw.edu.au +-- Stability : experimental +-- Portability : portable, requires ffi and cpp +-- Tested with : GHC 6.4.1 and Hugs March 2005 +-- + +-- | A module containing semi-public ByteString internals. This exposes +-- the ByteString representation and low level construction functions. +-- Modules which extend the ByteString system will need to use this module +-- while ideally most users will be able to make do with the public interface +-- modules. +-- +module Data.ByteString.Base ( + + -- * The @ByteString@ type and representation + ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + + -- * Unchecked access + unsafeHead, -- :: ByteString -> Word8 + unsafeTail, -- :: ByteString -> ByteString + unsafeIndex, -- :: ByteString -> Int -> Word8 + unsafeTake, -- :: Int -> ByteString -> ByteString + unsafeDrop, -- :: Int -> ByteString -> ByteString + + -- * Low level introduction and elimination + create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString + createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString + createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) + + unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString + unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a + unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a + + fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString + toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int) + +#if defined(__GLASGOW_HASKELL__) + packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString + packAddress, -- :: Addr# -> ByteString + unsafePackAddress, -- :: Int -> Addr# -> ByteString + unsafeFinalize, -- :: ByteString -> IO () +#endif + + -- * Utilities + inlinePerformIO, -- :: IO a -> a + + countOccurrences, -- :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO () + + -- * Standard C Functions + c_strlen, -- :: CString -> IO CInt + c_malloc, -- :: CInt -> IO (Ptr Word8) + c_free, -- :: Ptr Word8 -> IO () + +#if !defined(__GLASGOW_HASKELL__) + c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ()) +#endif + + memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8 + memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt + memcpy, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () + memmove, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () + memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) + + -- * cbits functions + c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () + c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () + c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8 + c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8 + c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt + + -- * Internal GHC magic +#if defined(__GLASGOW_HASKELL__) + getProgArgv, -- :: Ptr CInt -> Ptr (Ptr CString) -> IO () + memcpy_ptr_baoff, -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) +#endif + + -- * Chars + w2c, c2w, isSpaceWord8 + + ) where + +import Foreign.ForeignPtr +import Foreign.Ptr +import Foreign.Storable (Storable(..)) +import Foreign.C.Types +import Foreign.C.String (CString, CStringLen) + +import Control.Exception (assert) + +import Data.Char (ord) +import Data.Word (Word8) + +#if defined(__GLASGOW_HASKELL__) +import qualified Foreign.Concurrent as FC (newForeignPtr) + +import Data.Generics (Data(..), Typeable(..)) +import GHC.Prim (Addr#) +import GHC.Ptr (Ptr(..)) +import GHC.Base (realWorld#,unsafeChr) +import GHC.IOBase + +#if defined(__GLASGOW_HASKELL__) && !defined(SLOW_FOREIGN_PTR) +import GHC.ForeignPtr (mallocPlainForeignPtrBytes) +#endif + +#else +import Data.Char (chr) +import System.IO.Unsafe (unsafePerformIO) +#endif + +-- CFILES stuff is Hugs only +{-# CFILES cbits/fpstring.c #-} + +-- ----------------------------------------------------------------------------- +-- +-- Useful macros, until we have bang patterns +-- + +#define STRICT1(f) f a | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined + +-- ----------------------------------------------------------------------------- + +-- | A space-efficient representation of a Word8 vector, supporting many +-- efficient operations. A 'ByteString' contains 8-bit characters only. +-- +-- Instances of Eq, Ord, Read, Show, Data, Typeable +-- +data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- length + +#if defined(__GLASGOW_HASKELL__) + deriving (Data, Typeable) +#endif + +-- --------------------------------------------------------------------- +-- +-- Extensions to the basic interface +-- + +-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the +-- check for the empty case, so there is an obligation on the programmer +-- to provide a proof that the ByteString is non-empty. +unsafeHead :: ByteString -> Word8 +unsafeHead (PS x s l) = assert (l > 0) $ + inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s +{-# INLINE unsafeHead #-} + +-- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the +-- check for the empty case. As with 'unsafeHead', the programmer must +-- provide a separate proof that the ByteString is non-empty. +unsafeTail :: ByteString -> ByteString +unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1) +{-# INLINE unsafeTail #-} + +-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' +-- This omits the bounds check, which means there is an accompanying +-- obligation on the programmer to ensure the bounds are checked in some +-- other way. +unsafeIndex :: ByteString -> Int -> Word8 +unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $ + inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) +{-# INLINE unsafeIndex #-} + +-- | A variety of 'take' which omits the checks on @n@ so there is an +-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. +unsafeTake :: Int -> ByteString -> ByteString +unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n +{-# INLINE unsafeTake #-} + +-- | A variety of 'drop' which omits the checks on @n@ so there is an +-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. +unsafeDrop :: Int -> ByteString -> ByteString +unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n) +{-# INLINE unsafeDrop #-} + +-- --------------------------------------------------------------------- +-- Low level constructors + +-- | /O(1)/ Build a ByteString from a ForeignPtr +fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString +fromForeignPtr fp l = PS fp 0 l + +-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString +toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) +toForeignPtr (PS ps s l) = (ps, s, l) + +-- | A way of creating ByteStrings outside the IO monad. The @Int@ +-- argument gives the final size of the ByteString. Unlike +-- 'createAndTrim' the ByteString is not reallocated if the final size +-- is less than the estimated size. +unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString +unsafeCreate l f = unsafePerformIO (create l f) +{-# INLINE unsafeCreate #-} + +-- | Wrapper of mallocForeignPtrBytes. +create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString +create l f = do +#if defined(SLOW_FOREIGN_PTR) || !defined(__GLASGOW_HASKELL__) + fp <- mallocForeignPtrBytes l +#else + fp <- mallocPlainForeignPtrBytes l +#endif + withForeignPtr fp $ \p -> f p + return $! PS fp 0 l + +-- | Given the maximum size needed and a function to make the contents +-- of a ByteString, createAndTrim makes the 'ByteString'. The generating +-- function is required to return the actual final size (<= the maximum +-- size), and the resulting byte array is realloced to this size. +-- +-- createAndTrim is the main mechanism for creating custom, efficient +-- ByteString functions, using Haskell or C functions to fill the space. +-- +createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString +createAndTrim l f = do +#if defined(SLOW_FOREIGN_PTR) || !defined(__GLASGOW_HASKELL__) + fp <- mallocForeignPtrBytes l +#else + fp <- mallocPlainForeignPtrBytes l +#endif + withForeignPtr fp $ \p -> do + l' <- f p + if assert (l' <= l) $ l' >= l + then return $! PS fp 0 l + else create l' $ \p' -> memcpy p' p (fromIntegral l') + +createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) +createAndTrim' l f = do +#if defined(SLOW_FOREIGN_PTR) || !defined(__GLASGOW_HASKELL__) + fp <- mallocForeignPtrBytes l +#else + fp <- mallocPlainForeignPtrBytes l +#endif + withForeignPtr fp $ \p -> do + (off, l', res) <- f p + if assert (l' <= l) $ l' >= l + then return $! (PS fp 0 l, res) + else do ps <- create l' $ \p' -> + memcpy p' (p `plusPtr` off) (fromIntegral l') + return $! (ps, res) + +#if defined(__GLASGOW_HASKELL__) +-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an +-- Addr\# (an arbitrary machine address assumed to point outside the +-- garbage-collected heap) into a @ByteString@. A much faster way to +-- create an Addr\# is with an unboxed string literal, than to pack a +-- boxed string. A unboxed string literal is compiled to a static @char +-- []@ by GHC. Establishing the length of the string requires a call to +-- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as +-- is the case with "string"# literals in GHC). Use 'unsafePackAddress' +-- if you know the length of the string statically. +-- +-- An example: +-- +-- > literalFS = packAddress "literal"# +-- +packAddress :: Addr# -> ByteString +packAddress addr# = inlinePerformIO $ do + p <- newForeignPtr_ cstr + l <- c_strlen cstr + return $ PS p 0 (fromIntegral l) + where + cstr = Ptr addr# +{-# INLINE packAddress #-} + +-- | /O(1)/ 'unsafePackAddress' provides constant-time construction of +-- 'ByteStrings' -- which is ideal for string literals. It packs a +-- null-terminated sequence of bytes into a 'ByteString', given a raw +-- 'Addr\#' to the string, and the length of the string. Make sure the +-- length is correct, otherwise use the safer 'packAddress' (where the +-- length will be calculated once at runtime). +unsafePackAddress :: Int -> Addr# -> ByteString +unsafePackAddress len addr# = inlinePerformIO $ do + p <- newForeignPtr_ cstr + return $ PS p 0 len + where cstr = Ptr addr# + +-- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a +-- length, and an IO action representing a finalizer. This function is +-- not available on Hugs. +-- +packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString +packCStringFinalizer p l f = do + fp <- FC.newForeignPtr p f + return $ PS fp 0 l + +-- | Explicitly run the finaliser associated with a 'ByteString'. +-- Further references to this value may generate invalid memory +-- references. This operation is unsafe, as there may be other +-- 'ByteStrings' referring to the same underlying pages. If you use +-- this, you need to have a proof of some kind that all 'ByteString's +-- ever generated from the underlying byte array are no longer live. +unsafeFinalize :: ByteString -> IO () +unsafeFinalize (PS p _ _) = finalizeForeignPtr p + +#endif + +------------------------------------------------------------------------ + +-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. +w2c :: Word8 -> Char +#if !defined(__GLASGOW_HASKELL__) +w2c = chr . fromIntegral +#else +w2c = unsafeChr . fromIntegral +#endif +{-# INLINE w2c #-} + +-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and +-- silently truncates to 8 bits Chars > '\255'. It is provided as +-- convenience for ByteString construction. +c2w :: Char -> Word8 +c2w = fromIntegral . ord +{-# INLINE c2w #-} + +-- Selects white-space characters in the Latin-1 range +-- ordered by frequency +-- Idea from Ketil +isSpaceWord8 :: Word8 -> Bool +isSpaceWord8 w = case w of + 0x20 -> True -- SPACE + 0x0A -> True -- LF, \n + 0x09 -> True -- HT, \t + 0x0C -> True -- FF, \f + 0x0D -> True -- CR, \r + 0x0B -> True -- VT, \v + 0xA0 -> True -- spotted by QC.. + _ -> False +{-# INLINE isSpaceWord8 #-} + +------------------------------------------------------------------------ +-- | Just like unsafePerformIO, but we inline it. Big performance gains as +-- it exposes lots of things to further inlining +-- +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +#if defined(__GLASGOW_HASKELL__) +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r +#else +inlinePerformIO = unsafePerformIO +#endif + +-- | Count the number of occurrences of each byte. +-- +{-# SPECIALIZE countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () #-} +countOccurrences :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO () +STRICT3(countOccurrences) +countOccurrences counts str l = go 0 + where + STRICT1(go) + go i | i == l = return () + | otherwise = do k <- fromIntegral `fmap` peekElemOff str i + x <- peekElemOff counts k + pokeElemOff counts k (x + 1) + go (i + 1) + +-- | /O(1) construction/ Use a @ByteString@ with a function requiring a +-- @CString@. Warning: modifying the @CString@ will affect the +-- @ByteString@. Why is this function unsafe? It relies on the null +-- byte at the end of the ByteString to be there. Unless you can +-- guarantee the null byte, you should use the safe version, which will +-- copy the string first. +unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a +unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s) + +-- | /O(1) construction/ Use a @ByteString@ with a function requiring a +-- @CStringLen@. Warning: modifying the @CStringLen@ will affect the +-- @ByteString@. This is analogous to unsafeUseAsCString, and comes +-- with the same safety requirements. The user must ensure there is a +-- null byte at the end of the string. +unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a +unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l) + +-- --------------------------------------------------------------------- +-- +-- Standard C functions +-- + +foreign import ccall unsafe "string.h strlen" c_strlen + :: CString -> IO CInt + +foreign import ccall unsafe "stdlib.h malloc" c_malloc + :: CInt -> IO (Ptr Word8) + +foreign import ccall unsafe "static stdlib.h free" c_free + :: Ptr Word8 -> IO () + +#if !defined(__GLASGOW_HASKELL__) +foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer + :: FunPtr (Ptr Word8 -> IO ()) +#endif + +foreign import ccall unsafe "string.h memchr" memchr + :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) + +foreign import ccall unsafe "string.h memcmp" memcmp + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt + +foreign import ccall unsafe "string.h memcpy" memcpy + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () + +foreign import ccall unsafe "string.h memmove" memmove + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () + +foreign import ccall unsafe "string.h memset" memset + :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) + + +-- --------------------------------------------------------------------- +-- +-- Uses our C code +-- + +foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse + :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () + +foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse + :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () + +foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum + :: Ptr Word8 -> CInt -> IO Word8 + +foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum + :: Ptr Word8 -> CInt -> IO Word8 + +foreign import ccall unsafe "static fpstring.h fps_count" c_count + :: Ptr Word8 -> CInt -> Word8 -> IO CInt + +-- --------------------------------------------------------------------- +-- MMap + +{- +foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap + :: Int -> Int -> IO (Ptr Word8) + +foreign import ccall unsafe "static unistd.h close" c_close + :: Int -> IO Int + +# if !defined(__OpenBSD__) +foreign import ccall unsafe "static sys/mman.h munmap" c_munmap + :: Ptr Word8 -> Int -> IO Int +# endif +-} + +-- --------------------------------------------------------------------- +-- Internal GHC Haskell magic + +#if defined(__GLASGOW_HASKELL__) +foreign import ccall unsafe "RtsAPI.h getProgArgv" + getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO () + +foreign import ccall unsafe "__hscore_memcpy_src_off" + memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) +#endif diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index 86916f2..71bf394 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -1,4 +1,4 @@ -{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-} +{-# OPTIONS_GHC -cpp -fglasgow-exts #-} -- -- Module : Data.ByteString.Char8 -- Copyright : (c) Don Stewart 2006 @@ -35,11 +35,11 @@ module Data.ByteString.Char8 ( -- * The @ByteString@ type - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - singleton, -- :: Char -> ByteString + singleton, -- :: Char -> ByteString pack, -- :: String -> ByteString unpack, -- :: ByteString -> String @@ -104,6 +104,7 @@ module Data.ByteString.Char8 ( span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + breakEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) group, -- :: ByteString -> [ByteString] groupBy, -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString] inits, -- :: ByteString -> [ByteString] @@ -179,19 +180,28 @@ module Data.ByteString.Char8 ( -- * Ordered ByteStrings sort, -- :: ByteString -> ByteString - -- * Unchecked access - unsafeHead, -- :: ByteString -> Char - unsafeTail, -- :: ByteString -> ByteString - unsafeIndex, -- :: ByteString -> Int -> Char + -- * Conversion w2c, -- :: Word8 -> Char c2w, -- :: Char -> Word8 -- * Reading from ByteStrings readInt, -- :: ByteString -> Maybe Int - unsafeReadInt, -- :: ByteString -> Maybe Int + + -- * Low level CString conversions + + -- ** Packing CStrings and pointers + packCString, -- :: CString -> ByteString + packCStringLen, -- :: CString -> ByteString + packMallocCString, -- :: CString -> ByteString + + -- ** Using ByteStrings as CStrings + useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a + useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a -- * Copying ByteStrings copy, -- :: ByteString -> ByteString + copyCString, -- :: CString -> IO ByteString + copyCStringLen, -- :: CStringLen -> IO ByteString -- * I\/O with @ByteString@s @@ -207,17 +217,21 @@ module Data.ByteString.Char8 ( -- ** Files readFile, -- :: FilePath -> IO ByteString writeFile, -- :: FilePath -> ByteString -> IO () + appendFile, -- :: FilePath -> ByteString -> IO () -- mmapFile, -- :: FilePath -> IO ByteString -- ** I\/O with Handles #if defined(__GLASGOW_HASKELL__) getArgs, -- :: IO [ByteString] hGetLine, -- :: Handle -> IO ByteString + hGetLines, -- :: Handle -> IO ByteString hGetNonBlocking, -- :: Handle -> Int -> IO ByteString #endif hGetContents, -- :: Handle -> IO ByteString hGet, -- :: Handle -> Int -> IO ByteString hPut, -- :: Handle -> ByteString -> IO () + hPutStr, -- :: Handle -> ByteString -> IO () + hPutStrLn, -- :: Handle -> ByteString -> IO () #if defined(__GLASGOW_HASKELL__) -- * Low level construction @@ -230,8 +244,6 @@ module Data.ByteString.Char8 ( #if defined(__GLASGOW_HASKELL__) unpackList, #endif - noAL, NoAL, loopArr, loopAcc, loopSndAcc, - loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL, filter', map' ) where @@ -242,45 +254,47 @@ import Prelude hiding (reverse,head,tail,last,init,null ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,unwords ,words,maximum,minimum,all,concatMap,scanl,scanl1 - ,foldl1,foldr1,readFile,writeFile,replicate + ,foldl1,foldr1,readFile,writeFile,appendFile,replicate ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) import qualified Data.ByteString as B +import qualified Data.ByteString.Base as B -- Listy functions transparently exported -import Data.ByteString (ByteString(..) - ,empty,null,length,tail,init,append +import Data.ByteString (empty,null,length,tail,init,append ,inits,tails,reverse,transpose ,concat,take,drop,splitAt,join ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring - ,findSubstrings,unsafeTail,copy,group + ,findSubstrings,copy,group ,getContents, putStr, putStrLn - ,readFile, {-mmapFile,-} writeFile - ,hGetContents, hGet, hPut + ,readFile, {-mmapFile,-} writeFile, appendFile + ,hGetContents, hGet, hPut, hPutStr, hPutStrLn + ,packCString,packCStringLen, packMallocCString + ,useAsCString,useAsCStringLen, copyCString,copyCStringLen #if defined(__GLASGOW_HASKELL__) - ,getLine, getArgs, hGetLine, hGetNonBlocking - ,packAddress, unsafePackAddress + ,getLine, getArgs, hGetLine, hGetLines, hGetNonBlocking ,unpackList #endif - ,noAL, NoAL, loopArr, loopAcc, loopSndAcc - ,loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL - ,useAsCString, unsafeUseAsCString ) -import Data.Char +import Data.ByteString.Base ( + ByteString(..) +#if defined(__GLASGOW_HASKELL__) + ,packAddress, unsafePackAddress +#endif + ,c2w, w2c, unsafeTail, inlinePerformIO, isSpaceWord8 + ) import qualified Data.List as List (intersperse) import Foreign -import Foreign.C.Types (CLong) -import Foreign.Marshal.Utils (with) #if defined(__GLASGOW_HASKELL__) -import GHC.Base (Char(..),unsafeChr,unpackCString#,unsafeCoerce#) +import GHC.Base (Char(..),unpackCString#,unsafeCoerce#) import GHC.IOBase (IO(..),stToIO) -import GHC.Prim (Addr#,writeWord8OffAddr#,realWorld#,plusAddr#) +import GHC.Prim (Addr#,writeWord8OffAddr#,plusAddr#) import GHC.Ptr (Ptr(..)) import GHC.ST (ST(..)) #endif @@ -288,6 +302,7 @@ import GHC.ST (ST(..)) #define STRICT1(f) f a | a `seq` False = undefined #define STRICT2(f) f a b | a `seq` b `seq` False = undefined #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined ------------------------------------------------------------------------ @@ -303,13 +318,13 @@ singleton = B.singleton . c2w pack :: String -> ByteString #if !defined(__GLASGOW_HASKELL__) -pack str = B.create (P.length str) $ \p -> go p str +pack str = B.unsafeCreate (P.length str) $ \p -> go p str where go _ [] = return () go p (x:xs) = poke p (c2w x) >> go (p `plusPtr` 1) xs #else /* hack away */ -pack str = B.create (P.length str) $ \(Ptr p) -> stToIO (go p str) +pack str = B.unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p str) where go :: Addr# -> [Char] -> ST a () go _ [] = return () @@ -527,6 +542,13 @@ spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd f = B.spanEnd (f . w2c) {-# INLINE spanEnd #-} +-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' +-- +-- breakEnd p == spanEnd (not.p) +breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +breakEnd f = B.breakEnd (f . w2c) +{-# INLINE breakEnd #-} + -- | 'breakChar' breaks its ByteString argument at the first occurence -- of the specified Char. It is more efficient than 'break' as it is -- implemented with @memchr(3)@. I.e. @@ -742,22 +764,6 @@ unsafeIndex :: ByteString -> Int -> Char unsafeIndex = (w2c .) . B.unsafeIndex {-# INLINE unsafeIndex #-} --- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. -w2c :: Word8 -> Char -#if !defined(__GLASGOW_HASKELL__) -w2c = chr . fromIntegral -#else -w2c = unsafeChr . fromIntegral -#endif -{-# INLINE w2c #-} - --- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and --- silently truncates to 8 bits Chars > '\255'. It is provided as --- convenience for ByteString construction. -c2w :: Char -> Word8 -c2w = fromIntegral . ord -{-# INLINE c2w #-} - -- --------------------------------------------------------------------- -- Things that depend on the encoding @@ -769,7 +775,7 @@ c2w = fromIntegral . ord breakSpace :: ByteString -> (ByteString,ByteString) breakSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do i <- firstspace (p `plusPtr` s) 0 l - return $ case () of {_ + return $! case () of {_ | i == 0 -> (empty, PS x s l) | i == l -> (PS x s l, empty) | otherwise -> (PS x s i, PS x (s+i) (l-i)) @@ -792,7 +798,7 @@ firstspace ptr n m dropSpace :: ByteString -> ByteString dropSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do i <- firstnonspace (p `plusPtr` s) 0 l - return $ if i == l then empty else PS x (s+i) (l-i) + return $! if i == l then empty else PS x (s+i) (l-i) {-# INLINE dropSpace #-} firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int @@ -812,7 +818,7 @@ firstnonspace ptr n m dropSpaceEnd :: ByteString -> ByteString dropSpaceEnd (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do i <- lastnonspace (p `plusPtr` s) (l-1) - return $ if i == (-1) then empty else PS x s (i+1) + return $! if i == (-1) then empty else PS x s (i+1) {-# INLINE dropSpaceEnd #-} lastnonspace :: Ptr Word8 -> Int -> IO Int @@ -972,90 +978,33 @@ betweenLines start end ps = -- --------------------------------------------------------------------- -- Reading from ByteStrings --- | readInt skips any whitespace at the beginning of its argument, and --- reads an Int from the beginning of the ByteString. If there is no +-- | readInt reads an Int from the beginning of the ByteString. If there is no -- integer at the beginning of the string, it returns Nothing, otherwise -- it just returns the int read, and the rest of the string. readInt :: ByteString -> Maybe (Int, ByteString) -readInt p@(PS x s l) = inlinePerformIO $ useAsCString p $ \cstr -> - with (castPtr cstr) $ \endpp -> do - val <- c_strtol (castPtr cstr) endpp 0 - skipped <- (`minusPtr` cstr) `fmap` peek endpp - return $ if skipped == 0 - then Nothing - else Just (fromIntegral val, PS x (s+skipped) (l-skipped)) - --- | unsafeReadInt is like readInt, but requires a null terminated --- ByteString. It avoids a copy if this is the case. It returns the Int --- read, if any, and the rest of the string. -unsafeReadInt :: ByteString -> Maybe (Int, ByteString) -unsafeReadInt p@(PS x s l) = inlinePerformIO $ unsafeUseAsCString p $ \cstr -> - with (castPtr cstr) $ \endpp -> do - val <- c_strtol (castPtr cstr) endpp 0 - skipped <- (`minusPtr` cstr) `fmap` peek endpp - return $ if skipped == 0 - then Nothing - else Just (fromIntegral val, PS x (s+skipped) (l-skipped)) - -foreign import ccall unsafe "stdlib.h strtol" c_strtol - :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong - -{- --- --- not quite there yet --- -readInt :: ByteString -> Maybe (Int, ByteString) -readInt = go 0 - where - STRICT2(go) - go i ps - | B.null ps = Nothing - | x == '-' = neg 0 xs - | otherwise = pos (parse x) xs - where (x, xs) = (ps `unsafeIndex` 0, unsafeTail ps) - - STRICT2(neg) - neg n qs | isSpace x = return $ Just ((i-n),xs) - | otherwise = neg (parse x + (10 * n)) xs - where (x, xs) = (qs `unsafeIndex` 0, unsafeTail qs) - - STRICT2(pos) - pos n qs | isSpace x = go (i+n) xs - | otherwise = pos (parse x + (10 * n)) xs - where (x, xs) = (qs `unsafeIndexWord8` 0, unsafeTail qs) - - parse w = fromIntegral (w - 48) :: Int - {-# INLINE parse #-} --} - --- --------------------------------------------------------------------- --- Internals - --- Just like inlinePerformIO, but we inline it. Big performance gains as --- it exposes lots of things to further inlining --- -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -#if defined(__GLASGOW_HASKELL__) -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -#else -inlinePerformIO = unsafePerformIO -#endif - --- Selects white-space characters in the Latin-1 range --- ordered by frequency --- Idea from Ketil -isSpaceWord8 :: Word8 -> Bool -isSpaceWord8 w = case w of - 0x20 -> True -- SPACE - 0x0A -> True -- LF, \n - 0x09 -> True -- HT, \t - 0x0C -> True -- FF, \f - 0x0D -> True -- CR, \r - 0x0B -> True -- VT, \v - 0xA0 -> True -- spotted by QC.. - _ -> False -{-# INLINE isSpaceWord8 #-} +readInt as + | null as = Nothing + | otherwise = + case unsafeHead as of + '-' -> loop True 0 0 (unsafeTail as) + '+' -> loop False 0 0 (unsafeTail as) + _ -> loop False 0 0 as + + where loop :: Bool -> Int -> Int -> ByteString -> Maybe (Int, ByteString) + STRICT4(loop) + loop neg i n ps + | null ps = end neg i n ps + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 + && w <= 0x39 -> loop neg (i+1) + (n * 10 + (fromIntegral w - 0x30)) + (unsafeTail ps) + | otherwise -> end neg i n ps + + end _ 0 _ _ = Nothing + end True _ n ps = Just (negate n, ps) + end _ _ n ps = Just (n, ps) -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is -- slightly faster for one-shot cases. diff --git a/Data/ByteString/Fusion.hs b/Data/ByteString/Fusion.hs new file mode 100644 index 0000000..99cfa2b --- /dev/null +++ b/Data/ByteString/Fusion.hs @@ -0,0 +1,700 @@ +{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-} +-- +-- Module : Data.ByteString.Fusion +-- License : BSD-style +-- Maintainer : dons@cse.unsw.edu.au +-- Stability : experimental +-- Portability : portable, requires ffi and cpp +-- Tested with : GHC 6.4.1 and Hugs March 2005 +-- + +-- +-- | Functional array fusion for ByteStrings. +-- +-- Originally based on code from the Data Parallel Haskell project, +-- +-- +module Data.ByteString.Fusion ( + + -- * Fusion utilities + loopU, loopL, fuseEFL, + NoAcc(NoAcc), loopArr, loopAcc, loopSndAcc, unSP, + mapEFL, filterEFL, foldEFL, foldEFL', scanEFL, mapAccumEFL, mapIndexEFL, + + -- ** Alternative Fusion stuff + -- | This replaces 'loopU' with 'loopUp' + -- and adds several further special cases of loops. + loopUp, loopDown, loopNoAcc, loopMap, loopFilter, + loopWrapper, sequenceLoops, + doUpLoop, doDownLoop, doNoAccLoop, doMapLoop, doFilterLoop, + + -- | These are the special fusion cases for combining each loop form perfectly. + fuseAccAccEFL, fuseAccNoAccEFL, fuseNoAccAccEFL, fuseNoAccNoAccEFL, + fuseMapAccEFL, fuseAccMapEFL, fuseMapNoAccEFL, fuseNoAccMapEFL, + fuseMapMapEFL, fuseAccFilterEFL, fuseFilterAccEFL, fuseNoAccFilterEFL, + fuseFilterNoAccEFL, fuseFilterFilterEFL, fuseMapFilterEFL, fuseFilterMapEFL, + + -- * Strict pairs and sums + PairS(..), MaybeS(..) + + ) where + +import Data.ByteString.Base + +import Foreign.ForeignPtr +import Foreign.Ptr +import Foreign.Storable (Storable(..)) + +import Data.Word (Word8) +import System.IO.Unsafe (unsafePerformIO) + +-- ----------------------------------------------------------------------------- +-- +-- Useful macros, until we have bang patterns +-- + +#define STRICT1(f) f a | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined + +infixl 2 :*: + +-- |Strict pair +data PairS a b = !a :*: !b deriving (Eq,Ord,Show) + +-- |Strict Maybe +data MaybeS a = NothingS | JustS !a deriving (Eq,Ord,Show) + +-- |Data type for accumulators which can be ignored. The rewrite rules rely on +-- the fact that no bottoms of this type are ever constructed; hence, we can +-- assume @(_ :: NoAcc) `seq` x = x@. +-- +data NoAcc = NoAcc + +-- |Type of loop functions +type AccEFL acc = acc -> Word8 -> (PairS acc (MaybeS Word8)) +type NoAccEFL = Word8 -> MaybeS Word8 +type MapEFL = Word8 -> Word8 +type FilterEFL = Word8 -> Bool + +infixr 9 `fuseEFL` + +-- |Fuse to flat loop functions +fuseEFL :: AccEFL acc1 -> AccEFL acc2 -> AccEFL (PairS acc1 acc2) +fuseEFL f g (acc1 :*: acc2) e1 = + case f acc1 e1 of + acc1' :*: NothingS -> (acc1' :*: acc2) :*: NothingS + acc1' :*: JustS e2 -> + case g acc2 e2 of + acc2' :*: res -> (acc1' :*: acc2') :*: res +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] fuseEFL #-} +#endif + +-- | Special forms of loop arguments +-- +-- * These are common special cases for the three function arguments of gen +-- and loop; we give them special names to make it easier to trigger RULES +-- applying in the special cases represented by these arguments. The +-- "INLINE [1]" makes sure that these functions are only inlined in the last +-- two simplifier phases. +-- +-- * In the case where the accumulator is not needed, it is better to always +-- explicitly return a value `()', rather than just copy the input to the +-- output, as the former gives GHC better local information. +-- + +-- | Element function expressing a mapping only +#if !defined(LOOPNOACC_FUSION) +mapEFL :: (Word8 -> Word8) -> AccEFL NoAcc +mapEFL f = \_ e -> (NoAcc :*: (JustS $ f e)) +#else +mapEFL :: (Word8 -> Word8) -> NoAccEFL +mapEFL f = \e -> JustS (f e) +#endif +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] mapEFL #-} +#endif + +-- | Element function implementing a filter function only +#if !defined(LOOPNOACC_FUSION) +filterEFL :: (Word8 -> Bool) -> AccEFL NoAcc +filterEFL p = \_ e -> if p e then (NoAcc :*: JustS e) else (NoAcc :*: NothingS) +#else +filterEFL :: (Word8 -> Bool) -> NoAccEFL +filterEFL p = \e -> if p e then JustS e else NothingS +#endif + +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] filterEFL #-} +#endif + +-- |Element function expressing a reduction only +foldEFL :: (acc -> Word8 -> acc) -> AccEFL acc +foldEFL f = \a e -> (f a e :*: NothingS) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] foldEFL #-} +#endif + +-- | A strict foldEFL. +foldEFL' :: (acc -> Word8 -> acc) -> AccEFL acc +foldEFL' f = \a e -> let a' = f a e in a' `seq` (a' :*: NothingS) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] foldEFL' #-} +#endif + +-- | Element function expressing a prefix reduction only +-- +scanEFL :: (Word8 -> Word8 -> Word8) -> AccEFL Word8 +scanEFL f = \a e -> (f a e :*: JustS a) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] scanEFL #-} +#endif + +-- | Element function implementing a map and fold +-- +mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> AccEFL acc +mapAccumEFL f = \a e -> case f a e of (a', e') -> (a' :*: JustS e') +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] mapAccumEFL #-} +#endif + +-- | Element function implementing a map with index +-- +mapIndexEFL :: (Int -> Word8 -> Word8) -> AccEFL Int +mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i' :*: JustS (f i e)) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] mapIndexEFL #-} +#endif + +-- | Projection functions that are fusion friendly (as in, we determine when +-- they are inlined) +loopArr :: (PairS acc arr) -> arr +loopArr (_ :*: arr) = arr +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopArr #-} +#endif + +loopAcc :: (PairS acc arr) -> acc +loopAcc (acc :*: _) = acc +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopAcc #-} +#endif + +loopSndAcc :: (PairS (PairS acc1 acc2) arr) -> (PairS acc2 arr) +loopSndAcc ((_ :*: acc) :*: arr) = (acc :*: arr) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopSndAcc #-} +#endif + +unSP :: (PairS acc arr) -> (acc, arr) +unSP (acc :*: arr) = (acc, arr) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] unSP #-} +#endif + +------------------------------------------------------------------------ +-- +-- Loop combinator and fusion rules for flat arrays +-- |Iteration over over ByteStrings + +-- | Iteration over over ByteStrings +loopU :: AccEFL acc -- ^ mapping & folding, once per elem + -> acc -- ^ initial acc value + -> ByteString -- ^ input ByteString + -> (PairS acc ByteString) + +loopU f start (PS z s i) = unsafePerformIO $ withForeignPtr z $ \a -> do + (ps, acc) <- createAndTrim' i $ \p -> do + (acc' :*: i') <- go (a `plusPtr` s) p start + return (0, i', acc') + return (acc :*: ps) + + where + go p ma = trans 0 0 + where + STRICT3(trans) + trans a_off ma_off acc + | a_off >= i = return (acc :*: ma_off) + | otherwise = do + x <- peekByteOff p a_off + let (acc' :*: oe) = f acc x + ma_off' <- case oe of + NothingS -> return ma_off + JustS e -> do pokeByteOff ma ma_off e + return $ ma_off + 1 + trans (a_off+1) ma_off' acc' + +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopU #-} +#endif + +{-# RULES + +"loop/loop fusion!" forall em1 em2 start1 start2 arr. + loopU em2 start2 (loopArr (loopU em1 start1 arr)) = + loopSndAcc (loopU (em1 `fuseEFL` em2) (start1 :*: start2) arr) + + #-} + +-- +-- Functional list/array fusion for lazy ByteStrings. +-- +loopL :: AccEFL acc -- ^ mapping & folding, once per elem + -> acc -- ^ initial acc value + -> [ByteString] -- ^ input ByteString + -> PairS acc [ByteString] +loopL f = loop + where loop s [] = (s :*: []) + loop s (x:xs) + | l == 0 = (s'' :*: ys) + | otherwise = (s'' :*: y:ys) + where (s' :*: y@(PS _ _ l)) = loopU f s x -- avoid circular dep on P.null + (s'' :*: ys) = loop s' xs + +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopL #-} +#endif + +{-# RULES + +"lazy loop/loop fusion!" forall em1 em2 start1 start2 arr. + loopL em2 start2 (loopArr (loopL em1 start1 arr)) = + loopSndAcc (loopL (em1 `fuseEFL` em2) (start1 :*: start2) arr) + + #-} + + +{- + +Alternate experimental formulation of loopU which partitions it into +an allocating wrapper and an imperitive array-mutating loop. + +The point in doing this split is that we might be able to fuse multiple +loops into a single wrapper. This would save reallocating another buffer. +It should also give better cache locality by reusing the buffer. + +Note that this stuff needs ghc-6.5 from May 26 or later for the RULES to +really work reliably. + +-} + +loopUp :: AccEFL acc -> acc -> ByteString -> PairS acc ByteString +loopUp f a arr = loopWrapper (doUpLoop f a) arr +{-# INLINE loopUp #-} + +loopDown :: AccEFL acc -> acc -> ByteString -> PairS acc ByteString +loopDown f a arr = loopWrapper (doDownLoop f a) arr +{-# INLINE loopDown #-} + +loopNoAcc :: NoAccEFL -> ByteString -> PairS NoAcc ByteString +loopNoAcc f arr = loopWrapper (doNoAccLoop f NoAcc) arr +{-# INLINE loopNoAcc #-} + +loopMap :: MapEFL -> ByteString -> PairS NoAcc ByteString +loopMap f arr = loopWrapper (doMapLoop f NoAcc) arr +{-# INLINE loopMap #-} + +loopFilter :: FilterEFL -> ByteString -> PairS NoAcc ByteString +loopFilter f arr = loopWrapper (doFilterLoop f NoAcc) arr +{-# INLINE loopFilter #-} + +-- The type of imperitive loops that fill in a destination array by +-- reading a source array. They may not fill in the whole of the dest +-- array if the loop is behaving as a filter, this is why we return +-- the length that was filled in. The loop may also accumulate some +-- value as it loops over the source array. +-- +type ImperativeLoop acc = + Ptr Word8 -- pointer to the start of the source byte array + -> Ptr Word8 -- pointer to ther start of the destination byte array + -> Int -- length of the source byte array + -> IO (PairS (PairS acc Int) Int) -- result and offset, length of dest that was filled + +loopWrapper :: ImperativeLoop acc -> ByteString -> PairS acc ByteString +loopWrapper body (PS srcFPtr srcOffset srcLen) = unsafePerformIO $ + withForeignPtr srcFPtr $ \srcPtr -> do + (ps, acc) <- createAndTrim' srcLen $ \destPtr -> do + (acc :*: destOffset :*: destLen) <- + body (srcPtr `plusPtr` srcOffset) destPtr srcLen + return (destOffset, destLen, acc) + return (acc :*: ps) + +doUpLoop :: AccEFL acc -> acc -> ImperativeLoop acc +doUpLoop f acc0 src dest len = loop 0 0 acc0 + where STRICT3(loop) + loop src_off dest_off acc + | src_off >= len = return (acc :*: 0 :*: dest_off) + | otherwise = do + x <- peekByteOff src src_off + case f acc x of + (acc' :*: NothingS) -> loop (src_off+1) dest_off acc' + (acc' :*: JustS x') -> pokeByteOff dest dest_off x' + >> loop (src_off+1) (dest_off+1) acc' + +doDownLoop :: AccEFL acc -> acc -> ImperativeLoop acc +doDownLoop f acc0 src dest len = loop (len-1) (len-1) acc0 + where STRICT3(loop) + loop src_off dest_off acc + | src_off < 0 = return (acc :*: dest_off + 1 :*: len - (dest_off + 1)) + | otherwise = do + x <- peekByteOff src src_off + case f acc x of + (acc' :*: NothingS) -> loop (src_off-1) dest_off acc' + (acc' :*: JustS x') -> pokeByteOff dest dest_off x' + >> loop (src_off-1) (dest_off-1) acc' + +doNoAccLoop :: NoAccEFL -> noAcc -> ImperativeLoop noAcc +doNoAccLoop f noAcc src dest len = loop 0 0 + where STRICT2(loop) + loop src_off dest_off + | src_off >= len = return (noAcc :*: 0 :*: dest_off) + | otherwise = do + x <- peekByteOff src src_off + case f x of + NothingS -> loop (src_off+1) dest_off + JustS x' -> pokeByteOff dest dest_off x' + >> loop (src_off+1) (dest_off+1) + +doMapLoop :: MapEFL -> noAcc -> ImperativeLoop noAcc +doMapLoop f noAcc src dest len = loop 0 + where STRICT1(loop) + loop n + | n >= len = return (noAcc :*: 0 :*: len) + | otherwise = do + x <- peekByteOff src n + pokeByteOff dest n (f x) + loop (n+1) -- offset always the same, only pass 1 arg + +doFilterLoop :: FilterEFL -> noAcc -> ImperativeLoop noAcc +doFilterLoop f noAcc src dest len = loop 0 0 + where STRICT2(loop) + loop src_off dest_off + | src_off >= len = return (noAcc :*: 0 :*: dest_off) + | otherwise = do + x <- peekByteOff src src_off + if f x + then pokeByteOff dest dest_off x + >> loop (src_off+1) (dest_off+1) + else loop (src_off+1) dest_off + +-- run two loops in sequence, +-- think of it as: loop1 >> loop2 +sequenceLoops :: ImperativeLoop acc1 + -> ImperativeLoop acc2 + -> ImperativeLoop (PairS acc1 acc2) +sequenceLoops loop1 loop2 src dest len0 = do + (acc1 :*: off1 :*: len1) <- loop1 src dest len0 + (acc2 :*: off2 :*: len2) <- + let src' = dest `plusPtr` off1 + dest' = src' -- note that we are using dest == src + -- for the second loop as we are + -- mutating the dest array in-place! + in loop2 src' dest' len1 + return ((acc1 :*: acc2) :*: off1 + off2 :*: len2) + + -- TODO: prove that this is associative! (I think it is) + -- since we can't be sure how the RULES will combine loops. + +#if defined(__GLASGOW_HASKELL__) + +{-# INLINE [1] doUpLoop #-} +{-# INLINE [1] doDownLoop #-} +{-# INLINE [1] doNoAccLoop #-} +{-# INLINE [1] doMapLoop #-} +{-# INLINE [1] doFilterLoop #-} + +{-# INLINE [1] loopWrapper #-} +{-# INLINE [1] sequenceLoops #-} + +{-# INLINE [1] fuseAccAccEFL #-} +{-# INLINE [1] fuseAccNoAccEFL #-} +{-# INLINE [1] fuseNoAccAccEFL #-} +{-# INLINE [1] fuseNoAccNoAccEFL #-} +{-# INLINE [1] fuseMapAccEFL #-} +{-# INLINE [1] fuseAccMapEFL #-} +{-# INLINE [1] fuseMapNoAccEFL #-} +{-# INLINE [1] fuseNoAccMapEFL #-} +{-# INLINE [1] fuseMapMapEFL #-} +{-# INLINE [1] fuseAccFilterEFL #-} +{-# INLINE [1] fuseFilterAccEFL #-} +{-# INLINE [1] fuseNoAccFilterEFL #-} +{-# INLINE [1] fuseFilterNoAccEFL #-} +{-# INLINE [1] fuseFilterFilterEFL #-} +{-# INLINE [1] fuseMapFilterEFL #-} +{-# INLINE [1] fuseFilterMapEFL #-} + +#endif + +{-# RULES + +"loopArr/loopSndAcc" forall x. + loopArr (loopSndAcc x) = loopArr x + +"seq/NoAcc" forall (u::NoAcc) e. + u `seq` e = e + +"loop/loop wrapper elimination" forall loop1 loop2 arr. + loopWrapper loop2 (loopArr (loopWrapper loop1 arr)) = + loopSndAcc (loopWrapper (sequenceLoops loop1 loop2) arr) + +-- +-- n.b in the following, when reading n/m fusion, recall sequenceLoops +-- is monadic, so its really n >> m fusion (i.e. m.n), not n . m fusion. +-- + +"up/up loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doUpLoop f1 acc1) (doUpLoop f2 acc2) = + doUpLoop (f1 `fuseAccAccEFL` f2) (acc1 :*: acc2) + +"map/map loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doMapLoop f1 acc1) (doMapLoop f2 acc2) = + doMapLoop (f1 `fuseMapMapEFL` f2) (acc1 :*: acc2) + +"filter/filter loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doFilterLoop f1 acc1) (doFilterLoop f2 acc2) = + doFilterLoop (f1 `fuseFilterFilterEFL` f2) (acc1 :*: acc2) + +"map/filter loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doMapLoop f1 acc1) (doFilterLoop f2 acc2) = + doNoAccLoop (f1 `fuseMapFilterEFL` f2) (acc1 :*: acc2) + +"filter/map loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doFilterLoop f1 acc1) (doMapLoop f2 acc2) = + doNoAccLoop (f1 `fuseFilterMapEFL` f2) (acc1 :*: acc2) + +"map/up loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doMapLoop f1 acc1) (doUpLoop f2 acc2) = + doUpLoop (f1 `fuseMapAccEFL` f2) (acc1 :*: acc2) + +"up/map loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doUpLoop f1 acc1) (doMapLoop f2 acc2) = + doUpLoop (f1 `fuseAccMapEFL` f2) (acc1 :*: acc2) + +"filter/up loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doFilterLoop f1 acc1) (doUpLoop f2 acc2) = + doUpLoop (f1 `fuseFilterAccEFL` f2) (acc1 :*: acc2) + +"up/filter loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doUpLoop f1 acc1) (doFilterLoop f2 acc2) = + doUpLoop (f1 `fuseAccFilterEFL` f2) (acc1 :*: acc2) + +"down/down loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doDownLoop f1 acc1) (doDownLoop f2 acc2) = + doDownLoop (f1 `fuseAccAccEFL` f2) (acc1 :*: acc2) + +"map/down fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doMapLoop f1 acc1) (doDownLoop f2 acc2) = + doDownLoop (f1 `fuseMapAccEFL` f2) (acc1 :*: acc2) + +"down/map loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doDownLoop f1 acc1) (doMapLoop f2 acc2) = + doDownLoop (f1 `fuseAccMapEFL` f2) (acc1 :*: acc2) + +"filter/down fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doFilterLoop f1 acc1) (doDownLoop f2 acc2) = + doDownLoop (f1 `fuseFilterAccEFL` f2) (acc1 :*: acc2) + +"down/filter loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doDownLoop f1 acc1) (doFilterLoop f2 acc2) = + doDownLoop (f1 `fuseAccFilterEFL` f2) (acc1 :*: acc2) + +"noAcc/noAcc loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doNoAccLoop f1 acc1) (doNoAccLoop f2 acc2) = + doNoAccLoop (f1 `fuseNoAccNoAccEFL` f2) (acc1 :*: acc2) + +"noAcc/up loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doNoAccLoop f1 acc1) (doUpLoop f2 acc2) = + doUpLoop (f1 `fuseNoAccAccEFL` f2) (acc1 :*: acc2) + +"up/noAcc loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doUpLoop f1 acc1) (doNoAccLoop f2 acc2) = + doUpLoop (f1 `fuseAccNoAccEFL` f2) (acc1 :*: acc2) + +"map/noAcc loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doMapLoop f1 acc1) (doNoAccLoop f2 acc2) = + doNoAccLoop (f1 `fuseMapNoAccEFL` f2) (acc1 :*: acc2) + +"noAcc/map loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doNoAccLoop f1 acc1) (doMapLoop f2 acc2) = + doNoAccLoop (f1 `fuseNoAccMapEFL` f2) (acc1 :*: acc2) + +"filter/noAcc loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doFilterLoop f1 acc1) (doNoAccLoop f2 acc2) = + doNoAccLoop (f1 `fuseFilterNoAccEFL` f2) (acc1 :*: acc2) + +"noAcc/filter loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doNoAccLoop f1 acc1) (doFilterLoop f2 acc2) = + doNoAccLoop (f1 `fuseNoAccFilterEFL` f2) (acc1 :*: acc2) + +"noAcc/down loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doNoAccLoop f1 acc1) (doDownLoop f2 acc2) = + doDownLoop (f1 `fuseNoAccAccEFL` f2) (acc1 :*: acc2) + +"down/noAcc loop fusion" forall f1 f2 acc1 acc2. + sequenceLoops (doDownLoop f1 acc1) (doNoAccLoop f2 acc2) = + doDownLoop (f1 `fuseAccNoAccEFL` f2) (acc1 :*: acc2) + + #-} + +{- + +up = up loop +down = down loop +map = map special case +filter = filter special case +noAcc = noAcc undirectional loop (unused) + +heirarchy: + up down + ^ ^ + \ / + noAcc + ^ ^ + / \ + map filter + +each is a special case of the things above + +so we get rules that combine things on the same level +and rules that combine things on different levels +to get something on the higher level + +so all the cases: +up/up --> up fuseAccAccEFL +down/down --> down fuseAccAccEFL +noAcc/noAcc --> noAcc fuseNoAccNoAccEFL + +noAcc/up --> up fuseNoAccAccEFL +up/noAcc --> up fuseAccNoAccEFL +noAcc/down --> down fuseNoAccAccEFL +down/noAcc --> down fuseAccNoAccEFL + +and if we do the map, filter special cases then it adds a load more: + +map/map --> map fuseMapMapEFL +filter/filter --> filter fuseFilterFilterEFL + +map/filter --> noAcc fuseMapFilterEFL +filter/map --> noAcc fuseFilterMapEFL + +map/noAcc --> noAcc fuseMapNoAccEFL +noAcc/map --> noAcc fuseNoAccMapEFL + +map/up --> up fuseMapAccEFL +up/map --> up fuseAccMapEFL + +map/down --> down fuseMapAccEFL +down/map --> down fuseAccMapEFL + +filter/noAcc --> noAcc fuseNoAccFilterEFL +noAcc/filter --> noAcc fuseFilterNoAccEFL + +filter/up --> up fuseFilterAccEFL +up/filter --> up fuseAccFilterEFL + +filter/down --> down fuseFilterAccEFL +down/filter --> down fuseAccFilterEFL +-} + +fuseAccAccEFL :: AccEFL acc1 -> AccEFL acc2 -> AccEFL (PairS acc1 acc2) +fuseAccAccEFL f g (acc1 :*: acc2) e1 = + case f acc1 e1 of + acc1' :*: NothingS -> (acc1' :*: acc2) :*: NothingS + acc1' :*: JustS e2 -> + case g acc2 e2 of + acc2' :*: res -> (acc1' :*: acc2') :*: res + +fuseAccNoAccEFL :: AccEFL acc -> NoAccEFL -> AccEFL (PairS acc noAcc) +fuseAccNoAccEFL f g (acc :*: noAcc) e1 = + case f acc e1 of + acc' :*: NothingS -> (acc' :*: noAcc) :*: NothingS + acc' :*: JustS e2 -> (acc' :*: noAcc) :*: g e2 + +fuseNoAccAccEFL :: NoAccEFL -> AccEFL acc -> AccEFL (PairS noAcc acc) +fuseNoAccAccEFL f g (noAcc :*: acc) e1 = + case f e1 of + NothingS -> (noAcc :*: acc) :*: NothingS + JustS e2 -> + case g acc e2 of + acc' :*: res -> (noAcc :*: acc') :*: res + +fuseNoAccNoAccEFL :: NoAccEFL -> NoAccEFL -> NoAccEFL +fuseNoAccNoAccEFL f g e1 = + case f e1 of + NothingS -> NothingS + JustS e2 -> g e2 + +fuseMapAccEFL :: MapEFL -> AccEFL acc -> AccEFL (PairS noAcc acc) +fuseMapAccEFL f g (noAcc :*: acc) e1 = + case g acc (f e1) of + (acc' :*: res) -> (noAcc :*: acc') :*: res + +fuseAccMapEFL :: AccEFL acc -> MapEFL -> AccEFL (PairS acc noAcc) +fuseAccMapEFL f g (acc :*: noAcc) e1 = + case f acc e1 of + (acc' :*: NothingS) -> (acc' :*: noAcc) :*: NothingS + (acc' :*: JustS e2) -> (acc' :*: noAcc) :*: JustS (g e2) + +fuseMapMapEFL :: MapEFL -> MapEFL -> MapEFL +fuseMapMapEFL f g e1 = g (f e1) -- n.b. perfect fusion + +fuseMapNoAccEFL :: MapEFL -> NoAccEFL -> NoAccEFL +fuseMapNoAccEFL f g e1 = g (f e1) + +fuseNoAccMapEFL :: NoAccEFL -> MapEFL -> NoAccEFL +fuseNoAccMapEFL f g e1 = + case f e1 of + NothingS -> NothingS + JustS e2 -> JustS (g e2) + +fuseAccFilterEFL :: AccEFL acc -> FilterEFL -> AccEFL (PairS acc noAcc) +fuseAccFilterEFL f g (acc :*: noAcc) e1 = + case f acc e1 of + acc' :*: NothingS -> (acc' :*: noAcc) :*: NothingS + acc' :*: JustS e2 -> + case g e2 of + False -> (acc' :*: noAcc) :*: NothingS + True -> (acc' :*: noAcc) :*: JustS e2 + +fuseFilterAccEFL :: FilterEFL -> AccEFL acc -> AccEFL (PairS noAcc acc) +fuseFilterAccEFL f g (noAcc :*: acc) e1 = + case f e1 of + False -> (noAcc :*: acc) :*: NothingS + True -> + case g acc e1 of + acc' :*: res -> (noAcc :*: acc') :*: res + +fuseNoAccFilterEFL :: NoAccEFL -> FilterEFL -> NoAccEFL +fuseNoAccFilterEFL f g e1 = + case f e1 of + NothingS -> NothingS + JustS e2 -> + case g e2 of + False -> NothingS + True -> JustS e2 + +fuseFilterNoAccEFL :: FilterEFL -> NoAccEFL -> NoAccEFL +fuseFilterNoAccEFL f g e1 = + case f e1 of + False -> NothingS + True -> g e1 + +fuseFilterFilterEFL :: FilterEFL -> FilterEFL -> FilterEFL +fuseFilterFilterEFL f g e1 = f e1 && g e1 + +fuseMapFilterEFL :: MapEFL -> FilterEFL -> NoAccEFL +fuseMapFilterEFL f g e1 = + case f e1 of + e2 -> case g e2 of + False -> NothingS + True -> JustS e2 + +fuseFilterMapEFL :: FilterEFL -> MapEFL -> NoAccEFL +fuseFilterMapEFL f g e1 = + case f e1 of + False -> NothingS + True -> JustS (g e1) + diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs new file mode 100644 index 0000000..17e181f --- /dev/null +++ b/Data/ByteString/Lazy.hs @@ -0,0 +1,1246 @@ +{-# OPTIONS_GHC -cpp -optc-O1 -fffi -fglasgow-exts -fno-warn-incomplete-patterns #-} +-- +-- -optc-O2 breaks with 4.0.4 gcc on debian +-- +-- Module : ByteString.Lazy +-- Copyright : (c) Don Stewart 2006 +-- (c) Duncan Coutts 2006 +-- License : BSD-style +-- +-- Maintainer : dons@cse.unsw.edu.au +-- Stability : experimental +-- Portability : portable, requires ffi and cpp +-- Tested with : GHC 6.4.1 and Hugs March 2005 +-- + +-- +-- | A time and space-efficient implementation of lazy byte vectors +-- using lists of packed 'Word8' arrays, suitable for high performance +-- use, both in terms of large data quantities, or high speed +-- requirements. Byte vectors are encoded as lazy lists of strict 'Word8' +-- arrays of bytes. They provide a means to manipulate large byte vectors +-- without requiring the entire vector be resident in memory. +-- +-- Some operations, such as concat, append, reverse and cons, have +-- better complexity than their "Data.ByteString" equivalents, as due to +-- optimisations resulting from the list spine structure. And for other +-- operations Lazy ByteStrings are usually within a few percent of +-- strict ones, but with better heap usage. For data larger than the +-- available memory, or if you have tight memory constraints, this +-- module will be the only option. The default chunk size is 64k, which +-- should be good in most circumstances. For people with large L2 +-- caches, you may want to increase this to fit your cache. +-- +-- This module is intended to be imported @qualified@, to avoid name +-- clashes with "Prelude" functions. eg. +-- +-- > import qualified Data.ByteString.Lazy as B +-- +-- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use +-- UArray by Simon Marlow. Rewritten to support slices and use +-- ForeignPtr by David Roundy. Polished and extended by Don Stewart. +-- Lazy variant by Duncan Coutts and Don Stewart. +-- + +module Data.ByteString.Lazy ( + + -- * The @ByteString@ type + ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + + -- * Introducing and eliminating 'ByteString's + empty, -- :: ByteString + singleton, -- :: Word8 -> ByteString + pack, -- :: [Word8] -> ByteString + unpack, -- :: ByteString -> [Word8] + packWith, -- :: (a -> Word8) -> [a] -> ByteString + unpackWith, -- :: (Word8 -> a) -> ByteString -> [a] + + -- * Basic interface + cons, -- :: Word8 -> ByteString -> ByteString + snoc, -- :: ByteString -> Word8 -> ByteString + append, -- :: ByteString -> ByteString -> ByteString + head, -- :: ByteString -> Word8 + last, -- :: ByteString -> Word8 + tail, -- :: ByteString -> ByteString + init, -- :: ByteString -> ByteString + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int64 + + -- * Transformating ByteStrings + map, -- :: (Word8 -> Word8) -> ByteString -> ByteString + reverse, -- :: ByteString -> ByteString +-- intersperse, -- :: Word8 -> ByteString -> ByteString + transpose, -- :: [ByteString] -> [ByteString] + + -- * Reducing 'ByteString's (folds) + foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a + foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a + foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a + foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + + -- ** Special folds + concat, -- :: [ByteString] -> ByteString + concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString + any, -- :: (Word8 -> Bool) -> ByteString -> Bool + all, -- :: (Word8 -> Bool) -> ByteString -> Bool + maximum, -- :: ByteString -> Word8 + minimum, -- :: ByteString -> Word8 + + -- * Building ByteStrings + -- ** Scans + scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString +-- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString +-- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString +-- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString + + -- ** Accumulating maps + mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) + mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString + + -- ** Infinite ByteStrings + repeat, -- :: Word8 -> ByteString + replicate, -- :: Int64 -> Word8 -> ByteString + cycle, -- :: ByteString -> ByteString + iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString + + -- ** Unfolding + unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString + + -- * Substrings + + -- ** Breaking strings + take, -- :: Int64 -> ByteString -> ByteString + drop, -- :: Int64 -> ByteString -> ByteString + splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString) + takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString + dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString + span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + group, -- :: ByteString -> [ByteString] + groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] + + -- ** Breaking and dropping on specific bytes + breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) + spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) + + -- ** Breaking into many substrings + split, -- :: Word8 -> ByteString -> [ByteString] + splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] + tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] + + -- ** Joining strings + join, -- :: ByteString -> [ByteString] -> ByteString + joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString + + -- * Predicates + isPrefixOf, -- :: ByteString -> ByteString -> Bool +-- isSuffixOf, -- :: ByteString -> ByteString -> Bool + + -- * Searching ByteStrings + + -- ** Searching by equality + elem, -- :: Word8 -> ByteString -> Bool + notElem, -- :: Word8 -> ByteString -> Bool + filterByte, -- :: Word8 -> ByteString -> ByteString + filterNotByte, -- :: Word8 -> ByteString -> ByteString + + -- ** Searching with a predicate + find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 + filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString +-- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + + -- * Indexing ByteStrings + index, -- :: ByteString -> Int64 -> Word8 + elemIndex, -- :: Word8 -> ByteString -> Maybe Int64 + elemIndices, -- :: Word8 -> ByteString -> [Int64] + findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64 + findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64] + count, -- :: Word8 -> ByteString -> Int64 + + -- * Zipping and unzipping ByteStrings + zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] + zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] +-- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) + + -- * Ordered ByteStrings +-- sort, -- :: ByteString -> ByteString + + -- * I\/O with 'ByteString's + + -- ** Standard input and output + getContents, -- :: IO ByteString + putStr, -- :: ByteString -> IO () + putStrLn, -- :: ByteString -> IO () + interact, -- :: (ByteString -> ByteString) -> IO () + + -- ** Files + readFile, -- :: FilePath -> IO ByteString + writeFile, -- :: FilePath -> ByteString -> IO () + appendFile, -- :: FilePath -> ByteString -> IO () + + -- ** I\/O with Handles + hGetContents, -- :: Handle -> IO ByteString + hGetContentsN, -- :: Int -> Handle -> IO ByteString + hGet, -- :: Handle -> Int -> IO ByteString + hGetN, -- :: Int -> Handle -> Int -> IO ByteString + hPut, -- :: Handle -> ByteString -> IO () +#if defined(__GLASGOW_HASKELL__) + hGetNonBlocking, -- :: Handle -> IO ByteString + hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString +#endif + + ) where + +import qualified Prelude +import Prelude hiding + (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines + ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum + ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1 + ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate + ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) + +import qualified Data.List as L -- L for list/lazy +import qualified Data.ByteString as P -- P for packed +import qualified Data.ByteString.Base as P +import qualified Data.ByteString.Fusion as P +import Data.ByteString.Fusion (PairS(..),loopL) + +import Data.Monoid (Monoid(..)) + +import Data.Word (Word8) +import Data.Int (Int64) +import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..),hClose) +import System.IO.Unsafe +import Control.Exception (bracket) + +#if defined(__GLASGOW_HASKELL__) +import Data.Generics (Data(..), Typeable(..)) +#endif + +-- ----------------------------------------------------------------------------- +-- +-- Useful macros, until we have bang patterns +-- + +#define STRICT1(f) f a | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined + +-- ----------------------------------------------------------------------------- + +-- | A space-efficient representation of a Word8 vector, supporting many +-- efficient operations. A 'ByteString' contains 8-bit characters only. +-- +-- Instances of Eq, Ord, Read, Show, Data, Typeable +-- +newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string + deriving (Show,Read +#if defined(__GLASGOW_HASKELL__) + ,Data, Typeable +#endif + ) + +-- +-- hmm, what about getting the PS constructor unpacked into the cons cell? +-- +-- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List +-- +-- Would avoid one indirection per chunk. +-- + +unLPS :: ByteString -> [P.ByteString] +unLPS (LPS xs) = xs +{-# INLINE unLPS #-} + +instance Eq ByteString + where (==) = eq + +instance Ord ByteString + where compare = compareBytes + +instance Monoid ByteString where + mempty = empty + mappend = append + mconcat = concat + +------------------------------------------------------------------------ + +-- XXX +-- The data type invariant: +-- Every ByteString is either empty or consists of non-null ByteStrings. +-- All functions must preserve this, and the QC properties must check this. +-- +_invariant :: ByteString -> Bool +_invariant (LPS []) = True +_invariant (LPS xs) = L.all (not . P.null) xs + +-- In a form useful for QC testing +_checkInvariant :: ByteString -> ByteString +_checkInvariant lps + | _invariant lps = lps + | otherwise = moduleError "invariant" ("violation: " ++ show lps) + +-- The Data abstraction function +-- +_abstr :: ByteString -> P.ByteString +_abstr (LPS []) = P.empty +_abstr (LPS xs) = P.concat xs + +-- The representation uses lists of packed chunks. When we have to convert from +-- a lazy list to the chunked representation, then by default we'll use this +-- chunk size. Some functions give you more control over the chunk size. +-- +-- Measurements here: +-- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png +-- +-- indicate that a value around 0.5 to 1 x your L2 cache is best. +-- The following value assumes people have something greater than 128k, +-- and need to share the cache with other programs. +-- +defaultChunkSize :: Int +defaultChunkSize = 64 * k + where k = 1024 + +smallChunkSize :: Int +smallChunkSize = 4 * k + where k = 1024 + +-- defaultChunkSize = 1 + +------------------------------------------------------------------------ + +eq :: ByteString -> ByteString -> Bool +eq (LPS xs) (LPS ys) = eq' xs ys + where eq' [] [] = True + eq' [] _ = False + eq' _ [] = False + eq' (a:as) (b:bs) = + case compare (P.length a) (P.length b) of + LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs) + EQ -> a == b && eq' as bs + GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs + +compareBytes :: ByteString -> ByteString -> Ordering +compareBytes (LPS xs) (LPS ys) = cmp xs ys + where cmp [] [] = EQ + cmp [] _ = LT + cmp _ [] = GT + cmp (a:as) (b:bs) = + case compare (P.length a) (P.length b) of + LT -> case compare a (P.take (P.length a) b) of + EQ -> cmp as (P.drop (P.length a) b : bs) + result -> result + EQ -> case compare a b of + EQ -> cmp as bs + result -> result + GT -> case compare (P.take (P.length b) a) b of + EQ -> cmp (P.drop (P.length b) a : as) bs + result -> result + +-- ----------------------------------------------------------------------------- +-- Introducing and eliminating 'ByteString's + +-- | /O(1)/ The empty 'ByteString' +empty :: ByteString +empty = LPS [] +{-# NOINLINE empty #-} + +-- | /O(1)/ Convert a 'Word8' into a 'ByteString' +singleton :: Word8 -> ByteString +singleton c = LPS [P.singleton c] +{-# NOINLINE singleton #-} + +-- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. +pack :: [Word8] -> ByteString +pack str = LPS $ L.map P.pack (chunk defaultChunkSize str) + +-- ? +chunk :: Int -> [a] -> [[a]] +chunk _ [] = [] +chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs'' + +-- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. +unpack :: ByteString -> [Word8] +unpack (LPS ss) = L.concatMap P.unpack ss +{-# INLINE unpack #-} + +------------------------------------------------------------------------ + +-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some +-- conversion function +packWith :: (a -> Word8) -> [a] -> ByteString +packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str) +{-# INLINE packWith #-} +{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} + +-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. +unpackWith :: (Word8 -> a) -> ByteString -> [a] +unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss +{-# INLINE unpackWith #-} +{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} + +-- --------------------------------------------------------------------- +-- Basic interface + +-- | /O(1)/ Test whether a ByteString is empty. +null :: ByteString -> Bool +null (LPS []) = True +null (_) = False -- TODO: guarantee this invariant is maintained +{-# INLINE null #-} + +-- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64' +length :: ByteString -> Int64 +length (LPS ss) = L.sum (L.map (fromIntegral.P.length) ss) + +-- avoid the intermediate list? +-- length (LPS ss) = L.foldl lengthF 0 ss +-- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m +{-# INLINE length #-} + +-- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is +-- strict in the ByteString that we are consing onto. More precisely, it forces +-- the head and the first chunk. It does this because, for space efficiency, it +-- may coalesce the new byte onto the first \'chunk\' rather than starting a +-- new \'chunk\'. +-- +-- So that means you can't use a lazy recursive contruction like this: +-- +-- > let xs = cons c xs in xs +-- +-- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings. +-- +cons :: Word8 -> ByteString -> ByteString +cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss) +cons c (LPS ss) = LPS (P.singleton c : ss) +{-# INLINE cons #-} + +-- | /O(n\/c)/ Append a byte to the end of a 'ByteString' +snoc :: ByteString -> Word8 -> ByteString +snoc (LPS ss) c = LPS (ss ++ [P.singleton c]) +{-# INLINE snoc #-} + +-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. +head :: ByteString -> Word8 +head (LPS []) = errorEmptyList "head" +head (LPS (x:_)) = P.unsafeHead x +{-# INLINE head #-} + +-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. +tail :: ByteString -> ByteString +tail (LPS []) = errorEmptyList "tail" +tail (LPS (x:xs)) + | P.length x == 1 = LPS xs + | otherwise = LPS (P.unsafeTail x : xs) +{-# INLINE tail #-} + +-- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty. +last :: ByteString -> Word8 +last (LPS []) = errorEmptyList "last" +last (LPS xs) = P.last (L.last xs) +{-# INLINE last #-} + +-- | /O(1)/ Return all the elements of a 'ByteString' except the last one. +init :: ByteString -> ByteString +init (LPS []) = errorEmptyList "init" +init (LPS xs) + | P.length y == 1 = LPS ys + | otherwise = LPS (ys ++ [P.init y]) + where (y,ys) = (L.last xs, L.init xs) +{-# INLINE init #-} + +-- | /O(n)/ Append two ByteStrings +append :: ByteString -> ByteString -> ByteString +append (LPS []) (LPS ys) = LPS ys +append (LPS xs) (LPS []) = LPS xs +append (LPS xs) (LPS ys) = LPS (xs ++ ys) +{-# INLINE append #-} + +-- --------------------------------------------------------------------- +-- Transformations + +-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each +-- element of @xs@. +map :: (Word8 -> Word8) -> ByteString -> ByteString +--map f (LPS xs) = LPS (L.map (P.map' f) xs) +map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS +{-# INLINE map #-} + +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. +reverse :: ByteString -> ByteString +reverse (LPS xs) = LPS (L.reverse . L.map P.reverse $ xs) +{-# INLINE reverse #-} + +-- The 'intersperse' function takes a 'Word8' and a 'ByteString' and +-- \`intersperses\' that byte between the elements of the 'ByteString'. +-- It is analogous to the intersperse function on Lists. +-- intersperse :: Word8 -> ByteString -> ByteString +-- intersperse = error "FIXME: not yet implemented" + +{- +intersperse c (LPS []) = LPS [] +intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse') + where intersperse' c ps@(PS x s l) = + P.create (2*l) $ \p -> withForeignPtr x $ \f -> + poke p c + c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c +-} + +-- | The 'transpose' function transposes the rows and columns of its +-- 'ByteString' argument. +transpose :: [ByteString] -> [ByteString] +transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s)) + +-- --------------------------------------------------------------------- +-- Reducing 'ByteString's + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a ByteString, reduces the +-- ByteString using the binary operator, from left to right. +foldl :: (a -> Word8 -> a) -> a -> ByteString -> a +--foldl f z (LPS xs) = L.foldl (P.foldl f) z xs +foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS +{-# INLINE foldl #-} + +-- | 'foldl\'' is like 'foldl', but strict in the accumulator. +foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a +--foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs +foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS +{-# INLINE foldl' #-} + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a ByteString, +-- reduces the ByteString using the binary operator, from right to left. +foldr :: (Word8 -> a -> a) -> a -> ByteString -> a +foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs +{-# INLINE foldr #-} + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'ByteStrings'. +-- This function is subject to array fusion. +foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldl1 _ (LPS []) = errorEmptyList "foldl1" +foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs)) + +-- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. +foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldl1' _ (LPS []) = errorEmptyList "foldl1'" +foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs)) + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'ByteString's +foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldr1 _ (LPS []) = errorEmptyList "foldr1" +foldr1 f (LPS ps) = foldr1' ps + where foldr1' (x:[]) = P.foldr1 f x + foldr1' (x:xs) = P.foldr f (foldr1' xs) x + +-- --------------------------------------------------------------------- +-- Special folds + +-- | /O(n)/ Concatenate a list of ByteStrings. +concat :: [ByteString] -> ByteString +concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss) + +-- | Map a function over a 'ByteString' and concatenate the results +concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString +concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps) + where + k w = case f w of LPS xs -> P.concat xs + +-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if +-- any element of the 'ByteString' satisfies the predicate. +any :: (Word8 -> Bool) -> ByteString -> Bool +any f (LPS xs) = L.or (L.map (P.any f) xs) +-- todo fuse + +-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines +-- if all elements of the 'ByteString' satisfy the predicate. +all :: (Word8 -> Bool) -> ByteString -> Bool +all f (LPS xs) = L.and (L.map (P.all f) xs) +-- todo fuse + +-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' +maximum :: ByteString -> Word8 +maximum (LPS []) = errorEmptyList "maximum" +maximum (LPS xs) = L.maximum (L.map P.maximum xs) +{-# INLINE maximum #-} + +-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' +minimum :: ByteString -> Word8 +minimum (LPS []) = errorEmptyList "minimum" +minimum (LPS xs) = L.minimum (L.map P.minimum xs) +{-# INLINE minimum #-} + +-- | The 'mapAccumL' function behaves like a combination of 'map' and +-- 'foldl'; it applies a function to each element of a ByteString, +-- passing an accumulating parameter from left to right, and returning a +-- final value of this accumulator together with the new ByteString. +mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) +mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS + +-- | /O(n)/ map Word8 functions, provided with the index at each position +mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString +mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS + +-- --------------------------------------------------------------------- +-- Building ByteStrings + +-- | 'scanl' is similar to 'foldl', but returns a list of successive +-- reduced values from the left. This function will fuse. +-- +-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] +-- +-- Note that +-- +-- > last (scanl f z xs) == foldl f z xs. +scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString +scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0) +{-# INLINE scanl #-} + +-- --------------------------------------------------------------------- +-- Unfolds and replicates + +-- | @'iterate' f x@ returns an infinite ByteString of repeated applications +-- of @f@ to @x@: +-- +-- > iterate f x == [x, f x, f (f x), ...] +-- +iterate :: (Word8 -> Word8) -> Word8 -> ByteString +iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x')) + +-- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every +-- element. +-- +repeat :: Word8 -> ByteString +repeat c = LPS (L.repeat block) + where block = P.replicate smallChunkSize c + +-- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@ +-- the value of every element. +-- +replicate :: Int64 -> Word8 -> ByteString +replicate w c + | w <= 0 = empty + | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c] + | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant + | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s) + where + s = P.replicate smallChunkSize c + (q, r) = quotRem w (fromIntegral smallChunkSize) + +-- | 'cycle' ties a finite ByteString into a circular one, or equivalently, +-- the infinite repetition of the original ByteString. +-- +cycle :: ByteString -> ByteString +cycle (LPS []) = errorEmptyList "cycle" +cycle (LPS xs) = LPS (L.cycle xs) + +-- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'. +-- 'unfoldr' builds a ByteString from a seed value. The function takes +-- the element and returns 'Nothing' if it is done producing the +-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a +-- prepending to the ByteString and @b@ is used as the next element in a +-- recursive call. +unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString +unfoldr f = LPS . unfoldChunk 32 + where unfoldChunk n x = + case P.unfoldrN n f x of + (s, Nothing) + | P.null s -> [] + | otherwise -> s : [] + (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x' + +-- --------------------------------------------------------------------- +-- Substrings + +-- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. +take :: Int64 -> ByteString -> ByteString +take n _ | n < 0 = empty +take i (LPS ps) = LPS (take' i ps) + where take' _ [] = [] + take' 0 _ = [] + take' n (x:xs) = + if n < fromIntegral (P.length x) + then P.take (fromIntegral n) x : [] + else x : take' (n - fromIntegral (P.length x)) xs + +-- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ +-- elements, or @[]@ if @n > 'length' xs@. +drop :: Int64 -> ByteString -> ByteString +drop i p | i <= 0 = p +drop i (LPS ps) = LPS (drop' i ps) + where drop' _ [] = [] + drop' 0 xs = xs + drop' n (x:xs) = + if n < fromIntegral (P.length x) + then P.drop (fromIntegral n) x : xs + else drop' (n - fromIntegral (P.length x)) xs + +-- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. +splitAt :: Int64 -> ByteString -> (ByteString, ByteString) +splitAt i p | i <= 0 = (empty, p) +splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b) + where splitAt' _ [] = ([], []) + splitAt' 0 xs = ([], xs) + splitAt' n (x:xs) = + if n < fromIntegral (P.length x) + then (P.take (fromIntegral n) x : [], + P.drop (fromIntegral n) x : xs) + else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs + in (x:xs', xs'') + + +-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, +-- returns the longest prefix (possibly empty) of @xs@ of elements that +-- satisfy @p@. +takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString +takeWhile f (LPS ps) = LPS (takeWhile' ps) + where takeWhile' [] = [] + takeWhile' (x:xs) = + case P.findIndexOrEnd (not . f) x of + 0 -> [] + n | n < P.length x -> P.take n x : [] + | otherwise -> x : takeWhile' xs + +-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. +dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString +dropWhile f (LPS ps) = LPS (dropWhile' ps) + where dropWhile' [] = [] + dropWhile' (x:xs) = + case P.findIndexOrEnd (not . f) x of + n | n < P.length x -> P.drop n x : xs + | otherwise -> dropWhile' xs + +-- | 'break' @p@ is equivalent to @'span' ('not' . p)@. +break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) +break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b) + where break' [] = ([], []) + break' (x:xs) = + case P.findIndexOrEnd f x of + 0 -> ([], x : xs) + n | n < P.length x -> (P.take n x : [], P.drop n x : xs) + | otherwise -> let (xs', xs'') = break' xs + in (x : xs', xs'') + +-- | 'breakByte' breaks its ByteString argument at the first occurence +-- of the specified byte. It is more efficient than 'break' as it is +-- implemented with @memchr(3)@. I.e. +-- +-- > break (=='c') "abcd" == breakByte 'c' "abcd" +-- +breakByte :: Word8 -> ByteString -> (ByteString, ByteString) +breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b) + where breakByte' [] = ([], []) + breakByte' (x:xs) = + case P.elemIndex c x of + Just 0 -> ([], x : xs) + Just n -> (P.take n x : [], P.drop n x : xs) + Nothing -> let (xs', xs'') = breakByte' xs + in (x : xs', xs'') + +-- | 'spanByte' breaks its ByteString argument at the first +-- occurence of a byte other than its argument. It is more efficient +-- than 'span (==)' +-- +-- > span (=='c') "abcd" == spanByte 'c' "abcd" +-- +spanByte :: Word8 -> ByteString -> (ByteString, ByteString) +spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b) + where spanByte' [] = ([], []) + spanByte' (x:xs) = + case P.spanByte c x of + (x', x'') | P.null x' -> ([], x : xs) + | P.null x'' -> let (xs', xs'') = spanByte' xs + in (x : xs', xs'') + | otherwise -> (x' : [], x'' : xs) + +-- | 'span' @p xs@ breaks the ByteString into two segments. It is +-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ +span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) +span p = break (not . p) + +-- | /O(n)/ Splits a 'ByteString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] +-- > splitWith (=='a') [] == [] +-- +splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] +splitWith _ (LPS []) = [] +splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as + + where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString] + comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : [] + comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs + comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs + + cons' x xs | P.null x = xs + | otherwise = x:xs + {-# INLINE cons' #-} +{-# INLINE splitWith #-} + +-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] +-- > split 'a' "aXaXaXa" == ["","X","X","X"] +-- > split 'x' "x" == ["",""] +-- +-- and +-- +-- > join [c] . split c == id +-- > split == splitWith . (==) +-- +-- As for all splitting functions in this library, this function does +-- not copy the substrings, it just constructs new 'ByteStrings' that +-- are slices of the original. +-- +split :: Word8 -> ByteString -> [ByteString] +split _ (LPS []) = [] +split c (LPS (a:as)) = comb [] (P.split c a) as + + where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString] + comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : [] + comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs + comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs + + cons' x xs | P.null x = xs + | otherwise = x:xs + {-# INLINE cons' #-} +{-# INLINE split #-} + +-- | Like 'splitWith', except that sequences of adjacent separators are +-- treated as a single separator. eg. +-- +-- > tokens (=='a') "aabbaca" == ["bb","c"] +-- +tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] +tokens f = L.filter (not.null) . splitWith f + +-- | The 'group' function takes a ByteString and returns a list of +-- ByteStrings such that the concatenation of the result is equal to the +-- argument. Moreover, each sublist in the result contains only equal +-- elements. For example, +-- +-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] +-- +-- It is a special case of 'groupBy', which allows the programmer to +-- supply their own equality test. +group :: ByteString -> [ByteString] +group (LPS []) = [] +group (LPS (a:as)) = group' [] (P.group a) as + where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString] + group' acc@(s':_) ss@(s:_) xs + | P.unsafeHead s' + /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs + group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : [] + group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs + group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs + +{- +TODO: check if something like this might be faster + +group :: ByteString -> [ByteString] +group xs + | null xs = [] + | otherwise = ys : group zs + where + (ys, zs) = spanByte (unsafeHead xs) xs +-} + +-- | The 'groupBy' function is the non-overloaded version of 'group'. +-- +groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] +groupBy _ (LPS []) = [] +groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as + where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString] + groupBy' acc@(_:_) c ss@(s:_) xs + | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs + groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : [] + groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs + groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs + groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs + +{- +TODO: check if something like this might be faster + +groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] +groupBy k xs + | null xs = [] + | otherwise = take n xs : groupBy k (drop n xs) + where + n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs) +-} + +-- | /O(n)/ The 'join' function takes a 'ByteString' and a list of +-- 'ByteString's and concatenates the list after interspersing the first +-- argument between each element of the list. +join :: ByteString -> [ByteString] -> ByteString +join s = concat . (L.intersperse s) + +-- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings +-- with a char. +-- +joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString +joinWithByte c x y = append x (cons c y) + +-- --------------------------------------------------------------------- +-- Indexing ByteStrings + +-- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0. +index :: ByteString -> Int64 -> Word8 +index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i) +index (LPS ps) i = index' ps i + where index' [] n = moduleError "index" ("index too large: " ++ show n) + index' (x:xs) n + | n >= fromIntegral (P.length x) = + index' xs (n - fromIntegral (P.length x)) + | otherwise = P.unsafeIndex x (fromIntegral n) + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'ByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. +-- This implementation uses memchr(3). +elemIndex :: Word8 -> ByteString -> Maybe Int64 +elemIndex c (LPS ps) = elemIndex' 0 ps + where elemIndex' _ [] = Nothing + elemIndex' n (x:xs) = + case P.elemIndex c x of + Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs + Just i -> Just (n + fromIntegral i) + +{- +-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the +-- element in the given 'ByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. The following +-- holds: +-- +-- > elemIndexEnd c xs == +-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) +-- +elemIndexEnd :: Word8 -> ByteString -> Maybe Int +elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> + go (p `plusPtr` s) (l-1) + where + STRICT2(go) + go p i | i < 0 = return Nothing + | otherwise = do ch' <- peekByteOff p i + if ch == ch' + then return $ Just i + else go p (i-1) +-} +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +-- This implementation uses memchr(3). +elemIndices :: Word8 -> ByteString -> [Int64] +elemIndices c (LPS ps) = elemIndices' 0 ps + where elemIndices' _ [] = [] + elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x) + ++ elemIndices' (n + fromIntegral (P.length x)) xs + +-- | count returns the number of times its argument appears in the ByteString +-- +-- > count = length . elemIndices +-- +-- But more efficiently than using length on the intermediate list. +count :: Word8 -> ByteString -> Int64 +count w (LPS xs) = L.sum (L.map (fromIntegral . P.count w) xs) + +-- | The 'findIndex' function takes a predicate and a 'ByteString' and +-- returns the index of the first element in the ByteString +-- satisfying the predicate. +findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64 +findIndex k (LPS ps) = findIndex' 0 ps + where findIndex' _ [] = Nothing + findIndex' n (x:xs) = + case P.findIndex k x of + Nothing -> findIndex' (n + fromIntegral (P.length x)) xs + Just i -> Just (n + fromIntegral i) +{-# INLINE findIndex #-} + +-- | /O(n)/ The 'find' function takes a predicate and a ByteString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +-- +-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing +-- +find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 +find f (LPS ps) = find' ps + where find' [] = Nothing + find' (x:xs) = case P.find f x of + Nothing -> find' xs + Just w -> Just w +{-# INLINE find #-} + +-- | The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +findIndices :: (Word8 -> Bool) -> ByteString -> [Int64] +findIndices k (LPS ps) = findIndices' 0 ps + where findIndices' _ [] = [] + findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x) + ++ findIndices' (n + fromIntegral (P.length x)) xs + +-- --------------------------------------------------------------------- +-- Searching ByteStrings + +-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. +elem :: Word8 -> ByteString -> Bool +elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True + +-- | /O(n)/ 'notElem' is the inverse of 'elem' +notElem :: Word8 -> ByteString -> Bool +notElem c ps = not (elem c ps) + +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. +filter :: (Word8 -> Bool) -> ByteString -> ByteString +--filter f (LPS xs) = LPS (filterMap (P.filter' f) xs) +filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS +{-# INLINE filter #-} + +-- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter . +-- (==)/, for the common case of filtering a single byte. It is more +-- efficient to use /filterByte/ in this case. +-- +-- > filterByte == filter . (==) +-- +-- filterByte is around 10x faster, and uses much less space, than its +-- filter equivalent +filterByte :: Word8 -> ByteString -> ByteString +filterByte w ps = replicate (count w ps) w +-- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs) + +-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common +-- case of filtering a single byte out of a list. It is more efficient +-- to use /filterNotByte/ in this case. +-- +-- > filterNotByte == filter . (/=) +-- +-- filterNotByte is around 2x faster than its filter equivalent. +filterNotByte :: Word8 -> ByteString -> ByteString +filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs) + +-- --------------------------------------------------------------------- +-- Searching for substrings + +-- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' +-- iff the first is a prefix of the second. +isPrefixOf :: ByteString -> ByteString -> Bool +isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs + where isPrefixL [] _ = True + isPrefixL _ [] = False + isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys + | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys) + | otherwise = xh == y && isPrefixL (xt:xs) ys + where (xh,xt) = P.splitAt (P.length y) x + (yh,yt) = P.splitAt (P.length x) y + +-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' +-- iff the first is a suffix of the second. +-- +-- The following holds: +-- +-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y +-- +-- However, the real implemenation uses memcmp to compare the end of the +-- string only, with no reverse required.. +-- +--isSuffixOf :: ByteString -> ByteString -> Bool +--isSuffixOf = error "not yet implemented" + +-- --------------------------------------------------------------------- +-- Zipping + +-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of +-- corresponding pairs of bytes. If one input ByteString is short, +-- excess elements of the longer ByteString are discarded. This is +-- equivalent to a pair of 'unpack' operations. +zip :: ByteString -> ByteString -> [(Word8,Word8)] +zip = zipWith (,) + +-- | 'zipWith' generalises 'zip' by zipping with the function given as +-- the first argument, instead of a tupling function. For example, +-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of +-- corresponding sums. +zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] +zipWith _ (LPS []) (LPS _) = [] +zipWith _ (LPS _) (LPS []) = [] +zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs + where zipWith' x xs y ys = + (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys) + + zipWith'' x [] _ _ | P.null x = [] + zipWith'' _ _ y [] | P.null y = [] + zipWith'' x xs y ys | not (P.null x) + && not (P.null y) = zipWith' x xs y ys + zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys + zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys + zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys + +-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of +-- ByteStrings. Note that this performs two 'pack' operations. +{- +unzip :: [(Word8,Word8)] -> (ByteString,ByteString) +unzip _ls = error "not yet implemented" +{-# INLINE unzip #-} +-} + +-- --------------------------------------------------------------------- +-- Special lists + +-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. +inits :: ByteString -> [ByteString] +inits = (LPS [] :) . inits' . unLPS + where inits' [] = [] + inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x)) + ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs) + +-- | /O(n)/ Return all final segments of the given 'ByteString', longest first. +tails :: ByteString -> [ByteString] +tails = tails' . unLPS + where tails' [] = LPS [] : [] + tails' xs@(x:xs') + | P.length x == 1 = LPS xs : tails' xs' + | otherwise = LPS xs : tails' (P.unsafeTail x : xs') + +-- --------------------------------------------------------------------- + +-- TODO defrag func that concatenates block together that are below a threshold +-- defrag :: Int -> ByteString -> ByteString + +-- --------------------------------------------------------------------- +-- Lazy ByteString IO + +-- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks +-- are read on demand, in @k@-sized chunks. +hGetContentsN :: Int -> Handle -> IO ByteString +hGetContentsN k h = lazyRead >>= return . LPS + where + lazyRead = unsafeInterleaveIO $ do + ps <- P.hGet h k + case P.length ps of + 0 -> return [] + n | n < k -> return [ps] + _ -> do pss <- lazyRead + return (ps : pss) + +-- | Read @n@ bytes into a 'ByteString', directly from the +-- specified 'Handle', in chunks of size @k@. +hGetN :: Int -> Handle -> Int -> IO ByteString +hGetN _ _ 0 = return empty +hGetN k h n = readChunks n >>= return . LPS + where + STRICT1(readChunks) + readChunks i = do + ps <- P.hGet h (min k i) + case P.length ps of + 0 -> return [] + m | m == i -> return [ps] + m -> do pss <- readChunks (i - m) + return (ps : pss) + +#if defined(__GLASGOW_HASKELL__) +-- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block +-- waiting for data to become available, instead it returns only whatever data +-- is available. Chunks are read on demand, in @k@-sized chunks. +hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString +hGetNonBlockingN _ _ 0 = return empty +hGetNonBlockingN k h n = readChunks n >>= return . LPS + where + readChunks i = do + ps <- P.hGetNonBlocking h (min k i) + case P.length ps of + 0 -> return [] + m | fromIntegral m < i -> return [ps] + m -> do pss <- readChunks (i - m) + return (ps : pss) +#endif + +-- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks +-- are read on demand, using the default chunk size. +hGetContents :: Handle -> IO ByteString +hGetContents = hGetContentsN defaultChunkSize + +-- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'. +hGet :: Handle -> Int -> IO ByteString +hGet = hGetN defaultChunkSize + +#if defined(__GLASGOW_HASKELL__) +-- | hGetNonBlocking is similar to 'hGet', except that it will never block +-- waiting for data to become available, instead it returns only whatever data +-- is available. +hGetNonBlocking :: Handle -> Int -> IO ByteString +hGetNonBlocking = hGetNonBlockingN defaultChunkSize +#endif + + +-- | Read an entire file /lazily/ into a 'ByteString'. +readFile :: FilePath -> IO ByteString +readFile f = openBinaryFile f ReadMode >>= hGetContents + +-- | Write a 'ByteString' to a file. +writeFile :: FilePath -> ByteString -> IO () +writeFile f txt = bracket (openBinaryFile f WriteMode) hClose + (\hdl -> hPut hdl txt) + +-- | Append a 'ByteString' to a file. +appendFile :: FilePath -> ByteString -> IO () +appendFile f txt = bracket (openBinaryFile f AppendMode) hClose + (\hdl -> hPut hdl txt) + +-- | getContents. Equivalent to hGetContents stdin. Will read /lazily/ +getContents :: IO ByteString +getContents = hGetContents stdin + +-- | Outputs a 'ByteString' to the specified 'Handle'. +hPut :: Handle -> ByteString -> IO () +hPut h (LPS xs) = mapM_ (P.hPut h) xs + +-- | Write a ByteString to stdout +putStr :: ByteString -> IO () +putStr = hPut stdout + +-- | Write a ByteString to stdout, appending a newline byte +putStrLn :: ByteString -> IO () +putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a) + +-- | The interact function takes a function of type @ByteString -> ByteString@ +-- as its argument. The entire input from the standard input device is passed +-- to this function as its argument, and the resulting string is output on the +-- standard output device. It's great for writing one line programs! +interact :: (ByteString -> ByteString) -> IO () +interact transformer = putStr . transformer =<< getContents + +-- --------------------------------------------------------------------- +-- Internal utilities + +-- Common up near identical calls to `error' to reduce the number +-- constant strings created when compiled: +errorEmptyList :: String -> a +errorEmptyList fun = moduleError fun "empty ByteString" + +moduleError :: String -> String -> a +moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg) + +-- A manually fused version of "filter (not.null) . map f", since they +-- don't seem to fuse themselves. Really helps out filter*, concatMap. +-- +-- TODO fuse. +-- +filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString] +filterMap _ [] = [] +filterMap f (x:xs) = case f x of + y | P.null y -> filterMap f xs -- manually fuse the invariant filter + | otherwise -> y : filterMap f xs +{-# INLINE filterMap #-} + diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs new file mode 100644 index 0000000..15af132 --- /dev/null +++ b/Data/ByteString/Lazy/Char8.hs @@ -0,0 +1,693 @@ +{-# OPTIONS_GHC -cpp -optc-O1 -fno-warn-orphans #-} +-- +-- -optc-O2 breaks with 4.0.4 gcc on debian +-- +-- Module : Data.ByteString.Lazy.Char8 +-- Copyright : (c) Don Stewart 2006 +-- License : BSD-style +-- +-- Maintainer : dons@cse.unsw.edu.au +-- Stability : experimental +-- Portability : portable (tested with GHC>=6.4.1 and Hugs 2005) +-- + +-- +-- | Manipulate /lazy/ 'ByteString's using 'Char' operations. All Chars will +-- be truncated to 8 bits. It can be expected that these functions will +-- run at identical speeds to their Word8 equivalents in +-- "Data.ByteString.Lazy". +-- +-- This module is intended to be imported @qualified@, to avoid name +-- clashes with "Prelude" functions. eg. +-- +-- > import qualified Data.ByteString.Lazy.Char8 as C +-- + +module Data.ByteString.Lazy.Char8 ( + + -- * The @ByteString@ type + ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + + -- * Introducing and eliminating 'ByteString's + empty, -- :: ByteString + singleton, -- :: Char -> ByteString + pack, -- :: String -> ByteString + unpack, -- :: ByteString -> String + + -- * Basic interface + cons, -- :: Char -> ByteString -> ByteString + snoc, -- :: ByteString -> Char -> ByteString + append, -- :: ByteString -> ByteString -> ByteString + head, -- :: ByteString -> Char + last, -- :: ByteString -> Char + tail, -- :: ByteString -> ByteString + init, -- :: ByteString -> ByteString + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int64 + + -- * Transformating ByteStrings + map, -- :: (Char -> Char) -> ByteString -> ByteString + reverse, -- :: ByteString -> ByteString +-- intersperse, -- :: Char -> ByteString -> ByteString + transpose, -- :: [ByteString] -> [ByteString] + + -- * Reducing 'ByteString's (folds) + foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a + foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a + foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char + foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char + foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a + foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char + + -- ** Special folds + concat, -- :: [ByteString] -> ByteString + concatMap, -- :: (Char -> ByteString) -> ByteString -> ByteString + any, -- :: (Char -> Bool) -> ByteString -> Bool + all, -- :: (Char -> Bool) -> ByteString -> Bool + maximum, -- :: ByteString -> Char + minimum, -- :: ByteString -> Char + + -- * Building ByteStrings + -- ** Scans + scanl, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +-- scanl1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString +-- scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +-- scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString + + -- ** Accumulating maps + mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) + mapIndexed, -- :: (Int64 -> Char -> Char) -> ByteString -> ByteString + + -- ** Infinite ByteStrings + repeat, -- :: Char -> ByteString + replicate, -- :: Int64 -> Char -> ByteString + cycle, -- :: ByteString -> ByteString + iterate, -- :: (Char -> Char) -> Char -> ByteString + + -- ** Unfolding + unfoldr, -- :: (a -> Maybe (Char, a)) -> a -> ByteString + + -- * Substrings + + -- ** Breaking strings + take, -- :: Int64 -> ByteString -> ByteString + drop, -- :: Int64 -> ByteString -> ByteString + splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString) + takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString + dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString + span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + group, -- :: ByteString -> [ByteString] + groupBy, -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString] + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] + + -- ** Breaking and dropping on specific Chars + breakChar, -- :: Char -> ByteString -> (ByteString, ByteString) + spanChar, -- :: Char -> ByteString -> (ByteString, ByteString) + + -- ** Breaking into many substrings + split, -- :: Char -> ByteString -> [ByteString] + splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] + tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString] + + -- ** Breaking into lines and words + lines, -- :: ByteString -> [ByteString] + words, -- :: ByteString -> [ByteString] + unlines, -- :: [ByteString] -> ByteString + unwords, -- :: ByteString -> [ByteString] + + -- ** Joining strings + join, -- :: ByteString -> [ByteString] -> ByteString + joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString + + -- * Predicates + isPrefixOf, -- :: ByteString -> ByteString -> Bool +-- isSuffixOf, -- :: ByteString -> ByteString -> Bool + + -- * Searching ByteStrings + + -- ** Searching by equality + elem, -- :: Char -> ByteString -> Bool + notElem, -- :: Char -> ByteString -> Bool + filterChar, -- :: Char -> ByteString -> ByteString + filterNotChar, -- :: Char -> ByteString -> ByteString + + -- ** Searching with a predicate + find, -- :: (Char -> Bool) -> ByteString -> Maybe Char + filter, -- :: (Char -> Bool) -> ByteString -> ByteString +-- partition -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + + -- * Indexing ByteStrings + index, -- :: ByteString -> Int64 -> Char + elemIndex, -- :: Char -> ByteString -> Maybe Int64 + elemIndices, -- :: Char -> ByteString -> [Int64] + findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int64 + findIndices, -- :: (Char -> Bool) -> ByteString -> [Int64] + count, -- :: Char -> ByteString -> Int64 + + -- * Zipping and unzipping ByteStrings + zip, -- :: ByteString -> ByteString -> [(Char,Char)] + zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c] +-- unzip, -- :: [(Char,Char)] -> (ByteString,ByteString) + + -- * Ordered ByteStrings +-- sort, -- :: ByteString -> ByteString + + -- * Reading from ByteStrings + readInt, + + -- * I\/O with 'ByteString's + + -- ** Standard input and output + getContents, -- :: IO ByteString + putStr, -- :: ByteString -> IO () + putStrLn, -- :: ByteString -> IO () + interact, -- :: (ByteString -> ByteString) -> IO () + + -- ** Files + readFile, -- :: FilePath -> IO ByteString + writeFile, -- :: FilePath -> ByteString -> IO () + appendFile, -- :: FilePath -> ByteString -> IO () + + -- ** I\/O with Handles + hGetContents, -- :: Handle -> IO ByteString + hGetContentsN, -- :: Int -> Handle -> IO ByteString + hGet, -- :: Handle -> Int64 -> IO ByteString + hGetN, -- :: Int -> Handle -> Int64 -> IO ByteString + hPut, -- :: Handle -> ByteString -> IO () +#if defined(__GLASGOW_HASKELL__) + hGetNonBlocking, -- :: Handle -> IO ByteString + hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString +#endif + ) where + +-- Functions transparently exported +import Data.ByteString.Lazy + (ByteString(..) + ,empty,null,length,tail,init,append,reverse,transpose + ,concat,take,drop,splitAt,join,isPrefixOf,group,inits, tails + ,hGetContentsN, hGetN, hGetContents, hGet, hPut, getContents +#if defined(__GLASGOW_HASKELL__) + ,hGetNonBlocking, hGetNonBlockingN +#endif + ,putStr, putStrLn + ,readFile, writeFile, appendFile) + +-- Functions we need to wrap. +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as B +import qualified Data.ByteString.Base as B +import Data.ByteString.Base (w2c, c2w, isSpaceWord8) + +import Data.Int (Int64) +import qualified Data.List as List (intersperse) + +import qualified Prelude as P +import Prelude hiding + (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines + ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter + ,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,foldl1,foldr1 + ,readFile,writeFile,appendFile,replicate,getContents,getLine,putStr,putStrLn + ,zip,zipWith,unzip,notElem,repeat,iterate) + +#define STRICT1(f) f a | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined + +------------------------------------------------------------------------ + +-- | /O(1)/ Convert a 'Char' into a 'ByteString' +singleton :: Char -> ByteString +singleton = L.singleton . c2w +{-# INLINE singleton #-} + +-- | /O(n)/ Convert a 'String' into a 'ByteString'. +pack :: [Char] -> ByteString +pack = L.packWith c2w + +-- | /O(n)/ Converts a 'ByteString' to a 'String'. +unpack :: ByteString -> [Char] +unpack = L.unpackWith w2c +{-# INLINE unpack #-} + +-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different +-- complexity, as it requires a memcpy. +cons :: Char -> ByteString -> ByteString +cons = L.cons . c2w +{-# INLINE cons #-} + +-- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to +-- 'cons', this function performs a memcpy. +snoc :: ByteString -> Char -> ByteString +snoc p = L.snoc p . c2w +{-# INLINE snoc #-} + +-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. +head :: ByteString -> Char +head = w2c . L.head +{-# INLINE head #-} + +-- | /O(1)/ Extract the last element of a packed string, which must be non-empty. +last :: ByteString -> Char +last = w2c . L.last +{-# INLINE last #-} + +-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@ +map :: (Char -> Char) -> ByteString -> ByteString +map f = L.map (c2w . f . w2c) +{-# INLINE map #-} + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a ByteString, reduces the +-- ByteString using the binary operator, from left to right. +foldl :: (a -> Char -> a) -> a -> ByteString -> a +foldl f = L.foldl (\a c -> f a (w2c c)) +{-# INLINE foldl #-} + +-- | 'foldl\'' is like foldl, but strict in the accumulator. +foldl' :: (a -> Char -> a) -> a -> ByteString -> a +foldl' f = L.foldl' (\a c -> f a (w2c c)) +{-# INLINE foldl' #-} + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a packed string, +-- reduces the packed string using the binary operator, from right to left. +foldr :: (Char -> a -> a) -> a -> ByteString -> a +foldr f = L.foldr (\c a -> f (w2c c) a) +{-# INLINE foldr #-} + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'ByteStrings'. +foldl1 :: (Char -> Char -> Char) -> ByteString -> Char +foldl1 f ps = w2c (L.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldl1 #-} + +-- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. +foldl1' :: (Char -> Char -> Char) -> ByteString -> Char +foldl1' f ps = w2c (L.foldl1' (\x y -> c2w (f (w2c x) (w2c y))) ps) + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'ByteString's +foldr1 :: (Char -> Char -> Char) -> ByteString -> Char +foldr1 f ps = w2c (L.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldr1 #-} + +-- | Map a function over a 'ByteString' and concatenate the results +concatMap :: (Char -> ByteString) -> ByteString -> ByteString +concatMap f = L.concatMap (f . w2c) +{-# INLINE concatMap #-} + +-- | Applied to a predicate and a ByteString, 'any' determines if +-- any element of the 'ByteString' satisfies the predicate. +any :: (Char -> Bool) -> ByteString -> Bool +any f = L.any (f . w2c) +{-# INLINE any #-} + +-- | Applied to a predicate and a 'ByteString', 'all' determines if +-- all elements of the 'ByteString' satisfy the predicate. +all :: (Char -> Bool) -> ByteString -> Bool +all f = L.all (f . w2c) +{-# INLINE all #-} + +-- | 'maximum' returns the maximum value from a 'ByteString' +maximum :: ByteString -> Char +maximum = w2c . L.maximum +{-# INLINE maximum #-} + +-- | 'minimum' returns the minimum value from a 'ByteString' +minimum :: ByteString -> Char +minimum = w2c . L.minimum +{-# INLINE minimum #-} + +-- --------------------------------------------------------------------- +-- Building ByteStrings + +-- | 'scanl' is similar to 'foldl', but returns a list of successive +-- reduced values from the left. This function will fuse. +-- +-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] +-- +-- Note that +-- +-- > last (scanl f z xs) == foldl f z xs. +scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +scanl f z = L.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) + +-- | The 'mapAccumL' function behaves like a combination of 'map' and +-- 'foldl'; it applies a function to each element of a ByteString, +-- passing an accumulating parameter from left to right, and returning a +-- final value of this accumulator together with the new ByteString. +mapAccumL :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) +mapAccumL f = L.mapAccumL (\a w -> case f a (w2c w) of (a',c) -> (a', c2w c)) + +-- | /O(n)/ map Char functions, provided with the index at each position +mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString +mapIndexed f = L.mapIndexed (\i w -> c2w (f i (w2c w))) + +------------------------------------------------------------------------ +-- Generating and unfolding ByteStrings + +-- | @'iterate' f x@ returns an infinite ByteString of repeated applications +-- of @f@ to @x@: +-- +-- > iterate f x == [x, f x, f (f x), ...] +-- +iterate :: (Char -> Char) -> Char -> ByteString +iterate f = L.iterate (c2w . f . w2c) . c2w + +-- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every +-- element. +-- +repeat :: Char -> ByteString +repeat = L.repeat . c2w + +-- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@ +-- the value of every element. +-- +replicate :: Int64 -> Char -> ByteString +replicate w c = L.replicate w (c2w c) + +-- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'. +-- 'unfoldr' builds a ByteString from a seed value. The function takes +-- the element and returns 'Nothing' if it is done producing the +-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a +-- prepending to the ByteString and @b@ is used as the next element in a +-- recursive call. +unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString +unfoldr f = L.unfoldr $ \a -> case f a of + Nothing -> Nothing + Just (c, a') -> Just (c2w c, a') + +------------------------------------------------------------------------ + +-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, +-- returns the longest prefix (possibly empty) of @xs@ of elements that +-- satisfy @p@. +takeWhile :: (Char -> Bool) -> ByteString -> ByteString +takeWhile f = L.takeWhile (f . w2c) +{-# INLINE takeWhile #-} + +-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. +dropWhile :: (Char -> Bool) -> ByteString -> ByteString +dropWhile f = L.dropWhile (f . w2c) +{-# INLINE dropWhile #-} + +-- | 'break' @p@ is equivalent to @'span' ('not' . p)@. +break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +break f = L.break (f . w2c) +{-# INLINE break #-} + +-- | 'span' @p xs@ breaks the ByteString into two segments. It is +-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ +span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +span f = L.span (f . w2c) +{-# INLINE span #-} + +-- | 'breakChar' breaks its ByteString argument at the first occurence +-- of the specified Char. It is more efficient than 'break' as it is +-- implemented with @memchr(3)@. I.e. +-- +-- > break (=='c') "abcd" == breakChar 'c' "abcd" +-- +breakChar :: Char -> ByteString -> (ByteString, ByteString) +breakChar = L.breakByte . c2w +{-# INLINE breakChar #-} + +-- | 'spanChar' breaks its ByteString argument at the first +-- occurence of a Char other than its argument. It is more efficient +-- than 'span (==)' +-- +-- > span (=='c') "abcd" == spanByte 'c' "abcd" +-- +spanChar :: Char -> ByteString -> (ByteString, ByteString) +spanChar = L.spanByte . c2w +{-# INLINE spanChar #-} + +-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] +-- > split 'a' "aXaXaXa" == ["","X","X","X"] +-- > split 'x' "x" == ["",""] +-- +-- and +-- +-- > join [c] . split c == id +-- > split == splitWith . (==) +-- +-- As for all splitting functions in this library, this function does +-- not copy the substrings, it just constructs new 'ByteStrings' that +-- are slices of the original. +-- +split :: Char -> ByteString -> [ByteString] +split = L.split . c2w +{-# INLINE split #-} + +-- | /O(n)/ Splits a 'ByteString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] +-- +splitWith :: (Char -> Bool) -> ByteString -> [ByteString] +splitWith f = L.splitWith (f . w2c) +{-# INLINE splitWith #-} + +-- | Like 'splitWith', except that sequences of adjacent separators are +-- treated as a single separator. eg. +-- +-- > tokens (=='a') "aabbaca" == ["bb","c"] +-- +tokens :: (Char -> Bool) -> ByteString -> [ByteString] +tokens f = L.tokens (f . w2c) +{-# INLINE tokens #-} + +-- | The 'groupBy' function is the non-overloaded version of 'group'. +groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString] +groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b)) + +-- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a +-- char. Around 4 times faster than the generalised join. +-- +joinWithChar :: Char -> ByteString -> ByteString -> ByteString +joinWithChar = L.joinWithByte . c2w +{-# INLINE joinWithChar #-} + +-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. +index :: ByteString -> Int64 -> Char +index = (w2c .) . L.index +{-# INLINE index #-} + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'ByteString' which is equal (by memchr) to the +-- query element, or 'Nothing' if there is no such element. +elemIndex :: Char -> ByteString -> Maybe Int64 +elemIndex = L.elemIndex . c2w +{-# INLINE elemIndex #-} + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +elemIndices :: Char -> ByteString -> [Int64] +elemIndices = L.elemIndices . c2w +{-# INLINE elemIndices #-} + +-- | The 'findIndex' function takes a predicate and a 'ByteString' and +-- returns the index of the first element in the ByteString satisfying the predicate. +findIndex :: (Char -> Bool) -> ByteString -> Maybe Int64 +findIndex f = L.findIndex (f . w2c) +{-# INLINE findIndex #-} + +-- | The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +findIndices :: (Char -> Bool) -> ByteString -> [Int64] +findIndices f = L.findIndices (f . w2c) + +-- | count returns the number of times its argument appears in the ByteString +-- +-- > count == length . elemIndices +-- > count '\n' == length . lines +-- +-- But more efficiently than using length on the intermediate list. +count :: Char -> ByteString -> Int64 +count c = L.count (c2w c) + +-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This +-- implementation uses @memchr(3)@. +elem :: Char -> ByteString -> Bool +elem c = L.elem (c2w c) +{-# INLINE elem #-} + +-- | /O(n)/ 'notElem' is the inverse of 'elem' +notElem :: Char -> ByteString -> Bool +notElem c = L.notElem (c2w c) +{-# INLINE notElem #-} + +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. +filter :: (Char -> Bool) -> ByteString -> ByteString +filter f = L.filter (f . w2c) +{-# INLINE filter #-} + +-- | /O(n)/ The 'find' function takes a predicate and a ByteString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +find :: (Char -> Bool) -> ByteString -> Maybe Char +find f ps = w2c `fmap` L.find (f . w2c) ps +{-# INLINE find #-} + +-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common +-- case of filtering a single Char. It is more efficient to use +-- filterChar in this case. +-- +-- > filterChar == filter . (==) +-- +-- filterChar is around 10x faster, and uses much less space, than its +-- filter equivalent +-- +filterChar :: Char -> ByteString -> ByteString +filterChar c = L.filterByte (c2w c) +{-# INLINE filterChar #-} + +-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common +-- case of filtering a single Char out of a list. It is more efficient +-- to use /filterNotChar/ in this case. +-- +-- > filterNotChar == filter . (/=) +-- +-- filterNotChar is around 3x faster, and uses much less space, than its +-- filter equivalent +-- +filterNotChar :: Char -> ByteString -> ByteString +filterNotChar c = L.filterNotByte (c2w c) +{-# INLINE filterNotChar #-} + +-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of +-- corresponding pairs of Chars. If one input ByteString is short, +-- excess elements of the longer ByteString are discarded. This is +-- equivalent to a pair of 'unpack' operations, and so space +-- usage may be large for multi-megabyte ByteStrings +zip :: ByteString -> ByteString -> [(Char,Char)] +zip ps qs + | L.null ps || L.null qs = [] + | otherwise = (head ps, head qs) : zip (L.tail ps) (L.tail qs) + +-- | 'zipWith' generalises 'zip' by zipping with the function given as +-- the first argument, instead of a tupling function. For example, +-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list +-- of corresponding sums. +zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a] +zipWith f = L.zipWith ((. w2c) . f . w2c) + +-- | 'lines' breaks a ByteString up into a list of ByteStrings at +-- newline Chars. The resulting strings do not contain newlines. +-- +lines :: ByteString -> [ByteString] +lines (LPS []) = [] +lines (LPS (x:xs)) = loop0 x xs + where + -- this is a really performance sensitive function but the + -- chunked representation makes the general case a bit expensive + -- however assuming a large chunk size and normalish line lengths + -- we will find line endings much more frequently than chunk + -- endings so it makes sense to optimise for that common case. + -- So we partition into two special cases depending on whether we + -- are keeping back a list of chunks that will eventually be output + -- once we get to the end of the current line. + + -- the common special case where we have no existing chunks of + -- the current line + loop0 :: B.ByteString -> [B.ByteString] -> [ByteString] + STRICT2(loop0) + loop0 ps pss = + case B.elemIndex (c2w '\n') ps of + Nothing -> case pss of + [] | B.null ps -> [] + | otherwise -> LPS [ps] : [] + (ps':pss') + | B.null ps -> loop0 ps' pss' + | otherwise -> loop ps' [ps] pss' + + Just n | n /= 0 -> LPS [B.unsafeTake n ps] + : loop0 (B.unsafeDrop (n+1) ps) pss + | otherwise -> loop0 (B.unsafeTail ps) pss + + -- the general case when we are building a list of chunks that are + -- part of the same line + loop :: B.ByteString -> [B.ByteString] -> [B.ByteString] -> [ByteString] + STRICT3(loop) + loop ps line pss = + case B.elemIndex (c2w '\n') ps of + Nothing -> + case pss of + [] -> let ps' | B.null ps = P.reverse line + | otherwise = P.reverse (ps : line) + in ps' `seq` (LPS ps' : []) + + (ps':pss') + | B.null ps -> loop ps' line pss' + | otherwise -> loop ps' (ps : line) pss' + + Just n -> + let ps' | n == 0 = P.reverse line + | otherwise = P.reverse (B.unsafeTake n ps : line) + in ps' `seq` (LPS ps' : loop0 (B.unsafeDrop (n+1) ps) pss) + +-- | 'unlines' is an inverse operation to 'lines'. It joins lines, +-- after appending a terminating newline to each. +unlines :: [ByteString] -> ByteString +unlines [] = empty +unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space + where nl = singleton '\n' + +-- | 'words' breaks a ByteString up into a list of words, which +-- were delimited by Chars representing white space. And +-- +-- > tokens isSpace = words +-- +words :: ByteString -> [ByteString] +words = L.tokens isSpaceWord8 +{-# INLINE words #-} + +-- | The 'unwords' function is analogous to the 'unlines' function, on words. +unwords :: [ByteString] -> ByteString +unwords = join (singleton ' ') +{-# INLINE unwords #-} + +-- | readInt reads an Int from the beginning of the ByteString. If +-- there is no integer at the beginning of the string, it returns +-- Nothing, otherwise it just returns the int read, and the rest of the +-- string. +readInt :: ByteString -> Maybe (Int, ByteString) +readInt (LPS []) = Nothing +readInt (LPS (x:xs)) = + case w2c (B.unsafeHead x) of + '-' -> loop True 0 0 (B.unsafeTail x) xs + '+' -> loop False 0 0 (B.unsafeTail x) xs + _ -> loop False 0 0 x xs + + where loop :: Bool -> Int -> Int -> B.ByteString -> [B.ByteString] -> Maybe (Int, ByteString) + STRICT5(loop) + loop neg i n ps pss + | B.null ps = case pss of + [] -> end neg i n ps pss + (ps':pss') -> loop neg i n ps' pss' + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 + && w <= 0x39 -> loop neg (i+1) + (n * 10 + (fromIntegral w - 0x30)) + (B.unsafeTail ps) pss + | otherwise -> end neg i n ps pss + + end _ 0 _ _ _ = Nothing + end neg _ n ps pss = let n' | neg = negate n + | otherwise = n + ps' | B.null ps = pss + | otherwise = ps:pss + in n' `seq` ps' `seq` Just $! (n', LPS ps') + diff --git a/Makefile b/Makefile index d00bffe..7d96616 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ ALL_DIRS = \ Control/Monad/ST \ Data \ Data/ByteString \ + Data/ByteString/Lazy \ Data/Generics \ Data/Array \ Data/Array/IO \ diff --git a/base.cabal b/base.cabal index 4aead21..cd40db8 100644 --- a/base.cabal +++ b/base.cabal @@ -40,6 +40,10 @@ exposed-modules: Data.Bool, Data.ByteString, Data.ByteString.Char8, + Data.ByteString.Lazy + Data.ByteString.Lazy.Char8 + Data.ByteString.Base + Data.ByteString.Fusion Data.Char, Data.Complex, Data.Dynamic, diff --git a/cbits/fpstring.c b/cbits/fpstring.c index b8fc540..d42ebe5 100644 --- a/cbits/fpstring.c +++ b/cbits/fpstring.c @@ -32,7 +32,7 @@ #include "fpstring.h" /* copy a string in reverse */ -void reverse(unsigned char *dest, unsigned char *from, int len) { +void fps_reverse(unsigned char *dest, unsigned char *from, int len) { unsigned char *p, *q; p = from + len - 1; q = dest; @@ -43,7 +43,7 @@ void reverse(unsigned char *dest, unsigned char *from, int len) { /* duplicate a string, interspersing the character through the elements of the duplicated string */ -void intersperse(unsigned char *dest, unsigned char *from, int len, char c) { +void fps_intersperse(unsigned char *dest, unsigned char *from, int len, char c) { unsigned char *p, *q; p = from; q = dest; @@ -55,7 +55,7 @@ void intersperse(unsigned char *dest, unsigned char *from, int len, char c) { } /* find maximum char in a packed string */ -unsigned char maximum(unsigned char *p, int len) { +unsigned char fps_maximum(unsigned char *p, int len) { unsigned char *q, c = *p; for (q = p; q < p + len; q++) if (*q > c) @@ -64,7 +64,7 @@ unsigned char maximum(unsigned char *p, int len) { } /* find minimum char in a packed string */ -unsigned char minimum(unsigned char *p, int len) { +unsigned char fps_minimum(unsigned char *p, int len) { unsigned char *q, c = *p; for (q = p; q < p + len; q++) if (*q < c) @@ -73,7 +73,7 @@ unsigned char minimum(unsigned char *p, int len) { } /* count the number of occurences of a char in a string */ -int count(unsigned char *p, int len, unsigned char w) { +int fps_count(unsigned char *p, int len, unsigned char w) { int c; for (c = 0; len--; ++p) if (*p == w) diff --git a/include/fpstring.h b/include/fpstring.h index 614162d..42e8346 100644 --- a/include/fpstring.h +++ b/include/fpstring.h @@ -1,6 +1,6 @@ -void reverse(unsigned char *dest, unsigned char *from, int len); -void intersperse(unsigned char *dest, unsigned char *from, int len, char c); -unsigned char maximum(unsigned char *p, int len); -unsigned char minimum(unsigned char *p, int len); -int count(unsigned char *p, int len, unsigned char w); +void fps_reverse(unsigned char *dest, unsigned char *from, int len); +void fps_intersperse(unsigned char *dest, unsigned char *from, int len, char c); +unsigned char fps_maximum(unsigned char *p, int len); +unsigned char fps_minimum(unsigned char *p, int len); +int fps_count(unsigned char *p, int len, unsigned char w); diff --git a/package.conf.in b/package.conf.in index ad57400..577f50b 100644 --- a/package.conf.in +++ b/package.conf.in @@ -38,6 +38,10 @@ exposed-modules: Data.Bool, Data.ByteString, Data.ByteString.Char8, + Data.ByteString.Lazy + Data.ByteString.Lazy.Char8 + Data.ByteString.Base + Data.ByteString.Fusion Data.Char, Data.Complex, Data.Dynamic, -- 1.7.10.4