From 7de48e78ab0b12be788962faa8ec6270f48c7d00 Mon Sep 17 00:00:00 2001 From: Ian Lynagh Date: Sun, 29 Jul 2007 13:22:15 +0000 Subject: [PATCH] bytestring is now in its own package --- Data/ByteString.hs | 2020 ----------------------------------------- Data/ByteString/Base.hs | 514 ----------- Data/ByteString/Char8.hs | 995 -------------------- Data/ByteString/Fusion.hs | 699 -------------- Data/ByteString/Lazy.hs | 1293 -------------------------- Data/ByteString/Lazy/Char8.hs | 748 --------------- base.cabal | 7 - cbits/fpstring.c | 82 -- 8 files changed, 6358 deletions(-) delete mode 100644 Data/ByteString.hs delete mode 100644 Data/ByteString/Base.hs delete mode 100644 Data/ByteString/Char8.hs delete mode 100644 Data/ByteString/Fusion.hs delete mode 100644 Data/ByteString/Lazy.hs delete mode 100644 Data/ByteString/Lazy/Char8.hs delete mode 100644 cbits/fpstring.c diff --git a/Data/ByteString.hs b/Data/ByteString.hs deleted file mode 100644 index 8e9e919..0000000 --- a/Data/ByteString.hs +++ /dev/null @@ -1,2020 +0,0 @@ -{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-} --- | --- Module : Data.ByteString --- Copyright : (c) The University of Glasgow 2001, --- (c) David Roundy 2003-2005, --- (c) Simon Marlow 2005 --- (c) Don Stewart 2005-2006 --- (c) Bjorn Bringert 2006 --- Array fusion code: --- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller --- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy --- --- License : BSD-style --- --- Maintainer : dons@cse.unsw.edu.au --- Stability : experimental --- Portability : portable --- --- A time and space-efficient implementation of byte vectors using --- packed Word8 arrays, suitable for high performance use, both in terms --- of large data quantities, or high speed requirements. Byte vectors --- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr', --- and can be passed between C and Haskell with little effort. --- --- This module is intended to be imported @qualified@, to avoid name --- clashes with "Prelude" functions. eg. --- --- > import qualified Data.ByteString as B --- --- Original GHC implementation by Bryan O\'Sullivan. --- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. --- Rewritten to support slices and use 'ForeignPtr' by David Roundy. --- Polished and extended by Don Stewart. --- - -module Data.ByteString ( - - -- * The @ByteString@ type - ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid - - -- * Introducing and eliminating 'ByteString's - empty, -- :: ByteString - singleton, -- :: Word8 -> ByteString - pack, -- :: [Word8] -> ByteString - unpack, -- :: ByteString -> [Word8] - - -- * Basic interface - cons, -- :: Word8 -> ByteString -> ByteString - snoc, -- :: ByteString -> Word8 -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - head, -- :: ByteString -> Word8 - last, -- :: ByteString -> Word8 - tail, -- :: ByteString -> ByteString - init, -- :: ByteString -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int - - -- * Transformating ByteStrings - map, -- :: (Word8 -> Word8) -> ByteString -> ByteString - reverse, -- :: ByteString -> ByteString - intersperse, -- :: Word8 -> ByteString -> ByteString - transpose, -- :: [ByteString] -> [ByteString] - - -- * Reducing 'ByteString's (folds) - foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a - foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a - foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - - foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a - foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a - foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - - -- ** Special folds - concat, -- :: [ByteString] -> ByteString - concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString - any, -- :: (Word8 -> Bool) -> ByteString -> Bool - all, -- :: (Word8 -> Bool) -> ByteString -> Bool - maximum, -- :: ByteString -> Word8 - minimum, -- :: ByteString -> Word8 - - -- * Building ByteStrings - -- ** Scans - scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString - scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString - scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString - scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString - - -- ** Accumulating maps - mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) - mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) - mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString - - -- ** Unfolding ByteStrings - replicate, -- :: Int -> Word8 -> ByteString - unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString - unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) - - -- * Substrings - - -- ** Breaking strings - take, -- :: Int -> ByteString -> ByteString - drop, -- :: Int -> ByteString -> ByteString - splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) - takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString - dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString - span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - - -- ** Breaking into many substrings - split, -- :: Word8 -> ByteString -> [ByteString] - splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] - - -- ** Joining strings - join, -- :: ByteString -> [ByteString] -> ByteString - - -- * Predicates - isPrefixOf, -- :: ByteString -> ByteString -> Bool - isSuffixOf, -- :: ByteString -> ByteString -> Bool - - -- ** Search for arbitrary substrings - isSubstringOf, -- :: ByteString -> ByteString -> Bool - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] - - -- * Searching ByteStrings - - -- ** Searching by equality - -- | These functions use memchr(3) to efficiently search the ByteString - elem, -- :: Word8 -> ByteString -> Bool - notElem, -- :: Word8 -> ByteString -> Bool - - -- ** Searching with a predicate - find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 - filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString --- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - - -- * Indexing ByteStrings - index, -- :: ByteString -> Int -> Word8 - elemIndex, -- :: Word8 -> ByteString -> Maybe Int - elemIndices, -- :: Word8 -> ByteString -> [Int] - elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int - findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int - findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] - count, -- :: Word8 -> ByteString -> Int - - -- * Zipping and unzipping ByteStrings - zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] - zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] - unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) - - -- * Ordered ByteStrings - sort, -- :: ByteString -> ByteString - - -- * Low level CString conversions - - -- ** Packing CStrings and pointers - packCString, -- :: CString -> ByteString - packCStringLen, -- :: CString -> ByteString - packMallocCString, -- :: CString -> ByteString - - -- ** Using ByteStrings as CStrings - useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a - useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a - - -- ** Copying ByteStrings - -- | These functions perform memcpy(3) operations - copy, -- :: ByteString -> ByteString - copyCString, -- :: CString -> IO ByteString - copyCStringLen, -- :: CStringLen -> IO ByteString - - -- * I\/O with 'ByteString's - - -- ** Standard input and output - getLine, -- :: IO ByteString - getContents, -- :: IO ByteString - putStr, -- :: ByteString -> IO () - putStrLn, -- :: ByteString -> IO () - interact, -- :: (ByteString -> ByteString) -> IO () - - -- ** Files - readFile, -- :: FilePath -> IO ByteString - writeFile, -- :: FilePath -> ByteString -> IO () - appendFile, -- :: FilePath -> ByteString -> IO () --- mmapFile, -- :: FilePath -> IO ByteString - - -- ** I\/O with Handles - hGetLine, -- :: Handle -> IO ByteString - hGetContents, -- :: Handle -> IO ByteString - hGet, -- :: Handle -> Int -> IO ByteString - hGetNonBlocking, -- :: Handle -> Int -> IO ByteString - hPut, -- :: Handle -> ByteString -> IO () - hPutStr, -- :: Handle -> ByteString -> IO () - hPutStrLn, -- :: Handle -> ByteString -> IO () - -#if defined(__GLASGOW_HASKELL__) - -- * Fusion utilities - unpackList, -- eek, otherwise it gets thrown away by the simplifier - lengthU, maximumU, minimumU -#endif - - ) where - -import qualified Prelude as P -import Prelude hiding (reverse,head,tail,last,init,null - ,length,map,lines,foldl,foldr,unlines - ,concat,any,take,drop,splitAt,takeWhile - ,dropWhile,span,break,elem,filter,maximum - ,minimum,all,concatMap,foldl1,foldr1 - ,scanl,scanl1,scanr,scanr1 - ,readFile,writeFile,appendFile,replicate - ,getContents,getLine,putStr,putStrLn,interact - ,zip,zipWith,unzip,notElem) - -import Data.ByteString.Base -import Data.ByteString.Fusion - -import qualified Data.List as List - -import Data.Word (Word8) -import Data.Maybe (listToMaybe) -import Data.Array (listArray) -import qualified Data.Array as Array ((!)) - --- Control.Exception.bracket not available in yhc or nhc -import Control.Exception (bracket, assert) -import qualified Control.Exception as Exception -import Control.Monad (when) - -import Foreign.C.String (CString, CStringLen) -import Foreign.C.Types (CSize) -import Foreign.ForeignPtr -import Foreign.Marshal.Array -import Foreign.Ptr -import Foreign.Storable (Storable(..)) - --- hGetBuf and hPutBuf not available in yhc or nhc -import System.IO (stdin,stdout,hClose,hFileSize - ,hGetBuf,hPutBuf,openBinaryFile - ,Handle,IOMode(..)) - -import Data.Monoid (Monoid, mempty, mappend, mconcat) - -#if !defined(__GLASGOW_HASKELL__) -import System.IO.Unsafe -import qualified System.Environment -import qualified System.IO (hGetLine) -#endif - -#if defined(__GLASGOW_HASKELL__) - -import System.IO (hGetBufNonBlocking) -import System.IO.Error (isEOFError) - -import GHC.Handle -import GHC.Prim (Word#, (+#), writeWord8OffAddr#) -import GHC.Base (build) -import GHC.Word hiding (Word8) -import GHC.Ptr (Ptr(..)) -import GHC.ST (ST(..)) -import GHC.IOBase - -#endif - --- ----------------------------------------------------------------------------- --- --- Useful macros, until we have bang patterns --- - -#define STRICT1(f) f a | a `seq` False = undefined -#define STRICT2(f) f a b | a `seq` b `seq` False = undefined -#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined -#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined -#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined - --- ----------------------------------------------------------------------------- - -instance Eq ByteString - where (==) = eq - -instance Ord ByteString - where compare = compareBytes - -instance Monoid ByteString where - mempty = empty - mappend = append - mconcat = concat - -{- -instance Arbitrary PackedString where - arbitrary = P.pack `fmap` arbitrary - coarbitrary s = coarbitrary (P.unpack s) --} - --- | /O(n)/ Equality on the 'ByteString' type. -eq :: ByteString -> ByteString -> Bool -eq a@(PS p s l) b@(PS p' s' l') - | l /= l' = False -- short cut on length - | p == p' && s == s' = True -- short cut for the same string - | otherwise = compareBytes a b == EQ -{-# INLINE eq #-} - --- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. -compareBytes :: ByteString -> ByteString -> Ordering -compareBytes (PS x1 s1 l1) (PS x2 s2 l2) - | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings - | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string - | otherwise = inlinePerformIO $ - withForeignPtr x1 $ \p1 -> - withForeignPtr x2 $ \p2 -> do - i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2) - return $! case i `compare` 0 of - EQ -> l1 `compare` l2 - x -> x -{-# INLINE compareBytes #-} - -{- --- --- About 4x slower over 32M --- -compareBytes :: ByteString -> ByteString -> Ordering -compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $ - withForeignPtr fp1 $ \p1 -> - withForeignPtr fp2 $ \p2 -> - cmp (p1 `plusPtr` off1) - (p2 `plusPtr` off2) 0 len1 len2 - -cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering -STRICT5(cmp) -cmp p1 p2 n len1 len2 - | n == len1 = if n == len2 then return EQ else return LT - | n == len2 = return GT - | otherwise = do - (a :: Word8) <- peekByteOff p1 n - (b :: Word8) <- peekByteOff p2 n - case a `compare` b of - EQ -> cmp p1 p2 (n+1) len1 len2 - LT -> return LT - GT -> return GT -{-# INLINE compareBytes #-} --} - --- ----------------------------------------------------------------------------- --- Introducing and eliminating 'ByteString's - --- | /O(1)/ Convert a 'Word8' into a 'ByteString' -singleton :: Word8 -> ByteString -singleton c = unsafeCreate 1 $ \p -> poke p c -{-# INLINE [1] singleton #-} - --- --- XXX The unsafePerformIO is critical! --- --- Otherwise: --- --- singleton 255 `compare` singleton 127 --- --- is compiled to: --- --- case mallocByteString 2 of --- ForeignPtr f internals -> --- case writeWord8OffAddr# f 0 255 of _ -> --- case writeWord8OffAddr# f 0 127 of _ -> --- case eqAddr# f f of --- False -> case compare (GHC.Prim.plusAddr# f 0) --- (GHC.Prim.plusAddr# f 0) --- --- - --- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. --- --- For applications with large numbers of string literals, pack can be a --- bottleneck. In such cases, consider using packAddress (GHC only). -pack :: [Word8] -> ByteString - -#if !defined(__GLASGOW_HASKELL__) - -pack str = unsafeCreate (P.length str) $ \p -> go p str - where - go _ [] = return () - go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff - -#else /* hack away */ - -pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) - where - go _ _ [] = return () - go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs - - writeByte p i c = ST $ \s# -> - case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #) - -#endif - --- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. -unpack :: ByteString -> [Word8] - -#if !defined(__GLASGOW_HASKELL__) - -unpack (PS _ _ 0) = [] -unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> - go (p `plusPtr` s) (l - 1) [] - where - STRICT3(go) - go p 0 acc = peek p >>= \e -> return (e : acc) - go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc) -{-# INLINE unpack #-} - -#else - -unpack ps = build (unpackFoldr ps) -{-# INLINE unpack #-} - --- --- critical this isn't strict in the acc --- as it will break in the presence of list fusion. this is a known --- issue with seq and build/foldr rewrite rules, which rely on lazy --- demanding to avoid bottoms in the list. --- -unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a -unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do - let loop q n _ | q `seq` n `seq` False = undefined -- n.b. - loop _ (-1) acc = return acc - loop q n acc = do - a <- peekByteOff q n - loop q (n-1) (a `f` acc) - loop (p `plusPtr` off) (len-1) ch -{-# INLINE [0] unpackFoldr #-} - -unpackList :: ByteString -> [Word8] -unpackList (PS fp off len) = withPtr fp $ \p -> do - let STRICT3(loop) - loop _ (-1) acc = return acc - loop q n acc = do - a <- peekByteOff q n - loop q (n-1) (a : acc) - loop (p `plusPtr` off) (len-1) [] - -{-# RULES - "FPS unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p - #-} - -#endif - --- --------------------------------------------------------------------- --- Basic interface - --- | /O(1)/ Test whether a ByteString is empty. -null :: ByteString -> Bool -null (PS _ _ l) = assert (l >= 0) $ l <= 0 -{-# INLINE null #-} - --- --------------------------------------------------------------------- --- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. -length :: ByteString -> Int -length (PS _ _ l) = assert (l >= 0) $ l - --- --- length/loop fusion. When taking the length of any fuseable loop, --- rewrite it as a foldl', and thus avoid allocating the result buffer --- worth around 10% in speed testing. --- - -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] length #-} -#endif - -lengthU :: ByteString -> Int -lengthU = foldl' (const . (+1)) (0::Int) -{-# INLINE lengthU #-} - -{-# RULES - --- v2 fusion -"FPS length/loop" forall loop s . - length (loopArr (loopWrapper loop s)) = - lengthU (loopArr (loopWrapper loop s)) - - #-} - ------------------------------------------------------------------------- - --- | /O(n)/ 'cons' is analogous to (:) for lists, but of different --- complexity, as it requires a memcpy. -cons :: Word8 -> ByteString -> ByteString -cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do - poke p c - memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) -{-# INLINE cons #-} - --- | /O(n)/ Append a byte to the end of a 'ByteString' -snoc :: ByteString -> Word8 -> ByteString -snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do - memcpy p (f `plusPtr` s) (fromIntegral l) - poke (p `plusPtr` l) c -{-# INLINE snoc #-} - --- todo fuse - --- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. --- An exception will be thrown in the case of an empty ByteString. -head :: ByteString -> Word8 -head (PS x s l) - | l <= 0 = errorEmptyList "head" - | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s -{-# INLINE head #-} - --- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. --- An exception will be thrown in the case of an empty ByteString. -tail :: ByteString -> ByteString -tail (PS p s l) - | l <= 0 = errorEmptyList "tail" - | otherwise = PS p (s+1) (l-1) -{-# INLINE tail #-} - --- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. --- An exception will be thrown in the case of an empty ByteString. -last :: ByteString -> Word8 -last ps@(PS x s l) - | null ps = errorEmptyList "last" - | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1) -{-# INLINE last #-} - --- | /O(1)/ Return all the elements of a 'ByteString' except the last one. --- An exception will be thrown in the case of an empty ByteString. -init :: ByteString -> ByteString -init ps@(PS p s l) - | null ps = errorEmptyList "init" - | otherwise = PS p s (l-1) -{-# INLINE init #-} - --- | /O(n)/ Append two ByteStrings -append :: ByteString -> ByteString -> ByteString -append xs ys | null xs = ys - | null ys = xs - | otherwise = concat [xs,ys] -{-# INLINE append #-} - --- --------------------------------------------------------------------- --- Transformations - --- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each --- element of @xs@. This function is subject to array fusion. -map :: (Word8 -> Word8) -> ByteString -> ByteString -#if defined(LOOPU_FUSION) -map f = loopArr . loopU (mapEFL f) NoAcc -#elif defined(LOOPUP_FUSION) -map f = loopArr . loopUp (mapEFL f) NoAcc -#elif defined(LOOPNOACC_FUSION) -map f = loopArr . loopNoAcc (mapEFL f) -#else -map f = loopArr . loopMap f -#endif -{-# INLINE map #-} - -{- --- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is --- slightly faster for one-shot cases. -map' :: (Word8 -> Word8) -> ByteString -> ByteString -map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> - create len $ map_ 0 (a `plusPtr` s) - where - map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () - STRICT3(map_) - map_ n p1 p2 - | n >= len = return () - | otherwise = do - x <- peekByteOff p1 n - pokeByteOff p2 n (f x) - map_ (n+1) p1 p2 -{-# INLINE map' #-} --} - --- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. -reverse :: ByteString -> ByteString -reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> - c_reverse p (f `plusPtr` s) (fromIntegral l) - --- todo, fuseable version - --- | /O(n)/ The 'intersperse' function takes a 'Word8' and a --- 'ByteString' and \`intersperses\' that byte between the elements of --- the 'ByteString'. It is analogous to the intersperse function on --- Lists. -intersperse :: Word8 -> ByteString -> ByteString -intersperse c ps@(PS x s l) - | length ps < 2 = ps - | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f -> - c_intersperse p (f `plusPtr` s) (fromIntegral l) c - -{- -intersperse c = pack . List.intersperse c . unpack --} - --- | The 'transpose' function transposes the rows and columns of its --- 'ByteString' argument. -transpose :: [ByteString] -> [ByteString] -transpose ps = P.map pack (List.transpose (P.map unpack ps)) - --- --------------------------------------------------------------------- --- Reducing 'ByteString's - --- | 'foldl', applied to a binary operator, a starting value (typically --- the left-identity of the operator), and a ByteString, reduces the --- ByteString using the binary operator, from left to right. --- This function is subject to array fusion. -foldl :: (a -> Word8 -> a) -> a -> ByteString -> a -#if !defined(LOOPU_FUSION) -foldl f z = loopAcc . loopUp (foldEFL f) z -#else -foldl f z = loopAcc . loopU (foldEFL f) z -#endif -{-# INLINE foldl #-} - -{- --- --- About twice as fast with 6.4.1, but not fuseable --- A simple fold . map is enough to make it worth while. --- -foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> - lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) - where - STRICT3(lgo) - lgo z p q | p == q = return z - | otherwise = do c <- peek p - lgo (f z c) (p `plusPtr` 1) q --} - --- | 'foldl\'' is like 'foldl', but strict in the accumulator. --- Though actually foldl is also strict in the accumulator. -foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a -foldl' = foldl --- foldl' f z = loopAcc . loopU (foldEFL' f) z -{-# INLINE foldl' #-} - --- | 'foldr', applied to a binary operator, a starting value --- (typically the right-identity of the operator), and a ByteString, --- reduces the ByteString using the binary operator, from right to left. -foldr :: (Word8 -> a -> a) -> a -> ByteString -> a -foldr k z = loopAcc . loopDown (foldEFL (flip k)) z -{-# INLINE foldr #-} - --- | 'foldr\'' is like 'foldr', but strict in the accumulator. -foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a -foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> - go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1)) - where - STRICT3(go) - go z p q | p == q = return z - | otherwise = do c <- peek p - go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive -{-# INLINE [1] foldr' #-} - --- | 'foldl1' is a variant of 'foldl' that has no starting value --- argument, and thus must be applied to non-empty 'ByteStrings'. --- This function is subject to array fusion. --- An exception will be thrown in the case of an empty ByteString. -foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldl1 f ps - | null ps = errorEmptyList "foldl1" - | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) -{-# INLINE foldl1 #-} - --- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. --- An exception will be thrown in the case of an empty ByteString. -foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldl1' f ps - | null ps = errorEmptyList "foldl1'" - | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) -{-# INLINE foldl1' #-} - --- | 'foldr1' is a variant of 'foldr' that has no starting value argument, --- and thus must be applied to non-empty 'ByteString's --- An exception will be thrown in the case of an empty ByteString. -foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldr1 f ps - | null ps = errorEmptyList "foldr1" - | otherwise = foldr f (last ps) (init ps) -{-# INLINE foldr1 #-} - --- | 'foldr1\'' is a variant of 'foldr1', but is strict in the --- accumulator. -foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldr1' f ps - | null ps = errorEmptyList "foldr1" - | otherwise = foldr' f (last ps) (init ps) -{-# INLINE [1] foldr1' #-} - --- --------------------------------------------------------------------- --- Special folds - --- | /O(n)/ Concatenate a list of ByteStrings. -concat :: [ByteString] -> ByteString -concat [] = empty -concat [ps] = ps -concat xs = unsafeCreate len $ \ptr -> go xs ptr - where len = P.sum . P.map length $ xs - STRICT2(go) - go [] _ = return () - go (PS p s l:ps) ptr = do - withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l) - go ps (ptr `plusPtr` l) - --- | Map a function over a 'ByteString' and concatenate the results -concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString -concatMap f = concat . foldr ((:) . f) [] - --- foldr (append . f) empty - --- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if --- any element of the 'ByteString' satisfies the predicate. -any :: (Word8 -> Bool) -> ByteString -> Bool -any _ (PS _ _ 0) = False -any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> - go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) - where - STRICT2(go) - go p q | p == q = return False - | otherwise = do c <- peek p - if f c then return True - else go (p `plusPtr` 1) q - --- todo fuse - --- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines --- if all elements of the 'ByteString' satisfy the predicate. -all :: (Word8 -> Bool) -> ByteString -> Bool -all _ (PS _ _ 0) = True -all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> - go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) - where - STRICT2(go) - go p q | p == q = return True -- end of list - | otherwise = do c <- peek p - if f c - then go (p `plusPtr` 1) q - else return False - ------------------------------------------------------------------------- - --- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' --- This function will fuse. --- An exception will be thrown in the case of an empty ByteString. -maximum :: ByteString -> Word8 -maximum xs@(PS x s l) - | null xs = errorEmptyList "maximum" - | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> - c_maximum (p `plusPtr` s) (fromIntegral l) - --- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' --- This function will fuse. --- An exception will be thrown in the case of an empty ByteString. -minimum :: ByteString -> Word8 -minimum xs@(PS x s l) - | null xs = errorEmptyList "minimum" - | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> - c_minimum (p `plusPtr` s) (fromIntegral l) - --- --- minimum/maximum/loop fusion. As for length (and other folds), when we --- see we're applied after a fuseable op, switch from using the C --- version, to the fuseable version. The result should then avoid --- allocating a buffer. --- - -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] minimum #-} -{-# INLINE [1] maximum #-} -#endif - -maximumU :: ByteString -> Word8 -maximumU = foldl1' max -{-# INLINE maximumU #-} - -minimumU :: ByteString -> Word8 -minimumU = foldl1' min -{-# INLINE minimumU #-} - -{-# RULES - -"FPS minimum/loop" forall loop s . - minimum (loopArr (loopWrapper loop s)) = - minimumU (loopArr (loopWrapper loop s)) - -"FPS maximum/loop" forall loop s . - maximum (loopArr (loopWrapper loop s)) = - maximumU (loopArr (loopWrapper loop s)) - - #-} - ------------------------------------------------------------------------- - --- | The 'mapAccumL' function behaves like a combination of 'map' and --- 'foldl'; it applies a function to each element of a ByteString, --- passing an accumulating parameter from left to right, and returning a --- final value of this accumulator together with the new list. -mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -#if !defined(LOOPU_FUSION) -mapAccumL f z = unSP . loopUp (mapAccumEFL f) z -#else -mapAccumL f z = unSP . loopU (mapAccumEFL f) z -#endif -{-# INLINE mapAccumL #-} - --- | The 'mapAccumR' function behaves like a combination of 'map' and --- 'foldr'; it applies a function to each element of a ByteString, --- passing an accumulating parameter from right to left, and returning a --- final value of this accumulator together with the new ByteString. -mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumR f z = unSP . loopDown (mapAccumEFL f) z -{-# INLINE mapAccumR #-} - --- | /O(n)/ map Word8 functions, provided with the index at each position -mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString -mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0 -{-# INLINE mapIndexed #-} - --- --------------------------------------------------------------------- --- Building ByteStrings - --- | 'scanl' is similar to 'foldl', but returns a list of successive --- reduced values from the left. This function will fuse. --- --- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] --- --- Note that --- --- > last (scanl f z xs) == foldl f z xs. -scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString -#if !defined(LOOPU_FUSION) -scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0) -#else -scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -#endif - - -- n.b. haskell's List scan returns a list one bigger than the - -- input, so we need to snoc here to get some extra space, however, - -- it breaks map/up fusion (i.e. scanl . map no longer fuses) -{-# INLINE scanl #-} - --- | 'scanl1' is a variant of 'scanl' that has no starting value argument. --- This function will fuse. --- --- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] -scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -scanl1 f ps - | null ps = empty - | otherwise = scanl f (unsafeHead ps) (unsafeTail ps) -{-# INLINE scanl1 #-} - --- | scanr is the right-to-left dual of scanl. -scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString -scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space -{-# INLINE scanr #-} - --- | 'scanr1' is a variant of 'scanr' that has no starting value argument. -scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -scanr1 f ps - | null ps = empty - | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions -{-# INLINE scanr1 #-} - --- --------------------------------------------------------------------- --- Unfolds and replicates - --- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ --- the value of every element. The following holds: --- --- > replicate w c = unfoldr w (\u -> Just (u,u)) c --- --- This implemenation uses @memset(3)@ -replicate :: Int -> Word8 -> ByteString -replicate w c - | w <= 0 = empty - | otherwise = unsafeCreate w $ \ptr -> - memset ptr c (fromIntegral w) >> return () - --- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' --- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a --- ByteString from a seed value. The function takes the element and --- returns 'Nothing' if it is done producing the ByteString or returns --- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, --- and @b@ is the seed value for further production. --- --- Examples: --- --- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 --- > == pack [0, 1, 2, 3, 4, 5] --- -unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString -unfoldr f = concat . unfoldChunk 32 64 - where unfoldChunk n n' x = - case unfoldrN n f x of - (s, Nothing) -> s : [] - (s, Just x') -> s : unfoldChunk n' (n+n') x' - --- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed --- value. However, the length of the result is limited by the first --- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' --- when the maximum length of the result is known. --- --- The following equation relates 'unfoldrN' and 'unfoldr': --- --- > unfoldrN n f s == take n (unfoldr f s) --- -unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) -unfoldrN i f x0 - | i < 0 = (empty, Just x0) - | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 - where STRICT3(go) - go p x n = - case f x of - Nothing -> return (0, n, Nothing) - Just (w,x') - | n == i -> return (0, n, Just x) - | otherwise -> do poke p w - go (p `plusPtr` 1) x' (n+1) - --- --------------------------------------------------------------------- --- Substrings - --- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix --- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. -take :: Int -> ByteString -> ByteString -take n ps@(PS x s l) - | n <= 0 = empty - | n >= l = ps - | otherwise = PS x s n -{-# INLINE take #-} - --- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ --- elements, or @[]@ if @n > 'length' xs@. -drop :: Int -> ByteString -> ByteString -drop n ps@(PS x s l) - | n <= 0 = ps - | n >= l = empty - | otherwise = PS x (s+n) (l-n) -{-# INLINE drop #-} - --- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. -splitAt :: Int -> ByteString -> (ByteString, ByteString) -splitAt n ps@(PS x s l) - | n <= 0 = (empty, ps) - | n >= l = (ps, empty) - | otherwise = (PS x s n, PS x (s+n) (l-n)) -{-# INLINE splitAt #-} - --- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, --- returns the longest prefix (possibly empty) of @xs@ of elements that --- satisfy @p@. -takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString -takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps -{-# INLINE takeWhile #-} - --- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. -dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString -dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps -{-# INLINE dropWhile #-} - --- | 'break' @p@ is equivalent to @'span' ('not' . p)@. -break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) -{-# INLINE [1] break #-} - -{-# RULES -"FPS specialise break (x==)" forall x. - break ((==) x) = breakByte x - #-} - -#if __GLASGOW_HASKELL__ >= 605 -{-# RULES -"FPS specialise break (==x)" forall x. - break (==x) = breakByte x - #-} -#endif - --- | 'breakByte' breaks its ByteString argument at the first occurence --- of the specified byte. It is more efficient than 'break' as it is --- implemented with @memchr(3)@. I.e. --- --- > break (=='c') "abcd" == breakByte 'c' "abcd" --- -breakByte :: Word8 -> ByteString -> (ByteString, ByteString) -breakByte c p = case elemIndex c p of - Nothing -> (p,empty) - Just n -> (unsafeTake n p, unsafeDrop n p) -{-# INLINE breakByte #-} - --- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' --- --- breakEnd p == spanEnd (not.p) -breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -breakEnd p ps = splitAt (findFromEndUntil p ps) ps - --- | 'span' @p xs@ breaks the ByteString into two segments. It is --- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ -span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -span p ps = break (not . p) ps -{-# INLINE [1] span #-} - --- | 'spanByte' breaks its ByteString argument at the first --- occurence of a byte other than its argument. It is more efficient --- than 'span (==)' --- --- > span (=='c') "abcd" == spanByte 'c' "abcd" --- -spanByte :: Word8 -> ByteString -> (ByteString, ByteString) -spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> - go (p `plusPtr` s) 0 - where - STRICT2(go) - go p i | i >= l = return (ps, empty) - | otherwise = do c' <- peekByteOff p i - if c /= c' - then return (unsafeTake i ps, unsafeDrop i ps) - else go p (i+1) -{-# INLINE spanByte #-} - -{-# RULES -"FPS specialise span (x==)" forall x. - span ((==) x) = spanByte x - #-} - -#if __GLASGOW_HASKELL__ >= 605 -{-# RULES -"FPS specialise span (==x)" forall x. - span (==x) = spanByte x - #-} -#endif - --- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. --- We have --- --- > spanEnd (not.isSpace) "x y z" == ("x y ","z") --- --- and --- --- > spanEnd (not . isSpace) ps --- > == --- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) --- -spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps - --- | /O(n)/ Splits a 'ByteString' into components delimited by --- separators, where the predicate returns True for a separator element. --- The resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] --- > splitWith (=='a') [] == [] --- -splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] - -#if defined(__GLASGOW_HASKELL__) -splitWith _pred (PS _ _ 0) = [] -splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp - where pred# c# = pred_ (W8# c#) - - STRICT4(splitWith0) - splitWith0 pred' off' len' fp' = withPtr fp $ \p -> - splitLoop pred' p 0 off' len' fp' - - splitLoop :: (Word# -> Bool) - -> Ptr Word8 - -> Int -> Int -> Int - -> ForeignPtr Word8 - -> IO [ByteString] - - splitLoop pred' p idx' off' len' fp' - | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined - | idx' >= len' = return [PS fp' off' idx'] - | otherwise = do - w <- peekElemOff p (off'+idx') - if pred' (case w of W8# w# -> w#) - then return (PS fp' off' idx' : - splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp') - else splitLoop pred' p (idx'+1) off' len' fp' -{-# INLINE splitWith #-} - -#else -splitWith _ (PS _ _ 0) = [] -splitWith p ps = loop p ps - where - STRICT2(loop) - loop q qs = if null rest then [chunk] - else chunk : loop q (unsafeTail rest) - where (chunk,rest) = break q qs -#endif - --- | /O(n)/ Break a 'ByteString' into pieces separated by the byte --- argument, consuming the delimiter. I.e. --- --- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] --- > split 'a' "aXaXaXa" == ["","X","X","X",""] --- > split 'x' "x" == ["",""] --- --- and --- --- > join [c] . split c == id --- > split == splitWith . (==) --- --- As for all splitting functions in this library, this function does --- not copy the substrings, it just constructs new 'ByteStrings' that --- are slices of the original. --- -split :: Word8 -> ByteString -> [ByteString] -split _ (PS _ _ 0) = [] -split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - let ptr = p `plusPtr` s - - STRICT1(loop) - loop n = - let q = inlinePerformIO $ memchr (ptr `plusPtr` n) - w (fromIntegral (l-n)) - in if q == nullPtr - then [PS x (s+n) (l-n)] - else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1) - - return (loop 0) -{-# INLINE split #-} - -{- --- slower. but stays inside Haskell. -split _ (PS _ _ 0) = [] -split (W8# w#) (PS fp off len) = splitWith' off len fp - where - splitWith' off' len' fp' = withPtr fp $ \p -> - splitLoop p 0 off' len' fp' - - splitLoop :: Ptr Word8 - -> Int -> Int -> Int - -> ForeignPtr Word8 - -> IO [ByteString] - - STRICT5(splitLoop) - splitLoop p idx' off' len' fp' - | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined - | idx' >= len' = return [PS fp' off' idx'] - | otherwise = do - (W8# x#) <- peekElemOff p (off'+idx') - if word2Int# w# ==# word2Int# x# - then return (PS fp' off' idx' : - splitWith' (off'+idx'+1) (len'-idx'-1) fp') - else splitLoop p (idx'+1) off' len' fp' --} - -{- --- | Like 'splitWith', except that sequences of adjacent separators are --- treated as a single separator. eg. --- --- > tokens (=='a') "aabbaca" == ["bb","c"] --- -tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] -tokens f = P.filter (not.null) . splitWith f -{-# INLINE tokens #-} --} - --- | The 'group' function takes a ByteString and returns a list of --- ByteStrings such that the concatenation of the result is equal to the --- argument. Moreover, each sublist in the result contains only equal --- elements. For example, --- --- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] --- --- It is a special case of 'groupBy', which allows the programmer to --- supply their own equality test. It is about 40% faster than --- /groupBy (==)/ -group :: ByteString -> [ByteString] -group xs - | null xs = [] - | otherwise = ys : group zs - where - (ys, zs) = spanByte (unsafeHead xs) xs - --- | The 'groupBy' function is the non-overloaded version of 'group'. -groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -groupBy k xs - | null xs = [] - | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs) - where - n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs) - --- | /O(n)/ The 'join' function takes a 'ByteString' and a list of --- 'ByteString's and concatenates the list after interspersing the first --- argument between each element of the list. -join :: ByteString -> [ByteString] -> ByteString -join s = concat . (List.intersperse s) -{-# INLINE [1] join #-} - -{-# RULES -"FPS specialise join c -> joinByte" forall c s1 s2 . - join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2 - #-} - --- --- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings --- with a char. Around 4 times faster than the generalised join. --- -joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString -joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr -> - withForeignPtr ffp $ \fp -> - withForeignPtr fgp $ \gp -> do - memcpy ptr (fp `plusPtr` s) (fromIntegral l) - poke (ptr `plusPtr` l) c - memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m) - where - len = length f + length g + 1 -{-# INLINE joinWithByte #-} - --- --------------------------------------------------------------------- --- Indexing ByteStrings - --- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. -index :: ByteString -> Int -> Word8 -index ps n - | n < 0 = moduleError "index" ("negative index: " ++ show n) - | n >= length ps = moduleError "index" ("index too large: " ++ show n - ++ ", length = " ++ show (length ps)) - | otherwise = ps `unsafeIndex` n -{-# INLINE index #-} - --- | /O(n)/ The 'elemIndex' function returns the index of the first --- element in the given 'ByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. --- This implementation uses memchr(3). -elemIndex :: Word8 -> ByteString -> Maybe Int -elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - let p' = p `plusPtr` s - q <- memchr p' c (fromIntegral l) - return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p' -{-# INLINE elemIndex #-} - --- | /O(n)/ The 'elemIndexEnd' function returns the last index of the --- element in the given 'ByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. The following --- holds: --- --- > elemIndexEnd c xs == --- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) --- -elemIndexEnd :: Word8 -> ByteString -> Maybe Int -elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> - go (p `plusPtr` s) (l-1) - where - STRICT2(go) - go p i | i < 0 = return Nothing - | otherwise = do ch' <- peekByteOff p i - if ch == ch' - then return $ Just i - else go p (i-1) -{-# INLINE elemIndexEnd #-} - --- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning --- the indices of all elements equal to the query element, in ascending order. --- This implementation uses memchr(3). -elemIndices :: Word8 -> ByteString -> [Int] -elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - let ptr = p `plusPtr` s - - STRICT1(loop) - loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n) - w (fromIntegral (l - n)) - in if q == nullPtr - then [] - else let i = q `minusPtr` ptr - in i : loop (i+1) - return $! loop 0 -{-# INLINE elemIndices #-} - -{- --- much slower -elemIndices :: Word8 -> ByteString -> [Int] -elemIndices c ps = loop 0 ps - where STRICT2(loop) - loop _ ps' | null ps' = [] - loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps') - | otherwise = loop (n+1) (unsafeTail ps') --} - --- | count returns the number of times its argument appears in the ByteString --- --- > count = length . elemIndices --- --- But more efficiently than using length on the intermediate list. -count :: Word8 -> ByteString -> Int -count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> - fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w -{-# INLINE count #-} - -{- --- --- around 30% slower --- -count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> - go (p `plusPtr` s) (fromIntegral m) 0 - where - go :: Ptr Word8 -> CSize -> Int -> IO Int - STRICT3(go) - go p l i = do - q <- memchr p w l - if q == nullPtr - then return i - else do let k = fromIntegral $ q `minusPtr` p - go (q `plusPtr` 1) (l-k-1) (i+1) --} - --- | The 'findIndex' function takes a predicate and a 'ByteString' and --- returns the index of the first element in the ByteString --- satisfying the predicate. -findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int -findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 - where - STRICT2(go) - go ptr n | n >= l = return Nothing - | otherwise = do w <- peek ptr - if k w - then return (Just n) - else go (ptr `plusPtr` 1) (n+1) -{-# INLINE findIndex #-} - --- | The 'findIndices' function extends 'findIndex', by returning the --- indices of all elements satisfying the predicate, in ascending order. -findIndices :: (Word8 -> Bool) -> ByteString -> [Int] -findIndices p ps = loop 0 ps - where - STRICT2(loop) - loop n qs | null qs = [] - | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) - | otherwise = loop (n+1) (unsafeTail qs) - --- --------------------------------------------------------------------- --- Searching ByteStrings - --- | /O(n)/ 'elem' is the 'ByteString' membership predicate. -elem :: Word8 -> ByteString -> Bool -elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True -{-# INLINE elem #-} - --- | /O(n)/ 'notElem' is the inverse of 'elem' -notElem :: Word8 -> ByteString -> Bool -notElem c ps = not (elem c ps) -{-# INLINE notElem #-} - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. This function is subject to array fusion. -filter :: (Word8 -> Bool) -> ByteString -> ByteString -#if defined(LOOPU_FUSION) -filter p = loopArr . loopU (filterEFL p) NoAcc -#elif defined(LOOPUP_FUSION) -filter p = loopArr . loopUp (filterEFL p) NoAcc -#elif defined(LOOPNOACC_FUSION) -filter p = loopArr . loopNoAcc (filterEFL p) -#else -filter f = loopArr . loopFilter f -#endif -{-# INLINE filter #-} - -{- --- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be --- around 2x faster for some one-shot applications. -filter' :: (Word8 -> Bool) -> ByteString -> ByteString -filter' k ps@(PS x s l) - | null ps = ps - | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do - t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) - return $! t `minusPtr` p -- actual length - where - STRICT3(go) - go f t end | f == end = return t - | otherwise = do - w <- peek f - if k w - then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end - else go (f `plusPtr` 1) t end -{-# INLINE filter' #-} --} - --- --- | /O(n)/ A first order equivalent of /filter . (==)/, for the common --- case of filtering a single byte. It is more efficient to use --- /filterByte/ in this case. --- --- > filterByte == filter . (==) --- --- filterByte is around 10x faster, and uses much less space, than its --- filter equivalent -filterByte :: Word8 -> ByteString -> ByteString -filterByte w ps = replicate (count w ps) w -{-# INLINE filterByte #-} - -{-# RULES - "FPS specialise filter (== x)" forall x. - filter ((==) x) = filterByte x - #-} - -#if __GLASGOW_HASKELL__ >= 605 -{-# RULES - "FPS specialise filter (== x)" forall x. - filter (== x) = filterByte x - #-} -#endif - --- --- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common --- case of filtering a single byte out of a list. It is more efficient --- to use /filterNotByte/ in this case. --- --- > filterNotByte == filter . (/=) --- --- filterNotByte is around 2x faster than its filter equivalent. -filterNotByte :: Word8 -> ByteString -> ByteString -filterNotByte w = filter (/= w) -{-# INLINE filterNotByte #-} - -{-# RULES -"FPS specialise filter (x /=)" forall x. - filter ((/=) x) = filterNotByte x - #-} - -#if __GLASGOW_HASKELL__ >= 605 -{-# RULES -"FPS specialise filter (/= x)" forall x. - filter (/= x) = filterNotByte x - #-} -#endif - --- | /O(n)/ The 'find' function takes a predicate and a ByteString, --- and returns the first element in matching the predicate, or 'Nothing' --- if there is no such element. --- --- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing --- -find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 -find f p = case findIndex f p of - Just n -> Just (p `unsafeIndex` n) - _ -> Nothing -{-# INLINE find #-} - -{- --- --- fuseable, but we don't want to walk the whole array. --- -find k = foldl findEFL Nothing - where findEFL a@(Just _) _ = a - findEFL _ c | k c = Just c - | otherwise = Nothing --} - --- --------------------------------------------------------------------- --- Searching for substrings - --- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' --- iff the first is a prefix of the second. -isPrefixOf :: ByteString -> ByteString -> Bool -isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2) - | l1 == 0 = True - | l2 < l1 = False - | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> - withForeignPtr x2 $ \p2 -> do - i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1) - return $! i == 0 - --- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' --- iff the first is a suffix of the second. --- --- The following holds: --- --- > isSuffixOf x y == reverse x `isPrefixOf` reverse y --- --- However, the real implemenation uses memcmp to compare the end of the --- string only, with no reverse required.. -isSuffixOf :: ByteString -> ByteString -> Bool -isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2) - | l1 == 0 = True - | l2 < l1 = False - | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> - withForeignPtr x2 $ \p2 -> do - i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1) - return $! i == 0 - --- | Check whether one string is a substring of another. @isSubstringOf --- p s@ is equivalent to @not (null (findSubstrings p s))@. -isSubstringOf :: ByteString -- ^ String to search for. - -> ByteString -- ^ String to search in. - -> Bool -isSubstringOf p s = not $ P.null $ findSubstrings p s - --- | Get the first index of a substring in another string, --- or 'Nothing' if the string is not found. --- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@. -findSubstring :: ByteString -- ^ String to search for. - -> ByteString -- ^ String to seach in. - -> Maybe Int -findSubstring = (listToMaybe .) . findSubstrings - --- | Find the indexes of all (possibly overlapping) occurances of a --- substring in a string. This function uses the Knuth-Morris-Pratt --- string matching algorithm. -findSubstrings :: ByteString -- ^ String to search for. - -> ByteString -- ^ String to seach in. - -> [Int] - -findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0 - where - patc x = pat `unsafeIndex` x - strc x = str `unsafeIndex` x - - -- maybe we should make kmpNext a UArray before using it in search? - kmpNext = listArray (0,m) (-1:kmpNextL pat (-1)) - kmpNextL p _ | null p = [] - kmpNextL p j = let j' = next (unsafeHead p) j + 1 - ps = unsafeTail p - x = if not (null ps) && unsafeHead ps == patc j' - then kmpNext Array.! j' else j' - in x:kmpNextL ps j' - search i j = match ++ rest -- i: position in string, j: position in pattern - where match = if j == m then [(i - j)] else [] - rest = if i == n then [] else search (i+1) (next (strc i) j + 1) - next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j) - | otherwise = j - --- --------------------------------------------------------------------- --- Zipping - --- | /O(n)/ 'zip' takes two ByteStrings and returns a list of --- corresponding pairs of bytes. If one input ByteString is short, --- excess elements of the longer ByteString are discarded. This is --- equivalent to a pair of 'unpack' operations. -zip :: ByteString -> ByteString -> [(Word8,Word8)] -zip ps qs - | null ps || null qs = [] - | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs) - --- | 'zipWith' generalises 'zip' by zipping with the function given as --- the first argument, instead of a tupling function. For example, --- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of --- corresponding sums. -zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] -zipWith f ps qs - | null ps || null qs = [] - | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] zipWith #-} -#endif - --- --- | A specialised version of zipWith for the common case of a --- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules --- are used to automatically covert zipWith into zipWith' when a pack is --- performed on the result of zipWith, but we also export it for --- convenience. --- -zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString -zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $ - withForeignPtr fp $ \a -> - withForeignPtr fq $ \b -> - create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t) - where - zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () - STRICT4(zipWith_) - zipWith_ n p1 p2 r - | n >= len = return () - | otherwise = do - x <- peekByteOff p1 n - y <- peekByteOff p2 n - pokeByteOff r n (f x y) - zipWith_ (n+1) p1 p2 r - - len = min l m -{-# INLINE zipWith' #-} - -{-# RULES - -"FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q . - zipWith f p q = unpack (zipWith' f p q) - - #-} - --- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of --- ByteStrings. Note that this performs two 'pack' operations. -unzip :: [(Word8,Word8)] -> (ByteString,ByteString) -unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) -{-# INLINE unzip #-} - --- --------------------------------------------------------------------- --- Special lists - --- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. -inits :: ByteString -> [ByteString] -inits (PS x s l) = [PS x s n | n <- [0..l]] - --- | /O(n)/ Return all final segments of the given 'ByteString', longest first. -tails :: ByteString -> [ByteString] -tails p | null p = [empty] - | otherwise = p : tails (unsafeTail p) - --- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]] - --- --------------------------------------------------------------------- --- ** Ordered 'ByteString's - --- | /O(n)/ Sort a ByteString efficiently, using counting sort. -sort :: ByteString -> ByteString -sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do - - memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) - withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l) - - let STRICT2(go) - go 256 _ = return () - go i ptr = do n <- peekElemOff arr i - when (n /= 0) $ memset ptr (fromIntegral i) n >> return () - go (i + 1) (ptr `plusPtr` (fromIntegral n)) - go 0 p - -{- -sort :: ByteString -> ByteString -sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do - memcpy p (f `plusPtr` s) l - c_qsort p l -- inplace --} - --- | The 'sortBy' function is the non-overloaded version of 'sort'. --- --- Try some linear sorts: radix, counting --- Or mergesort. --- --- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString --- sortBy f ps = undefined - --- --------------------------------------------------------------------- --- Low level constructors - --- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/ --- finalizer associated to it. The ByteString length is calculated using --- /strlen(3)/, and thus the complexity is a /O(n)/. -packCString :: CString -> ByteString -packCString cstr = unsafePerformIO $ do - fp <- newForeignPtr_ (castPtr cstr) - l <- c_strlen cstr - return $! PS fp 0 (fromIntegral l) - --- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will --- have /no/ finalizer associated with it. This operation has /O(1)/ --- complexity as we already know the final size, so no /strlen(3)/ is --- required. -packCStringLen :: CStringLen -> ByteString -packCStringLen (ptr,len) = unsafePerformIO $ do - fp <- newForeignPtr_ (castPtr ptr) - return $! PS fp 0 (fromIntegral len) - --- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will --- have a @free(3)@ finalizer associated to it. -packMallocCString :: CString -> ByteString -packMallocCString cstr = unsafePerformIO $ do - fp <- newForeignFreePtr (castPtr cstr) - len <- c_strlen cstr - return $! PS fp 0 (fromIntegral len) - --- | /O(n) construction/ Use a @ByteString@ with a function requiring a --- null-terminated @CString@. The @CString@ will be freed --- automatically. This is a memcpy(3). -useAsCString :: ByteString -> (CString -> IO a) -> IO a -useAsCString (PS ps s l) = bracket alloc (c_free.castPtr) - where alloc = withForeignPtr ps $ \p -> do - buf <- c_malloc (fromIntegral l+1) - memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l) - poke (buf `plusPtr` l) (0::Word8) -- n.b. - return (castPtr buf) - --- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@. -useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a -useAsCStringLen = unsafeUseAsCStringLen - --- --- why were we doing this? --- --- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a --- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst) --- where --- alloc = withForeignPtr ps $ \p -> do --- buf <- c_malloc (fromIntegral l+1) --- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l) --- poke (buf `plusPtr` l) (0::Word8) -- n.b. --- return $! (castPtr buf, l) --- - --- | /O(n)/ Make a copy of the 'ByteString' with its own storage. --- This is mainly useful to allow the rest of the data pointed --- to by the 'ByteString' to be garbage collected, for example --- if a large string has been read in, and only a small part of it --- is needed in the rest of the program. -copy :: ByteString -> ByteString -copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> - memcpy p (f `plusPtr` s) (fromIntegral l) - --- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the --- CString is going to be deallocated from C land. -copyCString :: CString -> IO ByteString -copyCString cstr = do - len <- c_strlen cstr - copyCStringLen (cstr, fromIntegral len) - --- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known. -copyCStringLen :: CStringLen -> IO ByteString -copyCStringLen (cstr, len) = create len $ \p -> - memcpy p (castPtr cstr) (fromIntegral len) - --- --------------------------------------------------------------------- --- line IO - --- | Read a line from stdin. -getLine :: IO ByteString -getLine = hGetLine stdin - -{- --- | Lazily construct a list of lines of ByteStrings. This will be much --- better on memory consumption than using 'hGetContents >>= lines' --- If you're considering this, a better choice might be to use --- Data.ByteString.Lazy -hGetLines :: Handle -> IO [ByteString] -hGetLines h = go - where - go = unsafeInterleaveIO $ do - e <- hIsEOF h - if e - then return [] - else do - x <- hGetLine h - xs <- go - return (x:xs) --} - --- | Read a line from a handle - -hGetLine :: Handle -> IO ByteString -#if !defined(__GLASGOW_HASKELL__) -hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w -#else -hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do - case haBufferMode handle_ of - NoBuffering -> error "no buffering" - _other -> hGetLineBuffered handle_ - - where - hGetLineBuffered handle_ = do - let ref = haBuffer handle_ - buf <- readIORef ref - hGetLineBufferedLoop handle_ ref buf 0 [] - - hGetLineBufferedLoop handle_ ref - buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss = - len `seq` do - off <- findEOL r w raw - let new_len = len + off - r - xs <- mkPS raw r off - - -- if eol == True, then off is the offset of the '\n' - -- otherwise off == w and the buffer is now empty. - if off /= w - then do if (w == off + 1) - then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 } - else writeIORef ref buf{ bufRPtr = off + 1 } - mkBigPS new_len (xs:xss) - else do - maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_) - buf{ bufWPtr=0, bufRPtr=0 } - case maybe_buf of - -- Nothing indicates we caught an EOF, and we may have a - -- partial line to return. - Nothing -> do - writeIORef ref buf{ bufRPtr=0, bufWPtr=0 } - if new_len > 0 - then mkBigPS new_len (xs:xss) - else ioe_EOF - Just new_buf -> - hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss) - - -- find the end-of-line character, if there is one - findEOL r w raw - | r == w = return w - | otherwise = do - (c,r') <- readCharFromBuffer raw r - if c == '\n' - then return r -- NB. not r': don't include the '\n' - else findEOL r' w raw - - maybeFillReadBuffer fd is_line is_stream buf = catch - (do buf' <- fillReadBuffer fd is_line is_stream buf - return (Just buf')) - (\e -> if isEOFError e then return Nothing else ioError e) - --- TODO, rewrite to use normal memcpy -mkPS :: RawBuffer -> Int -> Int -> IO ByteString -mkPS buf start end = - let len = end - start - in create len $ \p -> do - memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len) - return () - -mkBigPS :: Int -> [ByteString] -> IO ByteString -mkBigPS _ [ps] = return ps -mkBigPS _ pss = return $! concat (P.reverse pss) - -#endif - --- --------------------------------------------------------------------- --- Block IO - --- | Outputs a 'ByteString' to the specified 'Handle'. -hPut :: Handle -> ByteString -> IO () -hPut _ (PS _ _ 0) = return () -hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l - --- | A synonym for @hPut@, for compatibility -hPutStr :: Handle -> ByteString -> IO () -hPutStr = hPut - --- | Write a ByteString to a handle, appending a newline byte -hPutStrLn :: Handle -> ByteString -> IO () -hPutStrLn h ps - | length ps < 1024 = hPut h (ps `snoc` 0x0a) - | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy - --- | Write a ByteString to stdout -putStr :: ByteString -> IO () -putStr = hPut stdout - --- | Write a ByteString to stdout, appending a newline byte -putStrLn :: ByteString -> IO () -putStrLn = hPutStrLn stdout - --- | Read a 'ByteString' directly from the specified 'Handle'. This --- is far more efficient than reading the characters into a 'String' --- and then using 'pack'. -hGet :: Handle -> Int -> IO ByteString -hGet _ 0 = return empty -hGet h i = createAndTrim i $ \p -> hGetBuf h p i - --- | hGetNonBlocking is identical to 'hGet', except that it will never block --- waiting for data to become available, instead it returns only whatever data --- is available. -hGetNonBlocking :: Handle -> Int -> IO ByteString -#if defined(__GLASGOW_HASKELL__) -hGetNonBlocking _ 0 = return empty -hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i -#else -hGetNonBlocking = hGet -#endif - --- | Read entire handle contents into a 'ByteString'. --- This function reads chunks at a time, doubling the chunksize on each --- read. The final buffer is then realloced to the appropriate size. For --- files > half of available memory, this may lead to memory exhaustion. --- Consider using 'readFile' in this case. --- --- As with 'hGet', the string representation in the file is assumed to --- be ISO-8859-1. --- -hGetContents :: Handle -> IO ByteString -hGetContents h = do - let start_size = 1024 - p <- mallocArray start_size - i <- hGetBuf h p start_size - if i < start_size - then do p' <- reallocArray p i - fp <- newForeignFreePtr p' - return $! PS fp 0 i - else f p start_size - where - f p s = do - let s' = 2 * s - p' <- reallocArray p s' - i <- hGetBuf h (p' `plusPtr` s) s - if i < s - then do let i' = s + i - p'' <- reallocArray p' i' - fp <- newForeignFreePtr p'' - return $! PS fp 0 i' - else f p' s' - --- | getContents. Equivalent to hGetContents stdin -getContents :: IO ByteString -getContents = hGetContents stdin - --- | The interact function takes a function of type @ByteString -> ByteString@ --- as its argument. The entire input from the standard input device is passed --- to this function as its argument, and the resulting string is output on the --- standard output device. It's great for writing one line programs! -interact :: (ByteString -> ByteString) -> IO () -interact transformer = putStr . transformer =<< getContents - --- | Read an entire file strictly into a 'ByteString'. This is far more --- efficient than reading the characters into a 'String' and then using --- 'pack'. It also may be more efficient than opening the file and --- reading it using hGet. Files are read using 'binary mode' on Windows, --- for 'text mode' use the Char8 version of this function. -readFile :: FilePath -> IO ByteString -readFile f = bracket (openBinaryFile f ReadMode) hClose - (\h -> hFileSize h >>= hGet h . fromIntegral) - --- | Write a 'ByteString' to a file. -writeFile :: FilePath -> ByteString -> IO () -writeFile f txt = bracket (openBinaryFile f WriteMode) hClose - (\h -> hPut h txt) - --- | Append a 'ByteString' to a file. -appendFile :: FilePath -> ByteString -> IO () -appendFile f txt = bracket (openBinaryFile f AppendMode) hClose - (\h -> hPut h txt) - -{- --- --- Disable until we can move it into a portable .hsc file --- - --- | Like readFile, this reads an entire file directly into a --- 'ByteString', but it is even more efficient. It involves directly --- mapping the file to memory. This has the advantage that the contents --- of the file never need to be copied. Also, under memory pressure the --- page may simply be discarded, while in the case of readFile it would --- need to be written to swap. If you read many small files, mmapFile --- will be less memory-efficient than readFile, since each mmapFile --- takes up a separate page of memory. Also, you can run into bus --- errors if the file is modified. As with 'readFile', the string --- representation in the file is assumed to be ISO-8859-1. --- --- On systems without mmap, this is the same as a readFile. --- -mmapFile :: FilePath -> IO ByteString -mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l - -mmap :: FilePath -> IO (ForeignPtr Word8, Int) -mmap f = do - h <- openBinaryFile f ReadMode - l <- fromIntegral `fmap` hFileSize h - -- Don't bother mmaping small files because each mmapped file takes up - -- at least one full VM block. - if l < mmap_limit - then do thefp <- mallocByteString l - withForeignPtr thefp $ \p-> hGetBuf h p l - hClose h - return (thefp, l) - else do - -- unix only :( - fd <- fromIntegral `fmap` handleToFd h - p <- my_mmap l fd - fp <- if p == nullPtr - then do thefp <- mallocByteString l - withForeignPtr thefp $ \p' -> hGetBuf h p' l - return thefp - else do - -- The munmap leads to crashes on OpenBSD. - -- maybe there's a use after unmap in there somewhere? - -- Bulat suggests adding the hClose to the - -- finalizer, excellent idea. -#if !defined(__OpenBSD__) - let unmap = c_munmap p l >> return () -#else - let unmap = return () -#endif - fp <- newForeignPtr p unmap - return fp - c_close fd - hClose h - return (fp, l) - where mmap_limit = 16*1024 --} - --- --------------------------------------------------------------------- --- Internal utilities - --- | 'findIndexOrEnd' is a variant of findIndex, that returns the length --- of the string if no element is found, rather than Nothing. -findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int -findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 - where - STRICT2(go) - go ptr n | n >= l = return l - | otherwise = do w <- peek ptr - if k w - then return n - else go (ptr `plusPtr` 1) (n+1) -{-# INLINE findIndexOrEnd #-} - --- | Perform an operation with a temporary ByteString -withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b -withPtr fp io = inlinePerformIO (withForeignPtr fp io) -{-# INLINE withPtr #-} - --- Common up near identical calls to `error' to reduce the number --- constant strings created when compiled: -errorEmptyList :: String -> a -errorEmptyList fun = moduleError fun "empty ByteString" -{-# NOINLINE errorEmptyList #-} - -moduleError :: String -> String -> a -moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg) -{-# NOINLINE moduleError #-} - --- Find from the end of the string using predicate -findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int -STRICT2(findFromEndUntil) -findFromEndUntil f ps@(PS x s l) = - if null ps then 0 - else if f (last ps) then l - else findFromEndUntil f (PS x s (l-1)) - -{-# INLINE newForeignFreePtr #-} -newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8) -newForeignFreePtr p = newForeignPtr c_free_finalizer p diff --git a/Data/ByteString/Base.hs b/Data/ByteString/Base.hs deleted file mode 100644 index a125812..0000000 --- a/Data/ByteString/Base.hs +++ /dev/null @@ -1,514 +0,0 @@ -{-# OPTIONS_GHC -cpp -fglasgow-exts #-} --- | --- Module : Data.ByteString.Base --- License : BSD-style --- Maintainer : dons@cse.unsw.edu.au --- Stability : experimental --- Portability : portable --- --- A module containing semi-public 'ByteString' internals. This exposes --- the 'ByteString' representation and low level construction functions. --- Modules which extend the 'ByteString' system will need to use this module --- while ideally most users will be able to make do with the public interface --- modules. --- -module Data.ByteString.Base ( - - -- * The @ByteString@ type and representation - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable - LazyByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable - - -- * Unchecked access - unsafeHead, -- :: ByteString -> Word8 - unsafeTail, -- :: ByteString -> ByteString - unsafeIndex, -- :: ByteString -> Int -> Word8 - unsafeTake, -- :: Int -> ByteString -> ByteString - unsafeDrop, -- :: Int -> ByteString -> ByteString - - -- * Low level introduction and elimination - empty, -- :: ByteString - create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString - createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString - createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) - mallocByteString, -- :: Int -> IO (ForeignPtr a) - - unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString - unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a - unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a - - fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString - toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int) - -#if defined(__GLASGOW_HASKELL__) - packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString - packAddress, -- :: Addr# -> ByteString - unsafePackAddress, -- :: Int -> Addr# -> ByteString - unsafeFinalize, -- :: ByteString -> IO () -#endif - - -- * Utilities - inlinePerformIO, -- :: IO a -> a - nullForeignPtr, -- :: ForeignPtr Word8 - - countOccurrences, -- :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO () - - -- * Standard C Functions - c_strlen, -- :: CString -> IO CInt - c_malloc, -- :: CInt -> IO (Ptr Word8) - c_free, -- :: Ptr Word8 -> IO () - c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ()) - - memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8 - memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt - memcpy, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () - memmove, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () - memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) - - -- * cbits functions - c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () - c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () - c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8 - c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8 - c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt - - -- * Internal GHC magic -#if defined(__GLASGOW_HASKELL__) - memcpy_ptr_baoff, -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) -#endif - - -- * Chars - w2c, c2w, isSpaceWord8 - - ) where - -import Foreign.ForeignPtr (ForeignPtr, newForeignPtr_, withForeignPtr) -import Foreign.Ptr (Ptr, FunPtr, plusPtr, castPtr) -import Foreign.Storable (Storable(..)) -import Foreign.C.Types (CInt, CSize, CULong) -import Foreign.C.String (CString, CStringLen) - -import Control.Exception (assert) - -import Data.Char (ord) -import Data.Word (Word8) - -#if defined(__GLASGOW_HASKELL__) -import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr) -import qualified Foreign.Concurrent as FC (newForeignPtr) - -import Data.Generics (Data(..), Typeable(..)) -import GHC.Prim (Addr#) -import GHC.Ptr (Ptr(..)) -import GHC.Base (realWorld#,unsafeChr) -import GHC.IOBase (IO(IO), unsafePerformIO, RawBuffer) -#else -import Data.Char (chr) -import System.IO.Unsafe (unsafePerformIO) -#endif - -#if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR) -import GHC.ForeignPtr (mallocPlainForeignPtrBytes) -#else -import Foreign.ForeignPtr (mallocForeignPtrBytes) -#endif - -#if __GLASGOW_HASKELL__>=605 -import GHC.ForeignPtr (ForeignPtr(ForeignPtr)) -import GHC.Base (nullAddr#) -#else -import Foreign.Ptr (nullPtr) -#endif - --- CFILES stuff is Hugs only -{-# CFILES cbits/fpstring.c #-} - --- ----------------------------------------------------------------------------- --- --- Useful macros, until we have bang patterns --- - -#define STRICT1(f) f a | a `seq` False = undefined -#define STRICT2(f) f a b | a `seq` b `seq` False = undefined -#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined -#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined -#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined - --- ----------------------------------------------------------------------------- - --- | A space-efficient representation of a Word8 vector, supporting many --- efficient operations. A 'ByteString' contains 8-bit characters only. --- --- Instances of Eq, Ord, Read, Show, Data, Typeable --- -data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) - {-# UNPACK #-} !Int -- offset - {-# UNPACK #-} !Int -- length - -#if defined(__GLASGOW_HASKELL__) - deriving (Data, Typeable) -#endif - -instance Show ByteString where - showsPrec p ps r = showsPrec p (unpackWith w2c ps) r - -instance Read ByteString where - readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] - --- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. -unpackWith :: (Word8 -> a) -> ByteString -> [a] -unpackWith _ (PS _ _ 0) = [] -unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> - go (p `plusPtr` s) (l - 1) [] - where - STRICT3(go) - go p 0 acc = peek p >>= \e -> return (k e : acc) - go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc) -{-# INLINE unpackWith #-} -{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} - --- | /O(n)/ Convert a '[a]' into a 'ByteString' using some --- conversion function -packWith :: (a -> Word8) -> [a] -> ByteString -packWith k str = unsafeCreate (length str) $ \p -> go p str - where - STRICT2(go) - go _ [] = return () - go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff -{-# INLINE packWith #-} -{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} - ------------------------------------------------------------------------- - --- | A space-efficient representation of a Word8 vector, supporting many --- efficient operations. A 'ByteString' contains 8-bit characters only. --- --- Instances of Eq, Ord, Read, Show, Data, Typeable --- -newtype LazyByteString = LPS [ByteString] -- LPS for lazy packed string - deriving (Show,Read -#if defined(__GLASGOW_HASKELL__) - ,Data, Typeable -#endif - ) - ------------------------------------------------------------------------- - --- | /O(1)/ The empty 'ByteString' -empty :: ByteString -empty = PS nullForeignPtr 0 0 - -nullForeignPtr :: ForeignPtr Word8 -#if __GLASGOW_HASKELL__>=605 -nullForeignPtr = ForeignPtr nullAddr# undefined --TODO: should ForeignPtrContents be strict? -#else -nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr -{-# NOINLINE nullForeignPtr #-} -#endif - --- --------------------------------------------------------------------- --- --- Extensions to the basic interface --- - --- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the --- check for the empty case, so there is an obligation on the programmer --- to provide a proof that the ByteString is non-empty. -unsafeHead :: ByteString -> Word8 -unsafeHead (PS x s l) = assert (l > 0) $ - inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s -{-# INLINE unsafeHead #-} - --- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the --- check for the empty case. As with 'unsafeHead', the programmer must --- provide a separate proof that the ByteString is non-empty. -unsafeTail :: ByteString -> ByteString -unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1) -{-# INLINE unsafeTail #-} - --- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' --- This omits the bounds check, which means there is an accompanying --- obligation on the programmer to ensure the bounds are checked in some --- other way. -unsafeIndex :: ByteString -> Int -> Word8 -unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $ - inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) -{-# INLINE unsafeIndex #-} - --- | A variety of 'take' which omits the checks on @n@ so there is an --- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. -unsafeTake :: Int -> ByteString -> ByteString -unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n -{-# INLINE unsafeTake #-} - --- | A variety of 'drop' which omits the checks on @n@ so there is an --- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. -unsafeDrop :: Int -> ByteString -> ByteString -unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n) -{-# INLINE unsafeDrop #-} - --- --------------------------------------------------------------------- --- Low level constructors - --- | /O(1)/ Build a ByteString from a ForeignPtr -fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString -fromForeignPtr fp l = PS fp 0 l - --- | /O(1)/ Deconstruct a ForeignPtr from a ByteString -toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -toForeignPtr (PS ps s l) = (ps, s, l) - --- | A way of creating ByteStrings outside the IO monad. The @Int@ --- argument gives the final size of the ByteString. Unlike --- 'createAndTrim' the ByteString is not reallocated if the final size --- is less than the estimated size. -unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString -unsafeCreate l f = unsafePerformIO (create l f) -{-# INLINE unsafeCreate #-} - --- | Create ByteString of size @l@ and use action @f@ to fill it's contents. -create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString -create l f = do - fp <- mallocByteString l - withForeignPtr fp $ \p -> f p - return $! PS fp 0 l - --- | Given the maximum size needed and a function to make the contents --- of a ByteString, createAndTrim makes the 'ByteString'. The generating --- function is required to return the actual final size (<= the maximum --- size), and the resulting byte array is realloced to this size. --- --- createAndTrim is the main mechanism for creating custom, efficient --- ByteString functions, using Haskell or C functions to fill the space. --- -createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString -createAndTrim l f = do - fp <- mallocByteString l - withForeignPtr fp $ \p -> do - l' <- f p - if assert (l' <= l) $ l' >= l - then return $! PS fp 0 l - else create l' $ \p' -> memcpy p' p (fromIntegral l') - -createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) -createAndTrim' l f = do - fp <- mallocByteString l - withForeignPtr fp $ \p -> do - (off, l', res) <- f p - if assert (l' <= l) $ l' >= l - then return $! (PS fp 0 l, res) - else do ps <- create l' $ \p' -> - memcpy p' (p `plusPtr` off) (fromIntegral l') - return $! (ps, res) - --- | Wrapper of mallocForeignPtrBytes with faster implementation --- for GHC 6.5 builds newer than 06/06/06 -mallocByteString :: Int -> IO (ForeignPtr a) -mallocByteString l = do -#if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR) - mallocPlainForeignPtrBytes l -#else - mallocForeignPtrBytes l -#endif - -#if defined(__GLASGOW_HASKELL__) --- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an --- Addr\# (an arbitrary machine address assumed to point outside the --- garbage-collected heap) into a @ByteString@. A much faster way to --- create an Addr\# is with an unboxed string literal, than to pack a --- boxed string. A unboxed string literal is compiled to a static @char --- []@ by GHC. Establishing the length of the string requires a call to --- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as --- is the case with "string"# literals in GHC). Use 'unsafePackAddress' --- if you know the length of the string statically. --- --- An example: --- --- > literalFS = packAddress "literal"# --- -packAddress :: Addr# -> ByteString -packAddress addr# = inlinePerformIO $ do - p <- newForeignPtr_ cstr - l <- c_strlen cstr - return $ PS p 0 (fromIntegral l) - where - cstr = Ptr addr# -{-# INLINE packAddress #-} - --- | /O(1)/ 'unsafePackAddress' provides constant-time construction of --- 'ByteStrings' -- which is ideal for string literals. It packs a --- null-terminated sequence of bytes into a 'ByteString', given a raw --- 'Addr\#' to the string, and the length of the string. Make sure the --- length is correct, otherwise use the safer 'packAddress' (where the --- length will be calculated once at runtime). -unsafePackAddress :: Int -> Addr# -> ByteString -unsafePackAddress len addr# = inlinePerformIO $ do - p <- newForeignPtr_ cstr - return $ PS p 0 len - where cstr = Ptr addr# - --- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a --- length, and an IO action representing a finalizer. This function is --- not available on Hugs. --- -packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString -packCStringFinalizer p l f = do - fp <- FC.newForeignPtr p f - return $ PS fp 0 l - --- | Explicitly run the finaliser associated with a 'ByteString'. --- Further references to this value may generate invalid memory --- references. This operation is unsafe, as there may be other --- 'ByteStrings' referring to the same underlying pages. If you use --- this, you need to have a proof of some kind that all 'ByteString's --- ever generated from the underlying byte array are no longer live. -unsafeFinalize :: ByteString -> IO () -unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p - -#endif - ------------------------------------------------------------------------- - --- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. -w2c :: Word8 -> Char -#if !defined(__GLASGOW_HASKELL__) -w2c = chr . fromIntegral -#else -w2c = unsafeChr . fromIntegral -#endif -{-# INLINE w2c #-} - --- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and --- silently truncates to 8 bits Chars > '\255'. It is provided as --- convenience for ByteString construction. -c2w :: Char -> Word8 -c2w = fromIntegral . ord -{-# INLINE c2w #-} - --- Selects white-space characters in the Latin-1 range --- ordered by frequency --- Idea from Ketil -isSpaceWord8 :: Word8 -> Bool -isSpaceWord8 w = case w of - 0x20 -> True -- SPACE - 0x0A -> True -- LF, \n - 0x09 -> True -- HT, \t - 0x0C -> True -- FF, \f - 0x0D -> True -- CR, \r - 0x0B -> True -- VT, \v - 0xA0 -> True -- spotted by QC.. - _ -> False -{-# INLINE isSpaceWord8 #-} - ------------------------------------------------------------------------- --- | Just like unsafePerformIO, but we inline it. Big performance gains as --- it exposes lots of things to further inlining --- -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -#if defined(__GLASGOW_HASKELL__) -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -#else -inlinePerformIO = unsafePerformIO -#endif - --- | Count the number of occurrences of each byte. --- -{-# SPECIALIZE countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () #-} -countOccurrences :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO () -STRICT3(countOccurrences) -countOccurrences counts str l = go 0 - where - STRICT1(go) - go i | i == l = return () - | otherwise = do k <- fromIntegral `fmap` peekElemOff str i - x <- peekElemOff counts k - pokeElemOff counts k (x + 1) - go (i + 1) - --- | /O(1) construction/ Use a @ByteString@ with a function requiring a --- @CString@. Warning: modifying the @CString@ will affect the --- @ByteString@. Why is this function unsafe? It relies on the null --- byte at the end of the ByteString to be there. Unless you can --- guarantee the null byte, you should use the safe version, which will --- copy the string first. -unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a -unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s) - --- | /O(1) construction/ Use a @ByteString@ with a function requiring a --- @CStringLen@. -unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a -unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l) - --- --------------------------------------------------------------------- --- --- Standard C functions --- - -foreign import ccall unsafe "string.h strlen" c_strlen - :: CString -> IO CSize - -foreign import ccall unsafe "stdlib.h malloc" c_malloc - :: CSize -> IO (Ptr Word8) - -foreign import ccall unsafe "static stdlib.h free" c_free - :: Ptr Word8 -> IO () - -foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer - :: FunPtr (Ptr Word8 -> IO ()) - -foreign import ccall unsafe "string.h memchr" c_memchr - :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) - -memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -memchr p w s = c_memchr p (fromIntegral w) s - -foreign import ccall unsafe "string.h memcmp" memcmp - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt - -foreign import ccall unsafe "string.h memcpy" c_memcpy - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) - -memcpy :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () -memcpy p q s = do c_memcpy p q s - return () - -foreign import ccall unsafe "string.h memmove" c_memmove - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) - -memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () -memmove p q s = do c_memmove p q s - return () - -foreign import ccall unsafe "string.h memset" c_memset - :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) - -memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -memset p w s = c_memset p (fromIntegral w) s - --- --------------------------------------------------------------------- --- --- Uses our C code --- - -foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse - :: Ptr Word8 -> Ptr Word8 -> CULong -> IO () - -foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse - :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO () - -foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum - :: Ptr Word8 -> CULong -> IO Word8 - -foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum - :: Ptr Word8 -> CULong -> IO Word8 - -foreign import ccall unsafe "static fpstring.h fps_count" c_count - :: Ptr Word8 -> CULong -> Word8 -> IO CULong - --- --------------------------------------------------------------------- --- Internal GHC Haskell magic - -#if defined(__GLASGOW_HASKELL__) -foreign import ccall unsafe "__hscore_memcpy_src_off" - memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) -#endif diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs deleted file mode 100644 index a62ae57..0000000 --- a/Data/ByteString/Char8.hs +++ /dev/null @@ -1,995 +0,0 @@ -{-# OPTIONS_GHC -cpp -fglasgow-exts #-} --- | --- Module : Data.ByteString.Char8 --- Copyright : (c) Don Stewart 2006 --- License : BSD-style --- --- Maintainer : dons@cse.unsw.edu.au --- Stability : experimental --- Portability : portable --- --- Manipulate 'ByteString's using 'Char' operations. All Chars will be --- truncated to 8 bits. It can be expected that these functions will run --- at identical speeds to their 'Word8' equivalents in "Data.ByteString". --- --- More specifically these byte strings are taken to be in the --- subset of Unicode covered by code points 0-255. This covers --- Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls. --- --- See: --- --- * --- --- * --- --- * --- --- This module is intended to be imported @qualified@, to avoid name --- clashes with "Prelude" functions. eg. --- --- > import qualified Data.ByteString.Char8 as B --- - -module Data.ByteString.Char8 ( - - -- * The @ByteString@ type - ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid - - -- * Introducing and eliminating 'ByteString's - empty, -- :: ByteString - singleton, -- :: Char -> ByteString - pack, -- :: String -> ByteString - unpack, -- :: ByteString -> String - - -- * Basic interface - cons, -- :: Char -> ByteString -> ByteString - snoc, -- :: ByteString -> Char -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - head, -- :: ByteString -> Char - last, -- :: ByteString -> Char - tail, -- :: ByteString -> ByteString - init, -- :: ByteString -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int - - -- * Transformating ByteStrings - map, -- :: (Char -> Char) -> ByteString -> ByteString - reverse, -- :: ByteString -> ByteString - intersperse, -- :: Char -> ByteString -> ByteString - transpose, -- :: [ByteString] -> [ByteString] - - -- * Reducing 'ByteString's (folds) - foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a - foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a - foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char - foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char - - foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a - foldr', -- :: (Char -> a -> a) -> a -> ByteString -> a - foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char - foldr1', -- :: (Char -> Char -> Char) -> ByteString -> Char - - -- ** Special folds - concat, -- :: [ByteString] -> ByteString - concatMap, -- :: (Char -> ByteString) -> ByteString -> ByteString - any, -- :: (Char -> Bool) -> ByteString -> Bool - all, -- :: (Char -> Bool) -> ByteString -> Bool - maximum, -- :: ByteString -> Char - minimum, -- :: ByteString -> Char - - -- * Building ByteStrings - -- ** Scans - scanl, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString - scanl1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString - scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString - scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString - - -- ** Accumulating maps - mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) - mapAccumR, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) - mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString - - -- * Generating and unfolding ByteStrings - replicate, -- :: Int -> Char -> ByteString - unfoldr, -- :: (a -> Maybe (Char, a)) -> a -> ByteString - unfoldrN, -- :: Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a) - - -- * Substrings - - -- ** Breaking strings - take, -- :: Int -> ByteString -> ByteString - drop, -- :: Int -> ByteString -> ByteString - splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) - takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString - dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString - span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - spanEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - breakEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString] - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - - -- ** Breaking into many substrings - split, -- :: Char -> ByteString -> [ByteString] - splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] - - -- ** Breaking into lines and words - lines, -- :: ByteString -> [ByteString] - words, -- :: ByteString -> [ByteString] - unlines, -- :: [ByteString] -> ByteString - unwords, -- :: ByteString -> [ByteString] - - -- ** Joining strings - join, -- :: ByteString -> [ByteString] -> ByteString - - -- ** Searching for substrings - isPrefixOf, -- :: ByteString -> ByteString -> Bool - isSuffixOf, -- :: ByteString -> ByteString -> Bool - isSubstringOf, -- :: ByteString -> ByteString -> Bool - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] - - -- * Searching ByteStrings - - -- ** Searching by equality - elem, -- :: Char -> ByteString -> Bool - notElem, -- :: Char -> ByteString -> Bool - - -- ** Searching with a predicate - find, -- :: (Char -> Bool) -> ByteString -> Maybe Char - filter, -- :: (Char -> Bool) -> ByteString -> ByteString --- partition -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - - -- * Indexing ByteStrings - index, -- :: ByteString -> Int -> Char - elemIndex, -- :: Char -> ByteString -> Maybe Int - elemIndices, -- :: Char -> ByteString -> [Int] - elemIndexEnd, -- :: Char -> ByteString -> Maybe Int - findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int - findIndices, -- :: (Char -> Bool) -> ByteString -> [Int] - count, -- :: Char -> ByteString -> Int - - -- * Zipping and unzipping ByteStrings - zip, -- :: ByteString -> ByteString -> [(Char,Char)] - zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c] - unzip, -- :: [(Char,Char)] -> (ByteString,ByteString) - - -- * Ordered ByteStrings - sort, -- :: ByteString -> ByteString - - -- * Reading from ByteStrings - readInt, -- :: ByteString -> Maybe (Int, ByteString) - readInteger, -- :: ByteString -> Maybe (Integer, ByteString) - - -- * Low level CString conversions - - -- ** Packing CStrings and pointers - packCString, -- :: CString -> ByteString - packCStringLen, -- :: CString -> ByteString - packMallocCString, -- :: CString -> ByteString - - -- ** Using ByteStrings as CStrings - useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a - useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a - - -- * Copying ByteStrings - copy, -- :: ByteString -> ByteString - copyCString, -- :: CString -> IO ByteString - copyCStringLen, -- :: CStringLen -> IO ByteString - - -- * I\/O with @ByteString@s - - -- ** Standard input and output - getLine, -- :: IO ByteString - getContents, -- :: IO ByteString - putStr, -- :: ByteString -> IO () - putStrLn, -- :: ByteString -> IO () - interact, -- :: (ByteString -> ByteString) -> IO () - - -- ** Files - readFile, -- :: FilePath -> IO ByteString - writeFile, -- :: FilePath -> ByteString -> IO () - appendFile, -- :: FilePath -> ByteString -> IO () --- mmapFile, -- :: FilePath -> IO ByteString - - -- ** I\/O with Handles - hGetLine, -- :: Handle -> IO ByteString - hGetNonBlocking, -- :: Handle -> Int -> IO ByteString - hGetContents, -- :: Handle -> IO ByteString - hGet, -- :: Handle -> Int -> IO ByteString - hPut, -- :: Handle -> ByteString -> IO () - hPutStr, -- :: Handle -> ByteString -> IO () - hPutStrLn, -- :: Handle -> ByteString -> IO () - -#if defined(__GLASGOW_HASKELL__) - -- * Low level construction - -- | For constructors from foreign language types see "Data.ByteString" - packAddress, -- :: Addr# -> ByteString - unsafePackAddress, -- :: Int -> Addr# -> ByteString -#endif - - -- * Utilities (needed for array fusion) -#if defined(__GLASGOW_HASKELL__) - unpackList, -#endif - - ) where - -import qualified Prelude as P -import Prelude hiding (reverse,head,tail,last,init,null - ,length,map,lines,foldl,foldr,unlines - ,concat,any,take,drop,splitAt,takeWhile - ,dropWhile,span,break,elem,filter,unwords - ,words,maximum,minimum,all,concatMap - ,scanl,scanl1,scanr,scanr1 - ,appendFile,readFile,writeFile - ,foldl1,foldr1,replicate - ,getContents,getLine,putStr,putStrLn,interact - ,zip,zipWith,unzip,notElem) - -import qualified Data.ByteString as B -import qualified Data.ByteString.Base as B - --- Listy functions transparently exported -import Data.ByteString (empty,null,length,tail,init,append - ,inits,tails,reverse,transpose - ,concat,take,drop,splitAt,join - ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring - ,findSubstrings,copy,group - - ,getLine, getContents, putStr, putStrLn, interact - ,hGetContents, hGet, hPut, hPutStr, hPutStrLn - ,hGetLine, hGetNonBlocking - ,packCString,packCStringLen, packMallocCString - ,useAsCString,useAsCStringLen, copyCString,copyCStringLen -#if defined(__GLASGOW_HASKELL__) - ,unpackList -#endif - ) - -import Data.ByteString.Base ( - ByteString(..) -#if defined(__GLASGOW_HASKELL__) - ,packAddress, unsafePackAddress -#endif - ,c2w, w2c, unsafeTail, isSpaceWord8, inlinePerformIO - ) - -import Data.Char ( isSpace ) -import qualified Data.List as List (intersperse) - -import System.IO (openFile,hClose,hFileSize,IOMode(..)) -import Control.Exception (bracket) -import Foreign - -#if defined(__GLASGOW_HASKELL__) -import GHC.Base (Char(..),unpackCString#,ord#,int2Word#) -import GHC.IOBase (IO(..),stToIO) -import GHC.Prim (Addr#,writeWord8OffAddr#,plusAddr#) -import GHC.Ptr (Ptr(..)) -import GHC.ST (ST(..)) -#endif - -#define STRICT1(f) f a | a `seq` False = undefined -#define STRICT2(f) f a b | a `seq` b `seq` False = undefined -#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined -#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined - ------------------------------------------------------------------------- - --- | /O(1)/ Convert a 'Char' into a 'ByteString' -singleton :: Char -> ByteString -singleton = B.singleton . c2w -{-# INLINE singleton #-} - --- | /O(n)/ Convert a 'String' into a 'ByteString' --- --- For applications with large numbers of string literals, pack can be a --- bottleneck. In such cases, consider using packAddress (GHC only). -pack :: String -> ByteString -#if !defined(__GLASGOW_HASKELL__) - -pack str = B.unsafeCreate (P.length str) $ \p -> go p str - where go _ [] = return () - go p (x:xs) = poke p (c2w x) >> go (p `plusPtr` 1) xs - -#else /* hack away */ - -pack str = B.unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p str) - where - go :: Addr# -> [Char] -> ST a () - go _ [] = return () - go p (C# c:cs) = writeByte p (int2Word# (ord# c)) >> go (p `plusAddr#` 1#) cs - - writeByte p c = ST $ \s# -> - case writeWord8OffAddr# p 0# c s# of s2# -> (# s2#, () #) - {-# INLINE writeByte #-} -{-# INLINE [1] pack #-} - -{-# RULES - "FPS pack/packAddress" forall s . - pack (unpackCString# s) = B.packAddress s - #-} - -#endif - --- | /O(n)/ Converts a 'ByteString' to a 'String'. -unpack :: ByteString -> [Char] -unpack = P.map w2c . B.unpack -{-# INLINE unpack #-} - --- | /O(n)/ 'cons' is analogous to (:) for lists, but of different --- complexity, as it requires a memcpy. -cons :: Char -> ByteString -> ByteString -cons = B.cons . c2w -{-# INLINE cons #-} - --- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to --- 'cons', this function performs a memcpy. -snoc :: ByteString -> Char -> ByteString -snoc p = B.snoc p . c2w -{-# INLINE snoc #-} - --- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. -head :: ByteString -> Char -head = w2c . B.head -{-# INLINE head #-} - --- | /O(1)/ Extract the last element of a packed string, which must be non-empty. -last :: ByteString -> Char -last = w2c . B.last -{-# INLINE last #-} - --- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@ -map :: (Char -> Char) -> ByteString -> ByteString -map f = B.map (c2w . f . w2c) -{-# INLINE map #-} - --- | /O(n)/ The 'intersperse' function takes a Char and a 'ByteString' --- and \`intersperses\' that Char between the elements of the --- 'ByteString'. It is analogous to the intersperse function on Lists. -intersperse :: Char -> ByteString -> ByteString -intersperse = B.intersperse . c2w -{-# INLINE intersperse #-} - --- | 'foldl', applied to a binary operator, a starting value (typically --- the left-identity of the operator), and a ByteString, reduces the --- ByteString using the binary operator, from left to right. -foldl :: (a -> Char -> a) -> a -> ByteString -> a -foldl f = B.foldl (\a c -> f a (w2c c)) -{-# INLINE foldl #-} - --- | 'foldl\'' is like foldl, but strict in the accumulator. -foldl' :: (a -> Char -> a) -> a -> ByteString -> a -foldl' f = B.foldl' (\a c -> f a (w2c c)) -{-# INLINE foldl' #-} - --- | 'foldr', applied to a binary operator, a starting value --- (typically the right-identity of the operator), and a packed string, --- reduces the packed string using the binary operator, from right to left. -foldr :: (Char -> a -> a) -> a -> ByteString -> a -foldr f = B.foldr (\c a -> f (w2c c) a) -{-# INLINE foldr #-} - --- | 'foldr\'' is a strict variant of foldr -foldr' :: (Char -> a -> a) -> a -> ByteString -> a -foldr' f = B.foldr' (\c a -> f (w2c c) a) -{-# INLINE foldr' #-} - --- | 'foldl1' is a variant of 'foldl' that has no starting value --- argument, and thus must be applied to non-empty 'ByteStrings'. -foldl1 :: (Char -> Char -> Char) -> ByteString -> Char -foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) -{-# INLINE foldl1 #-} - --- | A strict version of 'foldl1' -foldl1' :: (Char -> Char -> Char) -> ByteString -> Char -foldl1' f ps = w2c (B.foldl1' (\x y -> c2w (f (w2c x) (w2c y))) ps) -{-# INLINE foldl1' #-} - --- | 'foldr1' is a variant of 'foldr' that has no starting value argument, --- and thus must be applied to non-empty 'ByteString's -foldr1 :: (Char -> Char -> Char) -> ByteString -> Char -foldr1 f ps = w2c (B.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps) -{-# INLINE foldr1 #-} - --- | A strict variant of foldr1 -foldr1' :: (Char -> Char -> Char) -> ByteString -> Char -foldr1' f ps = w2c (B.foldr1' (\x y -> c2w (f (w2c x) (w2c y))) ps) -{-# INLINE foldr1' #-} - --- | Map a function over a 'ByteString' and concatenate the results -concatMap :: (Char -> ByteString) -> ByteString -> ByteString -concatMap f = B.concatMap (f . w2c) -{-# INLINE concatMap #-} - --- | Applied to a predicate and a ByteString, 'any' determines if --- any element of the 'ByteString' satisfies the predicate. -any :: (Char -> Bool) -> ByteString -> Bool -any f = B.any (f . w2c) -{-# INLINE any #-} - --- | Applied to a predicate and a 'ByteString', 'all' determines if --- all elements of the 'ByteString' satisfy the predicate. -all :: (Char -> Bool) -> ByteString -> Bool -all f = B.all (f . w2c) -{-# INLINE all #-} - --- | 'maximum' returns the maximum value from a 'ByteString' -maximum :: ByteString -> Char -maximum = w2c . B.maximum -{-# INLINE maximum #-} - --- | 'minimum' returns the minimum value from a 'ByteString' -minimum :: ByteString -> Char -minimum = w2c . B.minimum -{-# INLINE minimum #-} - --- | /O(n)/ map Char functions, provided with the index at each position -mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString -mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c))) -{-# INLINE mapIndexed #-} - --- | The 'mapAccumL' function behaves like a combination of 'map' and --- 'foldl'; it applies a function to each element of a ByteString, --- passing an accumulating parameter from left to right, and returning a --- final value of this accumulator together with the new list. -mapAccumL :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) -mapAccumL f = B.mapAccumL (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c)) - --- | The 'mapAccumR' function behaves like a combination of 'map' and --- 'foldr'; it applies a function to each element of a ByteString, --- passing an accumulating parameter from right to left, and returning a --- final value of this accumulator together with the new ByteString. -mapAccumR :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) -mapAccumR f = B.mapAccumR (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c)) - --- | 'scanl' is similar to 'foldl', but returns a list of successive --- reduced values from the left: --- --- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] --- --- Note that --- --- > last (scanl f z xs) == foldl f z xs. -scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString -scanl f z = B.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) - --- | 'scanl1' is a variant of 'scanl' that has no starting value argument: --- --- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] -scanl1 :: (Char -> Char -> Char) -> ByteString -> ByteString -scanl1 f = B.scanl1 (\a b -> c2w (f (w2c a) (w2c b))) - --- | scanr is the right-to-left dual of scanl. -scanr :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString -scanr f z = B.scanr (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) - --- | 'scanr1' is a variant of 'scanr' that has no starting value argument. -scanr1 :: (Char -> Char -> Char) -> ByteString -> ByteString -scanr1 f = B.scanr1 (\a b -> c2w (f (w2c a) (w2c b))) - --- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ --- the value of every element. The following holds: --- --- > replicate w c = unfoldr w (\u -> Just (u,u)) c --- --- This implemenation uses @memset(3)@ -replicate :: Int -> Char -> ByteString -replicate w = B.replicate w . c2w -{-# INLINE replicate #-} - --- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' --- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a --- ByteString from a seed value. The function takes the element and --- returns 'Nothing' if it is done producing the ByteString or returns --- 'Just' @(a,b)@, in which case, @a@ is the next character in the string, --- and @b@ is the seed value for further production. --- --- Examples: --- --- > unfoldr (\x -> if x <= '9' then Just (x, succ x) else Nothing) '0' == "0123456789" -unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString -unfoldr f x0 = B.unfoldr (fmap k . f) x0 - where k (i, j) = (c2w i, j) - --- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed --- value. However, the length of the result is limited by the first --- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' --- when the maximum length of the result is known. --- --- The following equation relates 'unfoldrN' and 'unfoldr': --- --- > unfoldrN n f s == take n (unfoldr f s) -unfoldrN :: Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a) -unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f) w - where k (i,j) = (c2w i, j) -{-# INLINE unfoldrN #-} - --- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, --- returns the longest prefix (possibly empty) of @xs@ of elements that --- satisfy @p@. -takeWhile :: (Char -> Bool) -> ByteString -> ByteString -takeWhile f = B.takeWhile (f . w2c) -{-# INLINE takeWhile #-} - --- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. -dropWhile :: (Char -> Bool) -> ByteString -> ByteString -dropWhile f = B.dropWhile (f . w2c) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] dropWhile #-} -#endif - --- | 'break' @p@ is equivalent to @'span' ('not' . p)@. -break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -break f = B.break (f . w2c) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] break #-} -#endif - --- | 'span' @p xs@ breaks the ByteString into two segments. It is --- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ -span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -span f = B.span (f . w2c) -{-# INLINE span #-} - --- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. --- We have --- --- > spanEnd (not.isSpace) "x y z" == ("x y ","z") --- --- and --- --- > spanEnd (not . isSpace) ps --- > == --- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) --- -spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -spanEnd f = B.spanEnd (f . w2c) -{-# INLINE spanEnd #-} - --- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' --- --- breakEnd p == spanEnd (not.p) -breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -breakEnd f = B.breakEnd (f . w2c) -{-# INLINE breakEnd #-} - -{- --- | 'breakChar' breaks its ByteString argument at the first occurence --- of the specified Char. It is more efficient than 'break' as it is --- implemented with @memchr(3)@. I.e. --- --- > break (=='c') "abcd" == breakChar 'c' "abcd" --- -breakChar :: Char -> ByteString -> (ByteString, ByteString) -breakChar = B.breakByte . c2w -{-# INLINE breakChar #-} - --- | 'spanChar' breaks its ByteString argument at the first --- occurence of a Char other than its argument. It is more efficient --- than 'span (==)' --- --- > span (=='c') "abcd" == spanByte 'c' "abcd" --- -spanChar :: Char -> ByteString -> (ByteString, ByteString) -spanChar = B.spanByte . c2w -{-# INLINE spanChar #-} --} - --- | /O(n)/ Break a 'ByteString' into pieces separated by the byte --- argument, consuming the delimiter. I.e. --- --- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] --- > split 'a' "aXaXaXa" == ["","X","X","X",""] --- > split 'x' "x" == ["",""] --- --- and --- --- > join [c] . split c == id --- > split == splitWith . (==) --- --- As for all splitting functions in this library, this function does --- not copy the substrings, it just constructs new 'ByteStrings' that --- are slices of the original. --- -split :: Char -> ByteString -> [ByteString] -split = B.split . c2w -{-# INLINE split #-} - --- | /O(n)/ Splits a 'ByteString' into components delimited by --- separators, where the predicate returns True for a separator element. --- The resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] --- -splitWith :: (Char -> Bool) -> ByteString -> [ByteString] -splitWith f = B.splitWith (f . w2c) -{-# INLINE splitWith #-} --- the inline makes a big difference here. - -{- --- | Like 'splitWith', except that sequences of adjacent separators are --- treated as a single separator. eg. --- --- > tokens (=='a') "aabbaca" == ["bb","c"] --- -tokens :: (Char -> Bool) -> ByteString -> [ByteString] -tokens f = B.tokens (f . w2c) -{-# INLINE tokens #-} --} - --- | The 'groupBy' function is the non-overloaded version of 'group'. -groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString] -groupBy k = B.groupBy (\a b -> k (w2c a) (w2c b)) - -{- --- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a --- char. Around 4 times faster than the generalised join. --- -joinWithChar :: Char -> ByteString -> ByteString -> ByteString -joinWithChar = B.joinWithByte . c2w -{-# INLINE joinWithChar #-} --} - --- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. -index :: ByteString -> Int -> Char -index = (w2c .) . B.index -{-# INLINE index #-} - --- | /O(n)/ The 'elemIndex' function returns the index of the first --- element in the given 'ByteString' which is equal (by memchr) to the --- query element, or 'Nothing' if there is no such element. -elemIndex :: Char -> ByteString -> Maybe Int -elemIndex = B.elemIndex . c2w -{-# INLINE elemIndex #-} - --- | /O(n)/ The 'elemIndexEnd' function returns the last index of the --- element in the given 'ByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. The following --- holds: --- --- > elemIndexEnd c xs == --- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) --- -elemIndexEnd :: Char -> ByteString -> Maybe Int -elemIndexEnd = B.elemIndexEnd . c2w -{-# INLINE elemIndexEnd #-} - --- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning --- the indices of all elements equal to the query element, in ascending order. -elemIndices :: Char -> ByteString -> [Int] -elemIndices = B.elemIndices . c2w -{-# INLINE elemIndices #-} - --- | The 'findIndex' function takes a predicate and a 'ByteString' and --- returns the index of the first element in the ByteString satisfying the predicate. -findIndex :: (Char -> Bool) -> ByteString -> Maybe Int -findIndex f = B.findIndex (f . w2c) -{-# INLINE findIndex #-} - --- | The 'findIndices' function extends 'findIndex', by returning the --- indices of all elements satisfying the predicate, in ascending order. -findIndices :: (Char -> Bool) -> ByteString -> [Int] -findIndices f = B.findIndices (f . w2c) - --- | count returns the number of times its argument appears in the ByteString --- --- > count = length . elemIndices --- --- Also --- --- > count '\n' == length . lines --- --- But more efficiently than using length on the intermediate list. -count :: Char -> ByteString -> Int -count c = B.count (c2w c) - --- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This --- implementation uses @memchr(3)@. -elem :: Char -> ByteString -> Bool -elem c = B.elem (c2w c) -{-# INLINE elem #-} - --- | /O(n)/ 'notElem' is the inverse of 'elem' -notElem :: Char -> ByteString -> Bool -notElem c = B.notElem (c2w c) -{-# INLINE notElem #-} - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. -filter :: (Char -> Bool) -> ByteString -> ByteString -filter f = B.filter (f . w2c) -{-# INLINE filter #-} - --- | /O(n)/ The 'find' function takes a predicate and a ByteString, --- and returns the first element in matching the predicate, or 'Nothing' --- if there is no such element. -find :: (Char -> Bool) -> ByteString -> Maybe Char -find f ps = w2c `fmap` B.find (f . w2c) ps -{-# INLINE find #-} - -{- --- | /O(n)/ A first order equivalent of /filter . (==)/, for the common --- case of filtering a single Char. It is more efficient to use --- filterChar in this case. --- --- > filterChar == filter . (==) --- --- filterChar is around 10x faster, and uses much less space, than its --- filter equivalent --- -filterChar :: Char -> ByteString -> ByteString -filterChar c = B.filterByte (c2w c) -{-# INLINE filterChar #-} - --- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common --- case of filtering a single Char out of a list. It is more efficient --- to use /filterNotChar/ in this case. --- --- > filterNotChar == filter . (/=) --- --- filterNotChar is around 3x faster, and uses much less space, than its --- filter equivalent --- -filterNotChar :: Char -> ByteString -> ByteString -filterNotChar c = B.filterNotByte (c2w c) -{-# INLINE filterNotChar #-} --} - --- | /O(n)/ 'zip' takes two ByteStrings and returns a list of --- corresponding pairs of Chars. If one input ByteString is short, --- excess elements of the longer ByteString are discarded. This is --- equivalent to a pair of 'unpack' operations, and so space --- usage may be large for multi-megabyte ByteStrings -zip :: ByteString -> ByteString -> [(Char,Char)] -zip ps qs - | B.null ps || B.null qs = [] - | otherwise = (unsafeHead ps, unsafeHead qs) : zip (B.unsafeTail ps) (B.unsafeTail qs) - --- | 'zipWith' generalises 'zip' by zipping with the function given as --- the first argument, instead of a tupling function. For example, --- @'zipWith' (+)@ is applied to two ByteStrings to produce the list --- of corresponding sums. -zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a] -zipWith f = B.zipWith ((. w2c) . f . w2c) - --- | 'unzip' transforms a list of pairs of Chars into a pair of --- ByteStrings. Note that this performs two 'pack' operations. -unzip :: [(Char,Char)] -> (ByteString,ByteString) -unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) -{-# INLINE unzip #-} - --- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits --- the check for the empty case, which is good for performance, but --- there is an obligation on the programmer to provide a proof that the --- ByteString is non-empty. -unsafeHead :: ByteString -> Char -unsafeHead = w2c . B.unsafeHead -{-# INLINE unsafeHead #-} - --- --------------------------------------------------------------------- --- Things that depend on the encoding - -{-# RULES - "FPS specialise break -> breakSpace" - break isSpace = breakSpace - #-} - --- | 'breakSpace' returns the pair of ByteStrings when the argument is --- broken at the first whitespace byte. I.e. --- --- > break isSpace == breakSpace --- -breakSpace :: ByteString -> (ByteString,ByteString) -breakSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - i <- firstspace (p `plusPtr` s) 0 l - return $! case () of {_ - | i == 0 -> (empty, PS x s l) - | i == l -> (PS x s l, empty) - | otherwise -> (PS x s i, PS x (s+i) (l-i)) - } -{-# INLINE breakSpace #-} - -firstspace :: Ptr Word8 -> Int -> Int -> IO Int -STRICT3(firstspace) -firstspace ptr n m - | n >= m = return n - | otherwise = do w <- peekByteOff ptr n - if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n - -{-# RULES - "FPS specialise dropWhile isSpace -> dropSpace" - dropWhile isSpace = dropSpace - #-} - --- | 'dropSpace' efficiently returns the 'ByteString' argument with --- white space Chars removed from the front. It is more efficient than --- calling dropWhile for removing whitespace. I.e. --- --- > dropWhile isSpace == dropSpace --- -dropSpace :: ByteString -> ByteString -dropSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - i <- firstnonspace (p `plusPtr` s) 0 l - return $! if i == l then empty else PS x (s+i) (l-i) -{-# INLINE dropSpace #-} - -firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int -STRICT3(firstnonspace) -firstnonspace ptr n m - | n >= m = return n - | otherwise = do w <- peekElemOff ptr n - if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n - -{- --- | 'dropSpaceEnd' efficiently returns the 'ByteString' argument with --- white space removed from the end. I.e. --- --- > reverse . (dropWhile isSpace) . reverse == dropSpaceEnd --- --- but it is more efficient than using multiple reverses. --- -dropSpaceEnd :: ByteString -> ByteString -dropSpaceEnd (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - i <- lastnonspace (p `plusPtr` s) (l-1) - return $! if i == (-1) then empty else PS x s (i+1) -{-# INLINE dropSpaceEnd #-} - -lastnonspace :: Ptr Word8 -> Int -> IO Int -STRICT2(lastnonspace) -lastnonspace ptr n - | n < 0 = return n - | otherwise = do w <- peekElemOff ptr n - if isSpaceWord8 w then lastnonspace ptr (n-1) else return n --} - --- | 'lines' breaks a ByteString up into a list of ByteStrings at --- newline Chars. The resulting strings do not contain newlines. --- -lines :: ByteString -> [ByteString] -lines ps - | null ps = [] - | otherwise = case search ps of - Nothing -> [ps] - Just n -> take n ps : lines (drop (n+1) ps) - where search = elemIndex '\n' -{-# INLINE lines #-} - -{- --- Just as fast, but more complex. Should be much faster, I thought. -lines :: ByteString -> [ByteString] -lines (PS _ _ 0) = [] -lines (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do - let ptr = p `plusPtr` s - - STRICT1(loop) - loop n = do - let q = memchr (ptr `plusPtr` n) 0x0a (fromIntegral (l-n)) - if q == nullPtr - then return [PS x (s+n) (l-n)] - else do let i = q `minusPtr` ptr - ls <- loop (i+1) - return $! PS x (s+n) (i-n) : ls - loop 0 --} - --- | 'unlines' is an inverse operation to 'lines'. It joins lines, --- after appending a terminating newline to each. -unlines :: [ByteString] -> ByteString -unlines [] = empty -unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space - where nl = singleton '\n' - --- | 'words' breaks a ByteString up into a list of words, which --- were delimited by Chars representing white space. And --- --- > tokens isSpace = words --- -words :: ByteString -> [ByteString] -words = P.filter (not . B.null) . B.splitWith isSpaceWord8 -{-# INLINE words #-} - --- | The 'unwords' function is analogous to the 'unlines' function, on words. -unwords :: [ByteString] -> ByteString -unwords = join (singleton ' ') -{-# INLINE unwords #-} - --- --------------------------------------------------------------------- --- Reading from ByteStrings - --- | readInt reads an Int from the beginning of the ByteString. If there is no --- integer at the beginning of the string, it returns Nothing, otherwise --- it just returns the int read, and the rest of the string. -readInt :: ByteString -> Maybe (Int, ByteString) -readInt as - | null as = Nothing - | otherwise = - case unsafeHead as of - '-' -> loop True 0 0 (unsafeTail as) - '+' -> loop False 0 0 (unsafeTail as) - _ -> loop False 0 0 as - - where loop :: Bool -> Int -> Int -> ByteString -> Maybe (Int, ByteString) - STRICT4(loop) - loop neg i n ps - | null ps = end neg i n ps - | otherwise = - case B.unsafeHead ps of - w | w >= 0x30 - && w <= 0x39 -> loop neg (i+1) - (n * 10 + (fromIntegral w - 0x30)) - (unsafeTail ps) - | otherwise -> end neg i n ps - - end _ 0 _ _ = Nothing - end True _ n ps = Just (negate n, ps) - end _ _ n ps = Just (n, ps) - --- | readInteger reads an Integer from the beginning of the ByteString. If --- there is no integer at the beginning of the string, it returns Nothing, --- otherwise it just returns the int read, and the rest of the string. -readInteger :: ByteString -> Maybe (Integer, ByteString) -readInteger as - | null as = Nothing - | otherwise = - case unsafeHead as of - '-' -> first (unsafeTail as) >>= \(n, bs) -> return (-n, bs) - '+' -> first (unsafeTail as) - _ -> first as - - where first ps | null ps = Nothing - | otherwise = - case B.unsafeHead ps of - w | w >= 0x30 && w <= 0x39 -> Just $ - loop 1 (fromIntegral w - 0x30) [] (unsafeTail ps) - | otherwise -> Nothing - - loop :: Int -> Int -> [Integer] - -> ByteString -> (Integer, ByteString) - STRICT4(loop) - loop d acc ns ps - | null ps = combine d acc ns empty - | otherwise = - case B.unsafeHead ps of - w | w >= 0x30 && w <= 0x39 -> - if d == 9 then loop 1 (fromIntegral w - 0x30) - (toInteger acc : ns) - (unsafeTail ps) - else loop (d+1) - (10*acc + (fromIntegral w - 0x30)) - ns (unsafeTail ps) - | otherwise -> combine d acc ns ps - - combine _ acc [] ps = (toInteger acc, ps) - combine d acc ns ps = - ((10^d * combine1 1000000000 ns + toInteger acc), ps) - - combine1 _ [n] = n - combine1 b ns = combine1 (b*b) $ combine2 b ns - - combine2 b (n:m:ns) = let t = m*b + n in t `seq` (t : combine2 b ns) - combine2 _ ns = ns - --- | Read an entire file strictly into a 'ByteString'. This is far more --- efficient than reading the characters into a 'String' and then using --- 'pack'. It also may be more efficient than opening the file and --- reading it using hGet. -readFile :: FilePath -> IO ByteString -readFile f = bracket (openFile f ReadMode) hClose - (\h -> hFileSize h >>= hGet h . fromIntegral) - --- | Write a 'ByteString' to a file. -writeFile :: FilePath -> ByteString -> IO () -writeFile f txt = bracket (openFile f WriteMode) hClose - (\h -> hPut h txt) - --- | Append a 'ByteString' to a file. -appendFile :: FilePath -> ByteString -> IO () -appendFile f txt = bracket (openFile f AppendMode) hClose - (\h -> hPut h txt) - diff --git a/Data/ByteString/Fusion.hs b/Data/ByteString/Fusion.hs deleted file mode 100644 index 7862c91..0000000 --- a/Data/ByteString/Fusion.hs +++ /dev/null @@ -1,699 +0,0 @@ -{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-} --- | --- Module : Data.ByteString.Fusion --- License : BSD-style --- Maintainer : dons@cse.unsw.edu.au --- Stability : experimental --- Portability : portable --- --- Functional array fusion for ByteStrings. --- --- Originally based on code from the Data Parallel Haskell project, --- --- - --- #hide -module Data.ByteString.Fusion ( - - -- * Fusion utilities - loopU, loopL, fuseEFL, - NoAcc(NoAcc), loopArr, loopAcc, loopSndAcc, unSP, - mapEFL, filterEFL, foldEFL, foldEFL', scanEFL, mapAccumEFL, mapIndexEFL, - - -- ** Alternative Fusion stuff - -- | This replaces 'loopU' with 'loopUp' - -- and adds several further special cases of loops. - loopUp, loopDown, loopNoAcc, loopMap, loopFilter, - loopWrapper, sequenceLoops, - doUpLoop, doDownLoop, doNoAccLoop, doMapLoop, doFilterLoop, - - -- | These are the special fusion cases for combining each loop form perfectly. - fuseAccAccEFL, fuseAccNoAccEFL, fuseNoAccAccEFL, fuseNoAccNoAccEFL, - fuseMapAccEFL, fuseAccMapEFL, fuseMapNoAccEFL, fuseNoAccMapEFL, - fuseMapMapEFL, fuseAccFilterEFL, fuseFilterAccEFL, fuseNoAccFilterEFL, - fuseFilterNoAccEFL, fuseFilterFilterEFL, fuseMapFilterEFL, fuseFilterMapEFL, - - -- * Strict pairs and sums - PairS(..), MaybeS(..) - - ) where - -import Data.ByteString.Base - -import Foreign.ForeignPtr -import Foreign.Ptr -import Foreign.Storable (Storable(..)) - -import Data.Word (Word8) -import System.IO.Unsafe (unsafePerformIO) - --- ----------------------------------------------------------------------------- --- --- Useful macros, until we have bang patterns --- - -#define STRICT1(f) f a | a `seq` False = undefined -#define STRICT2(f) f a b | a `seq` b `seq` False = undefined -#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined -#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined -#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined - -infixl 2 :*: - --- |Strict pair -data PairS a b = !a :*: !b deriving (Eq,Ord,Show) - --- |Strict Maybe -data MaybeS a = NothingS | JustS !a deriving (Eq,Ord,Show) - --- |Data type for accumulators which can be ignored. The rewrite rules rely on --- the fact that no bottoms of this type are ever constructed; hence, we can --- assume @(_ :: NoAcc) `seq` x = x@. --- -data NoAcc = NoAcc - --- |Type of loop functions -type AccEFL acc = acc -> Word8 -> (PairS acc (MaybeS Word8)) -type NoAccEFL = Word8 -> MaybeS Word8 -type MapEFL = Word8 -> Word8 -type FilterEFL = Word8 -> Bool - -infixr 9 `fuseEFL` - --- |Fuse to flat loop functions -fuseEFL :: AccEFL acc1 -> AccEFL acc2 -> AccEFL (PairS acc1 acc2) -fuseEFL f g (acc1 :*: acc2) e1 = - case f acc1 e1 of - acc1' :*: NothingS -> (acc1' :*: acc2) :*: NothingS - acc1' :*: JustS e2 -> - case g acc2 e2 of - acc2' :*: res -> (acc1' :*: acc2') :*: res -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] fuseEFL #-} -#endif - --- | Special forms of loop arguments --- --- * These are common special cases for the three function arguments of gen --- and loop; we give them special names to make it easier to trigger RULES --- applying in the special cases represented by these arguments. The --- "INLINE [1]" makes sure that these functions are only inlined in the last --- two simplifier phases. --- --- * In the case where the accumulator is not needed, it is better to always --- explicitly return a value `()', rather than just copy the input to the --- output, as the former gives GHC better local information. --- - --- | Element function expressing a mapping only -#if !defined(LOOPNOACC_FUSION) -mapEFL :: (Word8 -> Word8) -> AccEFL NoAcc -mapEFL f = \_ e -> (NoAcc :*: (JustS $ f e)) -#else -mapEFL :: (Word8 -> Word8) -> NoAccEFL -mapEFL f = \e -> JustS (f e) -#endif -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] mapEFL #-} -#endif - --- | Element function implementing a filter function only -#if !defined(LOOPNOACC_FUSION) -filterEFL :: (Word8 -> Bool) -> AccEFL NoAcc -filterEFL p = \_ e -> if p e then (NoAcc :*: JustS e) else (NoAcc :*: NothingS) -#else -filterEFL :: (Word8 -> Bool) -> NoAccEFL -filterEFL p = \e -> if p e then JustS e else NothingS -#endif - -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] filterEFL #-} -#endif - --- |Element function expressing a reduction only -foldEFL :: (acc -> Word8 -> acc) -> AccEFL acc -foldEFL f = \a e -> (f a e :*: NothingS) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] foldEFL #-} -#endif - --- | A strict foldEFL. -foldEFL' :: (acc -> Word8 -> acc) -> AccEFL acc -foldEFL' f = \a e -> let a' = f a e in a' `seq` (a' :*: NothingS) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] foldEFL' #-} -#endif - --- | Element function expressing a prefix reduction only --- -scanEFL :: (Word8 -> Word8 -> Word8) -> AccEFL Word8 -scanEFL f = \a e -> (f a e :*: JustS a) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] scanEFL #-} -#endif - --- | Element function implementing a map and fold --- -mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> AccEFL acc -mapAccumEFL f = \a e -> case f a e of (a', e') -> (a' :*: JustS e') -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] mapAccumEFL #-} -#endif - --- | Element function implementing a map with index --- -mapIndexEFL :: (Int -> Word8 -> Word8) -> AccEFL Int -mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i' :*: JustS (f i e)) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] mapIndexEFL #-} -#endif - --- | Projection functions that are fusion friendly (as in, we determine when --- they are inlined) -loopArr :: (PairS acc arr) -> arr -loopArr (_ :*: arr) = arr -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopArr #-} -#endif - -loopAcc :: (PairS acc arr) -> acc -loopAcc (acc :*: _) = acc -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopAcc #-} -#endif - -loopSndAcc :: (PairS (PairS acc1 acc2) arr) -> (PairS acc2 arr) -loopSndAcc ((_ :*: acc) :*: arr) = (acc :*: arr) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopSndAcc #-} -#endif - -unSP :: (PairS acc arr) -> (acc, arr) -unSP (acc :*: arr) = (acc, arr) -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] unSP #-} -#endif - ------------------------------------------------------------------------- --- --- Loop combinator and fusion rules for flat arrays --- |Iteration over over ByteStrings - --- | Iteration over over ByteStrings -loopU :: AccEFL acc -- ^ mapping & folding, once per elem - -> acc -- ^ initial acc value - -> ByteString -- ^ input ByteString - -> (PairS acc ByteString) - -loopU f start (PS z s i) = unsafePerformIO $ withForeignPtr z $ \a -> do - (ps, acc) <- createAndTrim' i $ \p -> do - (acc' :*: i') <- go (a `plusPtr` s) p start - return (0, i', acc') - return (acc :*: ps) - - where - go p ma = trans 0 0 - where - STRICT3(trans) - trans a_off ma_off acc - | a_off >= i = return (acc :*: ma_off) - | otherwise = do - x <- peekByteOff p a_off - let (acc' :*: oe) = f acc x - ma_off' <- case oe of - NothingS -> return ma_off - JustS e -> do pokeByteOff ma ma_off e - return $ ma_off + 1 - trans (a_off+1) ma_off' acc' - -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopU #-} -#endif - -{-# RULES - -"FPS loop/loop fusion!" forall em1 em2 start1 start2 arr. - loopU em2 start2 (loopArr (loopU em1 start1 arr)) = - loopSndAcc (loopU (em1 `fuseEFL` em2) (start1 :*: start2) arr) - - #-} - --- --- Functional list/array fusion for lazy ByteStrings. --- -loopL :: AccEFL acc -- ^ mapping & folding, once per elem - -> acc -- ^ initial acc value - -> [ByteString] -- ^ input ByteString - -> PairS acc [ByteString] -loopL f = loop - where loop s [] = (s :*: []) - loop s (x:xs) - | l == 0 = (s'' :*: ys) - | otherwise = (s'' :*: y:ys) - where (s' :*: y@(PS _ _ l)) = loopU f s x -- avoid circular dep on P.null - (s'' :*: ys) = loop s' xs - -#if defined(__GLASGOW_HASKELL__) -{-# INLINE [1] loopL #-} -#endif - -{-# RULES - -"FPS lazy loop/loop fusion!" forall em1 em2 start1 start2 arr. - loopL em2 start2 (loopArr (loopL em1 start1 arr)) = - loopSndAcc (loopL (em1 `fuseEFL` em2) (start1 :*: start2) arr) - - #-} - - -{- - -Alternate experimental formulation of loopU which partitions it into -an allocating wrapper and an imperitive array-mutating loop. - -The point in doing this split is that we might be able to fuse multiple -loops into a single wrapper. This would save reallocating another buffer. -It should also give better cache locality by reusing the buffer. - -Note that this stuff needs ghc-6.5 from May 26 or later for the RULES to -really work reliably. - --} - -loopUp :: AccEFL acc -> acc -> ByteString -> PairS acc ByteString -loopUp f a arr = loopWrapper (doUpLoop f a) arr -{-# INLINE loopUp #-} - -loopDown :: AccEFL acc -> acc -> ByteString -> PairS acc ByteString -loopDown f a arr = loopWrapper (doDownLoop f a) arr -{-# INLINE loopDown #-} - -loopNoAcc :: NoAccEFL -> ByteString -> PairS NoAcc ByteString -loopNoAcc f arr = loopWrapper (doNoAccLoop f NoAcc) arr -{-# INLINE loopNoAcc #-} - -loopMap :: MapEFL -> ByteString -> PairS NoAcc ByteString -loopMap f arr = loopWrapper (doMapLoop f NoAcc) arr -{-# INLINE loopMap #-} - -loopFilter :: FilterEFL -> ByteString -> PairS NoAcc ByteString -loopFilter f arr = loopWrapper (doFilterLoop f NoAcc) arr -{-# INLINE loopFilter #-} - --- The type of imperitive loops that fill in a destination array by --- reading a source array. They may not fill in the whole of the dest --- array if the loop is behaving as a filter, this is why we return --- the length that was filled in. The loop may also accumulate some --- value as it loops over the source array. --- -type ImperativeLoop acc = - Ptr Word8 -- pointer to the start of the source byte array - -> Ptr Word8 -- pointer to ther start of the destination byte array - -> Int -- length of the source byte array - -> IO (PairS (PairS acc Int) Int) -- result and offset, length of dest that was filled - -loopWrapper :: ImperativeLoop acc -> ByteString -> PairS acc ByteString -loopWrapper body (PS srcFPtr srcOffset srcLen) = unsafePerformIO $ - withForeignPtr srcFPtr $ \srcPtr -> do - (ps, acc) <- createAndTrim' srcLen $ \destPtr -> do - (acc :*: destOffset :*: destLen) <- - body (srcPtr `plusPtr` srcOffset) destPtr srcLen - return (destOffset, destLen, acc) - return (acc :*: ps) - -doUpLoop :: AccEFL acc -> acc -> ImperativeLoop acc -doUpLoop f acc0 src dest len = loop 0 0 acc0 - where STRICT3(loop) - loop src_off dest_off acc - | src_off >= len = return (acc :*: 0 :*: dest_off) - | otherwise = do - x <- peekByteOff src src_off - case f acc x of - (acc' :*: NothingS) -> loop (src_off+1) dest_off acc' - (acc' :*: JustS x') -> pokeByteOff dest dest_off x' - >> loop (src_off+1) (dest_off+1) acc' - -doDownLoop :: AccEFL acc -> acc -> ImperativeLoop acc -doDownLoop f acc0 src dest len = loop (len-1) (len-1) acc0 - where STRICT3(loop) - loop src_off dest_off acc - | src_off < 0 = return (acc :*: dest_off + 1 :*: len - (dest_off + 1)) - | otherwise = do - x <- peekByteOff src src_off - case f acc x of - (acc' :*: NothingS) -> loop (src_off-1) dest_off acc' - (acc' :*: JustS x') -> pokeByteOff dest dest_off x' - >> loop (src_off-1) (dest_off-1) acc' - -doNoAccLoop :: NoAccEFL -> noAcc -> ImperativeLoop noAcc -doNoAccLoop f noAcc src dest len = loop 0 0 - where STRICT2(loop) - loop src_off dest_off - | src_off >= len = return (noAcc :*: 0 :*: dest_off) - | otherwise = do - x <- peekByteOff src src_off - case f x of - NothingS -> loop (src_off+1) dest_off - JustS x' -> pokeByteOff dest dest_off x' - >> loop (src_off+1) (dest_off+1) - -doMapLoop :: MapEFL -> noAcc -> ImperativeLoop noAcc -doMapLoop f noAcc src dest len = loop 0 - where STRICT1(loop) - loop n - | n >= len = return (noAcc :*: 0 :*: len) - | otherwise = do - x <- peekByteOff src n - pokeByteOff dest n (f x) - loop (n+1) -- offset always the same, only pass 1 arg - -doFilterLoop :: FilterEFL -> noAcc -> ImperativeLoop noAcc -doFilterLoop f noAcc src dest len = loop 0 0 - where STRICT2(loop) - loop src_off dest_off - | src_off >= len = return (noAcc :*: 0 :*: dest_off) - | otherwise = do - x <- peekByteOff src src_off - if f x - then pokeByteOff dest dest_off x - >> loop (src_off+1) (dest_off+1) - else loop (src_off+1) dest_off - --- run two loops in sequence, --- think of it as: loop1 >> loop2 -sequenceLoops :: ImperativeLoop acc1 - -> ImperativeLoop acc2 - -> ImperativeLoop (PairS acc1 acc2) -sequenceLoops loop1 loop2 src dest len0 = do - (acc1 :*: off1 :*: len1) <- loop1 src dest len0 - (acc2 :*: off2 :*: len2) <- - let src' = dest `plusPtr` off1 - dest' = src' -- note that we are using dest == src - -- for the second loop as we are - -- mutating the dest array in-place! - in loop2 src' dest' len1 - return ((acc1 :*: acc2) :*: off1 + off2 :*: len2) - - -- TODO: prove that this is associative! (I think it is) - -- since we can't be sure how the RULES will combine loops. - -#if defined(__GLASGOW_HASKELL__) - -{-# INLINE [1] doUpLoop #-} -{-# INLINE [1] doDownLoop #-} -{-# INLINE [1] doNoAccLoop #-} -{-# INLINE [1] doMapLoop #-} -{-# INLINE [1] doFilterLoop #-} - -{-# INLINE [1] loopWrapper #-} -{-# INLINE [1] sequenceLoops #-} - -{-# INLINE [1] fuseAccAccEFL #-} -{-# INLINE [1] fuseAccNoAccEFL #-} -{-# INLINE [1] fuseNoAccAccEFL #-} -{-# INLINE [1] fuseNoAccNoAccEFL #-} -{-# INLINE [1] fuseMapAccEFL #-} -{-# INLINE [1] fuseAccMapEFL #-} -{-# INLINE [1] fuseMapNoAccEFL #-} -{-# INLINE [1] fuseNoAccMapEFL #-} -{-# INLINE [1] fuseMapMapEFL #-} -{-# INLINE [1] fuseAccFilterEFL #-} -{-# INLINE [1] fuseFilterAccEFL #-} -{-# INLINE [1] fuseNoAccFilterEFL #-} -{-# INLINE [1] fuseFilterNoAccEFL #-} -{-# INLINE [1] fuseFilterFilterEFL #-} -{-# INLINE [1] fuseMapFilterEFL #-} -{-# INLINE [1] fuseFilterMapEFL #-} - -#endif - -{-# RULES - -"FPS loopArr/loopSndAcc" forall x. - loopArr (loopSndAcc x) = loopArr x - -"FPS seq/NoAcc" forall (u::NoAcc) e. - u `seq` e = e - -"FPS loop/loop wrapper elimination" forall loop1 loop2 arr. - loopWrapper loop2 (loopArr (loopWrapper loop1 arr)) = - loopSndAcc (loopWrapper (sequenceLoops loop1 loop2) arr) - --- --- n.b in the following, when reading n/m fusion, recall sequenceLoops --- is monadic, so its really n >> m fusion (i.e. m.n), not n . m fusion. --- - -"FPS up/up loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doUpLoop f1 acc1) (doUpLoop f2 acc2) = - doUpLoop (f1 `fuseAccAccEFL` f2) (acc1 :*: acc2) - -"FPS map/map loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doMapLoop f1 acc1) (doMapLoop f2 acc2) = - doMapLoop (f1 `fuseMapMapEFL` f2) (acc1 :*: acc2) - -"FPS filter/filter loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doFilterLoop f1 acc1) (doFilterLoop f2 acc2) = - doFilterLoop (f1 `fuseFilterFilterEFL` f2) (acc1 :*: acc2) - -"FPS map/filter loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doMapLoop f1 acc1) (doFilterLoop f2 acc2) = - doNoAccLoop (f1 `fuseMapFilterEFL` f2) (acc1 :*: acc2) - -"FPS filter/map loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doFilterLoop f1 acc1) (doMapLoop f2 acc2) = - doNoAccLoop (f1 `fuseFilterMapEFL` f2) (acc1 :*: acc2) - -"FPS map/up loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doMapLoop f1 acc1) (doUpLoop f2 acc2) = - doUpLoop (f1 `fuseMapAccEFL` f2) (acc1 :*: acc2) - -"FPS up/map loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doUpLoop f1 acc1) (doMapLoop f2 acc2) = - doUpLoop (f1 `fuseAccMapEFL` f2) (acc1 :*: acc2) - -"FPS filter/up loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doFilterLoop f1 acc1) (doUpLoop f2 acc2) = - doUpLoop (f1 `fuseFilterAccEFL` f2) (acc1 :*: acc2) - -"FPS up/filter loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doUpLoop f1 acc1) (doFilterLoop f2 acc2) = - doUpLoop (f1 `fuseAccFilterEFL` f2) (acc1 :*: acc2) - -"FPS down/down loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doDownLoop f1 acc1) (doDownLoop f2 acc2) = - doDownLoop (f1 `fuseAccAccEFL` f2) (acc1 :*: acc2) - -"FPS map/down fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doMapLoop f1 acc1) (doDownLoop f2 acc2) = - doDownLoop (f1 `fuseMapAccEFL` f2) (acc1 :*: acc2) - -"FPS down/map loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doDownLoop f1 acc1) (doMapLoop f2 acc2) = - doDownLoop (f1 `fuseAccMapEFL` f2) (acc1 :*: acc2) - -"FPS filter/down fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doFilterLoop f1 acc1) (doDownLoop f2 acc2) = - doDownLoop (f1 `fuseFilterAccEFL` f2) (acc1 :*: acc2) - -"FPS down/filter loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doDownLoop f1 acc1) (doFilterLoop f2 acc2) = - doDownLoop (f1 `fuseAccFilterEFL` f2) (acc1 :*: acc2) - -"FPS noAcc/noAcc loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doNoAccLoop f1 acc1) (doNoAccLoop f2 acc2) = - doNoAccLoop (f1 `fuseNoAccNoAccEFL` f2) (acc1 :*: acc2) - -"FPS noAcc/up loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doNoAccLoop f1 acc1) (doUpLoop f2 acc2) = - doUpLoop (f1 `fuseNoAccAccEFL` f2) (acc1 :*: acc2) - -"FPS up/noAcc loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doUpLoop f1 acc1) (doNoAccLoop f2 acc2) = - doUpLoop (f1 `fuseAccNoAccEFL` f2) (acc1 :*: acc2) - -"FPS map/noAcc loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doMapLoop f1 acc1) (doNoAccLoop f2 acc2) = - doNoAccLoop (f1 `fuseMapNoAccEFL` f2) (acc1 :*: acc2) - -"FPS noAcc/map loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doNoAccLoop f1 acc1) (doMapLoop f2 acc2) = - doNoAccLoop (f1 `fuseNoAccMapEFL` f2) (acc1 :*: acc2) - -"FPS filter/noAcc loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doFilterLoop f1 acc1) (doNoAccLoop f2 acc2) = - doNoAccLoop (f1 `fuseFilterNoAccEFL` f2) (acc1 :*: acc2) - -"FPS noAcc/filter loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doNoAccLoop f1 acc1) (doFilterLoop f2 acc2) = - doNoAccLoop (f1 `fuseNoAccFilterEFL` f2) (acc1 :*: acc2) - -"FPS noAcc/down loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doNoAccLoop f1 acc1) (doDownLoop f2 acc2) = - doDownLoop (f1 `fuseNoAccAccEFL` f2) (acc1 :*: acc2) - -"FPS down/noAcc loop fusion" forall f1 f2 acc1 acc2. - sequenceLoops (doDownLoop f1 acc1) (doNoAccLoop f2 acc2) = - doDownLoop (f1 `fuseAccNoAccEFL` f2) (acc1 :*: acc2) - - #-} - -{- - -up = up loop -down = down loop -map = map special case -filter = filter special case -noAcc = noAcc undirectional loop (unused) - -heirarchy: - up down - ^ ^ - \ / - noAcc - ^ ^ - / \ - map filter - -each is a special case of the things above - -so we get rules that combine things on the same level -and rules that combine things on different levels -to get something on the higher level - -so all the cases: -up/up --> up fuseAccAccEFL -down/down --> down fuseAccAccEFL -noAcc/noAcc --> noAcc fuseNoAccNoAccEFL - -noAcc/up --> up fuseNoAccAccEFL -up/noAcc --> up fuseAccNoAccEFL -noAcc/down --> down fuseNoAccAccEFL -down/noAcc --> down fuseAccNoAccEFL - -and if we do the map, filter special cases then it adds a load more: - -map/map --> map fuseMapMapEFL -filter/filter --> filter fuseFilterFilterEFL - -map/filter --> noAcc fuseMapFilterEFL -filter/map --> noAcc fuseFilterMapEFL - -map/noAcc --> noAcc fuseMapNoAccEFL -noAcc/map --> noAcc fuseNoAccMapEFL - -map/up --> up fuseMapAccEFL -up/map --> up fuseAccMapEFL - -map/down --> down fuseMapAccEFL -down/map --> down fuseAccMapEFL - -filter/noAcc --> noAcc fuseNoAccFilterEFL -noAcc/filter --> noAcc fuseFilterNoAccEFL - -filter/up --> up fuseFilterAccEFL -up/filter --> up fuseAccFilterEFL - -filter/down --> down fuseFilterAccEFL -down/filter --> down fuseAccFilterEFL --} - -fuseAccAccEFL :: AccEFL acc1 -> AccEFL acc2 -> AccEFL (PairS acc1 acc2) -fuseAccAccEFL f g (acc1 :*: acc2) e1 = - case f acc1 e1 of - acc1' :*: NothingS -> (acc1' :*: acc2) :*: NothingS - acc1' :*: JustS e2 -> - case g acc2 e2 of - acc2' :*: res -> (acc1' :*: acc2') :*: res - -fuseAccNoAccEFL :: AccEFL acc -> NoAccEFL -> AccEFL (PairS acc noAcc) -fuseAccNoAccEFL f g (acc :*: noAcc) e1 = - case f acc e1 of - acc' :*: NothingS -> (acc' :*: noAcc) :*: NothingS - acc' :*: JustS e2 -> (acc' :*: noAcc) :*: g e2 - -fuseNoAccAccEFL :: NoAccEFL -> AccEFL acc -> AccEFL (PairS noAcc acc) -fuseNoAccAccEFL f g (noAcc :*: acc) e1 = - case f e1 of - NothingS -> (noAcc :*: acc) :*: NothingS - JustS e2 -> - case g acc e2 of - acc' :*: res -> (noAcc :*: acc') :*: res - -fuseNoAccNoAccEFL :: NoAccEFL -> NoAccEFL -> NoAccEFL -fuseNoAccNoAccEFL f g e1 = - case f e1 of - NothingS -> NothingS - JustS e2 -> g e2 - -fuseMapAccEFL :: MapEFL -> AccEFL acc -> AccEFL (PairS noAcc acc) -fuseMapAccEFL f g (noAcc :*: acc) e1 = - case g acc (f e1) of - (acc' :*: res) -> (noAcc :*: acc') :*: res - -fuseAccMapEFL :: AccEFL acc -> MapEFL -> AccEFL (PairS acc noAcc) -fuseAccMapEFL f g (acc :*: noAcc) e1 = - case f acc e1 of - (acc' :*: NothingS) -> (acc' :*: noAcc) :*: NothingS - (acc' :*: JustS e2) -> (acc' :*: noAcc) :*: JustS (g e2) - -fuseMapMapEFL :: MapEFL -> MapEFL -> MapEFL -fuseMapMapEFL f g e1 = g (f e1) -- n.b. perfect fusion - -fuseMapNoAccEFL :: MapEFL -> NoAccEFL -> NoAccEFL -fuseMapNoAccEFL f g e1 = g (f e1) - -fuseNoAccMapEFL :: NoAccEFL -> MapEFL -> NoAccEFL -fuseNoAccMapEFL f g e1 = - case f e1 of - NothingS -> NothingS - JustS e2 -> JustS (g e2) - -fuseAccFilterEFL :: AccEFL acc -> FilterEFL -> AccEFL (PairS acc noAcc) -fuseAccFilterEFL f g (acc :*: noAcc) e1 = - case f acc e1 of - acc' :*: NothingS -> (acc' :*: noAcc) :*: NothingS - acc' :*: JustS e2 -> - case g e2 of - False -> (acc' :*: noAcc) :*: NothingS - True -> (acc' :*: noAcc) :*: JustS e2 - -fuseFilterAccEFL :: FilterEFL -> AccEFL acc -> AccEFL (PairS noAcc acc) -fuseFilterAccEFL f g (noAcc :*: acc) e1 = - case f e1 of - False -> (noAcc :*: acc) :*: NothingS - True -> - case g acc e1 of - acc' :*: res -> (noAcc :*: acc') :*: res - -fuseNoAccFilterEFL :: NoAccEFL -> FilterEFL -> NoAccEFL -fuseNoAccFilterEFL f g e1 = - case f e1 of - NothingS -> NothingS - JustS e2 -> - case g e2 of - False -> NothingS - True -> JustS e2 - -fuseFilterNoAccEFL :: FilterEFL -> NoAccEFL -> NoAccEFL -fuseFilterNoAccEFL f g e1 = - case f e1 of - False -> NothingS - True -> g e1 - -fuseFilterFilterEFL :: FilterEFL -> FilterEFL -> FilterEFL -fuseFilterFilterEFL f g e1 = f e1 && g e1 - -fuseMapFilterEFL :: MapEFL -> FilterEFL -> NoAccEFL -fuseMapFilterEFL f g e1 = - case f e1 of - e2 -> case g e2 of - False -> NothingS - True -> JustS e2 - -fuseFilterMapEFL :: FilterEFL -> MapEFL -> NoAccEFL -fuseFilterMapEFL f g e1 = - case f e1 of - False -> NothingS - True -> JustS (g e1) - diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs deleted file mode 100644 index c9d3bdb..0000000 --- a/Data/ByteString/Lazy.hs +++ /dev/null @@ -1,1293 +0,0 @@ -{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-} --- | --- Module : Data.ByteString.Lazy --- Copyright : (c) Don Stewart 2006 --- (c) Duncan Coutts 2006 --- License : BSD-style --- --- Maintainer : dons@cse.unsw.edu.au --- Stability : experimental --- Portability : non-portable (instance of type synonym) --- --- A time and space-efficient implementation of lazy byte vectors --- using lists of packed 'Word8' arrays, suitable for high performance --- use, both in terms of large data quantities, or high speed --- requirements. Byte vectors are encoded as lazy lists of strict 'Word8' --- arrays of bytes. They provide a means to manipulate large byte vectors --- without requiring the entire vector be resident in memory. --- --- Some operations, such as concat, append, reverse and cons, have --- better complexity than their "Data.ByteString" equivalents, due to --- optimisations resulting from the list spine structure. And for other --- operations lazy ByteStrings are usually within a few percent of --- strict ones, but with better heap usage. For data larger than the --- available memory, or if you have tight memory constraints, this --- module will be the only option. The default chunk size is 64k, which --- should be good in most circumstances. For people with large L2 --- caches, you may want to increase this to fit your cache. --- --- This module is intended to be imported @qualified@, to avoid name --- clashes with "Prelude" functions. eg. --- --- > import qualified Data.ByteString.Lazy as B --- --- Original GHC implementation by Bryan O\'Sullivan. --- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. --- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr' --- by David Roundy. --- Polished and extended by Don Stewart. --- Lazy variant by Duncan Coutts and Don Stewart. --- - -module Data.ByteString.Lazy ( - - -- * The @ByteString@ type - ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable - - -- * Introducing and eliminating 'ByteString's - empty, -- :: ByteString - singleton, -- :: Word8 -> ByteString - pack, -- :: [Word8] -> ByteString - unpack, -- :: ByteString -> [Word8] - fromChunks, -- :: [Strict.ByteString] -> ByteString - toChunks, -- :: ByteString -> [Strict.ByteString] - - -- * Basic interface - cons, -- :: Word8 -> ByteString -> ByteString - snoc, -- :: ByteString -> Word8 -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - head, -- :: ByteString -> Word8 - last, -- :: ByteString -> Word8 - tail, -- :: ByteString -> ByteString - init, -- :: ByteString -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int64 - - -- * Transformating ByteStrings - map, -- :: (Word8 -> Word8) -> ByteString -> ByteString - reverse, -- :: ByteString -> ByteString --- intersperse, -- :: Word8 -> ByteString -> ByteString - transpose, -- :: [ByteString] -> [ByteString] - - -- * Reducing 'ByteString's (folds) - foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a - foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a - foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a - foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - - -- ** Special folds - concat, -- :: [ByteString] -> ByteString - concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString - any, -- :: (Word8 -> Bool) -> ByteString -> Bool - all, -- :: (Word8 -> Bool) -> ByteString -> Bool - maximum, -- :: ByteString -> Word8 - minimum, -- :: ByteString -> Word8 - - -- * Building ByteStrings - -- ** Scans - scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString --- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString --- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString --- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString - - -- ** Accumulating maps - mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) - mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString - - -- ** Infinite ByteStrings - repeat, -- :: Word8 -> ByteString - replicate, -- :: Int64 -> Word8 -> ByteString - cycle, -- :: ByteString -> ByteString - iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString - - -- ** Unfolding - unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString - - -- * Substrings - - -- ** Breaking strings - take, -- :: Int64 -> ByteString -> ByteString - drop, -- :: Int64 -> ByteString -> ByteString - splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString) - takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString - dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString - span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - - -- ** Breaking into many substrings - split, -- :: Word8 -> ByteString -> [ByteString] - splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] - - -- ** Joining strings - join, -- :: ByteString -> [ByteString] -> ByteString - - -- * Predicates - isPrefixOf, -- :: ByteString -> ByteString -> Bool --- isSuffixOf, -- :: ByteString -> ByteString -> Bool - - -- * Searching ByteStrings - - -- ** Searching by equality - elem, -- :: Word8 -> ByteString -> Bool - notElem, -- :: Word8 -> ByteString -> Bool - - -- ** Searching with a predicate - find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 - filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString --- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - - -- * Indexing ByteStrings - index, -- :: ByteString -> Int64 -> Word8 - elemIndex, -- :: Word8 -> ByteString -> Maybe Int64 - elemIndices, -- :: Word8 -> ByteString -> [Int64] - findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64 - findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64] - count, -- :: Word8 -> ByteString -> Int64 - - -- * Zipping and unzipping ByteStrings - zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] - zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] --- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) - - -- * Ordered ByteStrings --- sort, -- :: ByteString -> ByteString - - copy, -- :: ByteString -> ByteString - - -- * I\/O with 'ByteString's - - -- ** Standard input and output - getContents, -- :: IO ByteString - putStr, -- :: ByteString -> IO () - putStrLn, -- :: ByteString -> IO () - interact, -- :: (ByteString -> ByteString) -> IO () - - -- ** Files - readFile, -- :: FilePath -> IO ByteString - writeFile, -- :: FilePath -> ByteString -> IO () - appendFile, -- :: FilePath -> ByteString -> IO () - - -- ** I\/O with Handles - hGetContents, -- :: Handle -> IO ByteString - hGet, -- :: Handle -> Int -> IO ByteString - hPut, -- :: Handle -> ByteString -> IO () - hGetNonBlocking, -- :: Handle -> IO ByteString - --- hGetN, -- :: Int -> Handle -> Int -> IO ByteString --- hGetContentsN, -- :: Int -> Handle -> IO ByteString --- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString - - ) where - -import qualified Prelude -import Prelude hiding - (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines - ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum - ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1 - ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate - ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) - -import qualified Data.List as L -- L for list/lazy -import qualified Data.ByteString as P -- P for packed -import qualified Data.ByteString.Base as P -import Data.ByteString.Base (LazyByteString(..)) -import qualified Data.ByteString.Fusion as P -import Data.ByteString.Fusion (PairS(..),loopL) - -import Data.Monoid (Monoid(..)) - -import Data.Word (Word8) -import Data.Int (Int64) -import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..) - ,hClose,hWaitForInput,hIsEOF) -import System.IO.Unsafe -import Control.Exception (bracket) - -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr -import Foreign.Storable - --- ----------------------------------------------------------------------------- --- --- Useful macros, until we have bang patterns --- - -#define STRICT1(f) f a | a `seq` False = undefined -#define STRICT2(f) f a b | a `seq` b `seq` False = undefined -#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined -#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined -#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined - --- ----------------------------------------------------------------------------- - -type ByteString = LazyByteString - --- --- hmm, what about getting the PS constructor unpacked into the cons cell? --- --- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List --- --- Would avoid one indirection per chunk. --- - -unLPS :: ByteString -> [P.ByteString] -unLPS (LPS xs) = xs -{-# INLINE unLPS #-} - -instance Eq ByteString - where (==) = eq - -instance Ord ByteString - where compare = compareBytes - -instance Monoid ByteString where - mempty = empty - mappend = append - mconcat = concat - ------------------------------------------------------------------------- - --- XXX --- The data type invariant: --- Every ByteString is either empty or consists of non-null ByteStrings. --- All functions must preserve this, and the QC properties must check this. --- -_invariant :: ByteString -> Bool -_invariant (LPS []) = True -_invariant (LPS xs) = L.all (not . P.null) xs - --- In a form useful for QC testing -_checkInvariant :: ByteString -> ByteString -_checkInvariant lps - | _invariant lps = lps - | otherwise = moduleError "invariant" ("violation: " ++ show lps) - --- The Data abstraction function --- -_abstr :: ByteString -> P.ByteString -_abstr (LPS []) = P.empty -_abstr (LPS xs) = P.concat xs - --- The representation uses lists of packed chunks. When we have to convert from --- a lazy list to the chunked representation, then by default we'll use this --- chunk size. Some functions give you more control over the chunk size. --- --- Measurements here: --- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png --- --- indicate that a value around 0.5 to 1 x your L2 cache is best. --- The following value assumes people have something greater than 128k, --- and need to share the cache with other programs. --- -defaultChunkSize :: Int -defaultChunkSize = 32 * k - overhead - where k = 1024 - overhead = 2 * sizeOf (undefined :: Int) - -smallChunkSize :: Int -smallChunkSize = 4 * k - overhead - where k = 1024 - overhead = 2 * sizeOf (undefined :: Int) - --- defaultChunkSize = 1 - ------------------------------------------------------------------------- - -eq :: ByteString -> ByteString -> Bool -eq (LPS xs) (LPS ys) = eq' xs ys - where eq' [] [] = True - eq' [] _ = False - eq' _ [] = False - eq' (a:as) (b:bs) = - case compare (P.length a) (P.length b) of - LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs) - EQ -> a == b && eq' as bs - GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs - -compareBytes :: ByteString -> ByteString -> Ordering -compareBytes (LPS xs) (LPS ys) = cmp xs ys - where cmp [] [] = EQ - cmp [] _ = LT - cmp _ [] = GT - cmp (a:as) (b:bs) = - case compare (P.length a) (P.length b) of - LT -> case compare a (P.take (P.length a) b) of - EQ -> cmp as (P.drop (P.length a) b : bs) - result -> result - EQ -> case compare a b of - EQ -> cmp as bs - result -> result - GT -> case compare (P.take (P.length b) a) b of - EQ -> cmp (P.drop (P.length b) a : as) bs - result -> result - --- ----------------------------------------------------------------------------- --- Introducing and eliminating 'ByteString's - --- | /O(1)/ The empty 'ByteString' -empty :: ByteString -empty = LPS [] -{-# NOINLINE empty #-} - --- | /O(1)/ Convert a 'Word8' into a 'ByteString' -singleton :: Word8 -> ByteString -singleton c = LPS [P.singleton c] -{-# NOINLINE singleton #-} - --- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. -pack :: [Word8] -> ByteString -pack str = LPS $ L.map P.pack (chunk defaultChunkSize str) - --- ? -chunk :: Int -> [a] -> [[a]] -chunk _ [] = [] -chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs'' - --- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. -unpack :: ByteString -> [Word8] -unpack (LPS ss) = L.concatMap P.unpack ss -{-# INLINE unpack #-} - --- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString' -fromChunks :: [P.ByteString] -> ByteString -fromChunks ls = LPS $ L.filter (not . P.null) ls - --- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString' -toChunks :: ByteString -> [P.ByteString] -toChunks (LPS s) = s - ------------------------------------------------------------------------- - -{- --- | /O(n)/ Convert a '[a]' into a 'ByteString' using some --- conversion function -packWith :: (a -> Word8) -> [a] -> ByteString -packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str) -{-# INLINE packWith #-} -{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} - --- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. -unpackWith :: (Word8 -> a) -> ByteString -> [a] -unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss -{-# INLINE unpackWith #-} -{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} --} - --- --------------------------------------------------------------------- --- Basic interface - --- | /O(1)/ Test whether a ByteString is empty. -null :: ByteString -> Bool -null (LPS []) = True -null (_) = False -{-# INLINE null #-} - --- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64' -length :: ByteString -> Int64 -length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss - --- avoid the intermediate list? --- length (LPS ss) = L.foldl lengthF 0 ss --- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m -{-# INLINE length #-} - --- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is --- strict in the ByteString that we are consing onto. More precisely, it forces --- the head and the first chunk. It does this because, for space efficiency, it --- may coalesce the new byte onto the first \'chunk\' rather than starting a --- new \'chunk\'. --- --- So that means you can't use a lazy recursive contruction like this: --- --- > let xs = cons c xs in xs --- --- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings. --- -cons :: Word8 -> ByteString -> ByteString -cons c (LPS (s:ss)) | P.length s < 16 = LPS (P.cons c s : ss) -cons c (LPS ss) = LPS (P.singleton c : ss) -{-# INLINE cons #-} - --- | /O(n\/c)/ Append a byte to the end of a 'ByteString' -snoc :: ByteString -> Word8 -> ByteString -snoc (LPS ss) c = LPS (ss ++ [P.singleton c]) -{-# INLINE snoc #-} - --- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. -head :: ByteString -> Word8 -head (LPS []) = errorEmptyList "head" -head (LPS (x:_)) = P.unsafeHead x -{-# INLINE head #-} - --- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. -tail :: ByteString -> ByteString -tail (LPS []) = errorEmptyList "tail" -tail (LPS (x:xs)) - | P.length x == 1 = LPS xs - | otherwise = LPS (P.unsafeTail x : xs) -{-# INLINE tail #-} - --- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty. -last :: ByteString -> Word8 -last (LPS []) = errorEmptyList "last" -last (LPS xs) = P.last (L.last xs) -{-# INLINE last #-} - --- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one. -init :: ByteString -> ByteString -init (LPS []) = errorEmptyList "init" -init (LPS xs) - | P.length y == 1 = LPS ys - | otherwise = LPS (ys ++ [P.init y]) - where (y,ys) = (L.last xs, L.init xs) -{-# INLINE init #-} - --- | /O(n)/ Append two ByteStrings -append :: ByteString -> ByteString -> ByteString -append (LPS []) (LPS ys) = LPS ys -append (LPS xs) (LPS []) = LPS xs -append (LPS xs) (LPS ys) = LPS (xs ++ ys) -{-# INLINE append #-} - --- --------------------------------------------------------------------- --- Transformations - --- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each --- element of @xs@. -map :: (Word8 -> Word8) -> ByteString -> ByteString ---map f (LPS xs) = LPS (L.map (P.map' f) xs) -map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS -{-# INLINE map #-} - --- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. -reverse :: ByteString -> ByteString -reverse (LPS ps) = LPS (rev [] ps) - where rev a [] = a - rev a (x:xs) = rev (P.reverse x:a) xs --- note, here is one example where the extra element lazyness is an advantage. --- we can reerse the list of chunks strictly but reverse each chunk lazily --- so while we may force the whole lot into memory we do not need to copy --- each chunk until it is used. -{-# INLINE reverse #-} - --- The 'intersperse' function takes a 'Word8' and a 'ByteString' and --- \`intersperses\' that byte between the elements of the 'ByteString'. --- It is analogous to the intersperse function on Lists. --- intersperse :: Word8 -> ByteString -> ByteString --- intersperse = error "FIXME: not yet implemented" - -{- -intersperse c (LPS []) = LPS [] -intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse') - where intersperse' c ps@(PS x s l) = - P.create (2*l) $ \p -> withForeignPtr x $ \f -> - poke p c - c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c --} - --- | The 'transpose' function transposes the rows and columns of its --- 'ByteString' argument. -transpose :: [ByteString] -> [ByteString] -transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s)) - --- --------------------------------------------------------------------- --- Reducing 'ByteString's - --- | 'foldl', applied to a binary operator, a starting value (typically --- the left-identity of the operator), and a ByteString, reduces the --- ByteString using the binary operator, from left to right. -foldl :: (a -> Word8 -> a) -> a -> ByteString -> a ---foldl f z (LPS xs) = L.foldl (P.foldl f) z xs -foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS -{-# INLINE foldl #-} - --- | 'foldl\'' is like 'foldl', but strict in the accumulator. -foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a ---foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs -foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS -{-# INLINE foldl' #-} - --- | 'foldr', applied to a binary operator, a starting value --- (typically the right-identity of the operator), and a ByteString, --- reduces the ByteString using the binary operator, from right to left. -foldr :: (Word8 -> a -> a) -> a -> ByteString -> a -foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs -{-# INLINE foldr #-} - --- | 'foldl1' is a variant of 'foldl' that has no starting value --- argument, and thus must be applied to non-empty 'ByteStrings'. --- This function is subject to array fusion. -foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldl1 _ (LPS []) = errorEmptyList "foldl1" -foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs)) - --- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. -foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldl1' _ (LPS []) = errorEmptyList "foldl1'" -foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs)) - --- | 'foldr1' is a variant of 'foldr' that has no starting value argument, --- and thus must be applied to non-empty 'ByteString's -foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 -foldr1 _ (LPS []) = errorEmptyList "foldr1" -foldr1 f (LPS ps) = foldr1' ps - where foldr1' (x:[]) = P.foldr1 f x - foldr1' (x:xs) = P.foldr f (foldr1' xs) x - --- --------------------------------------------------------------------- --- Special folds - --- | /O(n)/ Concatenate a list of ByteStrings. -concat :: [ByteString] -> ByteString -concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss) - --- | Map a function over a 'ByteString' and concatenate the results -concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString -concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps) - where - k w = case f w of LPS xs -> P.concat xs - --- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if --- any element of the 'ByteString' satisfies the predicate. -any :: (Word8 -> Bool) -> ByteString -> Bool -any f (LPS xs) = L.or (L.map (P.any f) xs) --- todo fuse - --- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines --- if all elements of the 'ByteString' satisfy the predicate. -all :: (Word8 -> Bool) -> ByteString -> Bool -all f (LPS xs) = L.and (L.map (P.all f) xs) --- todo fuse - --- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' -maximum :: ByteString -> Word8 -maximum (LPS []) = errorEmptyList "maximum" -maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs -{-# INLINE maximum #-} - --- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' -minimum :: ByteString -> Word8 -minimum (LPS []) = errorEmptyList "minimum" -minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs -{-# INLINE minimum #-} - --- | The 'mapAccumL' function behaves like a combination of 'map' and --- 'foldl'; it applies a function to each element of a ByteString, --- passing an accumulating parameter from left to right, and returning a --- final value of this accumulator together with the new ByteString. -mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS - --- | /O(n)/ map Word8 functions, provided with the index at each position -mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString -mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS - --- --------------------------------------------------------------------- --- Building ByteStrings - --- | 'scanl' is similar to 'foldl', but returns a list of successive --- reduced values from the left. This function will fuse. --- --- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] --- --- Note that --- --- > last (scanl f z xs) == foldl f z xs. -scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString -scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0) -{-# INLINE scanl #-} - --- --------------------------------------------------------------------- --- Unfolds and replicates - --- | @'iterate' f x@ returns an infinite ByteString of repeated applications --- of @f@ to @x@: --- --- > iterate f x == [x, f x, f (f x), ...] --- -iterate :: (Word8 -> Word8) -> Word8 -> ByteString -iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x')) - --- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every --- element. --- -repeat :: Word8 -> ByteString -repeat c = LPS (L.repeat block) - where block = P.replicate smallChunkSize c - --- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@ --- the value of every element. --- -replicate :: Int64 -> Word8 -> ByteString -replicate w c - | w <= 0 = empty - | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c] - | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant - | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s) - where - s = P.replicate smallChunkSize c - (q, r) = quotRem w (fromIntegral smallChunkSize) - --- | 'cycle' ties a finite ByteString into a circular one, or equivalently, --- the infinite repetition of the original ByteString. --- -cycle :: ByteString -> ByteString -cycle (LPS []) = errorEmptyList "cycle" -cycle (LPS xs) = LPS (L.cycle xs) - --- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'. --- 'unfoldr' builds a ByteString from a seed value. The function takes --- the element and returns 'Nothing' if it is done producing the --- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a --- prepending to the ByteString and @b@ is used as the next element in a --- recursive call. -unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString -unfoldr f = LPS . unfoldChunk 32 - where unfoldChunk n x = - case P.unfoldrN n f x of - (s, Nothing) - | P.null s -> [] - | otherwise -> s : [] - (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x' - --- --------------------------------------------------------------------- --- Substrings - --- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix --- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. -take :: Int64 -> ByteString -> ByteString -take i _ | i <= 0 = empty -take i (LPS ps) = LPS (take' i ps) - where take' 0 _ = [] - take' _ [] = [] - take' n (x:xs) = - if n < fromIntegral (P.length x) - then P.take (fromIntegral n) x : [] - else x : take' (n - fromIntegral (P.length x)) xs - --- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ --- elements, or @[]@ if @n > 'length' xs@. -drop :: Int64 -> ByteString -> ByteString -drop i p | i <= 0 = p -drop i (LPS ps) = LPS (drop' i ps) - where drop' 0 xs = xs - drop' _ [] = [] - drop' n (x:xs) = - if n < fromIntegral (P.length x) - then P.drop (fromIntegral n) x : xs - else drop' (n - fromIntegral (P.length x)) xs - --- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. -splitAt :: Int64 -> ByteString -> (ByteString, ByteString) -splitAt i p | i <= 0 = (empty, p) -splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b) - where splitAt' 0 xs = ([], xs) - splitAt' _ [] = ([], []) - splitAt' n (x:xs) = - if n < fromIntegral (P.length x) - then (P.take (fromIntegral n) x : [], - P.drop (fromIntegral n) x : xs) - else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs - in (x:xs', xs'') - - --- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, --- returns the longest prefix (possibly empty) of @xs@ of elements that --- satisfy @p@. -takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString -takeWhile f (LPS ps) = LPS (takeWhile' ps) - where takeWhile' [] = [] - takeWhile' (x:xs) = - case findIndexOrEnd (not . f) x of - 0 -> [] - n | n < P.length x -> P.take n x : [] - | otherwise -> x : takeWhile' xs - --- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. -dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString -dropWhile f (LPS ps) = LPS (dropWhile' ps) - where dropWhile' [] = [] - dropWhile' (x:xs) = - case findIndexOrEnd (not . f) x of - n | n < P.length x -> P.drop n x : xs - | otherwise -> dropWhile' xs - --- | 'break' @p@ is equivalent to @'span' ('not' . p)@. -break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b) - where break' [] = ([], []) - break' (x:xs) = - case findIndexOrEnd f x of - 0 -> ([], x : xs) - n | n < P.length x -> (P.take n x : [], P.drop n x : xs) - | otherwise -> let (xs', xs'') = break' xs - in (x : xs', xs'') - --- --- TODO --- --- Add rules --- - -{- --- | 'breakByte' breaks its ByteString argument at the first occurence --- of the specified byte. It is more efficient than 'break' as it is --- implemented with @memchr(3)@. I.e. --- --- > break (=='c') "abcd" == breakByte 'c' "abcd" --- -breakByte :: Word8 -> ByteString -> (ByteString, ByteString) -breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b) - where breakByte' [] = ([], []) - breakByte' (x:xs) = - case P.elemIndex c x of - Just 0 -> ([], x : xs) - Just n -> (P.take n x : [], P.drop n x : xs) - Nothing -> let (xs', xs'') = breakByte' xs - in (x : xs', xs'') - --- | 'spanByte' breaks its ByteString argument at the first --- occurence of a byte other than its argument. It is more efficient --- than 'span (==)' --- --- > span (=='c') "abcd" == spanByte 'c' "abcd" --- -spanByte :: Word8 -> ByteString -> (ByteString, ByteString) -spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b) - where spanByte' [] = ([], []) - spanByte' (x:xs) = - case P.spanByte c x of - (x', x'') | P.null x' -> ([], x : xs) - | P.null x'' -> let (xs', xs'') = spanByte' xs - in (x : xs', xs'') - | otherwise -> (x' : [], x'' : xs) --} - --- | 'span' @p xs@ breaks the ByteString into two segments. It is --- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ -span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -span p = break (not . p) - --- | /O(n)/ Splits a 'ByteString' into components delimited by --- separators, where the predicate returns True for a separator element. --- The resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] --- > splitWith (=='a') [] == [] --- -splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] -splitWith _ (LPS []) = [] -splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as - - where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString] - comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : [] - comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs - comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs - - cons' x xs | P.null x = xs - | otherwise = x:xs - {-# INLINE cons' #-} -{-# INLINE splitWith #-} - --- | /O(n)/ Break a 'ByteString' into pieces separated by the byte --- argument, consuming the delimiter. I.e. --- --- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] --- > split 'a' "aXaXaXa" == ["","X","X","X",""] --- > split 'x' "x" == ["",""] --- --- and --- --- > join [c] . split c == id --- > split == splitWith . (==) --- --- As for all splitting functions in this library, this function does --- not copy the substrings, it just constructs new 'ByteStrings' that --- are slices of the original. --- -split :: Word8 -> ByteString -> [ByteString] -split _ (LPS []) = [] -split c (LPS (a:as)) = comb [] (P.split c a) as - - where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString] - comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : [] - comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs - comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs - - cons' x xs | P.null x = xs - | otherwise = x:xs - {-# INLINE cons' #-} -{-# INLINE split #-} - -{- --- | Like 'splitWith', except that sequences of adjacent separators are --- treated as a single separator. eg. --- --- > tokens (=='a') "aabbaca" == ["bb","c"] --- -tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] -tokens f = L.filter (not.null) . splitWith f --} - --- | The 'group' function takes a ByteString and returns a list of --- ByteStrings such that the concatenation of the result is equal to the --- argument. Moreover, each sublist in the result contains only equal --- elements. For example, --- --- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] --- --- It is a special case of 'groupBy', which allows the programmer to --- supply their own equality test. -group :: ByteString -> [ByteString] -group (LPS []) = [] -group (LPS (a:as)) = group' [] (P.group a) as - where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString] - group' acc@(s':_) ss@(s:_) xs - | P.unsafeHead s' - /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs - group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : [] - group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs - group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs - -{- -TODO: check if something like this might be faster - -group :: ByteString -> [ByteString] -group xs - | null xs = [] - | otherwise = ys : group zs - where - (ys, zs) = spanByte (unsafeHead xs) xs --} - --- | The 'groupBy' function is the non-overloaded version of 'group'. --- -groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented" -{- -groupBy _ (LPS []) = [] -groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as - where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString] - groupBy' acc@(_:_) c ss@(s:_) xs - | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs - groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : [] - groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs - groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs - groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs --} - -{- -TODO: check if something like this might be faster - -groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -groupBy k xs - | null xs = [] - | otherwise = take n xs : groupBy k (drop n xs) - where - n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs) --} - --- | /O(n)/ The 'join' function takes a 'ByteString' and a list of --- 'ByteString's and concatenates the list after interspersing the first --- argument between each element of the list. -join :: ByteString -> [ByteString] -> ByteString -join s = concat . (L.intersperse s) - --- --------------------------------------------------------------------- --- Indexing ByteStrings - --- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0. -index :: ByteString -> Int64 -> Word8 -index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i) -index (LPS ps) i = index' ps i - where index' [] n = moduleError "index" ("index too large: " ++ show n) - index' (x:xs) n - | n >= fromIntegral (P.length x) = - index' xs (n - fromIntegral (P.length x)) - | otherwise = P.unsafeIndex x (fromIntegral n) - --- | /O(n)/ The 'elemIndex' function returns the index of the first --- element in the given 'ByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. --- This implementation uses memchr(3). -elemIndex :: Word8 -> ByteString -> Maybe Int64 -elemIndex c (LPS ps) = elemIndex' 0 ps - where elemIndex' _ [] = Nothing - elemIndex' n (x:xs) = - case P.elemIndex c x of - Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs - Just i -> Just (n + fromIntegral i) - -{- --- | /O(n)/ The 'elemIndexEnd' function returns the last index of the --- element in the given 'ByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. The following --- holds: --- --- > elemIndexEnd c xs == --- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) --- -elemIndexEnd :: Word8 -> ByteString -> Maybe Int -elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> - go (p `plusPtr` s) (l-1) - where - STRICT2(go) - go p i | i < 0 = return Nothing - | otherwise = do ch' <- peekByteOff p i - if ch == ch' - then return $ Just i - else go p (i-1) --} --- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning --- the indices of all elements equal to the query element, in ascending order. --- This implementation uses memchr(3). -elemIndices :: Word8 -> ByteString -> [Int64] -elemIndices c (LPS ps) = elemIndices' 0 ps - where elemIndices' _ [] = [] - elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x) - ++ elemIndices' (n + fromIntegral (P.length x)) xs - --- | count returns the number of times its argument appears in the ByteString --- --- > count = length . elemIndices --- --- But more efficiently than using length on the intermediate list. -count :: Word8 -> ByteString -> Int64 -count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs - --- | The 'findIndex' function takes a predicate and a 'ByteString' and --- returns the index of the first element in the ByteString --- satisfying the predicate. -findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64 -findIndex k (LPS ps) = findIndex' 0 ps - where findIndex' _ [] = Nothing - findIndex' n (x:xs) = - case P.findIndex k x of - Nothing -> findIndex' (n + fromIntegral (P.length x)) xs - Just i -> Just (n + fromIntegral i) -{-# INLINE findIndex #-} - --- | /O(n)/ The 'find' function takes a predicate and a ByteString, --- and returns the first element in matching the predicate, or 'Nothing' --- if there is no such element. --- --- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing --- -find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 -find f (LPS ps) = find' ps - where find' [] = Nothing - find' (x:xs) = case P.find f x of - Nothing -> find' xs - Just w -> Just w -{-# INLINE find #-} - --- | The 'findIndices' function extends 'findIndex', by returning the --- indices of all elements satisfying the predicate, in ascending order. -findIndices :: (Word8 -> Bool) -> ByteString -> [Int64] -findIndices k (LPS ps) = findIndices' 0 ps - where findIndices' _ [] = [] - findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x) - ++ findIndices' (n + fromIntegral (P.length x)) xs - --- --------------------------------------------------------------------- --- Searching ByteStrings - --- | /O(n)/ 'elem' is the 'ByteString' membership predicate. -elem :: Word8 -> ByteString -> Bool -elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True - --- | /O(n)/ 'notElem' is the inverse of 'elem' -notElem :: Word8 -> ByteString -> Bool -notElem c ps = not (elem c ps) - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. -filter :: (Word8 -> Bool) -> ByteString -> ByteString ---filter f (LPS xs) = LPS (filterMap (P.filter' f) xs) -filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS -{-# INLINE filter #-} - -{- --- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter . --- (==)/, for the common case of filtering a single byte. It is more --- efficient to use /filterByte/ in this case. --- --- > filterByte == filter . (==) --- --- filterByte is around 10x faster, and uses much less space, than its --- filter equivalent -filterByte :: Word8 -> ByteString -> ByteString -filterByte w ps = replicate (count w ps) w --- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs) - --- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common --- case of filtering a single byte out of a list. It is more efficient --- to use /filterNotByte/ in this case. --- --- > filterNotByte == filter . (/=) --- --- filterNotByte is around 2x faster than its filter equivalent. -filterNotByte :: Word8 -> ByteString -> ByteString -filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs) --} - --- --------------------------------------------------------------------- --- Searching for substrings - --- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' --- iff the first is a prefix of the second. -isPrefixOf :: ByteString -> ByteString -> Bool -isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs - where isPrefixL [] _ = True - isPrefixL _ [] = False - isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys - | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys) - | otherwise = xh == y && isPrefixL (xt:xs) ys - where (xh,xt) = P.splitAt (P.length y) x - (yh,yt) = P.splitAt (P.length x) y - --- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' --- iff the first is a suffix of the second. --- --- The following holds: --- --- > isSuffixOf x y == reverse x `isPrefixOf` reverse y --- --- However, the real implemenation uses memcmp to compare the end of the --- string only, with no reverse required.. --- ---isSuffixOf :: ByteString -> ByteString -> Bool ---isSuffixOf = error "not yet implemented" - --- --------------------------------------------------------------------- --- Zipping - --- | /O(n)/ 'zip' takes two ByteStrings and returns a list of --- corresponding pairs of bytes. If one input ByteString is short, --- excess elements of the longer ByteString are discarded. This is --- equivalent to a pair of 'unpack' operations. -zip :: ByteString -> ByteString -> [(Word8,Word8)] -zip = zipWith (,) - --- | 'zipWith' generalises 'zip' by zipping with the function given as --- the first argument, instead of a tupling function. For example, --- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of --- corresponding sums. -zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] -zipWith _ (LPS []) (LPS _) = [] -zipWith _ (LPS _) (LPS []) = [] -zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs - where zipWith' x xs y ys = - (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys) - - zipWith'' x [] _ _ | P.null x = [] - zipWith'' _ _ y [] | P.null y = [] - zipWith'' x xs y ys | not (P.null x) - && not (P.null y) = zipWith' x xs y ys - zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys - zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys - zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys - --- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of --- ByteStrings. Note that this performs two 'pack' operations. -{- -unzip :: [(Word8,Word8)] -> (ByteString,ByteString) -unzip _ls = error "not yet implemented" -{-# INLINE unzip #-} --} - --- --------------------------------------------------------------------- --- Special lists - --- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. -inits :: ByteString -> [ByteString] -inits = (LPS [] :) . inits' . unLPS - where inits' [] = [] - inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x)) - ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs) - --- | /O(n)/ Return all final segments of the given 'ByteString', longest first. -tails :: ByteString -> [ByteString] -tails = tails' . unLPS - where tails' [] = LPS [] : [] - tails' xs@(x:xs') - | P.length x == 1 = LPS xs : tails' xs' - | otherwise = LPS xs : tails' (P.unsafeTail x : xs') - --- --------------------------------------------------------------------- --- Low level constructors - --- | /O(n)/ Make a copy of the 'ByteString' with its own storage. --- This is mainly useful to allow the rest of the data pointed --- to by the 'ByteString' to be garbage collected, for example --- if a large string has been read in, and only a small part of it --- is needed in the rest of the program. -copy :: ByteString -> ByteString -copy (LPS lps) = LPS (L.map P.copy lps) ---TODO, we could coalese small blocks here ---FIXME: probably not strict enough, if we're doing this to avoid retaining --- the parent blocks then we'd better copy strictly. - --- --------------------------------------------------------------------- - --- TODO defrag func that concatenates block together that are below a threshold --- defrag :: Int -> ByteString -> ByteString - --- --------------------------------------------------------------------- --- Lazy ByteString IO - --- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks --- are read on demand, in at most @k@-sized chunks. It does not block --- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are --- available then they will be returned immediately as a smaller chunk. -hGetContentsN :: Int -> Handle -> IO ByteString -hGetContentsN k h = lazyRead >>= return . LPS - where - lazyRead = unsafeInterleaveIO loop - - loop = do - ps <- P.hGetNonBlocking h k - --TODO: I think this should distinguish EOF from no data available - -- the otherlying POSIX call makes this distincion, returning either - -- 0 or EAGAIN - if P.null ps - then do eof <- hIsEOF h - if eof then return [] - else hWaitForInput h (-1) - >> loop - else do pss <- lazyRead - return (ps : pss) - --- | Read @n@ bytes into a 'ByteString', directly from the --- specified 'Handle', in chunks of size @k@. -hGetN :: Int -> Handle -> Int -> IO ByteString -hGetN _ _ 0 = return empty -hGetN k h n = readChunks n >>= return . LPS - where - STRICT1(readChunks) - readChunks i = do - ps <- P.hGet h (min k i) - case P.length ps of - 0 -> return [] - m -> do pss <- readChunks (i - m) - return (ps : pss) - --- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block --- waiting for data to become available, instead it returns only whatever data --- is available. Chunks are read on demand, in @k@-sized chunks. -hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString -#if defined(__GLASGOW_HASKELL__) -hGetNonBlockingN _ _ 0 = return empty -hGetNonBlockingN k h n = readChunks n >>= return . LPS - where - STRICT1(readChunks) - readChunks i = do - ps <- P.hGetNonBlocking h (min k i) - case P.length ps of - 0 -> return [] - m -> do pss <- readChunks (i - m) - return (ps : pss) -#else -hGetNonBlockingN = hGetN -#endif - --- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks --- are read on demand, using the default chunk size. -hGetContents :: Handle -> IO ByteString -hGetContents = hGetContentsN defaultChunkSize - --- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'. -hGet :: Handle -> Int -> IO ByteString -hGet = hGetN defaultChunkSize - --- | hGetNonBlocking is similar to 'hGet', except that it will never block --- waiting for data to become available, instead it returns only whatever data --- is available. -#if defined(__GLASGOW_HASKELL__) -hGetNonBlocking :: Handle -> Int -> IO ByteString -hGetNonBlocking = hGetNonBlockingN defaultChunkSize -#else -hGetNonBlocking = hGet -#endif - --- | Read an entire file /lazily/ into a 'ByteString'. -readFile :: FilePath -> IO ByteString -readFile f = openBinaryFile f ReadMode >>= hGetContents - --- | Write a 'ByteString' to a file. -writeFile :: FilePath -> ByteString -> IO () -writeFile f txt = bracket (openBinaryFile f WriteMode) hClose - (\hdl -> hPut hdl txt) - --- | Append a 'ByteString' to a file. -appendFile :: FilePath -> ByteString -> IO () -appendFile f txt = bracket (openBinaryFile f AppendMode) hClose - (\hdl -> hPut hdl txt) - --- | getContents. Equivalent to hGetContents stdin. Will read /lazily/ -getContents :: IO ByteString -getContents = hGetContents stdin - --- | Outputs a 'ByteString' to the specified 'Handle'. -hPut :: Handle -> ByteString -> IO () -hPut h (LPS xs) = mapM_ (P.hPut h) xs - --- | Write a ByteString to stdout -putStr :: ByteString -> IO () -putStr = hPut stdout - --- | Write a ByteString to stdout, appending a newline byte -putStrLn :: ByteString -> IO () -putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a) - --- | The interact function takes a function of type @ByteString -> ByteString@ --- as its argument. The entire input from the standard input device is passed --- to this function as its argument, and the resulting string is output on the --- standard output device. It's great for writing one line programs! -interact :: (ByteString -> ByteString) -> IO () -interact transformer = putStr . transformer =<< getContents - --- --------------------------------------------------------------------- --- Internal utilities - --- Common up near identical calls to `error' to reduce the number --- constant strings created when compiled: -errorEmptyList :: String -> a -errorEmptyList fun = moduleError fun "empty ByteString" - -moduleError :: String -> String -> a -moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg) - --- A manually fused version of "filter (not.null) . map f", since they --- don't seem to fuse themselves. Really helps out filter*, concatMap. --- --- TODO fuse. --- -filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString] -filterMap _ [] = [] -filterMap f (x:xs) = case f x of - y | P.null y -> filterMap f xs -- manually fuse the invariant filter - | otherwise -> y : filterMap f xs -{-# INLINE filterMap #-} - - --- | 'findIndexOrEnd' is a variant of findIndex, that returns the length --- of the string if no element is found, rather than Nothing. -findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int -findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 - where - STRICT2(go) - go ptr n | n >= l = return l - | otherwise = do w <- peek ptr - if k w - then return n - else go (ptr `plusPtr` 1) (n+1) -{-# INLINE findIndexOrEnd #-} diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs deleted file mode 100644 index 250a659..0000000 --- a/Data/ByteString/Lazy/Char8.hs +++ /dev/null @@ -1,748 +0,0 @@ -{-# OPTIONS_GHC -cpp -fno-warn-orphans #-} --- | --- Module : Data.ByteString.Lazy.Char8 --- Copyright : (c) Don Stewart 2006 --- License : BSD-style --- --- Maintainer : dons@cse.unsw.edu.au --- Stability : experimental --- Portability : non-portable (imports Data.ByteString.Lazy) --- --- Manipulate /lazy/ 'ByteString's using 'Char' operations. All Chars will --- be truncated to 8 bits. It can be expected that these functions will --- run at identical speeds to their 'Data.Word.Word8' equivalents in --- "Data.ByteString.Lazy". --- --- This module is intended to be imported @qualified@, to avoid name --- clashes with "Prelude" functions. eg. --- --- > import qualified Data.ByteString.Lazy.Char8 as C --- - -module Data.ByteString.Lazy.Char8 ( - - -- * The @ByteString@ type - ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable - - -- * Introducing and eliminating 'ByteString's - empty, -- :: ByteString - singleton, -- :: Char -> ByteString - pack, -- :: String -> ByteString - unpack, -- :: ByteString -> String - fromChunks, -- :: [Strict.ByteString] -> ByteString - toChunks, -- :: ByteString -> [Strict.ByteString] - - -- * Basic interface - cons, -- :: Char -> ByteString -> ByteString - snoc, -- :: ByteString -> Char -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - head, -- :: ByteString -> Char - last, -- :: ByteString -> Char - tail, -- :: ByteString -> ByteString - init, -- :: ByteString -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int64 - - -- * Transformating ByteStrings - map, -- :: (Char -> Char) -> ByteString -> ByteString - reverse, -- :: ByteString -> ByteString --- intersperse, -- :: Char -> ByteString -> ByteString - transpose, -- :: [ByteString] -> [ByteString] - - -- * Reducing 'ByteString's (folds) - foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a - foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a - foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char - foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char - foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a - foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char - - -- ** Special folds - concat, -- :: [ByteString] -> ByteString - concatMap, -- :: (Char -> ByteString) -> ByteString -> ByteString - any, -- :: (Char -> Bool) -> ByteString -> Bool - all, -- :: (Char -> Bool) -> ByteString -> Bool - maximum, -- :: ByteString -> Char - minimum, -- :: ByteString -> Char - - -- * Building ByteStrings - -- ** Scans - scanl, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString --- scanl1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString --- scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString --- scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString - - -- ** Accumulating maps - mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) - mapIndexed, -- :: (Int64 -> Char -> Char) -> ByteString -> ByteString - - -- ** Infinite ByteStrings - repeat, -- :: Char -> ByteString - replicate, -- :: Int64 -> Char -> ByteString - cycle, -- :: ByteString -> ByteString - iterate, -- :: (Char -> Char) -> Char -> ByteString - - -- ** Unfolding - unfoldr, -- :: (a -> Maybe (Char, a)) -> a -> ByteString - - -- * Substrings - - -- ** Breaking strings - take, -- :: Int64 -> ByteString -> ByteString - drop, -- :: Int64 -> ByteString -> ByteString - splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString) - takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString - dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString - span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString] - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - - -- ** Breaking into many substrings - split, -- :: Char -> ByteString -> [ByteString] - splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] - - -- ** Breaking into lines and words - lines, -- :: ByteString -> [ByteString] - words, -- :: ByteString -> [ByteString] - unlines, -- :: [ByteString] -> ByteString - unwords, -- :: ByteString -> [ByteString] - - -- ** Joining strings - join, -- :: ByteString -> [ByteString] -> ByteString - - -- * Predicates - isPrefixOf, -- :: ByteString -> ByteString -> Bool --- isSuffixOf, -- :: ByteString -> ByteString -> Bool - - -- * Searching ByteStrings - - -- ** Searching by equality - elem, -- :: Char -> ByteString -> Bool - notElem, -- :: Char -> ByteString -> Bool - - -- ** Searching with a predicate - find, -- :: (Char -> Bool) -> ByteString -> Maybe Char - filter, -- :: (Char -> Bool) -> ByteString -> ByteString --- partition -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - - -- * Indexing ByteStrings - index, -- :: ByteString -> Int64 -> Char - elemIndex, -- :: Char -> ByteString -> Maybe Int64 - elemIndices, -- :: Char -> ByteString -> [Int64] - findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int64 - findIndices, -- :: (Char -> Bool) -> ByteString -> [Int64] - count, -- :: Char -> ByteString -> Int64 - - -- * Zipping and unzipping ByteStrings - zip, -- :: ByteString -> ByteString -> [(Char,Char)] - zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c] --- unzip, -- :: [(Char,Char)] -> (ByteString,ByteString) - - -- * Ordered ByteStrings --- sort, -- :: ByteString -> ByteString - - copy, -- :: ByteString -> ByteString - - -- * Reading from ByteStrings - readInt, - readInteger, - - -- * I\/O with 'ByteString's - - -- ** Standard input and output - getContents, -- :: IO ByteString - putStr, -- :: ByteString -> IO () - putStrLn, -- :: ByteString -> IO () - interact, -- :: (ByteString -> ByteString) -> IO () - - -- ** Files - readFile, -- :: FilePath -> IO ByteString - writeFile, -- :: FilePath -> ByteString -> IO () - appendFile, -- :: FilePath -> ByteString -> IO () - - -- ** I\/O with Handles - hGetContents, -- :: Handle -> IO ByteString - hGet, -- :: Handle -> Int64 -> IO ByteString - hPut, -- :: Handle -> ByteString -> IO () - hGetNonBlocking, -- :: Handle -> IO ByteString - --- hGetN, -- :: Int -> Handle -> Int64 -> IO ByteString --- hGetContentsN, -- :: Int -> Handle -> IO ByteString --- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString - ) where - --- Functions transparently exported -import Data.ByteString.Lazy - (ByteString, fromChunks, toChunks - ,empty,null,length,tail,init,append,reverse,transpose,cycle - ,concat,take,drop,splitAt,join,isPrefixOf,group,inits,tails,copy - ,hGetContents, hGet, hPut, getContents - ,hGetNonBlocking - ,putStr, putStrLn, interact) - --- Functions we need to wrap. -import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString as B -import qualified Data.ByteString.Base as B -import Data.ByteString.Base (LazyByteString(LPS)) - -import Data.ByteString.Base (w2c, c2w, isSpaceWord8) - -import Data.Int (Int64) -import qualified Data.List as List (intersperse) - -import qualified Prelude as P -import Prelude hiding - (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines - ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter - ,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,foldl1,foldr1 - ,readFile,writeFile,appendFile,replicate,getContents,getLine,putStr,putStrLn - ,zip,zipWith,unzip,notElem,repeat,iterate,interact,cycle) - -import System.IO (hClose,openFile,IOMode(..)) -import Control.Exception (bracket) - -#define STRICT1(f) f a | a `seq` False = undefined -#define STRICT2(f) f a b | a `seq` b `seq` False = undefined -#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined -#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined -#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined - ------------------------------------------------------------------------- - --- | /O(1)/ Convert a 'Char' into a 'ByteString' -singleton :: Char -> ByteString -singleton = L.singleton . c2w -{-# INLINE singleton #-} - --- | /O(n)/ Convert a 'String' into a 'ByteString'. -pack :: [Char] -> ByteString -pack = L.pack. P.map c2w - --- | /O(n)/ Converts a 'ByteString' to a 'String'. -unpack :: ByteString -> [Char] -unpack = P.map w2c . L.unpack -{-# INLINE unpack #-} - --- | /O(n)/ 'cons' is analogous to (:) for lists, but of different --- complexity, as it requires a memcpy. -cons :: Char -> ByteString -> ByteString -cons = L.cons . c2w -{-# INLINE cons #-} - --- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to --- 'cons', this function performs a memcpy. -snoc :: ByteString -> Char -> ByteString -snoc p = L.snoc p . c2w -{-# INLINE snoc #-} - --- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. -head :: ByteString -> Char -head = w2c . L.head -{-# INLINE head #-} - --- | /O(1)/ Extract the last element of a packed string, which must be non-empty. -last :: ByteString -> Char -last = w2c . L.last -{-# INLINE last #-} - --- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@ -map :: (Char -> Char) -> ByteString -> ByteString -map f = L.map (c2w . f . w2c) -{-# INLINE map #-} - --- | 'foldl', applied to a binary operator, a starting value (typically --- the left-identity of the operator), and a ByteString, reduces the --- ByteString using the binary operator, from left to right. -foldl :: (a -> Char -> a) -> a -> ByteString -> a -foldl f = L.foldl (\a c -> f a (w2c c)) -{-# INLINE foldl #-} - --- | 'foldl\'' is like foldl, but strict in the accumulator. -foldl' :: (a -> Char -> a) -> a -> ByteString -> a -foldl' f = L.foldl' (\a c -> f a (w2c c)) -{-# INLINE foldl' #-} - --- | 'foldr', applied to a binary operator, a starting value --- (typically the right-identity of the operator), and a packed string, --- reduces the packed string using the binary operator, from right to left. -foldr :: (Char -> a -> a) -> a -> ByteString -> a -foldr f = L.foldr (\c a -> f (w2c c) a) -{-# INLINE foldr #-} - --- | 'foldl1' is a variant of 'foldl' that has no starting value --- argument, and thus must be applied to non-empty 'ByteStrings'. -foldl1 :: (Char -> Char -> Char) -> ByteString -> Char -foldl1 f ps = w2c (L.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) -{-# INLINE foldl1 #-} - --- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. -foldl1' :: (Char -> Char -> Char) -> ByteString -> Char -foldl1' f ps = w2c (L.foldl1' (\x y -> c2w (f (w2c x) (w2c y))) ps) - --- | 'foldr1' is a variant of 'foldr' that has no starting value argument, --- and thus must be applied to non-empty 'ByteString's -foldr1 :: (Char -> Char -> Char) -> ByteString -> Char -foldr1 f ps = w2c (L.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps) -{-# INLINE foldr1 #-} - --- | Map a function over a 'ByteString' and concatenate the results -concatMap :: (Char -> ByteString) -> ByteString -> ByteString -concatMap f = L.concatMap (f . w2c) -{-# INLINE concatMap #-} - --- | Applied to a predicate and a ByteString, 'any' determines if --- any element of the 'ByteString' satisfies the predicate. -any :: (Char -> Bool) -> ByteString -> Bool -any f = L.any (f . w2c) -{-# INLINE any #-} - --- | Applied to a predicate and a 'ByteString', 'all' determines if --- all elements of the 'ByteString' satisfy the predicate. -all :: (Char -> Bool) -> ByteString -> Bool -all f = L.all (f . w2c) -{-# INLINE all #-} - --- | 'maximum' returns the maximum value from a 'ByteString' -maximum :: ByteString -> Char -maximum = w2c . L.maximum -{-# INLINE maximum #-} - --- | 'minimum' returns the minimum value from a 'ByteString' -minimum :: ByteString -> Char -minimum = w2c . L.minimum -{-# INLINE minimum #-} - --- --------------------------------------------------------------------- --- Building ByteStrings - --- | 'scanl' is similar to 'foldl', but returns a list of successive --- reduced values from the left. This function will fuse. --- --- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] --- --- Note that --- --- > last (scanl f z xs) == foldl f z xs. -scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString -scanl f z = L.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) - --- | The 'mapAccumL' function behaves like a combination of 'map' and --- 'foldl'; it applies a function to each element of a ByteString, --- passing an accumulating parameter from left to right, and returning a --- final value of this accumulator together with the new ByteString. -mapAccumL :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) -mapAccumL f = L.mapAccumL (\a w -> case f a (w2c w) of (a',c) -> (a', c2w c)) - --- | /O(n)/ map Char functions, provided with the index at each position -mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString -mapIndexed f = L.mapIndexed (\i w -> c2w (f i (w2c w))) - ------------------------------------------------------------------------- --- Generating and unfolding ByteStrings - --- | @'iterate' f x@ returns an infinite ByteString of repeated applications --- of @f@ to @x@: --- --- > iterate f x == [x, f x, f (f x), ...] --- -iterate :: (Char -> Char) -> Char -> ByteString -iterate f = L.iterate (c2w . f . w2c) . c2w - --- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every --- element. --- -repeat :: Char -> ByteString -repeat = L.repeat . c2w - --- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@ --- the value of every element. --- -replicate :: Int64 -> Char -> ByteString -replicate w c = L.replicate w (c2w c) - --- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'. --- 'unfoldr' builds a ByteString from a seed value. The function takes --- the element and returns 'Nothing' if it is done producing the --- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a --- prepending to the ByteString and @b@ is used as the next element in a --- recursive call. -unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString -unfoldr f = L.unfoldr $ \a -> case f a of - Nothing -> Nothing - Just (c, a') -> Just (c2w c, a') - ------------------------------------------------------------------------- - --- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, --- returns the longest prefix (possibly empty) of @xs@ of elements that --- satisfy @p@. -takeWhile :: (Char -> Bool) -> ByteString -> ByteString -takeWhile f = L.takeWhile (f . w2c) -{-# INLINE takeWhile #-} - --- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. -dropWhile :: (Char -> Bool) -> ByteString -> ByteString -dropWhile f = L.dropWhile (f . w2c) -{-# INLINE dropWhile #-} - --- | 'break' @p@ is equivalent to @'span' ('not' . p)@. -break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -break f = L.break (f . w2c) -{-# INLINE break #-} - --- | 'span' @p xs@ breaks the ByteString into two segments. It is --- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ -span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -span f = L.span (f . w2c) -{-# INLINE span #-} - -{- --- | 'breakChar' breaks its ByteString argument at the first occurence --- of the specified Char. It is more efficient than 'break' as it is --- implemented with @memchr(3)@. I.e. --- --- > break (=='c') "abcd" == breakChar 'c' "abcd" --- -breakChar :: Char -> ByteString -> (ByteString, ByteString) -breakChar = L.breakByte . c2w -{-# INLINE breakChar #-} - --- | 'spanChar' breaks its ByteString argument at the first --- occurence of a Char other than its argument. It is more efficient --- than 'span (==)' --- --- > span (=='c') "abcd" == spanByte 'c' "abcd" --- -spanChar :: Char -> ByteString -> (ByteString, ByteString) -spanChar = L.spanByte . c2w -{-# INLINE spanChar #-} --} - --- --- TODO, more rules for breakChar* --- - --- | /O(n)/ Break a 'ByteString' into pieces separated by the byte --- argument, consuming the delimiter. I.e. --- --- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] --- > split 'a' "aXaXaXa" == ["","X","X","X"] --- > split 'x' "x" == ["",""] --- --- and --- --- > join [c] . split c == id --- > split == splitWith . (==) --- --- As for all splitting functions in this library, this function does --- not copy the substrings, it just constructs new 'ByteStrings' that --- are slices of the original. --- -split :: Char -> ByteString -> [ByteString] -split = L.split . c2w -{-# INLINE split #-} - --- | /O(n)/ Splits a 'ByteString' into components delimited by --- separators, where the predicate returns True for a separator element. --- The resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] --- -splitWith :: (Char -> Bool) -> ByteString -> [ByteString] -splitWith f = L.splitWith (f . w2c) -{-# INLINE splitWith #-} - --- | The 'groupBy' function is the non-overloaded version of 'group'. -groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString] -groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b)) - --- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. -index :: ByteString -> Int64 -> Char -index = (w2c .) . L.index -{-# INLINE index #-} - --- | /O(n)/ The 'elemIndex' function returns the index of the first --- element in the given 'ByteString' which is equal (by memchr) to the --- query element, or 'Nothing' if there is no such element. -elemIndex :: Char -> ByteString -> Maybe Int64 -elemIndex = L.elemIndex . c2w -{-# INLINE elemIndex #-} - --- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning --- the indices of all elements equal to the query element, in ascending order. -elemIndices :: Char -> ByteString -> [Int64] -elemIndices = L.elemIndices . c2w -{-# INLINE elemIndices #-} - --- | The 'findIndex' function takes a predicate and a 'ByteString' and --- returns the index of the first element in the ByteString satisfying the predicate. -findIndex :: (Char -> Bool) -> ByteString -> Maybe Int64 -findIndex f = L.findIndex (f . w2c) -{-# INLINE findIndex #-} - --- | The 'findIndices' function extends 'findIndex', by returning the --- indices of all elements satisfying the predicate, in ascending order. -findIndices :: (Char -> Bool) -> ByteString -> [Int64] -findIndices f = L.findIndices (f . w2c) - --- | count returns the number of times its argument appears in the ByteString --- --- > count == length . elemIndices --- > count '\n' == length . lines --- --- But more efficiently than using length on the intermediate list. -count :: Char -> ByteString -> Int64 -count c = L.count (c2w c) - --- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This --- implementation uses @memchr(3)@. -elem :: Char -> ByteString -> Bool -elem c = L.elem (c2w c) -{-# INLINE elem #-} - --- | /O(n)/ 'notElem' is the inverse of 'elem' -notElem :: Char -> ByteString -> Bool -notElem c = L.notElem (c2w c) -{-# INLINE notElem #-} - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. -filter :: (Char -> Bool) -> ByteString -> ByteString -filter f = L.filter (f . w2c) -{-# INLINE filter #-} - --- | /O(n)/ The 'find' function takes a predicate and a ByteString, --- and returns the first element in matching the predicate, or 'Nothing' --- if there is no such element. -find :: (Char -> Bool) -> ByteString -> Maybe Char -find f ps = w2c `fmap` L.find (f . w2c) ps -{-# INLINE find #-} - -{- --- | /O(n)/ A first order equivalent of /filter . (==)/, for the common --- case of filtering a single Char. It is more efficient to use --- filterChar in this case. --- --- > filterChar == filter . (==) --- --- filterChar is around 10x faster, and uses much less space, than its --- filter equivalent --- -filterChar :: Char -> ByteString -> ByteString -filterChar c = L.filterByte (c2w c) -{-# INLINE filterChar #-} - --- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common --- case of filtering a single Char out of a list. It is more efficient --- to use /filterNotChar/ in this case. --- --- > filterNotChar == filter . (/=) --- --- filterNotChar is around 3x faster, and uses much less space, than its --- filter equivalent --- -filterNotChar :: Char -> ByteString -> ByteString -filterNotChar c = L.filterNotByte (c2w c) -{-# INLINE filterNotChar #-} --} - --- | /O(n)/ 'zip' takes two ByteStrings and returns a list of --- corresponding pairs of Chars. If one input ByteString is short, --- excess elements of the longer ByteString are discarded. This is --- equivalent to a pair of 'unpack' operations, and so space --- usage may be large for multi-megabyte ByteStrings -zip :: ByteString -> ByteString -> [(Char,Char)] -zip ps qs - | L.null ps || L.null qs = [] - | otherwise = (head ps, head qs) : zip (L.tail ps) (L.tail qs) - --- | 'zipWith' generalises 'zip' by zipping with the function given as --- the first argument, instead of a tupling function. For example, --- @'zipWith' (+)@ is applied to two ByteStrings to produce the list --- of corresponding sums. -zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a] -zipWith f = L.zipWith ((. w2c) . f . w2c) - --- | 'lines' breaks a ByteString up into a list of ByteStrings at --- newline Chars. The resulting strings do not contain newlines. --- -lines :: ByteString -> [ByteString] -lines (LPS []) = [] -lines (LPS (x:xs)) = loop0 x xs - where - -- this is a really performance sensitive function but the - -- chunked representation makes the general case a bit expensive - -- however assuming a large chunk size and normalish line lengths - -- we will find line endings much more frequently than chunk - -- endings so it makes sense to optimise for that common case. - -- So we partition into two special cases depending on whether we - -- are keeping back a list of chunks that will eventually be output - -- once we get to the end of the current line. - - -- the common special case where we have no existing chunks of - -- the current line - loop0 :: B.ByteString -> [B.ByteString] -> [ByteString] - STRICT2(loop0) - loop0 ps pss = - case B.elemIndex (c2w '\n') ps of - Nothing -> case pss of - [] | B.null ps -> [] - | otherwise -> LPS [ps] : [] - (ps':pss') - | B.null ps -> loop0 ps' pss' - | otherwise -> loop ps' [ps] pss' - - Just n | n /= 0 -> LPS [B.unsafeTake n ps] - : loop0 (B.unsafeDrop (n+1) ps) pss - | otherwise -> loop0 (B.unsafeTail ps) pss - - -- the general case when we are building a list of chunks that are - -- part of the same line - loop :: B.ByteString -> [B.ByteString] -> [B.ByteString] -> [ByteString] - STRICT3(loop) - loop ps line pss = - case B.elemIndex (c2w '\n') ps of - Nothing -> - case pss of - [] -> let ps' | B.null ps = P.reverse line - | otherwise = P.reverse (ps : line) - in ps' `seq` (LPS ps' : []) - - (ps':pss') - | B.null ps -> loop ps' line pss' - | otherwise -> loop ps' (ps : line) pss' - - Just n -> - let ps' | n == 0 = P.reverse line - | otherwise = P.reverse (B.unsafeTake n ps : line) - in ps' `seq` (LPS ps' : loop0 (B.unsafeDrop (n+1) ps) pss) - --- | 'unlines' is an inverse operation to 'lines'. It joins lines, --- after appending a terminating newline to each. -unlines :: [ByteString] -> ByteString -unlines [] = empty -unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space - where nl = singleton '\n' - --- | 'words' breaks a ByteString up into a list of words, which --- were delimited by Chars representing white space. And --- --- > tokens isSpace = words --- -words :: ByteString -> [ByteString] -words = P.filter (not . L.null) . L.splitWith isSpaceWord8 -{-# INLINE words #-} - --- | The 'unwords' function is analogous to the 'unlines' function, on words. -unwords :: [ByteString] -> ByteString -unwords = join (singleton ' ') -{-# INLINE unwords #-} - --- | readInt reads an Int from the beginning of the ByteString. If --- there is no integer at the beginning of the string, it returns --- Nothing, otherwise it just returns the int read, and the rest of the --- string. -readInt :: ByteString -> Maybe (Int, ByteString) -readInt (LPS []) = Nothing -readInt (LPS (x:xs)) = - case w2c (B.unsafeHead x) of - '-' -> loop True 0 0 (B.unsafeTail x) xs - '+' -> loop False 0 0 (B.unsafeTail x) xs - _ -> loop False 0 0 x xs - - where loop :: Bool -> Int -> Int -> B.ByteString -> [B.ByteString] -> Maybe (Int, ByteString) - STRICT5(loop) - loop neg i n ps pss - | B.null ps = case pss of - [] -> end neg i n ps pss - (ps':pss') -> loop neg i n ps' pss' - | otherwise = - case B.unsafeHead ps of - w | w >= 0x30 - && w <= 0x39 -> loop neg (i+1) - (n * 10 + (fromIntegral w - 0x30)) - (B.unsafeTail ps) pss - | otherwise -> end neg i n ps pss - - end _ 0 _ _ _ = Nothing - end neg _ n ps pss = let n' | neg = negate n - | otherwise = n - ps' | B.null ps = pss - | otherwise = ps:pss - in n' `seq` ps' `seq` Just $! (n', LPS ps') - - --- | readInteger reads an Integer from the beginning of the ByteString. If --- there is no integer at the beginning of the string, it returns Nothing, --- otherwise it just returns the int read, and the rest of the string. -readInteger :: ByteString -> Maybe (Integer, ByteString) -readInteger (LPS []) = Nothing -readInteger (LPS (x:xs)) = - case w2c (B.unsafeHead x) of - '-' -> first (B.unsafeTail x) xs >>= \(n, bs) -> return (-n, bs) - '+' -> first (B.unsafeTail x) xs - _ -> first x xs - - where first ps pss - | B.null ps = case pss of - [] -> Nothing - (ps':pss') -> first' ps' pss' - | otherwise = first' ps pss - - first' ps pss = case B.unsafeHead ps of - w | w >= 0x30 && w <= 0x39 -> Just $ - loop 1 (fromIntegral w - 0x30) [] (B.unsafeTail ps) pss - | otherwise -> Nothing - - loop :: Int -> Int -> [Integer] - -> B.ByteString -> [B.ByteString] -> (Integer, ByteString) - STRICT5(loop) - loop d acc ns ps pss - | B.null ps = case pss of - [] -> combine d acc ns ps pss - (ps':pss') -> loop d acc ns ps' pss' - | otherwise = - case B.unsafeHead ps of - w | w >= 0x30 && w <= 0x39 -> - if d < 9 then loop (d+1) - (10*acc + (fromIntegral w - 0x30)) - ns (B.unsafeTail ps) pss - else loop 1 (fromIntegral w - 0x30) - (fromIntegral acc : ns) - (B.unsafeTail ps) pss - | otherwise -> combine d acc ns ps pss - - combine _ acc [] ps pss = end (fromIntegral acc) ps pss - combine d acc ns ps pss = - end (10^d * combine1 1000000000 ns + fromIntegral acc) ps pss - - combine1 _ [n] = n - combine1 b ns = combine1 (b*b) $ combine2 b ns - - combine2 b (n:m:ns) = let t = n+m*b in t `seq` (t : combine2 b ns) - combine2 _ ns = ns - - end n ps pss = let ps' | B.null ps = pss - | otherwise = ps:pss - in ps' `seq` (n, LPS ps') - --- | Read an entire file /lazily/ into a 'ByteString'. Use 'text mode' --- on Windows to interpret newlines -readFile :: FilePath -> IO ByteString -readFile f = openFile f ReadMode >>= hGetContents - --- | Write a 'ByteString' to a file. -writeFile :: FilePath -> ByteString -> IO () -writeFile f txt = bracket (openFile f WriteMode) hClose - (\hdl -> hPut hdl txt) - --- | Append a 'ByteString' to a file. -appendFile :: FilePath -> ByteString -> IO () -appendFile f txt = bracket (openFile f AppendMode) hClose - (\hdl -> hPut hdl txt) diff --git a/base.cabal b/base.cabal index e08510a..c918a4f 100644 --- a/base.cabal +++ b/base.cabal @@ -95,12 +95,6 @@ Library { Data.Array.Unboxed, Data.Bits, Data.Bool, - Data.ByteString, - Data.ByteString.Char8, - Data.ByteString.Lazy - Data.ByteString.Lazy.Char8 - Data.ByteString.Base - Data.ByteString.Fusion Data.Char, Data.Complex, Data.Dynamic, @@ -184,7 +178,6 @@ Library { cbits/Win32Utils.c cbits/consUtils.c cbits/dirUtils.c - cbits/fpstring.c cbits/inputReady.c cbits/lockFile.c cbits/longlong.c diff --git a/cbits/fpstring.c b/cbits/fpstring.c deleted file mode 100644 index 9e0b809..0000000 --- a/cbits/fpstring.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2003 David Roundy - * Copyright (c) 2005-6 Don Stewart - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the authors or the names of any contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "fpstring.h" - -/* copy a string in reverse */ -void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) { - p += n-1; - while (n-- != 0) - *q++ = *p--; -} - -/* duplicate a string, interspersing the character through the elements - of the duplicated string */ -void fps_intersperse(unsigned char *q, - unsigned char *p, - unsigned long n, - unsigned char c) { - - while (n > 1) { - *q++ = *p++; - *q++ = c; - n--; - } - if (n == 1) - *q = *p; -} - -/* find maximum char in a packed string */ -unsigned char fps_maximum(unsigned char *p, unsigned long len) { - unsigned char *q, c = *p; - for (q = p; q < p + len; q++) - if (*q > c) - c = *q; - return c; -} - -/* find minimum char in a packed string */ -unsigned char fps_minimum(unsigned char *p, unsigned long len) { - unsigned char *q, c = *p; - for (q = p; q < p + len; q++) - if (*q < c) - c = *q; - return c; -} - -/* count the number of occurences of a char in a string */ -unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) { - unsigned long c; - for (c = 0; len-- != 0; ++p) - if (*p == w) - ++c; - return c; -} -- 1.7.10.4