--- /dev/null
+{-# OPTIONS_GHC -cpp -fffi #-}
+--
+-- Module : ByteString
+-- Copyright : (c) The University of Glasgow 2001,
+-- (c) David Roundy 2003-2005,
+-- (c) Simon Marlow 2005
+-- (c) Don Stewart 2005-2006
+-- (c) Bjorn Bringert 2006
+-- License : BSD-style
+--
+-- Maintainer : dons@cse.unsw.edu.au
+-- Stability : experimental
+-- Portability : portable, requires ffi and cpp
+-- Tested with : GHC 6.4.1 and Hugs March 2005
+--
+
+--
+-- | A time and space-efficient implementation of byte vectors using
+-- packed Word8 arrays, suitable for high performance use, both in terms
+-- of large data quantities, or high speed requirements. Byte vectors
+-- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
+-- be passed between C and Haskell with little effort.
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with Prelude functions. eg.
+--
+-- > import qualified Data.ByteString as B
+--
+-- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
+-- UArray by Simon Marlow. Rewritten to support slices and use
+-- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
+--
+
+module Data.ByteString (
+
+ -- * The @ByteString@ type
+ ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+
+ -- * Introducing and eliminating 'ByteString's
+ empty, -- :: ByteString
+ packByte, -- :: Word8 -> ByteString
+ pack, -- :: [Word8] -> ByteString
+ unpack, -- :: ByteString -> [Word8]
+ packWith, -- :: (a -> Word8) -> [a] -> ByteString
+ unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
+
+ -- * Basic interface
+ cons, -- :: Word8 -> ByteString -> ByteString
+ snoc, -- :: Word8 -> ByteString -> ByteString
+ null, -- :: ByteString -> Bool
+ length, -- :: ByteString -> Int
+ head, -- :: ByteString -> Word8
+ tail, -- :: ByteString -> ByteString
+ last, -- :: ByteString -> Word8
+ init, -- :: ByteString -> ByteString
+ append, -- :: ByteString -> ByteString -> ByteString
+
+ -- * Special ByteStrings
+ inits, -- :: ByteString -> [ByteString]
+ tails, -- :: ByteString -> [ByteString]
+ elems, -- :: ByteString -> [ByteString]
+
+ -- * Transformating ByteStrings
+ map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
+ reverse, -- :: ByteString -> ByteString
+ intersperse, -- :: Word8 -> ByteString -> ByteString
+ transpose, -- :: [ByteString] -> [ByteString]
+
+ -- * Reducing 'ByteString's
+ foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
+ foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
+ foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+ foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+
+ -- ** Special folds
+ concat, -- :: [ByteString] -> ByteString
+ concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
+ any, -- :: (Word8 -> Bool) -> ByteString -> Bool
+ all, -- :: (Word8 -> Bool) -> ByteString -> Bool
+ maximum, -- :: ByteString -> Word8
+ minimum, -- :: ByteString -> Word8
+ mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
+ hash, -- :: ByteString -> Int32
+
+ -- * Generating and unfolding ByteStrings
+ replicate, -- :: Int -> Word8 -> ByteString
+ unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
+
+ -- * Substrings
+
+ -- ** Breaking strings
+ take, -- :: Int -> ByteString -> ByteString
+ drop, -- :: Int -> ByteString -> ByteString
+ splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
+ takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
+ dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
+ break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+ span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+ spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+
+ -- ** Breaking and dropping on specific bytes
+ breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
+ breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+ breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+
+ -- ** Breaking into many substrings
+ split, -- :: Word8 -> ByteString -> [ByteString]
+ splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
+ tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
+
+ -- ** Joining strings
+ join, -- :: ByteString -> [ByteString] -> ByteString
+ joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
+
+ -- * Indexing ByteStrings
+ index, -- :: ByteString -> Int -> Word8
+ elemIndex, -- :: Word8 -> ByteString -> Maybe Int
+ elemIndices, -- :: Word8 -> ByteString -> [Int]
+ elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
+ findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
+ findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
+ count, -- :: Word8 -> ByteString -> Int
+
+ -- * Ordered ByteStrings
+ sort, -- :: ByteString -> ByteString
+
+ -- * Searching ByteStrings
+
+ -- ** Searching by equality
+ -- | These functions use memchr(3) to efficiently search the ByteString
+
+ elem, -- :: Word8 -> ByteString -> Bool
+ notElem, -- :: Word8 -> ByteString -> Bool
+ filterByte, -- :: Word8 -> ByteString -> ByteString
+ filterNotByte, -- :: Word8 -> ByteString -> ByteString
+
+ -- ** Searching with a predicate
+ filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
+ find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
+
+ -- ** Prefixes and suffixes
+ -- | These functions use memcmp(3) to efficiently compare substrings
+ isPrefixOf, -- :: ByteString -> ByteString -> Bool
+ isSuffixOf, -- :: ByteString -> ByteString -> Bool
+
+ -- ** Search for arbitrary substrings
+ isSubstringOf, -- :: ByteString -> ByteString -> Bool
+ findSubstring, -- :: ByteString -> ByteString -> Maybe Int
+ findSubstrings, -- :: ByteString -> ByteString -> [Int]
+
+ -- * Zipping and unzipping ByteStrings
+ zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
+ zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
+ unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
+
+ -- * Unchecked access
+ unsafeHead, -- :: ByteString -> Word8
+ unsafeTail, -- :: ByteString -> ByteString
+ unsafeIndex, -- :: ByteString -> Int -> Word8
+
+ -- * Low level introduction and elimination
+ generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
+ create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
+ fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
+ toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
+ skipIndex, -- :: ByteString -> Int
+
+ -- ** Packing CStrings and pointers
+ packCString, -- :: CString -> ByteString
+ packCStringLen, -- :: CString -> ByteString
+ packMallocCString, -- :: CString -> ByteString
+
+#if defined(__GLASGOW_HASKELL__)
+ packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
+ packAddress, -- :: Addr# -> ByteString
+ unsafePackAddress, -- :: Int -> Addr# -> ByteString
+ unsafeFinalize, -- :: ByteString -> IO ()
+#endif
+
+ -- ** Using ByteStrings as CStrings
+ useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
+ unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
+ unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
+
+ -- ** Copying ByteStrings
+ -- | These functions perform memcpy(3) operations
+ copy, -- :: ByteString -> ByteString
+ copyCString, -- :: CString -> ByteString
+ copyCStringLen, -- :: CStringLen -> ByteString
+
+ -- * I\/O with @ByteString@s
+
+ -- ** Standard input and output
+
+#if defined(__GLASGOW_HASKELL__)
+ getLine, -- :: IO ByteString
+#endif
+ getContents, -- :: IO ByteString
+ putStr, -- :: ByteString -> IO ()
+ putStrLn, -- :: ByteString -> IO ()
+
+ -- ** Files
+ readFile, -- :: FilePath -> IO ByteString
+ writeFile, -- :: FilePath -> ByteString -> IO ()
+
+ -- ** I\/O with Handles
+#if defined(__GLASGOW_HASKELL__)
+ getArgs, -- :: IO [ByteString]
+ hGetLine, -- :: Handle -> IO ByteString
+ hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
+#endif
+ hGetContents, -- :: Handle -> IO ByteString
+ hGet, -- :: Handle -> Int -> IO ByteString
+ hPut, -- :: Handle -> ByteString -> IO ()
+
+#if defined(__GLASGOW_HASKELL__)
+ -- * Miscellaneous
+ unpackList, -- eek, otherwise it gets thrown away by the simplifier
+#endif
+
+ ) where
+
+import qualified Prelude as P
+import Prelude hiding (reverse,head,tail,last,init,null
+ ,length,map,lines,foldl,foldr,unlines
+ ,concat,any,take,drop,splitAt,takeWhile
+ ,dropWhile,span,break,elem,filter,maximum
+ ,minimum,all,concatMap,foldl1,foldr1
+ ,readFile,writeFile,replicate
+ ,getContents,getLine,putStr,putStrLn
+ ,zip,zipWith,unzip,notElem)
+
+import qualified Data.List as List
+
+import Data.Char
+import Data.Word (Word8)
+import Data.Int (Int32)
+import Data.Bits (rotateL)
+import Data.Maybe (listToMaybe)
+import Data.Array (listArray)
+import qualified Data.Array as Array ((!))
+
+import Control.Exception (bracket)
+
+import Foreign.C.Types (CSize, CInt)
+import Foreign.C.String (CString, CStringLen)
+import Foreign.Storable
+import Foreign.ForeignPtr
+import Foreign.Ptr
+import Foreign.Marshal.Array
+
+import System.IO (stdin,stdout,hClose,hFileSize
+ ,hGetBuf,hPutBuf,openBinaryFile
+ ,Handle,IOMode(..))
+
+#if defined(__GLASGOW_HASKELL__)
+
+import System.IO (hGetBufNonBlocking)
+
+import qualified Foreign.Concurrent as FC (newForeignPtr)
+
+import Data.Generics (Data(..), Typeable(..))
+
+import System.IO.Error (isEOFError)
+import Foreign.Marshal (alloca)
+
+import GHC.Handle
+import GHC.Prim
+import GHC.Base (build, unsafeChr)
+import GHC.Word hiding (Word8)
+import GHC.Ptr (Ptr(..))
+import GHC.ST (ST(..))
+import GHC.IOBase
+
+#else
+
+import System.IO.Unsafe
+
+#endif
+
+-- -----------------------------------------------------------------------------
+--
+-- Useful macros, until we have bang patterns
+--
+
+#define STRICT1(f) f a | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
+#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
+
+-- -----------------------------------------------------------------------------
+
+-- | A space-efficient representation of a Word8 vector, supporting many
+-- efficient operations. A 'ByteString' contains 8-bit characters only.
+--
+-- Instances of Eq, Ord, Read, Show, Data, Typeable
+--
+data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
+ {-# UNPACK #-} !Int
+ {-# UNPACK #-} !Int
+
+#if defined(__GLASGOW_HASKELL__)
+ deriving (Data, Typeable)
+#endif
+
+instance Eq ByteString
+ where (==) = eq
+
+instance Ord ByteString
+ where compare = compareBytes
+
+instance Show ByteString where
+ showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
+
+instance Read ByteString where
+ readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
+
+{-
+instance Arbitrary PackedString where
+ arbitrary = P.pack `fmap` arbitrary
+ coarbitrary s = coarbitrary (P.unpack s)
+-}
+
+-- | /O(n)/ Equality on the 'ByteString' type.
+eq :: ByteString -> ByteString -> Bool
+eq a b = (compareBytes a b) == EQ
+{-# INLINE eq #-}
+
+-- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
+compareBytes :: ByteString -> ByteString -> Ordering
+compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
+compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
+ withForeignPtr x1 $ \p1 ->
+ withForeignPtr x2 $ \p2 -> do
+ i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
+ return $ case i `compare` 0 of
+ EQ -> l1 `compare` l2
+ x -> x
+{-# INLINE compareBytes #-}
+
+{-
+--
+-- About 4x slower over 32M
+--
+compareBytes :: ByteString -> ByteString -> Ordering
+compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
+ withForeignPtr fp1 $ \p1 ->
+ withForeignPtr fp2 $ \p2 ->
+ cmp (p1 `plusPtr` off1)
+ (p2 `plusPtr` off2) 0 len1 len2
+
+cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
+STRICT5(cmp)
+cmp p1 p2 n len1 len2
+ | n == len1 = if n == len2 then return EQ else return LT
+ | n == len2 = return GT
+ | otherwise = do
+ (a :: Word8) <- peekByteOff p1 n
+ (b :: Word8) <- peekByteOff p2 n
+ case a `compare` b of
+ EQ -> cmp p1 p2 (n+1) len1 len2
+ LT -> return LT
+ GT -> return GT
+{-# INLINE compareBytes #-}
+-}
+
+-- -----------------------------------------------------------------------------
+-- Introducing and eliminating 'ByteString's
+
+-- | /O(1)/ The empty 'ByteString'
+empty :: ByteString
+empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
+{-# NOINLINE empty #-}
+
+-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
+packByte :: Word8 -> ByteString
+packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
+ withForeignPtr fp $ \p -> poke p c
+ return $ PS fp 0 1
+{-# NOINLINE packByte #-}
+
+-- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
+--
+-- For applications with large numbers of string literals, pack can be a
+-- bottleneck. In such cases, consider using packAddress (GHC only).
+pack :: [Word8] -> ByteString
+
+#if !defined(__GLASGOW_HASKELL__)
+
+pack str = create (P.length str) $ \p -> go p str
+ where
+ go _ [] = return ()
+ go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
+
+#else /* hack away */
+
+pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
+ where
+ go _ _ [] = return ()
+ go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
+
+ writeByte p i c = ST $ \s# ->
+ case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
+
+#endif
+
+-- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
+unpack :: ByteString -> [Word8]
+
+#if !defined(__GLASGOW_HASKELL__)
+
+unpack (PS _ _ 0) = []
+unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
+ go (p `plusPtr` s) (l - 1) []
+ where
+ STRICT3(go)
+ go p 0 acc = peek p >>= \e -> return (e : acc)
+ go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
+{-# INLINE unpack #-}
+
+#else
+
+unpack ps = build (unpackFoldr ps)
+{-# INLINE unpack #-}
+
+unpackList :: ByteString -> [Word8]
+unpackList (PS fp off len) = withPtr fp $ \p -> do
+ let STRICT3(loop)
+ loop _ (-1) acc = return acc
+ loop q n acc = do
+ a <- peekByteOff q n
+ loop q (n-1) (a : acc)
+ loop (p `plusPtr` off) (len-1) []
+
+{-# RULES
+"unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
+ #-}
+
+unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
+unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
+ let STRICT3(loop)
+ loop _ (-1) acc = return acc
+ loop q n acc = do
+ a <- peekByteOff q n
+ loop q (n-1) (a `f` acc)
+ loop (p `plusPtr` off) (len-1) ch
+{-# INLINE [0] unpackFoldr #-}
+
+#endif
+
+------------------------------------------------------------------------
+
+-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
+-- conversion function
+packWith :: (a -> Word8) -> [a] -> ByteString
+packWith k str = create (P.length str) $ \p -> go p str
+ where
+ STRICT2(go)
+ go _ [] = return ()
+ go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
+{-# INLINE packWith #-}
+{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
+
+-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
+unpackWith :: (Word8 -> a) -> ByteString -> [a]
+unpackWith _ (PS _ _ 0) = []
+unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
+ go (p `plusPtr` s) (l - 1) []
+ where
+ STRICT3(go)
+ go p 0 acc = peek p >>= \e -> return (k e : acc)
+ go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
+{-# INLINE unpackWith #-}
+{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
+
+-- ---------------------------------------------------------------------
+-- Basic interface
+
+-- | /O(1)/ Test whether a ByteString is empty.
+null :: ByteString -> Bool
+null (PS _ _ l) = l == 0
+{-# INLINE null #-}
+
+-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
+length :: ByteString -> Int
+length (PS _ _ l) = l
+{-# INLINE length #-}
+
+-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
+-- complexity, as it requires a memcpy.
+cons :: Word8 -> ByteString -> ByteString
+cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
+ memcpy (p `plusPtr` 1) (f `plusPtr` s) l
+ poke p c
+{-# INLINE cons #-}
+
+-- | /O(n)/ Append a byte to the end of a 'ByteString'
+snoc :: ByteString -> Word8 -> ByteString
+snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
+ memcpy p (f `plusPtr` s) l
+ poke (p `plusPtr` l) c
+{-# INLINE snoc #-}
+
+-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
+head :: ByteString -> Word8
+head ps@(PS x s _)
+ | null ps = errorEmptyList "head"
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
+{-# INLINE head #-}
+
+-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
+tail :: ByteString -> ByteString
+tail (PS p s l)
+ | l <= 0 = errorEmptyList "tail"
+ | otherwise = PS p (s+1) (l-1)
+{-# INLINE tail #-}
+
+-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
+last :: ByteString -> Word8
+last ps@(PS x s l)
+ | null ps = errorEmptyList "last"
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
+{-# INLINE last #-}
+
+-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
+init :: ByteString -> ByteString
+init (PS p s l)
+ | l <= 0 = errorEmptyList "init"
+ | otherwise = PS p s (l-1)
+{-# INLINE init #-}
+
+-- | /O(n)/ Append two ByteStrings
+append :: ByteString -> ByteString -> ByteString
+append xs ys | null xs = ys
+ | null ys = xs
+ | otherwise = concat [xs,ys]
+{-# INLINE append #-}
+
+{-
+--
+-- About 30% faster, but allocating in a big chunk isn't good for memory use
+--
+append :: ByteString -> ByteString -> ByteString
+append xs@(PS ffp s l) ys@(PS fgp t m)
+ | null xs = ys
+ | null ys = xs
+ | otherwise = create len $ \ptr ->
+ withForeignPtr ffp $ \fp ->
+ withForeignPtr fgp $ \gp -> do
+ memcpy ptr (fp `plusPtr` s) l
+ memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
+ where len = length xs + length ys
+-}
+
+-- ---------------------------------------------------------------------
+-- Transformations
+
+-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
+-- element of @xs@
+--
+map :: (Word8 -> Word8) -> ByteString -> ByteString
+map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
+ new_fp <- mallocByteString len
+ withForeignPtr new_fp $ \new_p -> do
+ map_ f (len-1) (p `plusPtr` start) new_p
+ return (PS new_fp 0 len)
+{-# INLINE map #-}
+
+map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
+STRICT4(map_)
+map_ f n p1 p2
+ | n < 0 = return ()
+ | otherwise = do
+ x <- peekByteOff p1 n
+ pokeByteOff p2 n (f x)
+ map_ f (n-1) p1 p2
+{-# INLINE map_ #-}
+
+-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
+reverse :: ByteString -> ByteString
+reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
+ c_reverse p (f `plusPtr` s) l
+
+-- reverse = pack . P.reverse . unpack
+
+-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
+-- 'ByteString' and \`intersperses\' that byte between the elements of
+-- the 'ByteString'. It is analogous to the intersperse function on
+-- Lists.
+intersperse :: Word8 -> ByteString -> ByteString
+intersperse c ps@(PS x s l)
+ | length ps < 2 = ps
+ | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
+ c_intersperse p (f `plusPtr` s) l c
+
+-- intersperse c = pack . List.intersperse c . unpack
+
+-- | The 'transpose' function transposes the rows and columns of its
+-- 'ByteString' argument.
+transpose :: [ByteString] -> [ByteString]
+transpose ps = P.map pack (List.transpose (P.map unpack ps))
+
+-- ---------------------------------------------------------------------
+-- Reducing 'ByteString's
+
+-- | 'foldl', applied to a binary operator, a starting value (typically
+-- the left-identity of the operator), and a ByteString, reduces the
+-- ByteString using the binary operator, from left to right.
+foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
+foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+ lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+ where
+ STRICT3(lgo)
+ lgo z p q | p == q = return z
+ | otherwise = do c <- peek p
+ lgo (f z c) (p `plusPtr` 1) q
+
+-- | 'foldr', applied to a binary operator, a starting value
+-- (typically the right-identity of the operator), and a ByteString,
+-- reduces the ByteString using the binary operator, from right to left.
+foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
+foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+ go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+ where
+ STRICT2(go)
+ go p q | p == q = return z
+ | otherwise = do c <- peek p
+ ws <- go (p `plusPtr` 1) q
+ return $ c `k` ws
+
+-- | 'foldl1' is a variant of 'foldl' that has no starting value
+-- argument, and thus must be applied to non-empty 'ByteStrings'.
+foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+foldl1 f ps
+ | null ps = errorEmptyList "foldl1"
+ | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
+
+-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
+-- and thus must be applied to non-empty 'ByteString's
+foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+foldr1 f ps
+ | null ps = errorEmptyList "foldr1"
+ | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
+
+-- ---------------------------------------------------------------------
+-- Special folds
+
+-- | /O(n)/ Concatenate a list of ByteStrings.
+concat :: [ByteString] -> ByteString
+concat [] = empty
+concat [ps] = ps
+concat xs = inlinePerformIO $ do
+ let start_size = 1024
+ p <- mallocArray start_size
+ f p 0 1024 xs
+
+ where f ptr len _ [] = do
+ ptr' <- reallocArray ptr (len+1)
+ poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
+ fp <- newForeignFreePtr ptr'
+ return $ PS fp 0 len
+
+ f ptr len to_go pss@(PS p s l:pss')
+ | l <= to_go = do withForeignPtr p $ \pf ->
+ memcpy (ptr `plusPtr` len)
+ (pf `plusPtr` s) l
+ f ptr (len + l) (to_go - l) pss'
+
+ | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
+ ptr' <- reallocArray ptr new_total
+ f ptr' len (new_total - len) pss
+
+-- | Map a function over a 'ByteString' and concatenate the results
+concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
+concatMap f = foldr (append . f) empty
+
+-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
+-- any element of the 'ByteString' satisfies the predicate.
+any :: (Word8 -> Bool) -> ByteString -> Bool
+any _ (PS _ _ 0) = False
+any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+ go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+ where
+ STRICT2(go)
+ go p q | p == q = return False
+ | otherwise = do c <- peek p
+ if f c then return True
+ else go (p `plusPtr` 1) q
+
+-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
+-- if all elements of the 'ByteString' satisfy the predicate.
+all :: (Word8 -> Bool) -> ByteString -> Bool
+all _ (PS _ _ 0) = True
+all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+ go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+ where
+ STRICT2(go)
+ go p q | p == q = return True -- end of list
+ | otherwise = do c <- peek p
+ if f c
+ then go (p `plusPtr` 1) q
+ else return False
+
+-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
+maximum :: ByteString -> Word8
+maximum xs@(PS x s l)
+ | null xs = errorEmptyList "maximum"
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
+ return $ c_maximum (p `plusPtr` s) l
+
+-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
+minimum :: ByteString -> Word8
+minimum xs@(PS x s l)
+ | null xs = errorEmptyList "minimum"
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
+ return $ c_minimum (p `plusPtr` s) l
+
+{-
+maximum xs@(PS x s l)
+ | null xs = errorEmptyList "maximum"
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
+ w <- peek p
+ maximum_ (p `plusPtr` s) 0 l w
+{-# INLINE maximum #-}
+
+maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
+STRICT4(maximum_)
+maximum_ ptr n m c
+ | n >= m = return c
+ | otherwise = do w <- peekByteOff ptr n
+ maximum_ ptr (n+1) m (if w > c then w else c)
+
+minimum xs@(PS x s l)
+ | null xs = errorEmptyList "minimum"
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
+ w <- peek p
+ minimum_ (p `plusPtr` s) 0 l w
+{-# INLINE minimum #-}
+
+minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
+STRICT4(minimum_)
+minimum_ ptr n m c
+ | n >= m = return c
+ | otherwise = do w <- peekByteOff ptr n
+ minimum_ ptr (n+1) m (if w < c then w else c)
+-}
+-- | /O(n)/ map Word8 functions, provided with the index at each position
+mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
+mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
+ go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
+ where
+ go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
+ STRICT4(go)
+ go n f t p | f == p = return ()
+ | otherwise = do w <- peek f
+ ((poke t) . k n) w
+ go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
+
+-- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key.
+hash :: ByteString -> Int32
+hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
+ go (0 :: Int32) (p `plusPtr` s) l
+ where
+ go :: Int32 -> Ptr Word8 -> Int -> IO Int32
+ STRICT3(go)
+ go h _ 0 = return h
+ go h p n = do w <- peek p
+ go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1)
+
+-- ---------------------------------------------------------------------
+-- Unfolds and replicates
+
+-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
+-- the value of every element. The following holds:
+--
+-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
+--
+-- This implemenation uses @memset(3)@
+replicate :: Int -> Word8 -> ByteString
+replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
+
+{-
+-- About 5x slower
+replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
+ where
+ STRICT2(go)
+ go _ 0 = return w
+ go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
+-}
+
+-- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
+-- 'unfoldrN' builds a ByteString from a seed value. The function takes
+-- the element and returns 'Nothing' if it is done producing the
+-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
+-- prepending to the ByteString and @b@ is used as the next element in a
+-- recursive call.
+--
+-- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
+-- character to a ByteString is /O(n)/, this unfoldr requires a maximum
+-- final size of the ByteString as an argument. 'cons' can then be
+-- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
+-- linear complexity. The depth of the recursion is limited to this
+-- size, but may be less. For lazy, infinite unfoldr, use
+-- 'Data.List.unfoldr' (from 'Data.List').
+--
+-- Examples:
+--
+-- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
+--
+-- The following equation connects the depth-limited unfoldr to the List unfoldr:
+--
+-- > unfoldrN n == take n $ List.unfoldr
+unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
+unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
+ where
+ STRICT3(go)
+ go q c n | n == i = return n -- stop if we reach `i'
+ | otherwise = case f c of
+ Nothing -> return n
+ Just (a,new_c) -> do
+ poke q a
+ go (q `plusPtr` 1) new_c (n+1)
+
+-- ---------------------------------------------------------------------
+-- Substrings
+
+-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
+-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
+take :: Int -> ByteString -> ByteString
+take n ps@(PS x s l)
+ | n < 0 = empty
+ | n >= l = ps
+ | otherwise = PS x s n
+{-# INLINE take #-}
+
+-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
+-- elements, or @[]@ if @n > 'length' xs@.
+drop :: Int -> ByteString -> ByteString
+drop n ps@(PS x s l)
+ | n <= 0 = ps
+ | n > l = empty
+ | otherwise = PS x (s+n) (l-n)
+{-# INLINE drop #-}
+
+-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
+splitAt :: Int -> ByteString -> (ByteString, ByteString)
+splitAt n ps = (take n ps, drop n ps)
+{-# INLINE splitAt #-}
+
+-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
+-- returns the longest prefix (possibly empty) of @xs@ of elements that
+-- satisfy @p@.
+takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
+takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
+{-# INLINE takeWhile #-}
+
+-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
+dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
+dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
+{-# INLINE dropWhile #-}
+
+-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
+break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
+{-# INLINE break #-}
+
+-- | 'breakByte' breaks its ByteString argument at the first occurence
+-- of the specified byte. It is more efficient than 'break' as it is
+-- implemented with @memchr(3)@. I.e.
+--
+-- > break (=='c') "abcd" == breakByte 'c' "abcd"
+--
+breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
+breakByte c p = case elemIndex c p of
+ Nothing -> (p,empty)
+ Just n -> (take n p, drop n p)
+{-# INLINE breakByte #-}
+
+-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
+-- occurence of @w@. It behaves like 'break', except the delimiter is
+-- not returned, and @Nothing@ is returned if the delimiter is not in
+-- the ByteString. I.e.
+--
+-- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
+--
+-- > breakFirst c xs ==
+-- > let (x,y) = break (== c) xs
+-- > in if null y then Nothing else Just (x, drop 1 y))
+--
+breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+breakFirst c p = case elemIndex c p of
+ Nothing -> Nothing
+ Just n -> Just (take n p, drop (n+1) p)
+{-# INLINE breakFirst #-}
+
+-- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
+-- ByteString.
+--
+-- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
+--
+-- and the following are equivalent:
+--
+-- > breakLast 'c' "abcdef"
+-- > let (x,y) = break (=='c') (reverse "abcdef")
+-- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
+--
+breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+breakLast c p = case elemIndexLast c p of
+ Nothing -> Nothing
+ Just n -> Just (take n p, drop (n+1) p)
+{-# INLINE breakLast #-}
+
+-- | 'span' @p xs@ breaks the ByteString into two segments. It is
+-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
+span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+span p ps = break (not . p) ps
+{-# INLINE span #-}
+
+-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
+-- We have
+--
+-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- > ==
+-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
+--
+spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
+
+-- | /O(n)/ Splits a 'ByteString' into components delimited by
+-- separators, where the predicate returns True for a separator element.
+-- The resulting components do not contain the separators. Two adjacent
+-- separators result in an empty component in the output. eg.
+--
+-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
+-- > splitWith (=='a') [] == []
+--
+splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
+
+#if defined(__GLASGOW_HASKELL__)
+splitWith _pred (PS _ _ 0) = []
+splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
+ where pred# c# = pred_ (W8# c#)
+
+ splitWith' pred' off' len' fp' = withPtr fp $ \p ->
+ splitLoop pred' p 0 off' len' fp'
+
+ splitLoop :: (Word# -> Bool)
+ -> Ptr Word8
+ -> Int -> Int -> Int
+ -> ForeignPtr Word8
+ -> IO [ByteString]
+
+ splitLoop pred' p idx' off' len' fp'
+ | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
+ | idx' >= len' = return [PS fp' off' idx']
+ | otherwise = do
+ w <- peekElemOff p (off'+idx')
+ if pred' (case w of W8# w# -> w#)
+ then return (PS fp' off' idx' :
+ splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
+ else splitLoop pred' p (idx'+1) off' len' fp'
+{-# INLINE splitWith #-}
+
+#else
+splitWith _ (PS _ _ 0) = []
+splitWith p ps = splitWith' p ps
+ where
+ STRICT2(splitWith')
+ splitWith' q qs = if null rest then [chunk]
+ else chunk : splitWith' q (unsafeTail rest)
+ where (chunk,rest) = break q qs
+#endif
+
+-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
+-- argument, consuming the delimiter. I.e.
+--
+-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
+-- > split 'a' "aXaXaXa" == ["","X","X","X"]
+-- > split 'x' "x" == ["",""]
+--
+-- and
+--
+-- > join [c] . split c == id
+-- > split == splitWith . (==)
+--
+-- As for all splitting functions in this library, this function does
+-- not copy the substrings, it just constructs new 'ByteStrings' that
+-- are slices of the original.
+--
+split :: Word8 -> ByteString -> [ByteString]
+split _ (PS _ _ 0) = []
+split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ let ptr = p `plusPtr` s
+
+ STRICT1(loop)
+ loop n = do
+ let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
+ if q == nullPtr
+ then return [PS x (s+n) (l-n)]
+ else do let i = q `minusPtr` ptr
+ ls <- loop (i+1)
+ return $! PS x (s+n) (i-n) : ls
+ loop 0
+{-# INLINE split #-}
+
+{-
+-- slower. but stays inside Haskell.
+split _ (PS _ _ 0) = []
+split (W8# w#) (PS fp off len) = splitWith' off len fp
+ where
+ splitWith' off' len' fp' = withPtr fp $ \p ->
+ splitLoop p 0 off' len' fp'
+
+ splitLoop :: Ptr Word8
+ -> Int -> Int -> Int
+ -> ForeignPtr Word8
+ -> IO [ByteString]
+
+ STRICT5(splitLoop)
+ splitLoop p idx' off' len' fp'
+ | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
+ | idx' >= len' = return [PS fp' off' idx']
+ | otherwise = do
+ (W8# x#) <- peekElemOff p (off'+idx')
+ if word2Int# w# ==# word2Int# x#
+ then return (PS fp' off' idx' :
+ splitWith' (off'+idx'+1) (len'-idx'-1) fp')
+ else splitLoop p (idx'+1) off' len' fp'
+-}
+
+-- | Like 'splitWith', except that sequences of adjacent separators are
+-- treated as a single separator. eg.
+--
+-- > tokens (=='a') "aabbaca" == ["bb","c"]
+--
+tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
+tokens f = P.filter (not.null) . splitWith f
+
+-- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
+-- 'ByteString's and concatenates the list after interspersing the first
+-- argument between each element of the list.
+join :: ByteString -> [ByteString] -> ByteString
+join filler pss = concat (splice pss)
+ where
+ splice [] = []
+ splice [x] = [x]
+ splice (x:y:xs) = x:filler:splice (y:xs)
+
+--
+-- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
+-- with a char. Around 4 times faster than the generalised join.
+--
+joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
+joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
+ withForeignPtr ffp $ \fp ->
+ withForeignPtr fgp $ \gp -> do
+ memcpy ptr (fp `plusPtr` s) l
+ poke (ptr `plusPtr` l) c
+ memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
+ where
+ len = length f + length g + 1
+{-# INLINE joinWithByte #-}
+
+-- ---------------------------------------------------------------------
+-- Indexing ByteStrings
+
+-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
+index :: ByteString -> Int -> Word8
+index ps n
+ | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
+ | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
+ ++ ", length = " ++ show (length ps)
+ | otherwise = ps `unsafeIndex` n
+{-# INLINE index #-}
+
+-- | /O(n)/ The 'elemIndex' function returns the index of the first
+-- element in the given 'ByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element.
+-- This implementation uses memchr(3).
+elemIndex :: Word8 -> ByteString -> Maybe Int
+elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ let p' = p `plusPtr` s
+ q = memchr p' c (fromIntegral l)
+ return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
+{-# INLINE elemIndex #-}
+
+-- | /O(n)/ The 'elemIndexLast' function returns the last index of the
+-- element in the given 'ByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element. The following
+-- holds:
+--
+-- > elemIndexLast c xs ==
+-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
+--
+elemIndexLast :: Word8 -> ByteString -> Maybe Int
+elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
+ go (p `plusPtr` s) (l-1)
+ where
+ STRICT2(go)
+ go p i | i < 0 = return Nothing
+ | otherwise = do ch' <- peekByteOff p i
+ if ch == ch'
+ then return $ Just i
+ else go p (i-1)
+{-# INLINE elemIndexLast #-}
+
+-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
+-- the indices of all elements equal to the query element, in ascending order.
+-- This implementation uses memchr(3).
+elemIndices :: Word8 -> ByteString -> [Int]
+elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ let ptr = p `plusPtr` s
+
+ STRICT1(loop)
+ loop n = do
+ let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
+ if q == nullPtr
+ then return []
+ else do let i = q `minusPtr` ptr
+ ls <- loop (i+1)
+ return $! i:ls
+ loop 0
+
+{-
+-- much slower
+elemIndices :: Word8 -> ByteString -> [Int]
+elemIndices c ps = loop 0 ps
+ where STRICT2(loop)
+ loop _ ps' | null ps' = []
+ loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
+ | otherwise = loop (n+1) (unsafeTail ps')
+-}
+
+-- | count returns the number of times its argument appears in the ByteString
+--
+-- > count = length . elemIndices
+--
+-- But more efficiently than using length on the intermediate list.
+count :: Word8 -> ByteString -> Int
+count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
+ go (p `plusPtr` s) (fromIntegral m) 0
+ where
+ go :: Ptr Word8 -> CSize -> Int -> IO Int
+ STRICT3(go)
+ go p l i = do
+ let q = memchr p w l
+ if q == nullPtr
+ then return i
+ else do let k = fromIntegral $ q `minusPtr` p
+ go (q `plusPtr` 1) (l-k-1) (i+1)
+{-# INLINE count #-}
+
+-- | The 'findIndex' function takes a predicate and a 'ByteString' and
+-- returns the index of the first element in the ByteString
+-- satisfying the predicate.
+findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
+findIndex = (listToMaybe .) . findIndices
+
+-- | The 'findIndices' function extends 'findIndex', by returning the
+-- indices of all elements satisfying the predicate, in ascending order.
+findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
+findIndices p ps = loop 0 ps
+ where
+ STRICT2(loop)
+ loop _ qs | null qs = []
+ loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
+ | otherwise = loop (n+1) (unsafeTail qs)
+
+-- ---------------------------------------------------------------------
+-- Searching ByteStrings
+
+-- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
+elem :: Word8 -> ByteString -> Bool
+elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
+{-# INLINE elem #-}
+
+-- | /O(n)/ 'notElem' is the inverse of 'elem'
+notElem :: Word8 -> ByteString -> Bool
+notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
+{-# INLINE notElem #-}
+
+--
+-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
+-- case of filtering a single byte. It is more efficient to use
+-- /filterByte/ in this case.
+--
+-- > filterByte == filter . (==)
+--
+-- filterByte is around 10x faster, and uses much less space, than its
+-- filter equivalent
+filterByte :: Word8 -> ByteString -> ByteString
+filterByte w ps = replicate (count w ps) w
+
+{-
+-- slower than the replicate version
+
+filterByte ch ps@(PS x s l)
+ | null ps = ps
+ | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+ t <- go (f `plusPtr` s) p l
+ return (t `minusPtr` p) -- actual length
+ where
+ STRICT3(go)
+ go _ t 0 = return t
+ go f t e = do w <- peek f
+ if w == ch
+ then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
+ else go (f `plusPtr` 1) t (e-1)
+-}
+
+--
+-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
+-- case of filtering a single byte out of a list. It is more efficient
+-- to use /filterNotByte/ in this case.
+--
+-- > filterNotByte == filter . (/=)
+--
+-- filterNotByte is around 3x faster, and uses much less space, than its
+-- filter equivalent
+filterNotByte :: Word8 -> ByteString -> ByteString
+filterNotByte ch ps@(PS x s l)
+ | null ps = ps
+ | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+ t <- go (f `plusPtr` s) p l
+ return (t `minusPtr` p) -- actual length
+ where
+ STRICT3(go)
+ go _ t 0 = return t
+ go f t e = do w <- peek f
+ if w /= ch
+ then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
+ else go (f `plusPtr` 1) t (e-1)
+
+-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
+-- returns a ByteString containing those characters that satisfy the
+-- predicate.
+filter :: (Word8 -> Bool) -> ByteString -> ByteString
+filter k ps@(PS x s l)
+ | null ps = ps
+ | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+ t <- go (f `plusPtr` s) p l
+ return (t `minusPtr` p) -- actual length
+ where
+ STRICT3(go)
+ go _ t 0 = return t
+ go f t e = do w <- peek f
+ if k w
+ then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
+ else go (f `plusPtr` 1) t (e - 1)
+
+-- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
+
+-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
+-- and returns the first element in matching the predicate, or 'Nothing'
+-- if there is no such element.
+find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
+find p ps = case filter p ps of
+ q | null q -> Nothing
+ | otherwise -> Just (unsafeHead q)
+
+-- ---------------------------------------------------------------------
+-- Searching for substrings
+
+-- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
+-- iff the first is a prefix of the second.
+isPrefixOf :: ByteString -> ByteString -> Bool
+isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
+ | l1 == 0 = True
+ | l2 < l1 = False
+ | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
+ withForeignPtr x2 $ \p2 -> do
+ i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
+ return (i == 0)
+
+-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
+-- iff the first is a suffix of the second.
+--
+-- The following holds:
+--
+-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
+--
+-- However, the real implemenation uses memcmp to compare the end of the
+-- string only, with no reverse required..
+isSuffixOf :: ByteString -> ByteString -> Bool
+isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
+ | l1 == 0 = True
+ | l2 < l1 = False
+ | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
+ withForeignPtr x2 $ \p2 -> do
+ i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
+ return (i == 0)
+
+-- | Check whether one string is a substring of another. @isSubstringOf
+-- p s@ is equivalent to @not (null (findSubstrings p s))@.
+isSubstringOf :: ByteString -- ^ String to search for.
+ -> ByteString -- ^ String to search in.
+ -> Bool
+isSubstringOf p s = not $ P.null $ findSubstrings p s
+
+-- | Get the first index of a substring in another string,
+-- or 'Nothing' if the string is not found.
+-- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
+findSubstring :: ByteString -- ^ String to search for.
+ -> ByteString -- ^ String to seach in.
+ -> Maybe Int
+findSubstring = (listToMaybe .) . findSubstrings
+
+-- | Find the indexes of all (possibly overlapping) occurances of a
+-- substring in a string. This function uses the Knuth-Morris-Pratt
+-- string matching algorithm.
+findSubstrings :: ByteString -- ^ String to search for.
+ -> ByteString -- ^ String to seach in.
+ -> [Int]
+
+findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
+ where
+ patc x = pat `unsafeIndex` x
+ strc x = str `unsafeIndex` x
+
+ -- maybe we should make kmpNext a UArray before using it in search?
+ kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
+ kmpNextL p _ | null p = []
+ kmpNextL p j = let j' = next (unsafeHead p) j + 1
+ ps = unsafeTail p
+ x = if not (null ps) && unsafeHead ps == patc j'
+ then kmpNext Array.! j' else j'
+ in x:kmpNextL ps j'
+ search i j = match ++ rest -- i: position in string, j: position in pattern
+ where match = if j == m then [(i - j)] else []
+ rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
+ next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
+ | otherwise = j
+
+-- ---------------------------------------------------------------------
+-- Zipping
+
+-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
+-- corresponding pairs of bytes. If one input ByteString is short,
+-- excess elements of the longer ByteString are discarded. This is
+-- equivalent to a pair of 'unpack' operations.
+zip :: ByteString -> ByteString -> [(Word8,Word8)]
+zip ps qs
+ | null ps || null qs = []
+ | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
+
+-- | 'zipWith' generalises 'zip' by zipping with the function given as
+-- the first argument, instead of a tupling function. For example,
+-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
+-- corresponding sums.
+zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
+zipWith f ps qs
+ | null ps || null qs = []
+ | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
+
+-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
+-- ByteStrings. Note that this performs two 'pack' operations.
+unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
+unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
+{-# INLINE unzip #-}
+
+-- ---------------------------------------------------------------------
+-- Special lists
+
+-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
+inits :: ByteString -> [ByteString]
+inits (PS x s l) = [PS x s n | n <- [0..l]]
+
+-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
+tails :: ByteString -> [ByteString]
+tails p | null p = [empty]
+ | otherwise = p : tails (unsafeTail p)
+
+-- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
+
+-- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
+elems :: ByteString -> [ByteString]
+elems (PS _ _ 0) = []
+elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
+{-# INLINE elems #-}
+
+-- ---------------------------------------------------------------------
+-- ** Ordered 'ByteString's
+
+-- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
+sort :: ByteString -> ByteString
+sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
+ memcpy p (f `plusPtr` s) l
+ c_qsort p l -- inplace
+
+-- sort = pack . List.sort . unpack
+
+-- ---------------------------------------------------------------------
+--
+-- Extensions to the basic interface
+--
+
+-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
+-- check for the empty case, so there is an obligation on the programmer
+-- to provide a proof that the ByteString is non-empty.
+unsafeHead :: ByteString -> Word8
+unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
+{-# INLINE unsafeHead #-}
+
+-- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
+-- check for the empty case. As with 'unsafeHead', the programmer must
+-- provide a separate proof that the ByteString is non-empty.
+unsafeTail :: ByteString -> ByteString
+unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
+{-# INLINE unsafeTail #-}
+
+-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
+-- This omits the bounds check, which means there is an accompanying
+-- obligation on the programmer to ensure the bounds are checked in some
+-- other way.
+unsafeIndex :: ByteString -> Int -> Word8
+unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
+{-# INLINE unsafeIndex #-}
+
+-- ---------------------------------------------------------------------
+-- Low level constructors
+
+#if defined(__GLASGOW_HASKELL__)
+-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
+-- Addr\# (an arbitrary machine address assumed to point outside the
+-- garbage-collected heap) into a @ByteString@. A much faster way to
+-- create an Addr\# is with an unboxed string literal, than to pack a
+-- boxed string. A unboxed string literal is compiled to a static @char
+-- []@ by GHC. Establishing the length of the string requires a call to
+-- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
+-- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
+-- if you know the length of the string statically.
+--
+-- An example:
+--
+-- > literalFS = packAddress "literal"#
+--
+packAddress :: Addr# -> ByteString
+packAddress addr# = inlinePerformIO $ do
+ p <- newForeignPtr_ cstr
+ return $ PS p 0 (fromIntegral $ c_strlen cstr)
+ where
+ cstr = Ptr addr#
+{-# INLINE packAddress #-}
+
+-- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
+-- 'ByteStrings' -- which is ideal for string literals. It packs a
+-- null-terminated sequence of bytes into a 'ByteString', given a raw
+-- 'Addr\#' to the string, and the length of the string. Make sure the
+-- length is correct, otherwise use the safer 'packAddress' (where the
+-- length will be calculated once at runtime).
+unsafePackAddress :: Int -> Addr# -> ByteString
+unsafePackAddress len addr# = inlinePerformIO $ do
+ p <- newForeignPtr_ cstr
+ return $ PS p 0 len
+ where cstr = Ptr addr#
+
+#endif
+
+-- | /O(1)/ Build a ByteString from a ForeignPtr
+fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
+fromForeignPtr fp l = PS fp 0 l
+
+-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
+toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
+toForeignPtr (PS ps s l) = (ps, s, l)
+
+-- | /O(1)/ 'skipIndex' returns the internal skipped index of the
+-- current 'ByteString' from any larger string it was created from, as
+-- an 'Int'.
+skipIndex :: ByteString -> Int
+skipIndex (PS _ s _) = s
+{-# INLINE skipIndex #-}
+
+-- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
+-- finalizer associated to it. The ByteString length is calculated using
+-- /strlen(3)/, and thus the complexity is a /O(n)/.
+packCString :: CString -> ByteString
+packCString cstr = inlinePerformIO $ do
+ fp <- newForeignPtr_ (castPtr cstr)
+ return $ PS fp 0 (fromIntegral $ c_strlen cstr)
+
+-- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
+-- have /no/ finalizer associated with it. This operation has /O(1)/
+-- complexity as we already know the final size, so no /strlen(3)/ is
+-- required.
+packCStringLen :: CStringLen -> ByteString
+packCStringLen (ptr,len) = inlinePerformIO $ do
+ fp <- newForeignPtr_ (castPtr ptr)
+ return $ PS fp 0 (fromIntegral len)
+
+-- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
+-- have a @free(3)@ finalizer associated to it.
+packMallocCString :: CString -> ByteString
+packMallocCString cstr = inlinePerformIO $ do
+ fp <- newForeignFreePtr (castPtr cstr)
+ return $ PS fp 0 (fromIntegral $ c_strlen cstr)
+
+#if defined(__GLASGOW_HASKELL__)
+-- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
+-- length, and an IO action representing a finalizer. This function is
+-- not available on Hugs.
+--
+packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
+packCStringFinalizer p l f = do
+ fp <- FC.newForeignPtr p f
+ return $ PS fp 0 l
+
+-- | Explicitly run the finaliser associated with a 'ByteString'.
+-- Further references to this value may generate invalid memory
+-- references. This operation is unsafe, as there may be other
+-- 'ByteStrings' referring to the same underlying pages. If you use
+-- this, you need to have a proof of some kind that all 'ByteString's
+-- ever generated from the underlying byte array are no longer live.
+unsafeFinalize :: ByteString -> IO ()
+unsafeFinalize (PS p _ _) = finalizeForeignPtr p
+
+#endif
+
+-- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
+-- The @CString@ should not be freed afterwards. This is a memcpy(3).
+useAsCString :: ByteString -> (CString -> IO a) -> IO a
+useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
+ where
+ alloc = withForeignPtr ps $ \p -> do
+ buf <- c_malloc (fromIntegral l+1)
+ memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
+ poke (buf `plusPtr` l) (0::Word8)
+ return $ castPtr buf
+
+-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
+-- Warning: modifying the @CString@ will affect the @ByteString@.
+-- Why is this function unsafe? It relies on the null byte at the end of
+-- the ByteString to be there. This is /not/ the case if your ByteString
+-- has been spliced from a larger string (i.e. with take or drop).
+-- Unless you can guarantee the null byte, you should use the safe
+-- version, which will copy the string first.
+--
+unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
+unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
+
+-- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
+-- This is mainly useful to allow the rest of the data pointed
+-- to by the 'ByteString' to be garbage collected, for example
+-- if a large string has been read in, and only a small part of it
+-- is needed in the rest of the program.
+copy :: ByteString -> ByteString
+copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
+
+-- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
+-- CString is going to be deallocated from C land.
+copyCString :: CString -> ByteString
+copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
+
+-- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
+copyCStringLen :: CStringLen -> ByteString
+copyCStringLen (cstr, len) = inlinePerformIO $ do
+ fp <- mallocForeignPtrArray (len+1)
+ withForeignPtr fp $ \p -> do
+ memcpy p (castPtr cstr) len
+ poke (p `plusPtr` len) (0 :: Word8)
+ return $! PS fp 0 len
+
+-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
+-- Warning: modifying the @CStringLen@ will affect the @ByteString@.
+-- This is analogous to unsafeUseAsCString, and comes with the same
+-- safety requirements.
+--
+unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
+unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
+
+-- | Given the maximum size needed and a function to make the contents
+-- of a ByteString, generate makes the 'ByteString'. The generating
+-- function is required to return the actual final size (<= the maximum
+-- size), and the resulting byte array is realloced to this size. The
+-- string is padded at the end with a null byte.
+--
+-- generate is the main mechanism for creating custom, efficient
+-- ByteString functions, using Haskell or C functions to fill the space.
+--
+generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
+generate i f = do
+ p <- mallocArray i
+ i' <- f p
+ p' <- reallocArray p (i'+1)
+ poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
+ fp <- newForeignFreePtr p'
+ return $ PS fp 0 i'
+
+-- ---------------------------------------------------------------------
+-- line IO
+
+#if defined(__GLASGOW_HASKELL__)
+
+-- | getLine, read a line from stdin.
+getLine :: IO ByteString
+getLine = hGetLine stdin
+
+-- | hGetLine. read a ByteString from a handle
+hGetLine :: Handle -> IO ByteString
+hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
+ case haBufferMode handle_ of
+ NoBuffering -> error "no buffering"
+ _other -> hGetLineBuffered handle_
+
+ where
+ hGetLineBuffered handle_ = do
+ let ref = haBuffer handle_
+ buf <- readIORef ref
+ hGetLineBufferedLoop handle_ ref buf 0 []
+
+ hGetLineBufferedLoop handle_ ref
+ buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
+ len `seq` do
+ off <- findEOL r w raw
+ let new_len = len + off - r
+ xs <- mkPS raw r off
+
+ -- if eol == True, then off is the offset of the '\n'
+ -- otherwise off == w and the buffer is now empty.
+ if off /= w
+ then do if (w == off + 1)
+ then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
+ else writeIORef ref buf{ bufRPtr = off + 1 }
+ mkBigPS new_len (xs:xss)
+ else do
+ maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
+ buf{ bufWPtr=0, bufRPtr=0 }
+ case maybe_buf of
+ -- Nothing indicates we caught an EOF, and we may have a
+ -- partial line to return.
+ Nothing -> do
+ writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
+ if new_len > 0
+ then mkBigPS new_len (xs:xss)
+ else ioe_EOF
+ Just new_buf ->
+ hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
+
+ -- find the end-of-line character, if there is one
+ findEOL r w raw
+ | r == w = return w
+ | otherwise = do
+ (c,r') <- readCharFromBuffer raw r
+ if c == '\n'
+ then return r -- NB. not r': don't include the '\n'
+ else findEOL r' w raw
+
+ maybeFillReadBuffer fd is_line is_stream buf = catch
+ (do buf' <- fillReadBuffer fd is_line is_stream buf
+ return (Just buf'))
+ (\e -> if isEOFError e then return Nothing else ioError e)
+
+-- TODO, rewrite to use normal memcpy
+mkPS :: RawBuffer -> Int -> Int -> IO ByteString
+mkPS buf start end = do
+ let len = end - start
+ fp <- mallocByteString (len `quot` 8)
+ withForeignPtr fp $ \p -> do
+ memcpy_ptr_baoff p buf start (fromIntegral len)
+ return (PS fp 0 len)
+
+mkBigPS :: Int -> [ByteString] -> IO ByteString
+mkBigPS _ [ps] = return ps
+mkBigPS _ pss = return $! concat (P.reverse pss)
+
+#endif
+
+-- ---------------------------------------------------------------------
+-- Block IO
+
+-- | Outputs a 'ByteString' to the specified 'Handle'.
+hPut :: Handle -> ByteString -> IO ()
+hPut _ (PS _ _ 0) = return ()
+hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
+hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
+
+-- | Write a ByteString to stdout
+putStr :: ByteString -> IO ()
+putStr = hPut stdout
+
+-- | Write a ByteString to stdout, appending a newline byte
+putStrLn :: ByteString -> IO ()
+putStrLn ps = hPut stdout ps >> hPut stdout nl
+ where nl = packByte 0x0a
+
+-- | Read a 'ByteString' directly from the specified 'Handle'. This
+-- is far more efficient than reading the characters into a 'String'
+-- and then using 'pack'.
+hGet :: Handle -> Int -> IO ByteString
+hGet _ 0 = return empty
+hGet h i = do fp <- mallocByteString i
+ l <- withForeignPtr fp $ \p-> hGetBuf h p i
+ return $ PS fp 0 l
+
+#if defined(__GLASGOW_HASKELL__)
+-- | hGetNonBlocking is identical to 'hGet', except that it will never block
+-- waiting for data to become available, instead it returns only whatever data
+-- is available.
+hGetNonBlocking :: Handle -> Int -> IO ByteString
+hGetNonBlocking _ 0 = return empty
+hGetNonBlocking h i = do
+ fp <- mallocByteString i
+ l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
+ return $ PS fp 0 l
+#endif
+
+-- | Read entire handle contents into a 'ByteString'.
+--
+-- As with 'hGet', the string representation in the file is assumed to
+-- be ISO-8859-1.
+--
+hGetContents :: Handle -> IO ByteString
+hGetContents h = do
+ let start_size = 1024
+ p <- mallocArray start_size
+ i <- hGetBuf h p start_size
+ if i < start_size
+ then do p' <- reallocArray p i
+ fp <- newForeignFreePtr p'
+ return $ PS fp 0 i
+ else f p start_size
+ where
+ f p s = do
+ let s' = 2 * s
+ p' <- reallocArray p s'
+ i <- hGetBuf h (p' `plusPtr` s) s
+ if i < s
+ then do let i' = s + i
+ p'' <- reallocArray p' i'
+ fp <- newForeignFreePtr p''
+ return $ PS fp 0 i'
+ else f p' s'
+
+-- | getContents. Equivalent to hGetContents stdin
+getContents :: IO ByteString
+getContents = hGetContents stdin
+
+-- | Read an entire file directly into a 'ByteString'. This is far more
+-- efficient than reading the characters into a 'String' and then using
+-- 'pack'. It also may be more efficient than opening the file and
+-- reading it using hGet.
+readFile :: FilePath -> IO ByteString
+readFile f = do
+ h <- openBinaryFile f ReadMode
+ l <- hFileSize h
+ s <- hGet h $ fromIntegral l
+ hClose h
+ return s
+
+-- | Write a 'ByteString' to a file.
+writeFile :: FilePath -> ByteString -> IO ()
+writeFile f ps = do
+ h <- openBinaryFile f WriteMode
+ hPut h ps
+ hClose h
+
+#if defined(__GLASGOW_HASKELL__)
+--
+-- | A ByteString equivalent for getArgs. More efficient for large argument lists
+--
+getArgs :: IO [ByteString]
+getArgs =
+ alloca $ \ p_argc ->
+ alloca $ \ p_argv -> do
+ getProgArgv p_argc p_argv
+ p <- fromIntegral `fmap` peek p_argc
+ argv <- peek p_argv
+ P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
+#endif
+
+-- ---------------------------------------------------------------------
+-- Internal utilities
+
+-- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
+-- silently truncate to 8 bits Chars > '\255'. They are provided as
+-- convenience for ByteString construction.
+w2c :: Word8 -> Char
+#if !defined(__GLASGOW_HASKELL__)
+w2c = chr . fromIntegral
+#else
+w2c = unsafeChr . fromIntegral
+#endif
+{-# INLINE w2c #-}
+
+c2w :: Char -> Word8
+c2w = fromIntegral . ord
+{-# INLINE c2w #-}
+
+-- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
+-- is padded with a null byte.
+mallocByteString :: Int -> IO (ForeignPtr Word8)
+mallocByteString l = do
+ fp <- mallocForeignPtrArray (l+1)
+ withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
+ return fp
+
+-- | A way of creating ForeignPtrs outside the IO monad. The @Int@
+-- argument gives the final size of the ByteString. Unlike 'generate'
+-- the ByteString is no reallocated if the final size is less than the
+-- estimated size.
+create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
+create l write_ptr = inlinePerformIO $ do
+ fp <- mallocByteString (l+1)
+ withForeignPtr fp $ \p -> write_ptr p
+ return $ PS fp 0 l
+{-# INLINE create #-}
+
+-- | Perform an operation with a temporary ByteString
+withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
+withPtr fp io = inlinePerformIO (withForeignPtr fp io)
+{-# INLINE withPtr #-}
+
+-- Common up near identical calls to `error' to reduce the number
+-- constant strings created when compiled:
+errorEmptyList :: String -> a
+errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
+{-# INLINE errorEmptyList #-}
+
+-- 'findIndexOrEnd' is a variant of findIndex, that returns the length
+-- of the string if no element is found, rather than Nothing.
+findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
+STRICT2(findIndexOrEnd)
+findIndexOrEnd f ps
+ | null ps = 0
+ | f (unsafeHead ps) = 0
+ | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
+{-# INLINE findIndexOrEnd #-}
+
+-- Find from the end of the string using predicate
+findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
+STRICT2(findFromEndUntil)
+findFromEndUntil f ps@(PS x s l) =
+ if null ps then 0
+ else if f (last ps) then l
+ else findFromEndUntil f (PS x s (l-1))
+
+-- Just like inlinePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining
+--
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif
+
+{-# INLINE newForeignFreePtr #-}
+newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
+#if defined(__GLASGOW_HASKELL__)
+newForeignFreePtr p = FC.newForeignPtr p (c_free p)
+#else
+newForeignFreePtr p = newForeignPtr c_free_finalizer p
+#endif
+
+-- ---------------------------------------------------------------------
+--
+-- Standard C functions
+--
+
+foreign import ccall unsafe "string.h strlen" c_strlen
+ :: CString -> CInt
+
+foreign import ccall unsafe "stdlib.h malloc" c_malloc
+ :: CInt -> IO (Ptr Word8)
+
+foreign import ccall unsafe "static stdlib.h free" c_free
+ :: Ptr Word8 -> IO ()
+
+#if !defined(__GLASGOW_HASKELL__)
+foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
+ :: FunPtr (Ptr Word8 -> IO ())
+#endif
+
+foreign import ccall unsafe "string.h memset" memset
+ :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
+
+foreign import ccall unsafe "string.h memchr" memchr
+ :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
+
+foreign import ccall unsafe "string.h memcmp" memcmp
+ :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
+
+foreign import ccall unsafe "string.h memcpy" memcpy
+ :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+
+-- ---------------------------------------------------------------------
+--
+-- Uses our C code
+--
+
+foreign import ccall unsafe "static fpstring.h reverse" c_reverse
+ :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+
+foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
+ :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
+
+foreign import ccall unsafe "static fpstring.h maximum" c_maximum
+ :: Ptr Word8 -> Int -> Word8
+
+foreign import ccall unsafe "static fpstring.h minimum" c_minimum
+ :: Ptr Word8 -> Int -> Word8
+
+foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
+ :: Ptr Word8 -> Int -> IO ()
+
+-- ---------------------------------------------------------------------
+-- Internal GHC Haskell magic
+
+#if defined(__GLASGOW_HASKELL__)
+foreign import ccall unsafe "RtsAPI.h getProgArgv"
+ getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
+
+foreign import ccall unsafe "__hscore_memcpy_src_off"
+ memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
+#endif
--- /dev/null
+{-# OPTIONS_GHC -cpp -fffi #-}
+--
+-- Module : Data.ByteString.Char8
+-- Copyright : (c) Don Stewart 2006
+-- License : BSD-style
+--
+-- Maintainer : dons@cse.unsw.edu.au
+-- Stability : experimental
+-- Portability : portable (tested with GHC>=6.4.1 and Hugs 2005)
+--
+
+--
+-- | Manipulate ByteStrings using Char operations. All Chars will be
+-- truncated to 8 bits. It can be expected that these functions will run
+-- at identical speeds to their Word8 equivalents in @Data.ByteString@.
+--
+-- More specifically these byte strings are taken to be in the
+-- subset of Unicode covered by code points 0-255. This covers
+-- Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls.
+--
+-- See:
+--
+-- * <http://www.unicode.org/charts/>
+--
+-- * <http://www.unicode.org/charts/PDF/U0000.pdf>
+--
+-- * <http://www.unicode.org/charts/PDF/U0080.pdf>
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with Prelude functions. eg.
+--
+-- > import qualified Data.ByteString.Char8 as B
+--
+
+module Data.ByteString.Char8 (
+
+ -- * The @ByteString@ type
+ ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+
+ -- * Introducing and eliminating 'ByteString's
+ empty, -- :: ByteString
+ packChar, -- :: Char -> ByteString
+ pack, -- :: String -> ByteString
+ unpack, -- :: ByteString -> String
+
+ -- * Basic interface
+ cons, -- :: Char -> ByteString -> ByteString
+ snoc, -- :: Char -> ByteString -> ByteString
+ null, -- :: ByteString -> Bool
+ length, -- :: ByteString -> Int
+ head, -- :: ByteString -> Char
+ tail, -- :: ByteString -> ByteString
+ last, -- :: ByteString -> Char
+ init, -- :: ByteString -> ByteString
+ append, -- :: ByteString -> ByteString -> ByteString
+
+ -- * Special ByteStrings
+ inits, -- :: ByteString -> [ByteString]
+ tails, -- :: ByteString -> [ByteString]
+ elems, -- :: ByteString -> [ByteString]
+
+ -- * Transformating ByteStrings
+ map, -- :: (Char -> Char) -> ByteString -> ByteString
+ reverse, -- :: ByteString -> ByteString
+ intersperse, -- :: Char -> ByteString -> ByteString
+ transpose, -- :: [ByteString] -> [ByteString]
+
+ -- * Reducing 'ByteString's
+ foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a
+ foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a
+ foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char
+ foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char
+
+ -- ** Special folds
+ concat, -- :: [ByteString] -> ByteString
+ concatMap, -- :: (Char -> ByteString) -> ByteString -> ByteString
+ any, -- :: (Char -> Bool) -> ByteString -> Bool
+ all, -- :: (Char -> Bool) -> ByteString -> Bool
+ maximum, -- :: ByteString -> Char
+ minimum, -- :: ByteString -> Char
+ mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString
+ hash, -- :: ByteString -> Int32
+
+ -- * Generating and unfolding ByteStrings
+ replicate, -- :: Int -> Char -> ByteString
+ unfoldrN, -- :: (Char -> Maybe (Char, Char)) -> Char -> ByteString
+
+ -- * Substrings
+
+ -- ** Breaking strings
+ take, -- :: Int -> ByteString -> ByteString
+ drop, -- :: Int -> ByteString -> ByteString
+ splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
+ takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString
+ dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString
+ break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+ span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+ spanEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+
+ -- ** Breaking and dropping on specific Chars
+ breakChar, -- :: Char -> ByteString -> (ByteString, ByteString)
+ breakFirst, -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
+ breakLast, -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
+ breakSpace, -- :: ByteString -> Maybe (ByteString,ByteString)
+ dropSpace, -- :: ByteString -> ByteString
+ dropSpaceEnd, -- :: ByteString -> ByteString
+
+ -- ** Breaking into many substrings
+ split, -- :: Char -> ByteString -> [ByteString]
+ splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString]
+ tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString]
+
+ -- ** Breaking into lines and words
+ lines, -- :: ByteString -> [ByteString]
+ words, -- :: ByteString -> [ByteString]
+ unlines, -- :: [ByteString] -> ByteString
+ unwords, -- :: ByteString -> [ByteString]
+
+ lines', -- :: ByteString -> [ByteString]
+ unlines', -- :: [ByteString] -> ByteString
+ linesCRLF', -- :: ByteString -> [ByteString]
+ unlinesCRLF', -- :: [ByteString] -> ByteString
+ words', -- :: ByteString -> [ByteString]
+ unwords', -- :: ByteString -> [ByteString]
+
+ lineIndices, -- :: ByteString -> [Int]
+ betweenLines, -- :: ByteString -> ByteString -> ByteString -> Maybe (ByteString)
+
+ -- ** Joining strings
+ join, -- :: ByteString -> [ByteString] -> ByteString
+ joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString
+
+ -- * Indexing ByteStrings
+ index, -- :: ByteString -> Int -> Char
+ elemIndex, -- :: Char -> ByteString -> Maybe Int
+ elemIndexLast, -- :: Char -> ByteString -> Maybe Int
+ elemIndices, -- :: Char -> ByteString -> [Int]
+ findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int
+ findIndices, -- :: (Char -> Bool) -> ByteString -> [Int]
+ count, -- :: Char -> ByteString -> Int
+
+ -- * Ordered ByteStrings
+ sort, -- :: ByteString -> ByteString
+
+ -- * Searching ByteStrings
+
+ -- ** Searching by equality
+ elem, -- :: Char -> ByteString -> Bool
+ notElem, -- :: Char -> ByteString -> Bool
+ filterChar, -- :: Char -> ByteString -> ByteString
+ filterNotChar, -- :: Char -> ByteString -> ByteString
+
+ -- ** Searching with a predicate
+ filter, -- :: (Char -> Bool) -> ByteString -> ByteString
+ find, -- :: (Char -> Bool) -> ByteString -> Maybe Char
+
+ -- ** Searching for substrings
+ isPrefixOf, -- :: ByteString -> ByteString -> Bool
+ isSuffixOf, -- :: ByteString -> ByteString -> Bool
+ isSubstringOf, -- :: ByteString -> ByteString -> Bool
+ findSubstring, -- :: ByteString -> ByteString -> Maybe Int
+ findSubstrings, -- :: ByteString -> ByteString -> [Int]
+
+ -- * Zipping and unzipping ByteString
+ zip, -- :: ByteString -> ByteString -> [(Char,Char)]
+ zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c]
+ unzip, -- :: [(Char,Char)] -> (ByteString,ByteString)
+
+ -- * Unchecked access
+ unsafeHead, -- :: ByteString -> Char
+ unsafeTail, -- :: ByteString -> ByteString
+ unsafeIndex, -- :: ByteString -> Int -> Char
+ w2c, -- :: Word8 -> Char
+ c2w, -- :: Char -> Word8
+
+ -- * Reading from ByteStrings
+ readInt, -- :: ByteString -> Maybe Int
+ unsafeReadInt, -- :: ByteString -> Maybe Int
+
+ -- * Copying ByteStrings
+ copy, -- :: ByteString -> ByteString
+
+ -- * I\/O with @ByteString@s
+
+ -- ** Standard input and output
+
+#if defined(__GLASGOW_HASKELL__)
+ getLine, -- :: IO ByteString
+#endif
+ getContents, -- :: IO ByteString
+ putStr, -- :: ByteString -> IO ()
+ putStrLn, -- :: ByteString -> IO ()
+
+ -- ** Files
+ readFile, -- :: FilePath -> IO ByteString
+ writeFile, -- :: FilePath -> ByteString -> IO ()
+
+ -- ** I\/O with Handles
+#if defined(__GLASGOW_HASKELL__)
+ getArgs, -- :: IO [ByteString]
+ hGetLine, -- :: Handle -> IO ByteString
+ hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
+#endif
+ hGetContents, -- :: Handle -> IO ByteString
+ hGet, -- :: Handle -> Int -> IO ByteString
+ hPut, -- :: Handle -> ByteString -> IO ()
+
+#if defined(__GLASGOW_HASKELL__)
+ -- * Low level construction
+ -- | For constructors from foreign language types see /Data.ByteString/
+ packAddress, -- :: Addr# -> ByteString
+ unsafePackAddress, -- :: Int -> Addr# -> ByteString
+#endif
+
+ ) where
+
+import qualified Prelude as P
+import Prelude hiding (reverse,head,tail,last,init,null
+ ,length,map,lines,foldl,foldr,unlines
+ ,concat,any,take,drop,splitAt,takeWhile
+ ,dropWhile,span,break,elem,filter,unwords
+ ,words,maximum,minimum,all,concatMap
+ ,foldl1,foldr1,readFile,writeFile,replicate
+ ,getContents,getLine,putStr,putStrLn
+ ,zip,zipWith,unzip,notElem)
+
+import qualified Data.ByteString as B
+
+-- Listy functions transparently exported
+import Data.ByteString (ByteString(..)
+ ,empty,null,length,tail,init,append
+ ,inits,tails,elems,reverse,transpose
+ ,concat,hash,take,drop,splitAt,join
+ ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring
+ ,findSubstrings,unsafeTail,copy
+
+ ,getContents, putStr, putStrLn
+ ,readFile, writeFile
+ ,hGetContents, hGet, hPut
+#if defined(__GLASGOW_HASKELL__)
+ ,getLine, getArgs, hGetLine, hGetNonBlocking
+ ,packAddress, unsafePackAddress
+#endif
+ ,useAsCString, unsafeUseAsCString
+ )
+
+import Data.Char
+
+import qualified Data.List as List (intersperse)
+
+import Foreign
+import Foreign.C.Types (CLong)
+import Foreign.Marshal.Utils (with)
+
+#if defined(__GLASGOW_HASKELL__)
+import GHC.Base (Char(..),unsafeChr,unpackCString#,unsafeCoerce#)
+import GHC.IOBase (IO(..),stToIO)
+import GHC.Prim (Addr#,writeWord8OffAddr#,realWorld#,plusAddr#)
+import GHC.Ptr (Ptr(..))
+import GHC.ST (ST(..))
+#endif
+
+#define STRICT1(f) f a | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+
+------------------------------------------------------------------------
+
+-- | /O(1)/ Convert a 'Char' into a 'ByteString'
+packChar :: Char -> ByteString
+packChar = B.packByte . c2w
+{-# INLINE packChar #-}
+
+-- | /O(n)/ Convert a 'String' into a 'ByteString'
+--
+-- For applications with large numbers of string literals, pack can be a
+-- bottleneck. In such cases, consider using packAddress (GHC only).
+pack :: String -> ByteString
+#if !defined(__GLASGOW_HASKELL__)
+
+pack str = B.create (P.length str) $ \p -> go p str
+ where go _ [] = return ()
+ go p (x:xs) = poke p (c2w x) >> go (p `plusPtr` 1) xs
+
+#else /* hack away */
+
+pack str = B.create (P.length str) $ \(Ptr p) -> stToIO (go p str)
+ where
+ go :: Addr# -> [Char] -> ST a ()
+ go _ [] = return ()
+ go p (C# c:cs) = writeByte p (unsafeCoerce# c) >> go (p `plusAddr#` 1#) cs
+
+ writeByte p c = ST $ \s# ->
+ case writeWord8OffAddr# p 0# c s# of s2# -> (# s2#, () #)
+ {-# INLINE writeByte #-}
+
+{-# RULES
+"pack/packAddress" forall s# .
+ pack (unpackCString# s#) = B.packAddress s#
+ #-}
+
+#endif
+
+{-# INLINE pack #-}
+
+-- | /O(n)/ Converts a 'ByteString' to a 'String'.
+unpack :: ByteString -> [Char]
+unpack = B.unpackWith w2c
+{-# INLINE unpack #-}
+
+-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
+-- complexity, as it requires a memcpy.
+cons :: Char -> ByteString -> ByteString
+cons = B.cons . c2w
+{-# INLINE cons #-}
+
+-- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to
+-- 'cons', this function performs a memcpy.
+snoc :: ByteString -> Char -> ByteString
+snoc p = B.snoc p . c2w
+{-# INLINE snoc #-}
+
+-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
+head :: ByteString -> Char
+head = w2c . B.head
+{-# INLINE head #-}
+
+-- | /O(1)/ Extract the last element of a packed string, which must be non-empty.
+last :: ByteString -> Char
+last = w2c . B.last
+{-# INLINE last #-}
+
+-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@
+map :: (Char -> Char) -> ByteString -> ByteString
+map f = B.map (c2w . f . w2c)
+{-# INLINE map #-}
+
+-- | /O(n)/ The 'intersperse' function takes a Char and a 'ByteString'
+-- and \`intersperses\' that Char between the elements of the
+-- 'ByteString'. It is analogous to the intersperse function on Lists.
+intersperse :: Char -> ByteString -> ByteString
+intersperse = B.intersperse . c2w
+{-# INLINE intersperse #-}
+
+-- | 'foldl', applied to a binary operator, a starting value (typically
+-- the left-identity of the operator), and a ByteString, reduces the
+-- ByteString using the binary operator, from left to right.
+foldl :: (a -> Char -> a) -> a -> ByteString -> a
+foldl f = B.foldl (\a c -> f a (w2c c))
+{-# INLINE foldl #-}
+
+-- | 'foldr', applied to a binary operator, a starting value
+-- (typically the right-identity of the operator), and a packed string,
+-- reduces the packed string using the binary operator, from right to left.
+foldr :: (Char -> a -> a) -> a -> ByteString -> a
+foldr f = B.foldr (\c a -> f (w2c c) a)
+{-# INLINE foldr #-}
+
+-- | 'foldl1' is a variant of 'foldl' that has no starting value
+-- argument, and thus must be applied to non-empty 'ByteStrings'.
+foldl1 :: (Char -> Char -> Char) -> ByteString -> Char
+foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
+{-# INLINE foldl1 #-}
+
+-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
+-- and thus must be applied to non-empty 'ByteString's
+foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
+foldr1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
+{-# INLINE foldr1 #-}
+
+-- | Map a function over a 'ByteString' and concatenate the results
+concatMap :: (Char -> ByteString) -> ByteString -> ByteString
+concatMap f = B.concatMap (f . w2c)
+{-# INLINE concatMap #-}
+
+-- | Applied to a predicate and a ByteString, 'any' determines if
+-- any element of the 'ByteString' satisfies the predicate.
+any :: (Char -> Bool) -> ByteString -> Bool
+any f = B.any (f . w2c)
+{-# INLINE any #-}
+
+-- | Applied to a predicate and a 'ByteString', 'all' determines if
+-- all elements of the 'ByteString' satisfy the predicate.
+all :: (Char -> Bool) -> ByteString -> Bool
+all f = B.all (f . w2c)
+{-# INLINE all #-}
+
+-- | 'maximum' returns the maximum value from a 'ByteString'
+maximum :: ByteString -> Char
+maximum = w2c . B.maximum
+{-# INLINE maximum #-}
+
+-- | 'minimum' returns the minimum value from a 'ByteString'
+minimum :: ByteString -> Char
+minimum = w2c . B.minimum
+{-# INLINE minimum #-}
+
+-- | /O(n)/ map Char functions, provided with the index at each position
+mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString
+mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c)))
+{-# INLINE mapIndexed #-}
+
+-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
+-- the value of every element. The following holds:
+--
+-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
+--
+-- This implemenation uses @memset(3)@
+replicate :: Int -> Char -> ByteString
+replicate w = B.replicate w . c2w
+{-# INLINE replicate #-}
+
+-- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
+-- 'unfoldrN' builds a ByteString from a seed value. The function takes
+-- the element and returns 'Nothing' if it is done producing the
+-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
+-- prepending to the ByteString and @b@ is used as the next element in a
+-- recursive call.
+--
+-- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
+-- character to a ByteString is /O(n)/, this unfoldr requires a maximum
+-- final size of the ByteString as an argument. 'cons' can then be
+-- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
+-- linear complexity. The depth of the recursion is limited to this
+-- size, but may be less. For lazy, infinite unfoldr, use
+-- 'Data.List.unfoldr' (from 'Data.List').
+--
+-- Examples:
+--
+-- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
+--
+-- The following equation connects the depth-limited unfoldr to the List unfoldr:
+--
+-- > unfoldrN n == take n $ List.unfoldr
+--
+unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString
+unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f . w2c) (c2w w)
+ where k (i,j) = (c2w i, c2w j) -- (c2w *** c2w)
+{-# INLINE unfoldrN #-}
+
+-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
+-- returns the longest prefix (possibly empty) of @xs@ of elements that
+-- satisfy @p@.
+takeWhile :: (Char -> Bool) -> ByteString -> ByteString
+takeWhile f = B.takeWhile (f . w2c)
+{-# INLINE takeWhile #-}
+
+-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
+dropWhile :: (Char -> Bool) -> ByteString -> ByteString
+dropWhile f = B.dropWhile (f . w2c)
+{-# INLINE dropWhile #-}
+
+-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
+break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+break f = B.break (f . w2c)
+{-# INLINE break #-}
+
+-- | 'span' @p xs@ breaks the ByteString into two segments. It is
+-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
+span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+span f = B.span (f . w2c)
+{-# INLINE span #-}
+
+-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
+-- We have
+--
+-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- > ==
+-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
+--
+spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd f = B.spanEnd (f . w2c)
+{-# INLINE spanEnd #-}
+
+-- | 'breakChar' breaks its ByteString argument at the first occurence
+-- of the specified Char. It is more efficient than 'break' as it is
+-- implemented with @memchr(3)@. I.e.
+--
+-- > break (=='c') "abcd" == breakChar 'c' "abcd"
+--
+breakChar :: Char -> ByteString -> (ByteString, ByteString)
+breakChar = B.breakByte . c2w
+{-# INLINE breakChar #-}
+
+-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
+-- occurence of @w@. It behaves like 'break', except the delimiter is
+-- not returned, and @Nothing@ is returned if the delimiter is not in
+-- the ByteString. I.e.
+--
+-- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
+--
+-- > breakFirst c xs ==
+-- > let (x,y) = break (== c) xs
+-- > in if null y then Nothing else Just (x, drop 1 y))
+--
+breakFirst :: Char -> ByteString -> Maybe (ByteString,ByteString)
+breakFirst = B.breakFirst . c2w
+{-# INLINE breakFirst #-}
+
+-- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
+-- ByteString.
+--
+-- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
+--
+-- and the following are equivalent:
+--
+-- > breakLast 'c' "abcdef"
+-- > let (x,y) = break (=='c') (reverse "abcdef")
+-- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
+--
+breakLast :: Char -> ByteString -> Maybe (ByteString,ByteString)
+breakLast = B.breakLast . c2w
+{-# INLINE breakLast #-}
+
+-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
+-- argument, consuming the delimiter. I.e.
+--
+-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
+-- > split 'a' "aXaXaXa" == ["","X","X","X"]
+-- > split 'x' "x" == ["",""]
+--
+-- and
+--
+-- > join [c] . split c == id
+-- > split == splitWith . (==)
+--
+-- As for all splitting functions in this library, this function does
+-- not copy the substrings, it just constructs new 'ByteStrings' that
+-- are slices of the original.
+--
+split :: Char -> ByteString -> [ByteString]
+split = B.split . c2w
+{-# INLINE split #-}
+
+-- | /O(n)/ Splits a 'ByteString' into components delimited by
+-- separators, where the predicate returns True for a separator element.
+-- The resulting components do not contain the separators. Two adjacent
+-- separators result in an empty component in the output. eg.
+--
+-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
+--
+splitWith :: (Char -> Bool) -> ByteString -> [ByteString]
+splitWith f = B.splitWith (f . w2c)
+{-# INLINE splitWith #-}
+-- the inline makes a big difference here.
+
+-- | Like 'splitWith', except that sequences of adjacent separators are
+-- treated as a single separator. eg.
+--
+-- > tokens (=='a') "aabbaca" == ["bb","c"]
+--
+tokens :: (Char -> Bool) -> ByteString -> [ByteString]
+tokens f = B.tokens (f . w2c)
+{-# INLINE tokens #-}
+
+-- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
+-- char. Around 4 times faster than the generalised join.
+--
+joinWithChar :: Char -> ByteString -> ByteString -> ByteString
+joinWithChar = B.joinWithByte . c2w
+{-# INLINE joinWithChar #-}
+
+-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
+index :: ByteString -> Int -> Char
+index = (w2c .) . B.index
+{-# INLINE index #-}
+
+-- | /O(n)/ The 'elemIndex' function returns the index of the first
+-- element in the given 'ByteString' which is equal (by memchr) to the
+-- query element, or 'Nothing' if there is no such element.
+elemIndex :: Char -> ByteString -> Maybe Int
+elemIndex = B.elemIndex . c2w
+{-# INLINE elemIndex #-}
+
+-- | /O(n)/ The 'elemIndexLast' function returns the last index of the
+-- element in the given 'ByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element. The following
+-- holds:
+--
+-- > elemIndexLast c xs ==
+-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
+--
+elemIndexLast :: Char -> ByteString -> Maybe Int
+elemIndexLast = B.elemIndexLast . c2w
+{-# INLINE elemIndexLast #-}
+
+-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
+-- the indices of all elements equal to the query element, in ascending order.
+elemIndices :: Char -> ByteString -> [Int]
+elemIndices = B.elemIndices . c2w
+{-# INLINE elemIndices #-}
+
+-- | The 'findIndex' function takes a predicate and a 'ByteString' and
+-- returns the index of the first element in the ByteString satisfying the predicate.
+findIndex :: (Char -> Bool) -> ByteString -> Maybe Int
+findIndex f = B.findIndex (f . w2c)
+{-# INLINE findIndex #-}
+
+-- | The 'findIndices' function extends 'findIndex', by returning the
+-- indices of all elements satisfying the predicate, in ascending order.
+findIndices :: (Char -> Bool) -> ByteString -> [Int]
+findIndices f = B.findIndices (f . w2c)
+
+-- | count returns the number of times its argument appears in the ByteString
+--
+-- > count = length . elemIndices
+--
+-- But more efficiently than using length on the intermediate list.
+count :: Char -> ByteString -> Int
+count c = B.count (c2w c)
+
+-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This
+-- implementation uses @memchr(3)@.
+elem :: Char -> ByteString -> Bool
+elem c = B.elem (c2w c)
+{-# INLINE elem #-}
+
+-- | /O(n)/ 'notElem' is the inverse of 'elem'
+notElem :: Char -> ByteString -> Bool
+notElem c = B.notElem (c2w c)
+{-# INLINE notElem #-}
+
+-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
+-- returns a ByteString containing those characters that satisfy the
+-- predicate.
+filter :: (Char -> Bool) -> ByteString -> ByteString
+filter f = B.filter (f . w2c)
+{-# INLINE filter #-}
+
+-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
+-- and returns the first element in matching the predicate, or 'Nothing'
+-- if there is no such element.
+find :: (Char -> Bool) -> ByteString -> Maybe Char
+find f ps = w2c `fmap` B.find (f . w2c) ps
+{-# INLINE find #-}
+
+-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
+-- case of filtering a single Char. It is more efficient to use
+-- filterChar in this case.
+--
+-- > filterChar == filter . (==)
+--
+-- filterChar is around 10x faster, and uses much less space, than its
+-- filter equivalent
+--
+filterChar :: Char -> ByteString -> ByteString
+filterChar c = B.filterByte (c2w c)
+{-# INLINE filterChar #-}
+
+-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
+-- case of filtering a single Char out of a list. It is more efficient
+-- to use /filterNotChar/ in this case.
+--
+-- > filterNotChar == filter . (/=)
+--
+-- filterNotChar is around 3x faster, and uses much less space, than its
+-- filter equivalent
+--
+filterNotChar :: Char -> ByteString -> ByteString
+filterNotChar c = B.filterNotByte (c2w c)
+{-# INLINE filterNotChar #-}
+
+-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
+-- corresponding pairs of Chars. If one input ByteString is short,
+-- excess elements of the longer ByteString are discarded. This is
+-- equivalent to a pair of 'unpack' operations, and so space
+-- usage may be large for multi-megabyte ByteStrings
+zip :: ByteString -> ByteString -> [(Char,Char)]
+zip ps qs
+ | B.null ps || B.null qs = []
+ | otherwise = (unsafeHead ps, unsafeHead qs) : zip (B.unsafeTail ps) (B.unsafeTail qs)
+
+-- | 'zipWith' generalises 'zip' by zipping with the function given as
+-- the first argument, instead of a tupling function. For example,
+-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list
+-- of corresponding sums.
+zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
+zipWith f = B.zipWith ((. w2c) . f . w2c)
+
+-- | 'unzip' transforms a list of pairs of Chars into a pair of
+-- ByteStrings. Note that this performs two 'pack' operations.
+unzip :: [(Char,Char)] -> (ByteString,ByteString)
+unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
+{-# INLINE unzip #-}
+
+-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits
+-- the check for the empty case, which is good for performance, but
+-- there is an obligation on the programmer to provide a proof that the
+-- ByteString is non-empty.
+unsafeHead :: ByteString -> Char
+unsafeHead = w2c . B.unsafeHead
+{-# INLINE unsafeHead #-}
+
+-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a Char.
+-- This omits the bounds check, which means there is an accompanying
+-- obligation on the programmer to ensure the bounds are checked in some
+-- other way.
+unsafeIndex :: ByteString -> Int -> Char
+unsafeIndex = (w2c .) . B.unsafeIndex
+{-# INLINE unsafeIndex #-}
+
+-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
+w2c :: Word8 -> Char
+#if !defined(__GLASGOW_HASKELL__)
+w2c = chr . fromIntegral
+#else
+w2c = unsafeChr . fromIntegral
+#endif
+{-# INLINE w2c #-}
+
+-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
+-- silently truncates to 8 bits Chars > '\255'. It is provided as
+-- convenience for ByteString construction.
+c2w :: Char -> Word8
+c2w = fromIntegral . ord
+{-# INLINE c2w #-}
+
+-- ---------------------------------------------------------------------
+-- Things that depend on the encoding
+
+-- | 'breakSpace' returns the pair of ByteStrings when the argument is
+-- broken at the first whitespace byte. I.e.
+--
+-- > break isSpace == breakSpace
+--
+breakSpace :: ByteString -> (ByteString,ByteString)
+breakSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ i <- firstspace (p `plusPtr` s) 0 l
+ return $ case () of {_
+ | i == 0 -> (empty, PS x s l)
+ | i == l -> (PS x s l, empty)
+ | otherwise -> (PS x s i, PS x (s+i) (l-i))
+ }
+{-# INLINE breakSpace #-}
+
+firstspace :: Ptr Word8 -> Int -> Int -> IO Int
+STRICT3(firstspace)
+firstspace ptr n m
+ | n >= m = return n
+ | otherwise = do w <- peekByteOff ptr n
+ if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n
+
+-- | 'dropSpace' efficiently returns the 'ByteString' argument with
+-- white space Chars removed from the front. It is more efficient than
+-- calling dropWhile for removing whitespace. I.e.
+--
+-- > dropWhile isSpace == dropSpace
+--
+dropSpace :: ByteString -> ByteString
+dropSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ i <- firstnonspace (p `plusPtr` s) 0 l
+ return $ if i == l then empty else PS x (s+i) (l-i)
+{-# INLINE dropSpace #-}
+
+firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int
+STRICT3(firstnonspace)
+firstnonspace ptr n m
+ | n >= m = return n
+ | otherwise = do w <- peekElemOff ptr n
+ if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n
+
+-- | 'dropSpaceEnd' efficiently returns the 'ByteString' argument with
+-- white space removed from the end. I.e.
+--
+-- > reverse . (dropWhile isSpace) . reverse == dropSpaceEnd
+--
+-- but it is more efficient than using multiple reverses.
+--
+dropSpaceEnd :: ByteString -> ByteString
+dropSpaceEnd (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ i <- lastnonspace (p `plusPtr` s) (l-1)
+ return $ if i == (-1) then empty else PS x s (i+1)
+{-# INLINE dropSpaceEnd #-}
+
+lastnonspace :: Ptr Word8 -> Int -> IO Int
+STRICT2(lastnonspace)
+lastnonspace ptr n
+ | n < 0 = return n
+ | otherwise = do w <- peekElemOff ptr n
+ if isSpaceWord8 w then lastnonspace ptr (n-1) else return n
+
+-- | 'lines' breaks a ByteString up into a list of ByteStrings at
+-- newline Chars. The resulting strings do not contain newlines.
+lines :: ByteString -> [ByteString]
+lines ps
+ | null ps = []
+ | otherwise = case search ps of
+ Nothing -> [ps]
+ Just n -> take n ps : lines (drop (n+1) ps)
+ where search = elemIndex '\n'
+{-# INLINE lines #-}
+
+{-
+-- Just as fast, but more complex. Should be much faster, I thought.
+lines :: ByteString -> [ByteString]
+lines (PS _ _ 0) = []
+lines (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+ let ptr = p `plusPtr` s
+
+ STRICT1(loop)
+ loop n = do
+ let q = memchr (ptr `plusPtr` n) 0x0a (fromIntegral (l-n))
+ if q == nullPtr
+ then return [PS x (s+n) (l-n)]
+ else do let i = q `minusPtr` ptr
+ ls <- loop (i+1)
+ return $! PS x (s+n) (i-n) : ls
+ loop 0
+-}
+
+-- | 'unlines' is an inverse operation to 'lines'. It joins lines,
+-- after appending a terminating newline to each.
+unlines :: [ByteString] -> ByteString
+unlines [] = empty
+unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space
+ where nl = packChar '\n'
+
+-- | 'words' breaks a ByteString up into a list of words, which
+-- were delimited by Chars representing white space. And
+--
+-- > tokens isSpace = words
+--
+words :: ByteString -> [ByteString]
+words = B.tokens isSpaceWord8
+
+-- | The 'unwords' function is analogous to the 'unlines' function, on words.
+unwords :: [ByteString] -> ByteString
+unwords = join (packChar ' ')
+
+-- | /O(n)/ Indicies of newlines. Shorthand for
+--
+-- > elemIndices '\n'
+--
+lineIndices :: ByteString -> [Int]
+lineIndices = elemIndices '\n'
+
+-- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on
+-- newline Chars. However, unlike the Prelude functions, 'lines\'' and
+-- 'unlines\'' correctly reconstruct lines that are missing terminating
+-- newlines characters. I.e.
+--
+-- > unlines (lines "a\nb\nc") == "a\nb\nc\n"
+-- > unlines' (lines' "a\nb\nc") == "a\nb\nc"
+--
+-- Note that this means:
+--
+-- > lines "a\nb\nc\n" == ["a","b","c"]
+-- > lines' "a\nb\nc\n" == ["a","b","c",""]
+--
+lines' :: ByteString -> [ByteString]
+lines' ps = ps `seq` case elemIndex '\n' ps of
+ Nothing -> [ps]
+ Just n -> take n ps : lines' (drop (n+1) ps)
+
+-- | 'linesCRLF\'' behaves like 'lines\'', but breaks on (\\cr?\\lf)
+linesCRLF' :: ByteString -> [ByteString]
+linesCRLF' ps = ps `seq` case elemIndex '\n' ps of
+ Nothing -> [ps]
+ Just 0 -> empty : linesCRLF' (drop 1 ps)
+ Just n -> let k = if ps `unsafeIndex` (n-1) == '\r' then n-1 else n
+ in take k ps : linesCRLF' (drop (n+1) ps)
+
+-- | 'unlines\'' behaves like 'unlines', except that it also correctly
+-- retores lines that do not have terminating newlines (see the
+-- description for 'lines\'').
+--
+unlines' :: [ByteString] -> ByteString
+unlines' ss = concat $ intersperse_newlines ss
+ where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
+ intersperse_newlines s = s
+ newline = packChar '\n'
+
+-- | 'unlines\'' behaves like 'unlines', except that it also correctly
+-- retores lines that do not have terminating newlines (see the
+-- description for 'lines\''). Uses CRLF instead of LF.
+--
+unlinesCRLF' :: [ByteString] -> ByteString
+unlinesCRLF' ss = concat $ intersperse_newlines ss
+ where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
+ intersperse_newlines s = s
+ newline = pack "\r\n"
+
+-- | 'words\'' behaves like 'words', with the exception that it produces
+-- output on ByteStrings with trailing whitespace that can be
+-- correctly inverted by 'unwords'. I.e.
+--
+-- > words "a b c " == ["a","b","c"]
+-- > words' "a b c " == ["a","b","c",""]
+--
+-- > unwords $ words "a b c " == "a b c"
+-- > unwords $ words' "a b c " == "a b c "
+--
+words' :: ByteString -> [ByteString]
+words' = B.splitWith isSpaceWord8
+
+-- | 'unwords\'' behaves like 'unwords'. It is provided for consistency
+-- with the other invertable words and lines functions.
+unwords' :: [ByteString] -> ByteString
+unwords' = unwords
+
+-- | 'betweenLines' returns the ByteString between the two lines given,
+-- or Nothing if they do not appear. The returned string is the first
+-- and shortest string such that the line before it is the given first
+-- line, and the line after it is the given second line.
+betweenLines :: ByteString -- ^ First line to look for
+ -> ByteString -- ^ Second line to look for
+ -> ByteString -- ^ 'ByteString' to look in
+ -> Maybe (ByteString)
+
+betweenLines start end ps =
+ case P.break (start ==) (lines ps) of
+ (_, _:rest@(PS ps1 s1 _:_)) ->
+ case P.break (end ==) rest of
+ (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1)
+ _ -> Nothing
+ _ -> Nothing
+
+-- ---------------------------------------------------------------------
+-- Reading from ByteStrings
+
+-- | readInt skips any whitespace at the beginning of its argument, and
+-- reads an Int from the beginning of the ByteString. If there is no
+-- integer at the beginning of the string, it returns Nothing, otherwise
+-- it just returns the int read, and the rest of the string.
+readInt :: ByteString -> Maybe (Int, ByteString)
+readInt p@(PS x s l) = inlinePerformIO $ useAsCString p $ \cstr ->
+ with (castPtr cstr) $ \endpp -> do
+ val <- c_strtol (castPtr cstr) endpp 0
+ skipped <- (`minusPtr` cstr) `fmap` peek endpp
+ return $ if skipped == 0
+ then Nothing
+ else Just (fromIntegral val, PS x (s+skipped) (l-skipped))
+
+-- | unsafeReadInt is like readInt, but requires a null terminated
+-- ByteString. It avoids a copy if this is the case. It returns the Int
+-- read, if any, and the rest of the string.
+unsafeReadInt :: ByteString -> Maybe (Int, ByteString)
+unsafeReadInt p@(PS x s l) = inlinePerformIO $ unsafeUseAsCString p $ \cstr ->
+ with (castPtr cstr) $ \endpp -> do
+ val <- c_strtol (castPtr cstr) endpp 0
+ skipped <- (`minusPtr` cstr) `fmap` peek endpp
+ return $ if skipped == 0
+ then Nothing
+ else Just (fromIntegral val, PS x (s+skipped) (l-skipped))
+
+foreign import ccall unsafe "stdlib.h strtol" c_strtol
+ :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong
+
+{-
+--
+-- not quite there yet
+--
+readInt :: ByteString -> Maybe (Int, ByteString)
+readInt = go 0
+ where
+ STRICT2(go)
+ go i ps
+ | B.null ps = Nothing
+ | x == '-' = neg 0 xs
+ | otherwise = pos (parse x) xs
+ where (x, xs) = (ps `unsafeIndex` 0, unsafeTail ps)
+
+ STRICT2(neg)
+ neg n qs | isSpace x = return $ Just ((i-n),xs)
+ | otherwise = neg (parse x + (10 * n)) xs
+ where (x, xs) = (qs `unsafeIndex` 0, unsafeTail qs)
+
+ STRICT2(pos)
+ pos n qs | isSpace x = go (i+n) xs
+ | otherwise = pos (parse x + (10 * n)) xs
+ where (x, xs) = (qs `unsafeIndexWord8` 0, unsafeTail qs)
+
+ parse w = fromIntegral (w - 48) :: Int
+ {-# INLINE parse #-}
+-}
+
+-- ---------------------------------------------------------------------
+-- Internals
+
+-- Just like inlinePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining
+--
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif
+
+-- ordered by frequency
+-- Idea from Ketil
+isSpaceWord8 :: Word8 -> Bool
+isSpaceWord8 w = case w of
+ 0x20 -> True -- SPACE
+ 0x0A -> True -- LF, \n
+ 0x09 -> True -- HT, \t
+ 0x0C -> True -- FF, \f
+ 0x0D -> True -- CR, \r
+ 0x0B -> True -- VT, \v
+ _ -> False
+{-# INLINE isSpaceWord8 #-}
+