Data/ByteString/Lazy.hs

   1 {-# OPTIONS_GHC -cpp -optc-O1 -fffi -fglasgow-exts -fno-warn-incomplete-patterns #-}
   2 --
   3 -- -optc-O2 breaks with 4.0.4 gcc on debian
   4 --
   5 -- Module      : ByteString.Lazy
   6 -- Copyright   : (c) Don Stewart 2006
   7 --               (c) Duncan Coutts 2006
   8 -- License     : BSD-style
   9 --
  10 -- Maintainer  : dons@cse.unsw.edu.au
  11 -- Stability   : experimental
  12 -- Portability : portable, requires ffi and cpp
  13 -- Tested with : GHC 6.4.1 and Hugs March 2005
  14 --
  15
  16 --
  17 -- | A time and space-efficient implementation of lazy byte vectors
  18 -- using lists of packed 'Word8' arrays, suitable for high performance
  19 -- use, both in terms of large data quantities, or high speed
  20 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
  21 -- arrays of bytes. They provide a means to manipulate large byte vectors
  22 -- without requiring the entire vector be resident in memory.
  23 --
  24 -- Some operations, such as concat, append, reverse and cons, have
  25 -- better complexity than their "Data.ByteString" equivalents, as due to
  26 -- optimisations resulting from the list spine structure. And for other
  27 -- operations Lazy ByteStrings are usually within a few percent of
  28 -- strict ones, but with better heap usage. For data larger than the
  29 -- available memory, or if you have tight memory constraints, this
  30 -- module will be the only option. The default chunk size is 64k, which
  31 -- should be good in most circumstances. For people with large L2
  32 -- caches, you may want to increase this to fit your cache.
  33 --
  34 -- This module is intended to be imported @qualified@, to avoid name
  35 -- clashes with "Prelude" functions.  eg.
  36 --
  37 -- > import qualified Data.ByteString.Lazy as B
  38 --
  39 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  40 -- UArray by Simon Marlow. Rewritten to support slices and use
  41 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  42 -- Lazy variant by Duncan Coutts and Don Stewart.
  43 --
  44
  45 module Data.ByteString.Lazy (
  46
  47         -- * The @ByteString@ type
  48         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  49
  50         -- * Introducing and eliminating 'ByteString's
  51         empty,                  -- :: ByteString
  52         singleton,               -- :: Word8   -> ByteString
  53         pack,                   -- :: [Word8] -> ByteString
  54         unpack,                 -- :: ByteString -> [Word8]
  55         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  56         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  57
  58         -- * Basic interface
  59         cons,                   -- :: Word8 -> ByteString -> ByteString
  60         snoc,                   -- :: ByteString -> Word8 -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62         head,                   -- :: ByteString -> Word8
  63         last,                   -- :: ByteString -> Word8
  64         tail,                   -- :: ByteString -> ByteString
  65         init,                   -- :: ByteString -> ByteString
  66         null,                   -- :: ByteString -> Bool
  67         length,                 -- :: ByteString -> Int64
  68
  69         -- * Transformating ByteStrings
  70         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  71         reverse,                -- :: ByteString -> ByteString
  72 --      intersperse,            -- :: Word8 -> ByteString -> ByteString
  73         transpose,              -- :: [ByteString] -> [ByteString]
  74
  75         -- * Reducing 'ByteString's (folds)
  76         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  77         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  78         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  80         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  81         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  82
  83         -- ** Special folds
  84         concat,                 -- :: [ByteString] -> ByteString
  85         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  86         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  87         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  88         maximum,                -- :: ByteString -> Word8
  89         minimum,                -- :: ByteString -> Word8
  90
  91         -- * Building ByteStrings
  92         -- ** Scans
  93         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  94 --      scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  95 --      scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  96 --      scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  97
  98         -- ** Accumulating maps
  99         mapAccumL,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 100         mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
 101
 102         -- ** Infinite ByteStrings
 103         repeat,                 -- :: Word8 -> ByteString
 104         replicate,              -- :: Int64 -> Word8 -> ByteString
 105         cycle,                  -- :: ByteString -> ByteString
 106         iterate,                -- :: (Word8 -> Word8) -> Word8 -> ByteString
 107
 108         -- ** Unfolding
 109         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 110
 111         -- * Substrings
 112
 113         -- ** Breaking strings
 114         take,                   -- :: Int64 -> ByteString -> ByteString
 115         drop,                   -- :: Int64 -> ByteString -> ByteString
 116         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
 117         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 118         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 119         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 120         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 121         group,                  -- :: ByteString -> [ByteString]
 122         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 123         inits,                  -- :: ByteString -> [ByteString]
 124         tails,                  -- :: ByteString -> [ByteString]
 125
 126         -- ** Breaking and dropping on specific bytes
 127         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 128         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 129
 130         -- ** Breaking into many substrings
 131         split,                  -- :: Word8 -> ByteString -> [ByteString]
 132         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 133         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 134
 135         -- ** Joining strings
 136         join,                   -- :: ByteString -> [ByteString] -> ByteString
 137         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 138
 139         -- * Predicates
 140         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 141 --      isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 142
 143         -- * Searching ByteStrings
 144
 145         -- ** Searching by equality
 146         elem,                   -- :: Word8 -> ByteString -> Bool
 147         notElem,                -- :: Word8 -> ByteString -> Bool
 148         filterByte,             -- :: Word8 -> ByteString -> ByteString
 149         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 150
 151         -- ** Searching with a predicate
 152         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 153         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 154 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 155
 156         -- * Indexing ByteStrings
 157         index,                  -- :: ByteString -> Int64 -> Word8
 158         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int64
 159         elemIndices,            -- :: Word8 -> ByteString -> [Int64]
 160         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 161         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int64]
 162         count,                  -- :: Word8 -> ByteString -> Int64
 163
 164         -- * Zipping and unzipping ByteStrings
 165         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 166         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 167 --      unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 168
 169         -- * Ordered ByteStrings
 170 --        sort,                   -- :: ByteString -> ByteString
 171
 172         -- * I\/O with 'ByteString's
 173
 174         -- ** Standard input and output
 175         getContents,            -- :: IO ByteString
 176         putStr,                 -- :: ByteString -> IO ()
 177         putStrLn,               -- :: ByteString -> IO ()
 178         interact,               -- :: (ByteString -> ByteString) -> IO ()
 179
 180         -- ** Files
 181         readFile,               -- :: FilePath -> IO ByteString
 182         writeFile,              -- :: FilePath -> ByteString -> IO ()
 183         appendFile,             -- :: FilePath -> ByteString -> IO ()
 184
 185         -- ** I\/O with Handles
 186         hGetContents,           -- :: Handle -> IO ByteString
 187         hGetContentsN,          -- :: Int -> Handle -> IO ByteString
 188         hGet,                   -- :: Handle -> Int -> IO ByteString
 189         hGetN,                  -- :: Int -> Handle -> Int -> IO ByteString
 190         hPut,                   -- :: Handle -> ByteString -> IO ()
 191 #if defined(__GLASGOW_HASKELL__)
 192         hGetNonBlocking,        -- :: Handle -> IO ByteString
 193         hGetNonBlockingN,       -- :: Int -> Handle -> IO ByteString
 194 #endif
 195
 196   ) where
 197
 198 import qualified Prelude
 199 import Prelude hiding
 200     (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
 201     ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
 202     ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
 203     ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
 204     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
 205
 206 import qualified Data.List              as L  -- L for list/lazy
 207 import qualified Data.ByteString        as P  -- P for packed
 208 import qualified Data.ByteString.Base   as P
 209 import qualified Data.ByteString.Fusion as P
 210 import Data.ByteString.Fusion (PairS(..),loopL)
 211
 212 import Data.Monoid              (Monoid(..))
 213
 214 import Data.Word                (Word8)
 215 import Data.Int                 (Int64)
 216 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..),hClose)
 217 import System.IO.Unsafe
 218 import Control.Exception        (bracket)
 219
 220 #if defined(__GLASGOW_HASKELL__)
 221 import Data.Generics            (Data(..), Typeable(..))
 222 #endif
 223
 224 -- -----------------------------------------------------------------------------
 225 --
 226 -- Useful macros, until we have bang patterns
 227 --
 228
 229 #define STRICT1(f) f a | a `seq` False = undefined
 230 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 231 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 232 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 233 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 234
 235 -- -----------------------------------------------------------------------------
 236
 237 -- | A space-efficient representation of a Word8 vector, supporting many
 238 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 239 --
 240 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 241 --
 242 newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string
 243     deriving (Show,Read
 244 #if defined(__GLASGOW_HASKELL__)
 245                         ,Data, Typeable
 246 #endif
 247              )
 248
 249 --
 250 -- hmm, what about getting the PS constructor unpacked into the cons cell?
 251 --
 252 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
 253 --
 254 -- Would avoid one indirection per chunk.
 255 --
 256
 257 unLPS :: ByteString -> [P.ByteString]
 258 unLPS (LPS xs) = xs
 259 {-# INLINE unLPS #-}
 260
 261 instance Eq  ByteString
 262     where (==)    = eq
 263
 264 instance Ord ByteString
 265     where compare = compareBytes
 266
 267 instance Monoid ByteString where
 268     mempty  = empty
 269     mappend = append
 270     mconcat = concat
 271
 272 ------------------------------------------------------------------------
 273
 274 -- XXX
 275 -- The data type invariant:
 276 -- Every ByteString is either empty or consists of non-null ByteStrings.
 277 -- All functions must preserve this, and the QC properties must check this.
 278 --
 279 _invariant :: ByteString -> Bool
 280 _invariant (LPS []) = True
 281 _invariant (LPS xs) = L.all (not . P.null) xs
 282
 283 -- In a form useful for QC testing
 284 _checkInvariant :: ByteString -> ByteString
 285 _checkInvariant lps
 286     | _invariant lps = lps
 287     | otherwise      = moduleError "invariant" ("violation: " ++ show lps)
 288
 289 -- The Data abstraction function
 290 --
 291 _abstr :: ByteString -> P.ByteString
 292 _abstr (LPS []) = P.empty
 293 _abstr (LPS xs) = P.concat xs
 294
 295 -- The representation uses lists of packed chunks. When we have to convert from
 296 -- a lazy list to the chunked representation, then by default we'll use this
 297 -- chunk size. Some functions give you more control over the chunk size.
 298 --
 299 -- Measurements here:
 300 --  http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
 301 --
 302 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
 303 -- The following value assumes people have something greater than 128k,
 304 -- and need to share the cache with other programs.
 305 --
 306 defaultChunkSize :: Int
 307 defaultChunkSize = 64 * k
 308    where k = 1024
 309
 310 smallChunkSize :: Int
 311 smallChunkSize = 4 * k
 312    where k = 1024
 313
 314 -- defaultChunkSize = 1
 315
 316 ------------------------------------------------------------------------
 317
 318 eq :: ByteString -> ByteString -> Bool
 319 eq (LPS xs) (LPS ys) = eq' xs ys
 320   where eq' [] [] = True
 321         eq' [] _  = False
 322         eq' _  [] = False
 323         eq' (a:as) (b:bs) =
 324           case compare (P.length a) (P.length b) of
 325             LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
 326             EQ -> a == b                       && eq' as bs
 327             GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
 328
 329 compareBytes :: ByteString -> ByteString -> Ordering
 330 compareBytes (LPS xs) (LPS ys) = cmp xs ys
 331   where cmp [] [] = EQ
 332         cmp [] _  = LT
 333         cmp _  [] = GT
 334         cmp (a:as) (b:bs) =
 335           case compare (P.length a) (P.length b) of
 336             LT -> case compare a (P.take (P.length a) b) of
 337                     EQ     -> cmp as (P.drop (P.length a) b : bs)
 338                     result -> result
 339             EQ -> case compare a b of
 340                     EQ     -> cmp as bs
 341                     result -> result
 342             GT -> case compare (P.take (P.length b) a) b of
 343                     EQ     -> cmp (P.drop (P.length b) a : as) bs
 344                     result -> result
 345
 346 -- -----------------------------------------------------------------------------
 347 -- Introducing and eliminating 'ByteString's
 348
 349 -- | /O(1)/ The empty 'ByteString'
 350 empty :: ByteString
 351 empty = LPS []
 352 {-# NOINLINE empty #-}
 353
 354 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 355 singleton :: Word8 -> ByteString
 356 singleton c = LPS [P.singleton c]
 357 {-# NOINLINE singleton #-}
 358
 359 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 360 pack :: [Word8] -> ByteString
 361 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
 362
 363 -- ?
 364 chunk :: Int -> [a] -> [[a]]
 365 chunk _    [] = []
 366 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
 367
 368 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 369 unpack :: ByteString -> [Word8]
 370 unpack (LPS ss) = L.concatMap P.unpack ss
 371 {-# INLINE unpack #-}
 372
 373 ------------------------------------------------------------------------
 374
 375 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 376 -- conversion function
 377 packWith :: (a -> Word8) -> [a] -> ByteString
 378 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
 379 {-# INLINE packWith #-}
 380 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 381
 382 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 383 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 384 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
 385 {-# INLINE unpackWith #-}
 386 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 387
 388 -- ---------------------------------------------------------------------
 389 -- Basic interface
 390
 391 -- | /O(1)/ Test whether a ByteString is empty.
 392 null :: ByteString -> Bool
 393 null (LPS []) = True
 394 null (_)      = False  -- TODO: guarantee this invariant is maintained
 395 {-# INLINE null #-}
 396
 397 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
 398 length :: ByteString -> Int64
 399 length (LPS ss) = L.sum (L.map (fromIntegral.P.length) ss)
 400
 401 -- avoid the intermediate list?
 402 -- length (LPS ss) = L.foldl lengthF 0 ss
 403 --     where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
 404 {-# INLINE length #-}
 405
 406 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
 407 -- strict in the ByteString that we are consing onto. More precisely, it forces
 408 -- the head and the first chunk. It does this because, for space efficiency, it
 409 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
 410 -- new \'chunk\'.
 411 --
 412 -- So that means you can't use a lazy recursive contruction like this:
 413 --
 414 -- > let xs = cons c xs in xs
 415 --
 416 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
 417 --
 418 cons :: Word8 -> ByteString -> ByteString
 419 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
 420 cons c (LPS ss)                        = LPS (P.singleton c : ss)
 421 {-# INLINE cons #-}
 422
 423 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
 424 snoc :: ByteString -> Word8 -> ByteString
 425 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
 426 {-# INLINE snoc #-}
 427
 428 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 429 head :: ByteString -> Word8
 430 head (LPS [])    = errorEmptyList "head"
 431 head (LPS (x:_)) = P.unsafeHead x
 432 {-# INLINE head #-}
 433
 434 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 435 tail :: ByteString -> ByteString
 436 tail (LPS [])     = errorEmptyList "tail"
 437 tail (LPS (x:xs))
 438   | P.length x == 1 = LPS xs
 439   | otherwise       = LPS (P.unsafeTail x : xs)
 440 {-# INLINE tail #-}
 441
 442 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
 443 last :: ByteString -> Word8
 444 last (LPS []) = errorEmptyList "last"
 445 last (LPS xs) = P.last (L.last xs)
 446 {-# INLINE last #-}
 447
 448 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 449 init :: ByteString -> ByteString
 450 init (LPS []) = errorEmptyList "init"
 451 init (LPS xs)
 452     | P.length y == 1 = LPS ys
 453     | otherwise       = LPS (ys ++ [P.init y])
 454     where (y,ys) = (L.last xs, L.init xs)
 455 {-# INLINE init #-}
 456
 457 -- | /O(n)/ Append two ByteStrings
 458 append :: ByteString -> ByteString -> ByteString
 459 append (LPS []) (LPS ys) = LPS ys
 460 append (LPS xs) (LPS []) = LPS xs
 461 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
 462 {-# INLINE append #-}
 463
 464 -- ---------------------------------------------------------------------
 465 -- Transformations
 466
 467 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 468 -- element of @xs@.
 469 map :: (Word8 -> Word8) -> ByteString -> ByteString
 470 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
 471 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
 472 {-# INLINE map #-}
 473
 474 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 475 reverse :: ByteString -> ByteString
 476 reverse (LPS xs) = LPS (L.reverse . L.map P.reverse $ xs)
 477 {-# INLINE reverse #-}
 478
 479 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
 480 -- \`intersperses\' that byte between the elements of the 'ByteString'.
 481 -- It is analogous to the intersperse function on Lists.
 482 -- intersperse :: Word8 -> ByteString -> ByteString
 483 -- intersperse = error "FIXME: not yet implemented"
 484
 485 {-
 486 intersperse c (LPS [])     = LPS []
 487 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
 488   where intersperse' c ps@(PS x s l) =
 489           P.create (2*l) $ \p -> withForeignPtr x $ \f ->
 490                 poke p c
 491                 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
 492 -}
 493
 494 -- | The 'transpose' function transposes the rows and columns of its
 495 -- 'ByteString' argument.
 496 transpose :: [ByteString] -> [ByteString]
 497 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
 498
 499 -- ---------------------------------------------------------------------
 500 -- Reducing 'ByteString's
 501
 502 -- | 'foldl', applied to a binary operator, a starting value (typically
 503 -- the left-identity of the operator), and a ByteString, reduces the
 504 -- ByteString using the binary operator, from left to right.
 505 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 506 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
 507 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
 508 {-# INLINE foldl #-}
 509
 510 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 511 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 512 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
 513 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
 514 {-# INLINE foldl' #-}
 515
 516 -- | 'foldr', applied to a binary operator, a starting value
 517 -- (typically the right-identity of the operator), and a ByteString,
 518 -- reduces the ByteString using the binary operator, from right to left.
 519 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 520 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
 521 {-# INLINE foldr #-}
 522
 523 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 524 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 525 -- This function is subject to array fusion.
 526 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 527 foldl1 _ (LPS []) = errorEmptyList "foldl1"
 528 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 529
 530 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 531 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 532 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
 533 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 534
 535 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 536 -- and thus must be applied to non-empty 'ByteString's
 537 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 538 foldr1 _ (LPS []) = errorEmptyList "foldr1"
 539 foldr1 f (LPS ps) = foldr1' ps
 540   where foldr1' (x:[]) = P.foldr1 f x
 541         foldr1' (x:xs) = P.foldr  f (foldr1' xs) x
 542
 543 -- ---------------------------------------------------------------------
 544 -- Special folds
 545
 546 -- | /O(n)/ Concatenate a list of ByteStrings.
 547 concat :: [ByteString] -> ByteString
 548 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
 549
 550 -- | Map a function over a 'ByteString' and concatenate the results
 551 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 552 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
 553     where
 554       k w = case f w of LPS xs -> P.concat xs
 555
 556 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 557 -- any element of the 'ByteString' satisfies the predicate.
 558 any :: (Word8 -> Bool) -> ByteString -> Bool
 559 any f (LPS xs) = L.or (L.map (P.any f) xs)
 560 -- todo fuse
 561
 562 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 563 -- if all elements of the 'ByteString' satisfy the predicate.
 564 all :: (Word8 -> Bool) -> ByteString -> Bool
 565 all f (LPS xs) = L.and (L.map (P.all f) xs)
 566 -- todo fuse
 567
 568 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 569 maximum :: ByteString -> Word8
 570 maximum (LPS []) = errorEmptyList "maximum"
 571 maximum (LPS xs) = L.maximum (L.map P.maximum xs)
 572 {-# INLINE maximum #-}
 573
 574 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 575 minimum :: ByteString -> Word8
 576 minimum (LPS []) = errorEmptyList "minimum"
 577 minimum (LPS xs) = L.minimum (L.map P.minimum xs)
 578 {-# INLINE minimum #-}
 579
 580 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 581 -- 'foldl'; it applies a function to each element of a ByteString,
 582 -- passing an accumulating parameter from left to right, and returning a
 583 -- final value of this accumulator together with the new ByteString.
 584 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 585 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
 586
 587 -- | /O(n)/ map Word8 functions, provided with the index at each position
 588 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 589 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
 590
 591 -- ---------------------------------------------------------------------
 592 -- Building ByteStrings
 593
 594 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 595 -- reduced values from the left. This function will fuse.
 596 --
 597 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 598 --
 599 -- Note that
 600 --
 601 -- > last (scanl f z xs) == foldl f z xs.
 602 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 603 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
 604 {-# INLINE scanl #-}
 605
 606 -- ---------------------------------------------------------------------
 607 -- Unfolds and replicates
 608
 609 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
 610 -- of @f@ to @x@:
 611 --
 612 -- > iterate f x == [x, f x, f (f x), ...]
 613 --
 614 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
 615 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
 616
 617 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
 618 -- element.
 619 --
 620 repeat :: Word8 -> ByteString
 621 repeat c = LPS (L.repeat block)
 622     where block =  P.replicate smallChunkSize c
 623
 624 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
 625 -- the value of every element.
 626 --
 627 replicate :: Int64 -> Word8 -> ByteString
 628 replicate w c
 629     | w <= 0             = empty
 630     | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
 631     | r == 0             = LPS (L.genericReplicate q s) -- preserve invariant
 632     | otherwise          = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
 633  where
 634     s      = P.replicate smallChunkSize c
 635     (q, r) = quotRem w (fromIntegral smallChunkSize)
 636
 637 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
 638 -- the infinite repetition of the original ByteString.
 639 --
 640 cycle :: ByteString -> ByteString
 641 cycle (LPS []) = errorEmptyList "cycle"
 642 cycle (LPS xs) = LPS (L.cycle xs)
 643
 644 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
 645 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
 646 -- the element and returns 'Nothing' if it is done producing the
 647 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 648 -- prepending to the ByteString and @b@ is used as the next element in a
 649 -- recursive call.
 650 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 651 unfoldr f = LPS . unfoldChunk 32
 652   where unfoldChunk n x =
 653           case P.unfoldrN n f x of
 654             (s, Nothing)
 655               | P.null s  -> []
 656               | otherwise -> s : []
 657             (s, Just x')  -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
 658
 659 -- ---------------------------------------------------------------------
 660 -- Substrings
 661
 662 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 663 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 664 take :: Int64 -> ByteString -> ByteString
 665 take n _ | n < 0 = empty
 666 take i (LPS ps)  = LPS (take' i ps)
 667   where take' _ []     = []
 668         take' 0 _      = []
 669         take' n (x:xs) =
 670           if n < fromIntegral (P.length x)
 671             then P.take (fromIntegral n) x : []
 672             else x : take' (n - fromIntegral (P.length x)) xs
 673
 674 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 675 -- elements, or @[]@ if @n > 'length' xs@.
 676 drop  :: Int64 -> ByteString -> ByteString
 677 drop i p | i <= 0 = p
 678 drop i (LPS ps) = LPS (drop' i ps)
 679   where drop' _ []     = []
 680         drop' 0 xs     = xs
 681         drop' n (x:xs) =
 682           if n < fromIntegral (P.length x)
 683             then P.drop (fromIntegral n) x : xs
 684             else drop' (n - fromIntegral (P.length x)) xs
 685
 686 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 687 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
 688 splitAt i p        | i <= 0 = (empty, p)
 689 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
 690   where splitAt' _ []     = ([], [])
 691         splitAt' 0 xs     = ([], xs)
 692         splitAt' n (x:xs) =
 693           if n < fromIntegral (P.length x)
 694             then (P.take (fromIntegral n) x : [],
 695                   P.drop (fromIntegral n) x : xs)
 696             else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
 697                    in (x:xs', xs'')
 698
 699
 700 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 701 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 702 -- satisfy @p@.
 703 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 704 takeWhile f (LPS ps) = LPS (takeWhile' ps)
 705   where takeWhile' []     = []
 706         takeWhile' (x:xs) =
 707           case P.findIndexOrEnd (not . f) x of
 708             0                  -> []
 709             n | n < P.length x -> P.take n x : []
 710               | otherwise      -> x : takeWhile' xs
 711
 712 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 713 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 714 dropWhile f (LPS ps) = LPS (dropWhile' ps)
 715   where dropWhile' []     = []
 716         dropWhile' (x:xs) =
 717           case P.findIndexOrEnd (not . f) x of
 718             n | n < P.length x -> P.drop n x : xs
 719               | otherwise      -> dropWhile' xs
 720
 721 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 722 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 723 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
 724   where break' []     = ([], [])
 725         break' (x:xs) =
 726           case P.findIndexOrEnd f x of
 727             0                  -> ([], x : xs)
 728             n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
 729               | otherwise      -> let (xs', xs'') = break' xs
 730                                    in (x : xs', xs'')
 731
 732 -- | 'breakByte' breaks its ByteString argument at the first occurence
 733 -- of the specified byte. It is more efficient than 'break' as it is
 734 -- implemented with @memchr(3)@. I.e.
 735 --
 736 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 737 --
 738 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 739 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
 740   where breakByte' []     = ([], [])
 741         breakByte' (x:xs) =
 742           case P.elemIndex c x of
 743             Just 0  -> ([], x : xs)
 744             Just n  -> (P.take n x : [], P.drop n x : xs)
 745             Nothing -> let (xs', xs'') = breakByte' xs
 746                         in (x : xs', xs'')
 747
 748 -- | 'spanByte' breaks its ByteString argument at the first
 749 -- occurence of a byte other than its argument. It is more efficient
 750 -- than 'span (==)'
 751 --
 752 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 753 --
 754 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 755 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
 756   where spanByte' []     = ([], [])
 757         spanByte' (x:xs) =
 758           case P.spanByte c x of
 759             (x', x'') | P.null x'  -> ([], x : xs)
 760                       | P.null x'' -> let (xs', xs'') = spanByte' xs
 761                                        in (x : xs', xs'')
 762                       | otherwise  -> (x' : [], x'' : xs)
 763
 764 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 765 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 766 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 767 span p = break (not . p)
 768
 769 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 770 -- separators, where the predicate returns True for a separator element.
 771 -- The resulting components do not contain the separators.  Two adjacent
 772 -- separators result in an empty component in the output.  eg.
 773 --
 774 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 775 -- > splitWith (=='a') []        == []
 776 --
 777 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 778 splitWith _ (LPS [])     = []
 779 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
 780
 781   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 782         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 783         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
 784         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 785
 786         cons' x xs | P.null x  = xs
 787                    | otherwise = x:xs
 788         {-# INLINE cons' #-}
 789 {-# INLINE splitWith #-}
 790
 791 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 792 -- argument, consuming the delimiter. I.e.
 793 --
 794 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 795 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
 796 -- > split 'x'  "x"          == ["",""]
 797 --
 798 -- and
 799 --
 800 -- > join [c] . split c == id
 801 -- > split == splitWith . (==)
 802 --
 803 -- As for all splitting functions in this library, this function does
 804 -- not copy the substrings, it just constructs new 'ByteStrings' that
 805 -- are slices of the original.
 806 --
 807 split :: Word8 -> ByteString -> [ByteString]
 808 split _ (LPS [])     = []
 809 split c (LPS (a:as)) = comb [] (P.split c a) as
 810
 811   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 812         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 813         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
 814         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 815
 816         cons' x xs | P.null x  = xs
 817                    | otherwise = x:xs
 818         {-# INLINE cons' #-}
 819 {-# INLINE split #-}
 820
 821 -- | Like 'splitWith', except that sequences of adjacent separators are
 822 -- treated as a single separator. eg.
 823 --
 824 -- > tokens (=='a') "aabbaca" == ["bb","c"]
 825 --
 826 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
 827 tokens f = L.filter (not.null) . splitWith f
 828
 829 -- | The 'group' function takes a ByteString and returns a list of
 830 -- ByteStrings such that the concatenation of the result is equal to the
 831 -- argument.  Moreover, each sublist in the result contains only equal
 832 -- elements.  For example,
 833 --
 834 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 835 --
 836 -- It is a special case of 'groupBy', which allows the programmer to
 837 -- supply their own equality test.
 838 group :: ByteString -> [ByteString]
 839 group (LPS [])     = []
 840 group (LPS (a:as)) = group' [] (P.group a) as
 841   where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 842         group' acc@(s':_) ss@(s:_) xs
 843           | P.unsafeHead s'
 844          /= P.unsafeHead s       = LPS (L.reverse acc) : group' [] ss xs
 845         group' acc (s:[]) []     = LPS (L.reverse (s : acc)) : []
 846         group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
 847         group' acc (s:ss) xs     = LPS (L.reverse (s : acc)) : group' [] ss xs
 848
 849 {-
 850 TODO: check if something like this might be faster
 851
 852 group :: ByteString -> [ByteString]
 853 group xs
 854     | null xs   = []
 855     | otherwise = ys : group zs
 856     where
 857         (ys, zs) = spanByte (unsafeHead xs) xs
 858 -}
 859
 860 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 861 --
 862 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 863 groupBy _ (LPS [])     = []
 864 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
 865   where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 866         groupBy' acc@(_:_) c ss@(s:_) xs
 867           | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
 868         groupBy' acc _ (s:[]) []       = LPS (L.reverse (s : acc)) : []
 869         groupBy' []  _ (s:[]) (x:xs)   = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
 870         groupBy' acc c (s:[]) (x:xs)   = groupBy' (s:acc) c (P.groupBy k x) xs
 871         groupBy' acc _ (s:ss) xs       = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
 872
 873 {-
 874 TODO: check if something like this might be faster
 875
 876 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 877 groupBy k xs
 878     | null xs   = []
 879     | otherwise = take n xs : groupBy k (drop n xs)
 880     where
 881         n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
 882 -}
 883
 884 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
 885 -- 'ByteString's and concatenates the list after interspersing the first
 886 -- argument between each element of the list.
 887 join :: ByteString -> [ByteString] -> ByteString
 888 join s = concat . (L.intersperse s)
 889
 890 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
 891 -- with a char.
 892 --
 893 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
 894 joinWithByte c x y = append x (cons c y)
 895
 896 -- ---------------------------------------------------------------------
 897 -- Indexing ByteStrings
 898
 899 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
 900 index :: ByteString -> Int64 -> Word8
 901 index _        i | i < 0 = moduleError "index" ("negative index: " ++ show i)
 902 index (LPS ps) i         = index' ps i
 903   where index' []     n = moduleError "index" ("index too large: " ++ show n)
 904         index' (x:xs) n
 905           | n >= fromIntegral (P.length x) =
 906               index' xs (n - fromIntegral (P.length x))
 907           | otherwise       = P.unsafeIndex x (fromIntegral n)
 908
 909 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 910 -- element in the given 'ByteString' which is equal to the query
 911 -- element, or 'Nothing' if there is no such element.
 912 -- This implementation uses memchr(3).
 913 elemIndex :: Word8 -> ByteString -> Maybe Int64
 914 elemIndex c (LPS ps) = elemIndex' 0 ps
 915   where elemIndex' _ []     = Nothing
 916         elemIndex' n (x:xs) =
 917           case P.elemIndex c x of
 918             Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
 919             Just i  -> Just (n + fromIntegral i)
 920
 921 {-
 922 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 923 -- element in the given 'ByteString' which is equal to the query
 924 -- element, or 'Nothing' if there is no such element. The following
 925 -- holds:
 926 --
 927 -- > elemIndexEnd c xs ==
 928 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 929 --
 930 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 931 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 932     go (p `plusPtr` s) (l-1)
 933   where
 934     STRICT2(go)
 935     go p i | i < 0     = return Nothing
 936            | otherwise = do ch' <- peekByteOff p i
 937                             if ch == ch'
 938                                 then return $ Just i
 939                                 else go p (i-1)
 940 -}
 941 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 942 -- the indices of all elements equal to the query element, in ascending order.
 943 -- This implementation uses memchr(3).
 944 elemIndices :: Word8 -> ByteString -> [Int64]
 945 elemIndices c (LPS ps) = elemIndices' 0 ps
 946   where elemIndices' _ []     = []
 947         elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
 948                              ++ elemIndices' (n + fromIntegral (P.length x)) xs
 949
 950 -- | count returns the number of times its argument appears in the ByteString
 951 --
 952 -- > count = length . elemIndices
 953 --
 954 -- But more efficiently than using length on the intermediate list.
 955 count :: Word8 -> ByteString -> Int64
 956 count w (LPS xs) = L.sum (L.map (fromIntegral . P.count w) xs)
 957
 958 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 959 -- returns the index of the first element in the ByteString
 960 -- satisfying the predicate.
 961 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 962 findIndex k (LPS ps) = findIndex' 0 ps
 963   where findIndex' _ []     = Nothing
 964         findIndex' n (x:xs) =
 965           case P.findIndex k x of
 966             Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
 967             Just i  -> Just (n + fromIntegral i)
 968 {-# INLINE findIndex #-}
 969
 970 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 971 -- and returns the first element in matching the predicate, or 'Nothing'
 972 -- if there is no such element.
 973 --
 974 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 975 --
 976 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 977 find f (LPS ps) = find' ps
 978   where find' []     = Nothing
 979         find' (x:xs) = case P.find f x of
 980             Nothing -> find' xs
 981             Just w  -> Just w
 982 {-# INLINE find #-}
 983
 984 -- | The 'findIndices' function extends 'findIndex', by returning the
 985 -- indices of all elements satisfying the predicate, in ascending order.
 986 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
 987 findIndices k (LPS ps) = findIndices' 0 ps
 988   where findIndices' _ []     = []
 989         findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
 990                              ++ findIndices' (n + fromIntegral (P.length x)) xs
 991
 992 -- ---------------------------------------------------------------------
 993 -- Searching ByteStrings
 994
 995 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
 996 elem :: Word8 -> ByteString -> Bool
 997 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
 998
 999 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1000 notElem :: Word8 -> ByteString -> Bool
1001 notElem c ps = not (elem c ps)
1002
1003 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1004 -- returns a ByteString containing those characters that satisfy the
1005 -- predicate.
1006 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1007 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1008 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1009 {-# INLINE filter #-}
1010
1011 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1012 -- (==)/, for the common case of filtering a single byte. It is more
1013 -- efficient to use /filterByte/ in this case.
1014 --
1015 -- > filterByte == filter . (==)
1016 --
1017 -- filterByte is around 10x faster, and uses much less space, than its
1018 -- filter equivalent
1019 filterByte :: Word8 -> ByteString -> ByteString
1020 filterByte w ps = replicate (count w ps) w
1021 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1022
1023 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1024 -- case of filtering a single byte out of a list. It is more efficient
1025 -- to use /filterNotByte/ in this case.
1026 --
1027 -- > filterNotByte == filter . (/=)
1028 --
1029 -- filterNotByte is around 2x faster than its filter equivalent.
1030 filterNotByte :: Word8 -> ByteString -> ByteString
1031 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1032
1033 -- ---------------------------------------------------------------------
1034 -- Searching for substrings
1035
1036 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1037 -- iff the first is a prefix of the second.
1038 isPrefixOf :: ByteString -> ByteString -> Bool
1039 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1040   where isPrefixL [] _  = True
1041         isPrefixL _ []  = False
1042         isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y  && isPrefixL xs ys
1043                                 | P.length x <  P.length y = x == yh && isPrefixL xs (yt:ys)
1044                                 | otherwise                = xh == y && isPrefixL (xt:xs) ys
1045           where (xh,xt) = P.splitAt (P.length y) x
1046                 (yh,yt) = P.splitAt (P.length x) y
1047
1048 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1049 -- iff the first is a suffix of the second.
1050 --
1051 -- The following holds:
1052 --
1053 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1054 --
1055 -- However, the real implemenation uses memcmp to compare the end of the
1056 -- string only, with no reverse required..
1057 --
1058 --isSuffixOf :: ByteString -> ByteString -> Bool
1059 --isSuffixOf = error "not yet implemented"
1060
1061 -- ---------------------------------------------------------------------
1062 -- Zipping
1063
1064 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1065 -- corresponding pairs of bytes. If one input ByteString is short,
1066 -- excess elements of the longer ByteString are discarded. This is
1067 -- equivalent to a pair of 'unpack' operations.
1068 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1069 zip = zipWith (,)
1070
1071 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1072 -- the first argument, instead of a tupling function.  For example,
1073 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1074 -- corresponding sums.
1075 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1076 zipWith _ (LPS [])     (LPS _)  = []
1077 zipWith _ (LPS _)      (LPS []) = []
1078 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1079   where zipWith' x xs y ys =
1080           (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1081
1082         zipWith'' x []      _ _       | P.null x       = []
1083         zipWith'' _ _       y []      | P.null y       = []
1084         zipWith'' x xs      y ys      | not (P.null x)
1085                                      && not (P.null y) = zipWith' x  xs y  ys
1086         zipWith'' x xs      _ (y':ys) | not (P.null x) = zipWith' x  xs y' ys
1087         zipWith'' _ (x':xs) y ys      | not (P.null y) = zipWith' x' xs y  ys
1088         zipWith'' _ (x':xs) _ (y':ys)                  = zipWith' x' xs y' ys
1089
1090 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1091 -- ByteStrings. Note that this performs two 'pack' operations.
1092 {-
1093 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1094 unzip _ls = error "not yet implemented"
1095 {-# INLINE unzip #-}
1096 -}
1097
1098 -- ---------------------------------------------------------------------
1099 -- Special lists
1100
1101 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1102 inits :: ByteString -> [ByteString]
1103 inits = (LPS [] :) . inits' . unLPS
1104   where inits' []     = []
1105         inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1106                      ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1107
1108 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1109 tails :: ByteString -> [ByteString]
1110 tails = tails' . unLPS
1111   where tails' []           = LPS [] : []
1112         tails' xs@(x:xs')
1113           | P.length x == 1 = LPS xs : tails' xs'
1114           | otherwise       = LPS xs : tails' (P.unsafeTail x : xs')
1115
1116 -- ---------------------------------------------------------------------
1117
1118 -- TODO defrag func that concatenates block together that are below a threshold
1119 -- defrag :: Int -> ByteString -> ByteString
1120
1121 -- ---------------------------------------------------------------------
1122 -- Lazy ByteString IO
1123
1124 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1125 -- are read on demand, in @k@-sized chunks.
1126 hGetContentsN :: Int -> Handle -> IO ByteString
1127 hGetContentsN k h = lazyRead >>= return . LPS
1128   where
1129     lazyRead = unsafeInterleaveIO $ do
1130         ps <- P.hGet h k
1131         case P.length ps of
1132             0         -> return []
1133             n | n < k -> return [ps]
1134             _         -> do pss <- lazyRead
1135                             return (ps : pss)
1136
1137 -- | Read @n@ bytes into a 'ByteString', directly from the
1138 -- specified 'Handle', in chunks of size @k@.
1139 hGetN :: Int -> Handle -> Int -> IO ByteString
1140 hGetN _ _ 0 = return empty
1141 hGetN k h n = readChunks n >>= return . LPS
1142   where
1143     STRICT1(readChunks)
1144     readChunks i = do
1145         ps <- P.hGet h (min k i)
1146         case P.length ps of
1147             0          -> return []
1148             m | m == i -> return [ps]
1149             m          -> do pss <- readChunks (i - m)
1150                              return (ps : pss)
1151
1152 #if defined(__GLASGOW_HASKELL__)
1153 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1154 -- waiting for data to become available, instead it returns only whatever data
1155 -- is available. Chunks are read on demand, in @k@-sized chunks.
1156 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1157 hGetNonBlockingN _ _ 0 = return empty
1158 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1159   where
1160     readChunks i = do
1161         ps <- P.hGetNonBlocking h (min k i)
1162         case P.length ps of
1163             0         -> return []
1164             m | fromIntegral m < i -> return [ps]
1165             m         -> do pss <- readChunks (i - m)
1166                             return (ps : pss)
1167 #endif
1168
1169 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1170 -- are read on demand, using the default chunk size.
1171 hGetContents :: Handle -> IO ByteString
1172 hGetContents = hGetContentsN defaultChunkSize
1173
1174 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1175 hGet :: Handle -> Int -> IO ByteString
1176 hGet = hGetN defaultChunkSize
1177
1178 #if defined(__GLASGOW_HASKELL__)
1179 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1180 -- waiting for data to become available, instead it returns only whatever data
1181 -- is available.
1182 hGetNonBlocking :: Handle -> Int -> IO ByteString
1183 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1184 #endif
1185
1186
1187 -- | Read an entire file /lazily/ into a 'ByteString'.
1188 readFile :: FilePath -> IO ByteString
1189 readFile f = openBinaryFile f ReadMode >>= hGetContents
1190
1191 -- | Write a 'ByteString' to a file.
1192 writeFile :: FilePath -> ByteString -> IO ()
1193 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1194     (\hdl -> hPut hdl txt)
1195
1196 -- | Append a 'ByteString' to a file.
1197 appendFile :: FilePath -> ByteString -> IO ()
1198 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1199     (\hdl -> hPut hdl txt)
1200
1201 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1202 getContents :: IO ByteString
1203 getContents = hGetContents stdin
1204
1205 -- | Outputs a 'ByteString' to the specified 'Handle'.
1206 hPut :: Handle -> ByteString -> IO ()
1207 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1208
1209 -- | Write a ByteString to stdout
1210 putStr :: ByteString -> IO ()
1211 putStr = hPut stdout
1212
1213 -- | Write a ByteString to stdout, appending a newline byte
1214 putStrLn :: ByteString -> IO ()
1215 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1216
1217 -- | The interact function takes a function of type @ByteString -> ByteString@
1218 -- as its argument. The entire input from the standard input device is passed
1219 -- to this function as its argument, and the resulting string is output on the
1220 -- standard output device. It's great for writing one line programs!
1221 interact :: (ByteString -> ByteString) -> IO ()
1222 interact transformer = putStr . transformer =<< getContents
1223
1224 -- ---------------------------------------------------------------------
1225 -- Internal utilities
1226
1227 -- Common up near identical calls to `error' to reduce the number
1228 -- constant strings created when compiled:
1229 errorEmptyList :: String -> a
1230 errorEmptyList fun = moduleError fun "empty ByteString"
1231
1232 moduleError :: String -> String -> a
1233 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1234
1235 -- A manually fused version of "filter (not.null) . map f", since they
1236 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1237 --
1238 -- TODO fuse.
1239 --
1240 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1241 filterMap _ []     = []
1242 filterMap f (x:xs) = case f x of
1243                     y | P.null y  ->     filterMap f xs      -- manually fuse the invariant filter
1244                       | otherwise -> y : filterMap f xs
1245 {-# INLINE filterMap #-}
1246