Data/ByteString/Lazy.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
   2 --
   3 -- Module      : ByteString.Lazy
   4 -- Copyright   : (c) Don Stewart 2006
   5 --               (c) Duncan Coutts 2006
   6 -- License     : BSD-style
   7 --
   8 -- Maintainer  : dons@cse.unsw.edu.au
   9 -- Stability   : experimental
  10 -- Portability : portable, requires ffi and cpp
  11 -- Tested with : GHC 6.4.1 and Hugs March 2005
  12 --
  13
  14 --
  15 -- | A time and space-efficient implementation of lazy byte vectors
  16 -- using lists of packed 'Word8' arrays, suitable for high performance
  17 -- use, both in terms of large data quantities, or high speed
  18 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
  19 -- arrays of bytes. They provide a means to manipulate large byte vectors
  20 -- without requiring the entire vector be resident in memory.
  21 --
  22 -- Some operations, such as concat, append, reverse and cons, have
  23 -- better complexity than their "Data.ByteString" equivalents, due to
  24 -- optimisations resulting from the list spine structure. And for other
  25 -- operations Lazy ByteStrings are usually within a few percent of
  26 -- strict ones, but with better heap usage. For data larger than the
  27 -- available memory, or if you have tight memory constraints, this
  28 -- module will be the only option. The default chunk size is 64k, which
  29 -- should be good in most circumstances. For people with large L2
  30 -- caches, you may want to increase this to fit your cache.
  31 --
  32 -- This module is intended to be imported @qualified@, to avoid name
  33 -- clashes with "Prelude" functions.  eg.
  34 --
  35 -- > import qualified Data.ByteString.Lazy as B
  36 --
  37 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  38 -- UArray by Simon Marlow. Rewritten to support slices and use
  39 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  40 -- Lazy variant by Duncan Coutts and Don Stewart.
  41 --
  42
  43 module Data.ByteString.Lazy (
  44
  45         -- * The @ByteString@ type
  46         ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable
  47
  48         -- * Introducing and eliminating 'ByteString's
  49         empty,                  -- :: ByteString
  50         singleton,              -- :: Word8   -> ByteString
  51         pack,                   -- :: [Word8] -> ByteString
  52         unpack,                 -- :: ByteString -> [Word8]
  53         fromChunks,             -- :: [Strict.ByteString] -> ByteString
  54         toChunks,               -- :: ByteString -> [Strict.ByteString]
  55
  56         -- * Basic interface
  57         cons,                   -- :: Word8 -> ByteString -> ByteString
  58         snoc,                   -- :: ByteString -> Word8 -> ByteString
  59         append,                 -- :: ByteString -> ByteString -> ByteString
  60         head,                   -- :: ByteString -> Word8
  61         last,                   -- :: ByteString -> Word8
  62         tail,                   -- :: ByteString -> ByteString
  63         init,                   -- :: ByteString -> ByteString
  64         null,                   -- :: ByteString -> Bool
  65         length,                 -- :: ByteString -> Int64
  66
  67         -- * Transformating ByteStrings
  68         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  69         reverse,                -- :: ByteString -> ByteString
  70 --      intersperse,            -- :: Word8 -> ByteString -> ByteString
  71         transpose,              -- :: [ByteString] -> [ByteString]
  72
  73         -- * Reducing 'ByteString's (folds)
  74         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  75         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  76         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  78         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  79         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  80
  81         -- ** Special folds
  82         concat,                 -- :: [ByteString] -> ByteString
  83         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  84         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  85         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  86         maximum,                -- :: ByteString -> Word8
  87         minimum,                -- :: ByteString -> Word8
  88
  89         -- * Building ByteStrings
  90         -- ** Scans
  91         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  92 --      scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  93 --      scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  94 --      scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  95
  96         -- ** Accumulating maps
  97         mapAccumL,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  98         mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
  99
 100         -- ** Infinite ByteStrings
 101         repeat,                 -- :: Word8 -> ByteString
 102         replicate,              -- :: Int64 -> Word8 -> ByteString
 103         cycle,                  -- :: ByteString -> ByteString
 104         iterate,                -- :: (Word8 -> Word8) -> Word8 -> ByteString
 105
 106         -- ** Unfolding
 107         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 108
 109         -- * Substrings
 110
 111         -- ** Breaking strings
 112         take,                   -- :: Int64 -> ByteString -> ByteString
 113         drop,                   -- :: Int64 -> ByteString -> ByteString
 114         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
 115         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 116         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 117         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 118         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 119         group,                  -- :: ByteString -> [ByteString]
 120         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 121         inits,                  -- :: ByteString -> [ByteString]
 122         tails,                  -- :: ByteString -> [ByteString]
 123
 124         -- ** Breaking into many substrings
 125         split,                  -- :: Word8 -> ByteString -> [ByteString]
 126         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 127
 128         -- ** Joining strings
 129         join,                   -- :: ByteString -> [ByteString] -> ByteString
 130
 131         -- * Predicates
 132         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 133 --      isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 134
 135         -- * Searching ByteStrings
 136
 137         -- ** Searching by equality
 138         elem,                   -- :: Word8 -> ByteString -> Bool
 139         notElem,                -- :: Word8 -> ByteString -> Bool
 140
 141         -- ** Searching with a predicate
 142         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 143         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 144 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 145
 146         -- * Indexing ByteStrings
 147         index,                  -- :: ByteString -> Int64 -> Word8
 148         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int64
 149         elemIndices,            -- :: Word8 -> ByteString -> [Int64]
 150         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 151         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int64]
 152         count,                  -- :: Word8 -> ByteString -> Int64
 153
 154         -- * Zipping and unzipping ByteStrings
 155         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 156         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 157 --      unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 158
 159         -- * Ordered ByteStrings
 160 --        sort,                   -- :: ByteString -> ByteString
 161
 162         copy,                   -- :: ByteString -> ByteString
 163
 164         -- * I\/O with 'ByteString's
 165
 166         -- ** Standard input and output
 167         getContents,            -- :: IO ByteString
 168         putStr,                 -- :: ByteString -> IO ()
 169         putStrLn,               -- :: ByteString -> IO ()
 170         interact,               -- :: (ByteString -> ByteString) -> IO ()
 171
 172         -- ** Files
 173         readFile,               -- :: FilePath -> IO ByteString
 174         writeFile,              -- :: FilePath -> ByteString -> IO ()
 175         appendFile,             -- :: FilePath -> ByteString -> IO ()
 176
 177         -- ** I\/O with Handles
 178         hGetContents,           -- :: Handle -> IO ByteString
 179         hGet,                   -- :: Handle -> Int -> IO ByteString
 180         hPut,                   -- :: Handle -> ByteString -> IO ()
 181         hGetNonBlocking,        -- :: Handle -> IO ByteString
 182
 183 --      hGetN,                  -- :: Int -> Handle -> Int -> IO ByteString
 184 --      hGetContentsN,          -- :: Int -> Handle -> IO ByteString
 185 --      hGetNonBlockingN,       -- :: Int -> Handle -> IO ByteString
 186
 187   ) where
 188
 189 import qualified Prelude
 190 import Prelude hiding
 191     (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
 192     ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
 193     ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
 194     ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
 195     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
 196
 197 import qualified Data.List              as L  -- L for list/lazy
 198 import qualified Data.ByteString        as P  -- P for packed
 199 import qualified Data.ByteString.Base   as P
 200 import Data.ByteString.Base (LazyByteString(..))
 201 import qualified Data.ByteString.Fusion as P
 202 import Data.ByteString.Fusion (PairS(..),loopL)
 203
 204 import Data.Monoid              (Monoid(..))
 205
 206 import Data.Word                (Word8)
 207 import Data.Int                 (Int64)
 208 import System.IO                (Handle,stdin,stdout,openBinaryFile,IOMode(..)
 209                                 ,hClose,hWaitForInput,hIsEOF)
 210 import System.IO.Unsafe
 211 import Control.Exception        (bracket)
 212
 213 import Foreign.ForeignPtr       (withForeignPtr)
 214 import Foreign.Ptr
 215 import Foreign.Storable
 216
 217 -- -----------------------------------------------------------------------------
 218 --
 219 -- Useful macros, until we have bang patterns
 220 --
 221
 222 #define STRICT1(f) f a | a `seq` False = undefined
 223 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 224 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 225 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 226 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 227
 228 -- -----------------------------------------------------------------------------
 229
 230 type ByteString = LazyByteString
 231
 232 --
 233 -- hmm, what about getting the PS constructor unpacked into the cons cell?
 234 --
 235 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
 236 --
 237 -- Would avoid one indirection per chunk.
 238 --
 239
 240 unLPS :: ByteString -> [P.ByteString]
 241 unLPS (LPS xs) = xs
 242 {-# INLINE unLPS #-}
 243
 244 instance Eq  ByteString
 245     where (==)    = eq
 246
 247 instance Ord ByteString
 248     where compare = compareBytes
 249
 250 instance Monoid ByteString where
 251     mempty  = empty
 252     mappend = append
 253     mconcat = concat
 254
 255 ------------------------------------------------------------------------
 256
 257 -- XXX
 258 -- The data type invariant:
 259 -- Every ByteString is either empty or consists of non-null ByteStrings.
 260 -- All functions must preserve this, and the QC properties must check this.
 261 --
 262 _invariant :: ByteString -> Bool
 263 _invariant (LPS []) = True
 264 _invariant (LPS xs) = L.all (not . P.null) xs
 265
 266 -- In a form useful for QC testing
 267 _checkInvariant :: ByteString -> ByteString
 268 _checkInvariant lps
 269     | _invariant lps = lps
 270     | otherwise      = moduleError "invariant" ("violation: " ++ show lps)
 271
 272 -- The Data abstraction function
 273 --
 274 _abstr :: ByteString -> P.ByteString
 275 _abstr (LPS []) = P.empty
 276 _abstr (LPS xs) = P.concat xs
 277
 278 -- The representation uses lists of packed chunks. When we have to convert from
 279 -- a lazy list to the chunked representation, then by default we'll use this
 280 -- chunk size. Some functions give you more control over the chunk size.
 281 --
 282 -- Measurements here:
 283 --  http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
 284 --
 285 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
 286 -- The following value assumes people have something greater than 128k,
 287 -- and need to share the cache with other programs.
 288 --
 289 defaultChunkSize :: Int
 290 defaultChunkSize = 32 * k - overhead
 291    where k = 1024
 292          overhead = 2 * sizeOf (undefined :: Int)
 293
 294 smallChunkSize :: Int
 295 smallChunkSize = 4 * k - overhead
 296    where k = 1024
 297          overhead = 2 * sizeOf (undefined :: Int)
 298
 299 -- defaultChunkSize = 1
 300
 301 ------------------------------------------------------------------------
 302
 303 eq :: ByteString -> ByteString -> Bool
 304 eq (LPS xs) (LPS ys) = eq' xs ys
 305   where eq' [] [] = True
 306         eq' [] _  = False
 307         eq' _  [] = False
 308         eq' (a:as) (b:bs) =
 309           case compare (P.length a) (P.length b) of
 310             LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
 311             EQ -> a == b                       && eq' as bs
 312             GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
 313
 314 compareBytes :: ByteString -> ByteString -> Ordering
 315 compareBytes (LPS xs) (LPS ys) = cmp xs ys
 316   where cmp [] [] = EQ
 317         cmp [] _  = LT
 318         cmp _  [] = GT
 319         cmp (a:as) (b:bs) =
 320           case compare (P.length a) (P.length b) of
 321             LT -> case compare a (P.take (P.length a) b) of
 322                     EQ     -> cmp as (P.drop (P.length a) b : bs)
 323                     result -> result
 324             EQ -> case compare a b of
 325                     EQ     -> cmp as bs
 326                     result -> result
 327             GT -> case compare (P.take (P.length b) a) b of
 328                     EQ     -> cmp (P.drop (P.length b) a : as) bs
 329                     result -> result
 330
 331 -- -----------------------------------------------------------------------------
 332 -- Introducing and eliminating 'ByteString's
 333
 334 -- | /O(1)/ The empty 'ByteString'
 335 empty :: ByteString
 336 empty = LPS []
 337 {-# NOINLINE empty #-}
 338
 339 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 340 singleton :: Word8 -> ByteString
 341 singleton c = LPS [P.singleton c]
 342 {-# NOINLINE singleton #-}
 343
 344 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 345 pack :: [Word8] -> ByteString
 346 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
 347
 348 -- ?
 349 chunk :: Int -> [a] -> [[a]]
 350 chunk _    [] = []
 351 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
 352
 353 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 354 unpack :: ByteString -> [Word8]
 355 unpack (LPS ss) = L.concatMap P.unpack ss
 356 {-# INLINE unpack #-}
 357
 358 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
 359 fromChunks :: [P.ByteString] -> ByteString
 360 fromChunks ls = LPS $ L.filter (not . P.null) ls
 361
 362 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
 363 toChunks :: ByteString -> [P.ByteString]
 364 toChunks (LPS s) = s
 365
 366 ------------------------------------------------------------------------
 367
 368 {-
 369 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 370 -- conversion function
 371 packWith :: (a -> Word8) -> [a] -> ByteString
 372 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
 373 {-# INLINE packWith #-}
 374 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 375
 376 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 377 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 378 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
 379 {-# INLINE unpackWith #-}
 380 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 381 -}
 382
 383 -- ---------------------------------------------------------------------
 384 -- Basic interface
 385
 386 -- | /O(1)/ Test whether a ByteString is empty.
 387 null :: ByteString -> Bool
 388 null (LPS []) = True
 389 null (_)      = False
 390 {-# INLINE null #-}
 391
 392 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
 393 length :: ByteString -> Int64
 394 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
 395
 396 -- avoid the intermediate list?
 397 -- length (LPS ss) = L.foldl lengthF 0 ss
 398 --     where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
 399 {-# INLINE length #-}
 400
 401 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
 402 -- strict in the ByteString that we are consing onto. More precisely, it forces
 403 -- the head and the first chunk. It does this because, for space efficiency, it
 404 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
 405 -- new \'chunk\'.
 406 --
 407 -- So that means you can't use a lazy recursive contruction like this:
 408 --
 409 -- > let xs = cons c xs in xs
 410 --
 411 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
 412 --
 413 cons :: Word8 -> ByteString -> ByteString
 414 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
 415 cons c (LPS ss)                        = LPS (P.singleton c : ss)
 416 {-# INLINE cons #-}
 417
 418 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
 419 snoc :: ByteString -> Word8 -> ByteString
 420 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
 421 {-# INLINE snoc #-}
 422
 423 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 424 head :: ByteString -> Word8
 425 head (LPS [])    = errorEmptyList "head"
 426 head (LPS (x:_)) = P.unsafeHead x
 427 {-# INLINE head #-}
 428
 429 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 430 tail :: ByteString -> ByteString
 431 tail (LPS [])     = errorEmptyList "tail"
 432 tail (LPS (x:xs))
 433   | P.length x == 1 = LPS xs
 434   | otherwise       = LPS (P.unsafeTail x : xs)
 435 {-# INLINE tail #-}
 436
 437 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
 438 last :: ByteString -> Word8
 439 last (LPS []) = errorEmptyList "last"
 440 last (LPS xs) = P.last (L.last xs)
 441 {-# INLINE last #-}
 442
 443 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
 444 init :: ByteString -> ByteString
 445 init (LPS []) = errorEmptyList "init"
 446 init (LPS xs)
 447     | P.length y == 1 = LPS ys
 448     | otherwise       = LPS (ys ++ [P.init y])
 449     where (y,ys) = (L.last xs, L.init xs)
 450 {-# INLINE init #-}
 451
 452 -- | /O(n)/ Append two ByteStrings
 453 append :: ByteString -> ByteString -> ByteString
 454 append (LPS []) (LPS ys) = LPS ys
 455 append (LPS xs) (LPS []) = LPS xs
 456 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
 457 {-# INLINE append #-}
 458
 459 -- ---------------------------------------------------------------------
 460 -- Transformations
 461
 462 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 463 -- element of @xs@.
 464 map :: (Word8 -> Word8) -> ByteString -> ByteString
 465 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
 466 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
 467 {-# INLINE map #-}
 468
 469 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 470 reverse :: ByteString -> ByteString
 471 reverse (LPS ps) = LPS (rev [] ps)
 472   where rev a []     = a
 473         rev a (x:xs) = rev (P.reverse x:a) xs
 474 -- note, here is one example where the extra element lazyness is an advantage.
 475 -- we can reerse the list of chunks strictly but reverse each chunk lazily
 476 -- so while we may force the whole lot into memory we do not need to copy
 477 -- each chunk until it is used.
 478 {-# INLINE reverse #-}
 479
 480 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
 481 -- \`intersperses\' that byte between the elements of the 'ByteString'.
 482 -- It is analogous to the intersperse function on Lists.
 483 -- intersperse :: Word8 -> ByteString -> ByteString
 484 -- intersperse = error "FIXME: not yet implemented"
 485
 486 {-
 487 intersperse c (LPS [])     = LPS []
 488 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
 489   where intersperse' c ps@(PS x s l) =
 490           P.create (2*l) $ \p -> withForeignPtr x $ \f ->
 491                 poke p c
 492                 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
 493 -}
 494
 495 -- | The 'transpose' function transposes the rows and columns of its
 496 -- 'ByteString' argument.
 497 transpose :: [ByteString] -> [ByteString]
 498 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
 499
 500 -- ---------------------------------------------------------------------
 501 -- Reducing 'ByteString's
 502
 503 -- | 'foldl', applied to a binary operator, a starting value (typically
 504 -- the left-identity of the operator), and a ByteString, reduces the
 505 -- ByteString using the binary operator, from left to right.
 506 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 507 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
 508 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
 509 {-# INLINE foldl #-}
 510
 511 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 512 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 513 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
 514 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
 515 {-# INLINE foldl' #-}
 516
 517 -- | 'foldr', applied to a binary operator, a starting value
 518 -- (typically the right-identity of the operator), and a ByteString,
 519 -- reduces the ByteString using the binary operator, from right to left.
 520 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 521 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
 522 {-# INLINE foldr #-}
 523
 524 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 525 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 526 -- This function is subject to array fusion.
 527 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 528 foldl1 _ (LPS []) = errorEmptyList "foldl1"
 529 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 530
 531 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 532 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 533 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
 534 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 535
 536 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 537 -- and thus must be applied to non-empty 'ByteString's
 538 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 539 foldr1 _ (LPS []) = errorEmptyList "foldr1"
 540 foldr1 f (LPS ps) = foldr1' ps
 541   where foldr1' (x:[]) = P.foldr1 f x
 542         foldr1' (x:xs) = P.foldr  f (foldr1' xs) x
 543
 544 -- ---------------------------------------------------------------------
 545 -- Special folds
 546
 547 -- | /O(n)/ Concatenate a list of ByteStrings.
 548 concat :: [ByteString] -> ByteString
 549 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
 550
 551 -- | Map a function over a 'ByteString' and concatenate the results
 552 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 553 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
 554     where
 555       k w = case f w of LPS xs -> P.concat xs
 556
 557 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 558 -- any element of the 'ByteString' satisfies the predicate.
 559 any :: (Word8 -> Bool) -> ByteString -> Bool
 560 any f (LPS xs) = L.or (L.map (P.any f) xs)
 561 -- todo fuse
 562
 563 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 564 -- if all elements of the 'ByteString' satisfy the predicate.
 565 all :: (Word8 -> Bool) -> ByteString -> Bool
 566 all f (LPS xs) = L.and (L.map (P.all f) xs)
 567 -- todo fuse
 568
 569 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 570 maximum :: ByteString -> Word8
 571 maximum (LPS [])     = errorEmptyList "maximum"
 572 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
 573 {-# INLINE maximum #-}
 574
 575 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 576 minimum :: ByteString -> Word8
 577 minimum (LPS [])     = errorEmptyList "minimum"
 578 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
 579 {-# INLINE minimum #-}
 580
 581 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 582 -- 'foldl'; it applies a function to each element of a ByteString,
 583 -- passing an accumulating parameter from left to right, and returning a
 584 -- final value of this accumulator together with the new ByteString.
 585 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 586 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
 587
 588 -- | /O(n)/ map Word8 functions, provided with the index at each position
 589 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 590 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
 591
 592 -- ---------------------------------------------------------------------
 593 -- Building ByteStrings
 594
 595 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 596 -- reduced values from the left. This function will fuse.
 597 --
 598 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 599 --
 600 -- Note that
 601 --
 602 -- > last (scanl f z xs) == foldl f z xs.
 603 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 604 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
 605 {-# INLINE scanl #-}
 606
 607 -- ---------------------------------------------------------------------
 608 -- Unfolds and replicates
 609
 610 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
 611 -- of @f@ to @x@:
 612 --
 613 -- > iterate f x == [x, f x, f (f x), ...]
 614 --
 615 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
 616 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
 617
 618 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
 619 -- element.
 620 --
 621 repeat :: Word8 -> ByteString
 622 repeat c = LPS (L.repeat block)
 623     where block =  P.replicate smallChunkSize c
 624
 625 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
 626 -- the value of every element.
 627 --
 628 replicate :: Int64 -> Word8 -> ByteString
 629 replicate w c
 630     | w <= 0             = empty
 631     | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
 632     | r == 0             = LPS (L.genericReplicate q s) -- preserve invariant
 633     | otherwise          = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
 634  where
 635     s      = P.replicate smallChunkSize c
 636     (q, r) = quotRem w (fromIntegral smallChunkSize)
 637
 638 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
 639 -- the infinite repetition of the original ByteString.
 640 --
 641 cycle :: ByteString -> ByteString
 642 cycle (LPS []) = errorEmptyList "cycle"
 643 cycle (LPS xs) = LPS (L.cycle xs)
 644
 645 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
 646 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
 647 -- the element and returns 'Nothing' if it is done producing the
 648 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 649 -- prepending to the ByteString and @b@ is used as the next element in a
 650 -- recursive call.
 651 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 652 unfoldr f = LPS . unfoldChunk 32
 653   where unfoldChunk n x =
 654           case P.unfoldrN n f x of
 655             (s, Nothing)
 656               | P.null s  -> []
 657               | otherwise -> s : []
 658             (s, Just x')  -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
 659
 660 -- ---------------------------------------------------------------------
 661 -- Substrings
 662
 663 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 664 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 665 take :: Int64 -> ByteString -> ByteString
 666 take i _ | i <= 0 = empty
 667 take i (LPS ps)   = LPS (take' i ps)
 668   where take' 0 _      = []
 669         take' _ []     = []
 670         take' n (x:xs) =
 671           if n < fromIntegral (P.length x)
 672             then P.take (fromIntegral n) x : []
 673             else x : take' (n - fromIntegral (P.length x)) xs
 674
 675 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 676 -- elements, or @[]@ if @n > 'length' xs@.
 677 drop  :: Int64 -> ByteString -> ByteString
 678 drop i p | i <= 0 = p
 679 drop i (LPS ps) = LPS (drop' i ps)
 680   where drop' 0 xs     = xs
 681         drop' _ []     = []
 682         drop' n (x:xs) =
 683           if n < fromIntegral (P.length x)
 684             then P.drop (fromIntegral n) x : xs
 685             else drop' (n - fromIntegral (P.length x)) xs
 686
 687 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 688 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
 689 splitAt i p        | i <= 0 = (empty, p)
 690 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
 691   where splitAt' 0 xs     = ([], xs)
 692         splitAt' _ []     = ([], [])
 693         splitAt' n (x:xs) =
 694           if n < fromIntegral (P.length x)
 695             then (P.take (fromIntegral n) x : [],
 696                   P.drop (fromIntegral n) x : xs)
 697             else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
 698                    in (x:xs', xs'')
 699
 700
 701 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 702 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 703 -- satisfy @p@.
 704 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 705 takeWhile f (LPS ps) = LPS (takeWhile' ps)
 706   where takeWhile' []     = []
 707         takeWhile' (x:xs) =
 708           case findIndexOrEnd (not . f) x of
 709             0                  -> []
 710             n | n < P.length x -> P.take n x : []
 711               | otherwise      -> x : takeWhile' xs
 712
 713 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 714 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 715 dropWhile f (LPS ps) = LPS (dropWhile' ps)
 716   where dropWhile' []     = []
 717         dropWhile' (x:xs) =
 718           case findIndexOrEnd (not . f) x of
 719             n | n < P.length x -> P.drop n x : xs
 720               | otherwise      -> dropWhile' xs
 721
 722 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 723 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 724 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
 725   where break' []     = ([], [])
 726         break' (x:xs) =
 727           case findIndexOrEnd f x of
 728             0                  -> ([], x : xs)
 729             n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
 730               | otherwise      -> let (xs', xs'') = break' xs
 731                                    in (x : xs', xs'')
 732
 733 --
 734 -- TODO
 735 --
 736 -- Add rules
 737 --
 738
 739 {-
 740 -- | 'breakByte' breaks its ByteString argument at the first occurence
 741 -- of the specified byte. It is more efficient than 'break' as it is
 742 -- implemented with @memchr(3)@. I.e.
 743 --
 744 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 745 --
 746 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 747 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
 748   where breakByte' []     = ([], [])
 749         breakByte' (x:xs) =
 750           case P.elemIndex c x of
 751             Just 0  -> ([], x : xs)
 752             Just n  -> (P.take n x : [], P.drop n x : xs)
 753             Nothing -> let (xs', xs'') = breakByte' xs
 754                         in (x : xs', xs'')
 755
 756 -- | 'spanByte' breaks its ByteString argument at the first
 757 -- occurence of a byte other than its argument. It is more efficient
 758 -- than 'span (==)'
 759 --
 760 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 761 --
 762 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 763 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
 764   where spanByte' []     = ([], [])
 765         spanByte' (x:xs) =
 766           case P.spanByte c x of
 767             (x', x'') | P.null x'  -> ([], x : xs)
 768                       | P.null x'' -> let (xs', xs'') = spanByte' xs
 769                                        in (x : xs', xs'')
 770                       | otherwise  -> (x' : [], x'' : xs)
 771 -}
 772
 773 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 774 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 775 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 776 span p = break (not . p)
 777
 778 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 779 -- separators, where the predicate returns True for a separator element.
 780 -- The resulting components do not contain the separators.  Two adjacent
 781 -- separators result in an empty component in the output.  eg.
 782 --
 783 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 784 -- > splitWith (=='a') []        == []
 785 --
 786 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 787 splitWith _ (LPS [])     = []
 788 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
 789
 790   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 791         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 792         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
 793         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 794
 795         cons' x xs | P.null x  = xs
 796                    | otherwise = x:xs
 797         {-# INLINE cons' #-}
 798 {-# INLINE splitWith #-}
 799
 800 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 801 -- argument, consuming the delimiter. I.e.
 802 --
 803 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 804 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
 805 -- > split 'x'  "x"          == ["",""]
 806 --
 807 -- and
 808 --
 809 -- > join [c] . split c == id
 810 -- > split == splitWith . (==)
 811 --
 812 -- As for all splitting functions in this library, this function does
 813 -- not copy the substrings, it just constructs new 'ByteStrings' that
 814 -- are slices of the original.
 815 --
 816 split :: Word8 -> ByteString -> [ByteString]
 817 split _ (LPS [])     = []
 818 split c (LPS (a:as)) = comb [] (P.split c a) as
 819
 820   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 821         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 822         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
 823         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 824
 825         cons' x xs | P.null x  = xs
 826                    | otherwise = x:xs
 827         {-# INLINE cons' #-}
 828 {-# INLINE split #-}
 829
 830 {-
 831 -- | Like 'splitWith', except that sequences of adjacent separators are
 832 -- treated as a single separator. eg.
 833 --
 834 -- > tokens (=='a') "aabbaca" == ["bb","c"]
 835 --
 836 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
 837 tokens f = L.filter (not.null) . splitWith f
 838 -}
 839
 840 -- | The 'group' function takes a ByteString and returns a list of
 841 -- ByteStrings such that the concatenation of the result is equal to the
 842 -- argument.  Moreover, each sublist in the result contains only equal
 843 -- elements.  For example,
 844 --
 845 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 846 --
 847 -- It is a special case of 'groupBy', which allows the programmer to
 848 -- supply their own equality test.
 849 group :: ByteString -> [ByteString]
 850 group (LPS [])     = []
 851 group (LPS (a:as)) = group' [] (P.group a) as
 852   where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 853         group' acc@(s':_) ss@(s:_) xs
 854           | P.unsafeHead s'
 855          /= P.unsafeHead s       = LPS (L.reverse acc) : group' [] ss xs
 856         group' acc (s:[]) []     = LPS (L.reverse (s : acc)) : []
 857         group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
 858         group' acc (s:ss) xs     = LPS (L.reverse (s : acc)) : group' [] ss xs
 859
 860 {-
 861 TODO: check if something like this might be faster
 862
 863 group :: ByteString -> [ByteString]
 864 group xs
 865     | null xs   = []
 866     | otherwise = ys : group zs
 867     where
 868         (ys, zs) = spanByte (unsafeHead xs) xs
 869 -}
 870
 871 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 872 --
 873 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 874 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
 875 {-
 876 groupBy _ (LPS [])     = []
 877 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
 878   where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 879         groupBy' acc@(_:_) c ss@(s:_) xs
 880           | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
 881         groupBy' acc _ (s:[]) []       = LPS (L.reverse (s : acc)) : []
 882         groupBy' []  _ (s:[]) (x:xs)   = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
 883         groupBy' acc c (s:[]) (x:xs)   = groupBy' (s:acc) c (P.groupBy k x) xs
 884         groupBy' acc _ (s:ss) xs       = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
 885 -}
 886
 887 {-
 888 TODO: check if something like this might be faster
 889
 890 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 891 groupBy k xs
 892     | null xs   = []
 893     | otherwise = take n xs : groupBy k (drop n xs)
 894     where
 895         n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
 896 -}
 897
 898 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
 899 -- 'ByteString's and concatenates the list after interspersing the first
 900 -- argument between each element of the list.
 901 join :: ByteString -> [ByteString] -> ByteString
 902 join s = concat . (L.intersperse s)
 903
 904 -- ---------------------------------------------------------------------
 905 -- Indexing ByteStrings
 906
 907 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
 908 index :: ByteString -> Int64 -> Word8
 909 index _        i | i < 0 = moduleError "index" ("negative index: " ++ show i)
 910 index (LPS ps) i         = index' ps i
 911   where index' []     n = moduleError "index" ("index too large: " ++ show n)
 912         index' (x:xs) n
 913           | n >= fromIntegral (P.length x) =
 914               index' xs (n - fromIntegral (P.length x))
 915           | otherwise       = P.unsafeIndex x (fromIntegral n)
 916
 917 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 918 -- element in the given 'ByteString' which is equal to the query
 919 -- element, or 'Nothing' if there is no such element.
 920 -- This implementation uses memchr(3).
 921 elemIndex :: Word8 -> ByteString -> Maybe Int64
 922 elemIndex c (LPS ps) = elemIndex' 0 ps
 923   where elemIndex' _ []     = Nothing
 924         elemIndex' n (x:xs) =
 925           case P.elemIndex c x of
 926             Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
 927             Just i  -> Just (n + fromIntegral i)
 928
 929 {-
 930 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 931 -- element in the given 'ByteString' which is equal to the query
 932 -- element, or 'Nothing' if there is no such element. The following
 933 -- holds:
 934 --
 935 -- > elemIndexEnd c xs ==
 936 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 937 --
 938 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 939 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 940     go (p `plusPtr` s) (l-1)
 941   where
 942     STRICT2(go)
 943     go p i | i < 0     = return Nothing
 944            | otherwise = do ch' <- peekByteOff p i
 945                             if ch == ch'
 946                                 then return $ Just i
 947                                 else go p (i-1)
 948 -}
 949 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 950 -- the indices of all elements equal to the query element, in ascending order.
 951 -- This implementation uses memchr(3).
 952 elemIndices :: Word8 -> ByteString -> [Int64]
 953 elemIndices c (LPS ps) = elemIndices' 0 ps
 954   where elemIndices' _ []     = []
 955         elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
 956                              ++ elemIndices' (n + fromIntegral (P.length x)) xs
 957
 958 -- | count returns the number of times its argument appears in the ByteString
 959 --
 960 -- > count = length . elemIndices
 961 --
 962 -- But more efficiently than using length on the intermediate list.
 963 count :: Word8 -> ByteString -> Int64
 964 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
 965
 966 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 967 -- returns the index of the first element in the ByteString
 968 -- satisfying the predicate.
 969 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 970 findIndex k (LPS ps) = findIndex' 0 ps
 971   where findIndex' _ []     = Nothing
 972         findIndex' n (x:xs) =
 973           case P.findIndex k x of
 974             Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
 975             Just i  -> Just (n + fromIntegral i)
 976 {-# INLINE findIndex #-}
 977
 978 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 979 -- and returns the first element in matching the predicate, or 'Nothing'
 980 -- if there is no such element.
 981 --
 982 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 983 --
 984 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 985 find f (LPS ps) = find' ps
 986   where find' []     = Nothing
 987         find' (x:xs) = case P.find f x of
 988             Nothing -> find' xs
 989             Just w  -> Just w
 990 {-# INLINE find #-}
 991
 992 -- | The 'findIndices' function extends 'findIndex', by returning the
 993 -- indices of all elements satisfying the predicate, in ascending order.
 994 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
 995 findIndices k (LPS ps) = findIndices' 0 ps
 996   where findIndices' _ []     = []
 997         findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
 998                              ++ findIndices' (n + fromIntegral (P.length x)) xs
 999
1000 -- ---------------------------------------------------------------------
1001 -- Searching ByteStrings
1002
1003 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1004 elem :: Word8 -> ByteString -> Bool
1005 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1006
1007 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1008 notElem :: Word8 -> ByteString -> Bool
1009 notElem c ps = not (elem c ps)
1010
1011 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1012 -- returns a ByteString containing those characters that satisfy the
1013 -- predicate.
1014 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1015 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1016 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1017 {-# INLINE filter #-}
1018
1019 {-
1020 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1021 -- (==)/, for the common case of filtering a single byte. It is more
1022 -- efficient to use /filterByte/ in this case.
1023 --
1024 -- > filterByte == filter . (==)
1025 --
1026 -- filterByte is around 10x faster, and uses much less space, than its
1027 -- filter equivalent
1028 filterByte :: Word8 -> ByteString -> ByteString
1029 filterByte w ps = replicate (count w ps) w
1030 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1031
1032 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1033 -- case of filtering a single byte out of a list. It is more efficient
1034 -- to use /filterNotByte/ in this case.
1035 --
1036 -- > filterNotByte == filter . (/=)
1037 --
1038 -- filterNotByte is around 2x faster than its filter equivalent.
1039 filterNotByte :: Word8 -> ByteString -> ByteString
1040 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1041 -}
1042
1043 -- ---------------------------------------------------------------------
1044 -- Searching for substrings
1045
1046 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1047 -- iff the first is a prefix of the second.
1048 isPrefixOf :: ByteString -> ByteString -> Bool
1049 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1050   where isPrefixL [] _  = True
1051         isPrefixL _ []  = False
1052         isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y  && isPrefixL xs ys
1053                                 | P.length x <  P.length y = x == yh && isPrefixL xs (yt:ys)
1054                                 | otherwise                = xh == y && isPrefixL (xt:xs) ys
1055           where (xh,xt) = P.splitAt (P.length y) x
1056                 (yh,yt) = P.splitAt (P.length x) y
1057
1058 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1059 -- iff the first is a suffix of the second.
1060 --
1061 -- The following holds:
1062 --
1063 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1064 --
1065 -- However, the real implemenation uses memcmp to compare the end of the
1066 -- string only, with no reverse required..
1067 --
1068 --isSuffixOf :: ByteString -> ByteString -> Bool
1069 --isSuffixOf = error "not yet implemented"
1070
1071 -- ---------------------------------------------------------------------
1072 -- Zipping
1073
1074 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1075 -- corresponding pairs of bytes. If one input ByteString is short,
1076 -- excess elements of the longer ByteString are discarded. This is
1077 -- equivalent to a pair of 'unpack' operations.
1078 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1079 zip = zipWith (,)
1080
1081 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1082 -- the first argument, instead of a tupling function.  For example,
1083 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1084 -- corresponding sums.
1085 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1086 zipWith _ (LPS [])     (LPS _)  = []
1087 zipWith _ (LPS _)      (LPS []) = []
1088 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1089   where zipWith' x xs y ys =
1090           (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1091
1092         zipWith'' x []      _ _       | P.null x       = []
1093         zipWith'' _ _       y []      | P.null y       = []
1094         zipWith'' x xs      y ys      | not (P.null x)
1095                                      && not (P.null y) = zipWith' x  xs y  ys
1096         zipWith'' x xs      _ (y':ys) | not (P.null x) = zipWith' x  xs y' ys
1097         zipWith'' _ (x':xs) y ys      | not (P.null y) = zipWith' x' xs y  ys
1098         zipWith'' _ (x':xs) _ (y':ys)                  = zipWith' x' xs y' ys
1099
1100 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1101 -- ByteStrings. Note that this performs two 'pack' operations.
1102 {-
1103 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1104 unzip _ls = error "not yet implemented"
1105 {-# INLINE unzip #-}
1106 -}
1107
1108 -- ---------------------------------------------------------------------
1109 -- Special lists
1110
1111 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1112 inits :: ByteString -> [ByteString]
1113 inits = (LPS [] :) . inits' . unLPS
1114   where inits' []     = []
1115         inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1116                      ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1117
1118 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1119 tails :: ByteString -> [ByteString]
1120 tails = tails' . unLPS
1121   where tails' []           = LPS [] : []
1122         tails' xs@(x:xs')
1123           | P.length x == 1 = LPS xs : tails' xs'
1124           | otherwise       = LPS xs : tails' (P.unsafeTail x : xs')
1125
1126 -- ---------------------------------------------------------------------
1127 -- Low level constructors
1128
1129 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1130 --   This is mainly useful to allow the rest of the data pointed
1131 --   to by the 'ByteString' to be garbage collected, for example
1132 --   if a large string has been read in, and only a small part of it
1133 --   is needed in the rest of the program.
1134 copy :: ByteString -> ByteString
1135 copy (LPS lps) = LPS (L.map P.copy lps)
1136 --TODO, we could coalese small blocks here
1137 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1138 -- the parent blocks then we'd better copy strictly.
1139
1140 -- ---------------------------------------------------------------------
1141
1142 -- TODO defrag func that concatenates block together that are below a threshold
1143 -- defrag :: Int -> ByteString -> ByteString
1144
1145 -- ---------------------------------------------------------------------
1146 -- Lazy ByteString IO
1147
1148 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1149 -- are read on demand, in at most @k@-sized chunks. It does not block
1150 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1151 -- available then they will be returned immediately as a smaller chunk.
1152 hGetContentsN :: Int -> Handle -> IO ByteString
1153 hGetContentsN k h = lazyRead >>= return . LPS
1154   where
1155     lazyRead = unsafeInterleaveIO loop
1156
1157     loop = do
1158         ps <- P.hGetNonBlocking h k
1159         --TODO: I think this should distinguish EOF from no data available
1160         -- the otherlying POSIX call makes this distincion, returning either
1161         -- 0 or EAGAIN
1162         if P.null ps
1163           then do eof <- hIsEOF h
1164                   if eof then return []
1165                          else hWaitForInput h (-1)
1166                            >> loop
1167           else do pss <- lazyRead
1168                   return (ps : pss)
1169
1170 -- | Read @n@ bytes into a 'ByteString', directly from the
1171 -- specified 'Handle', in chunks of size @k@.
1172 hGetN :: Int -> Handle -> Int -> IO ByteString
1173 hGetN _ _ 0 = return empty
1174 hGetN k h n = readChunks n >>= return . LPS
1175   where
1176     STRICT1(readChunks)
1177     readChunks i = do
1178         ps <- P.hGet h (min k i)
1179         case P.length ps of
1180             0 -> return []
1181             m -> do pss <- readChunks (i - m)
1182                     return (ps : pss)
1183
1184 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1185 -- waiting for data to become available, instead it returns only whatever data
1186 -- is available. Chunks are read on demand, in @k@-sized chunks.
1187 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1188 #if defined(__GLASGOW_HASKELL__)
1189 hGetNonBlockingN _ _ 0 = return empty
1190 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1191   where
1192     STRICT1(readChunks)
1193     readChunks i = do
1194         ps <- P.hGetNonBlocking h (min k i)
1195         case P.length ps of
1196             0 -> return []
1197             m -> do pss <- readChunks (i - m)
1198                     return (ps : pss)
1199 #else
1200 hGetNonBlockingN = hGetN
1201 #endif
1202
1203 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1204 -- are read on demand, using the default chunk size.
1205 hGetContents :: Handle -> IO ByteString
1206 hGetContents = hGetContentsN defaultChunkSize
1207
1208 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1209 hGet :: Handle -> Int -> IO ByteString
1210 hGet = hGetN defaultChunkSize
1211
1212 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1213 -- waiting for data to become available, instead it returns only whatever data
1214 -- is available.
1215 #if defined(__GLASGOW_HASKELL__)
1216 hGetNonBlocking :: Handle -> Int -> IO ByteString
1217 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1218 #else
1219 hGetNonBlocking = hGet
1220 #endif
1221
1222 -- | Read an entire file /lazily/ into a 'ByteString'.
1223 readFile :: FilePath -> IO ByteString
1224 readFile f = openBinaryFile f ReadMode >>= hGetContents
1225
1226 -- | Write a 'ByteString' to a file.
1227 writeFile :: FilePath -> ByteString -> IO ()
1228 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1229     (\hdl -> hPut hdl txt)
1230
1231 -- | Append a 'ByteString' to a file.
1232 appendFile :: FilePath -> ByteString -> IO ()
1233 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1234     (\hdl -> hPut hdl txt)
1235
1236 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1237 getContents :: IO ByteString
1238 getContents = hGetContents stdin
1239
1240 -- | Outputs a 'ByteString' to the specified 'Handle'.
1241 hPut :: Handle -> ByteString -> IO ()
1242 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1243
1244 -- | Write a ByteString to stdout
1245 putStr :: ByteString -> IO ()
1246 putStr = hPut stdout
1247
1248 -- | Write a ByteString to stdout, appending a newline byte
1249 putStrLn :: ByteString -> IO ()
1250 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1251
1252 -- | The interact function takes a function of type @ByteString -> ByteString@
1253 -- as its argument. The entire input from the standard input device is passed
1254 -- to this function as its argument, and the resulting string is output on the
1255 -- standard output device. It's great for writing one line programs!
1256 interact :: (ByteString -> ByteString) -> IO ()
1257 interact transformer = putStr . transformer =<< getContents
1258
1259 -- ---------------------------------------------------------------------
1260 -- Internal utilities
1261
1262 -- Common up near identical calls to `error' to reduce the number
1263 -- constant strings created when compiled:
1264 errorEmptyList :: String -> a
1265 errorEmptyList fun = moduleError fun "empty ByteString"
1266
1267 moduleError :: String -> String -> a
1268 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1269
1270 -- A manually fused version of "filter (not.null) . map f", since they
1271 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1272 --
1273 -- TODO fuse.
1274 --
1275 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1276 filterMap _ []     = []
1277 filterMap f (x:xs) = case f x of
1278                     y | P.null y  ->     filterMap f xs      -- manually fuse the invariant filter
1279                       | otherwise -> y : filterMap f xs
1280 {-# INLINE filterMap #-}
1281
1282
1283 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1284 -- of the string if no element is found, rather than Nothing.
1285 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1286 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1287   where
1288     STRICT2(go)
1289     go ptr n | n >= l    = return l
1290              | otherwise = do w <- peek ptr
1291                               if k w
1292                                 then return n
1293                                 else go (ptr `plusPtr` 1) (n+1)
1294 {-# INLINE findIndexOrEnd #-}