Data/ByteString/Lazy.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
   2 -- |
   3 -- Module      : Data.ByteString.Lazy
   4 -- Copyright   : (c) Don Stewart 2006
   5 --               (c) Duncan Coutts 2006
   6 -- License     : BSD-style
   7 --
   8 -- Maintainer  : dons@cse.unsw.edu.au
   9 -- Stability   : experimental
  10 -- Portability : non-portable (instance of type synonym)
  11 --
  12 -- A time and space-efficient implementation of lazy byte vectors
  13 -- using lists of packed 'Word8' arrays, suitable for high performance
  14 -- use, both in terms of large data quantities, or high speed
  15 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
  16 -- arrays of bytes. They provide a means to manipulate large byte vectors
  17 -- without requiring the entire vector be resident in memory.
  18 --
  19 -- Some operations, such as concat, append, reverse and cons, have
  20 -- better complexity than their "Data.ByteString" equivalents, due to
  21 -- optimisations resulting from the list spine structure. And for other
  22 -- operations lazy ByteStrings are usually within a few percent of
  23 -- strict ones, but with better heap usage. For data larger than the
  24 -- available memory, or if you have tight memory constraints, this
  25 -- module will be the only option. The default chunk size is 64k, which
  26 -- should be good in most circumstances. For people with large L2
  27 -- caches, you may want to increase this to fit your cache.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with "Prelude" functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString.Lazy as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan.
  35 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
  36 -- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr'
  37 -- by David Roundy.
  38 -- Polished and extended by Don Stewart.
  39 -- Lazy variant by Duncan Coutts and Don Stewart.
  40 --
  41
  42 module Data.ByteString.Lazy (
  43
  44         -- * The @ByteString@ type
  45         ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable
  46
  47         -- * Introducing and eliminating 'ByteString's
  48         empty,                  -- :: ByteString
  49         singleton,              -- :: Word8   -> ByteString
  50         pack,                   -- :: [Word8] -> ByteString
  51         unpack,                 -- :: ByteString -> [Word8]
  52         fromChunks,             -- :: [Strict.ByteString] -> ByteString
  53         toChunks,               -- :: ByteString -> [Strict.ByteString]
  54
  55         -- * Basic interface
  56         cons,                   -- :: Word8 -> ByteString -> ByteString
  57         snoc,                   -- :: ByteString -> Word8 -> ByteString
  58         append,                 -- :: ByteString -> ByteString -> ByteString
  59         head,                   -- :: ByteString -> Word8
  60         last,                   -- :: ByteString -> Word8
  61         tail,                   -- :: ByteString -> ByteString
  62         init,                   -- :: ByteString -> ByteString
  63         null,                   -- :: ByteString -> Bool
  64         length,                 -- :: ByteString -> Int64
  65
  66         -- * Transformating ByteStrings
  67         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  68         reverse,                -- :: ByteString -> ByteString
  69 --      intersperse,            -- :: Word8 -> ByteString -> ByteString
  70         transpose,              -- :: [ByteString] -> [ByteString]
  71
  72         -- * Reducing 'ByteString's (folds)
  73         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  74         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  75         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  76         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  78         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79
  80         -- ** Special folds
  81         concat,                 -- :: [ByteString] -> ByteString
  82         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  83         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  84         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  85         maximum,                -- :: ByteString -> Word8
  86         minimum,                -- :: ByteString -> Word8
  87
  88         -- * Building ByteStrings
  89         -- ** Scans
  90         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  91 --      scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  92 --      scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  93 --      scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  94
  95         -- ** Accumulating maps
  96         mapAccumL,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  97         mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
  98
  99         -- ** Infinite ByteStrings
 100         repeat,                 -- :: Word8 -> ByteString
 101         replicate,              -- :: Int64 -> Word8 -> ByteString
 102         cycle,                  -- :: ByteString -> ByteString
 103         iterate,                -- :: (Word8 -> Word8) -> Word8 -> ByteString
 104
 105         -- ** Unfolding
 106         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 107
 108         -- * Substrings
 109
 110         -- ** Breaking strings
 111         take,                   -- :: Int64 -> ByteString -> ByteString
 112         drop,                   -- :: Int64 -> ByteString -> ByteString
 113         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
 114         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 115         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 116         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 117         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 118         group,                  -- :: ByteString -> [ByteString]
 119         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 120         inits,                  -- :: ByteString -> [ByteString]
 121         tails,                  -- :: ByteString -> [ByteString]
 122
 123         -- ** Breaking into many substrings
 124         split,                  -- :: Word8 -> ByteString -> [ByteString]
 125         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 126
 127         -- ** Joining strings
 128         join,                   -- :: ByteString -> [ByteString] -> ByteString
 129
 130         -- * Predicates
 131         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 132 --      isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 133
 134         -- * Searching ByteStrings
 135
 136         -- ** Searching by equality
 137         elem,                   -- :: Word8 -> ByteString -> Bool
 138         notElem,                -- :: Word8 -> ByteString -> Bool
 139
 140         -- ** Searching with a predicate
 141         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 142         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 143 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 144
 145         -- * Indexing ByteStrings
 146         index,                  -- :: ByteString -> Int64 -> Word8
 147         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int64
 148         elemIndices,            -- :: Word8 -> ByteString -> [Int64]
 149         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 150         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int64]
 151         count,                  -- :: Word8 -> ByteString -> Int64
 152
 153         -- * Zipping and unzipping ByteStrings
 154         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 155         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 156 --      unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 157
 158         -- * Ordered ByteStrings
 159 --        sort,                   -- :: ByteString -> ByteString
 160
 161         copy,                   -- :: ByteString -> ByteString
 162
 163         -- * I\/O with 'ByteString's
 164
 165         -- ** Standard input and output
 166         getContents,            -- :: IO ByteString
 167         putStr,                 -- :: ByteString -> IO ()
 168         putStrLn,               -- :: ByteString -> IO ()
 169         interact,               -- :: (ByteString -> ByteString) -> IO ()
 170
 171         -- ** Files
 172         readFile,               -- :: FilePath -> IO ByteString
 173         writeFile,              -- :: FilePath -> ByteString -> IO ()
 174         appendFile,             -- :: FilePath -> ByteString -> IO ()
 175
 176         -- ** I\/O with Handles
 177         hGetContents,           -- :: Handle -> IO ByteString
 178         hGet,                   -- :: Handle -> Int -> IO ByteString
 179         hPut,                   -- :: Handle -> ByteString -> IO ()
 180         hGetNonBlocking,        -- :: Handle -> IO ByteString
 181
 182 --      hGetN,                  -- :: Int -> Handle -> Int -> IO ByteString
 183 --      hGetContentsN,          -- :: Int -> Handle -> IO ByteString
 184 --      hGetNonBlockingN,       -- :: Int -> Handle -> IO ByteString
 185
 186   ) where
 187
 188 import qualified Prelude
 189 import Prelude hiding
 190     (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
 191     ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
 192     ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
 193     ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
 194     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
 195
 196 import qualified Data.List              as L  -- L for list/lazy
 197 import qualified Data.ByteString        as P  -- P for packed
 198 import qualified Data.ByteString.Base   as P
 199 import Data.ByteString.Base (LazyByteString(..))
 200 import qualified Data.ByteString.Fusion as P
 201 import Data.ByteString.Fusion (PairS(..),loopL)
 202
 203 import Data.Monoid              (Monoid(..))
 204
 205 import Data.Word                (Word8)
 206 import Data.Int                 (Int64)
 207 import System.IO                (Handle,stdin,stdout,openBinaryFile,IOMode(..)
 208                                 ,hClose,hWaitForInput,hIsEOF)
 209 import System.IO.Unsafe
 210 import Control.Exception        (bracket)
 211
 212 import Foreign.ForeignPtr       (withForeignPtr)
 213 import Foreign.Ptr
 214 import Foreign.Storable
 215
 216 -- -----------------------------------------------------------------------------
 217 --
 218 -- Useful macros, until we have bang patterns
 219 --
 220
 221 #define STRICT1(f) f a | a `seq` False = undefined
 222 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 223 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 224 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 225 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 226
 227 -- -----------------------------------------------------------------------------
 228
 229 type ByteString = LazyByteString
 230
 231 --
 232 -- hmm, what about getting the PS constructor unpacked into the cons cell?
 233 --
 234 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
 235 --
 236 -- Would avoid one indirection per chunk.
 237 --
 238
 239 unLPS :: ByteString -> [P.ByteString]
 240 unLPS (LPS xs) = xs
 241 {-# INLINE unLPS #-}
 242
 243 instance Eq  ByteString
 244     where (==)    = eq
 245
 246 instance Ord ByteString
 247     where compare = compareBytes
 248
 249 instance Monoid ByteString where
 250     mempty  = empty
 251     mappend = append
 252     mconcat = concat
 253
 254 ------------------------------------------------------------------------
 255
 256 -- XXX
 257 -- The data type invariant:
 258 -- Every ByteString is either empty or consists of non-null ByteStrings.
 259 -- All functions must preserve this, and the QC properties must check this.
 260 --
 261 _invariant :: ByteString -> Bool
 262 _invariant (LPS []) = True
 263 _invariant (LPS xs) = L.all (not . P.null) xs
 264
 265 -- In a form useful for QC testing
 266 _checkInvariant :: ByteString -> ByteString
 267 _checkInvariant lps
 268     | _invariant lps = lps
 269     | otherwise      = moduleError "invariant" ("violation: " ++ show lps)
 270
 271 -- The Data abstraction function
 272 --
 273 _abstr :: ByteString -> P.ByteString
 274 _abstr (LPS []) = P.empty
 275 _abstr (LPS xs) = P.concat xs
 276
 277 -- The representation uses lists of packed chunks. When we have to convert from
 278 -- a lazy list to the chunked representation, then by default we'll use this
 279 -- chunk size. Some functions give you more control over the chunk size.
 280 --
 281 -- Measurements here:
 282 --  http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
 283 --
 284 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
 285 -- The following value assumes people have something greater than 128k,
 286 -- and need to share the cache with other programs.
 287 --
 288 defaultChunkSize :: Int
 289 defaultChunkSize = 32 * k - overhead
 290    where k = 1024
 291          overhead = 2 * sizeOf (undefined :: Int)
 292
 293 smallChunkSize :: Int
 294 smallChunkSize = 4 * k - overhead
 295    where k = 1024
 296          overhead = 2 * sizeOf (undefined :: Int)
 297
 298 -- defaultChunkSize = 1
 299
 300 ------------------------------------------------------------------------
 301
 302 eq :: ByteString -> ByteString -> Bool
 303 eq (LPS xs) (LPS ys) = eq' xs ys
 304   where eq' [] [] = True
 305         eq' [] _  = False
 306         eq' _  [] = False
 307         eq' (a:as) (b:bs) =
 308           case compare (P.length a) (P.length b) of
 309             LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
 310             EQ -> a == b                       && eq' as bs
 311             GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
 312
 313 compareBytes :: ByteString -> ByteString -> Ordering
 314 compareBytes (LPS xs) (LPS ys) = cmp xs ys
 315   where cmp [] [] = EQ
 316         cmp [] _  = LT
 317         cmp _  [] = GT
 318         cmp (a:as) (b:bs) =
 319           case compare (P.length a) (P.length b) of
 320             LT -> case compare a (P.take (P.length a) b) of
 321                     EQ     -> cmp as (P.drop (P.length a) b : bs)
 322                     result -> result
 323             EQ -> case compare a b of
 324                     EQ     -> cmp as bs
 325                     result -> result
 326             GT -> case compare (P.take (P.length b) a) b of
 327                     EQ     -> cmp (P.drop (P.length b) a : as) bs
 328                     result -> result
 329
 330 -- -----------------------------------------------------------------------------
 331 -- Introducing and eliminating 'ByteString's
 332
 333 -- | /O(1)/ The empty 'ByteString'
 334 empty :: ByteString
 335 empty = LPS []
 336 {-# NOINLINE empty #-}
 337
 338 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 339 singleton :: Word8 -> ByteString
 340 singleton c = LPS [P.singleton c]
 341 {-# NOINLINE singleton #-}
 342
 343 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 344 pack :: [Word8] -> ByteString
 345 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
 346
 347 -- ?
 348 chunk :: Int -> [a] -> [[a]]
 349 chunk _    [] = []
 350 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
 351
 352 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 353 unpack :: ByteString -> [Word8]
 354 unpack (LPS ss) = L.concatMap P.unpack ss
 355 {-# INLINE unpack #-}
 356
 357 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
 358 fromChunks :: [P.ByteString] -> ByteString
 359 fromChunks ls = LPS $ L.filter (not . P.null) ls
 360
 361 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
 362 toChunks :: ByteString -> [P.ByteString]
 363 toChunks (LPS s) = s
 364
 365 ------------------------------------------------------------------------
 366
 367 {-
 368 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 369 -- conversion function
 370 packWith :: (a -> Word8) -> [a] -> ByteString
 371 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
 372 {-# INLINE packWith #-}
 373 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 374
 375 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 376 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 377 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
 378 {-# INLINE unpackWith #-}
 379 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 380 -}
 381
 382 -- ---------------------------------------------------------------------
 383 -- Basic interface
 384
 385 -- | /O(1)/ Test whether a ByteString is empty.
 386 null :: ByteString -> Bool
 387 null (LPS []) = True
 388 null (_)      = False
 389 {-# INLINE null #-}
 390
 391 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
 392 length :: ByteString -> Int64
 393 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
 394
 395 -- avoid the intermediate list?
 396 -- length (LPS ss) = L.foldl lengthF 0 ss
 397 --     where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
 398 {-# INLINE length #-}
 399
 400 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
 401 -- strict in the ByteString that we are consing onto. More precisely, it forces
 402 -- the head and the first chunk. It does this because, for space efficiency, it
 403 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
 404 -- new \'chunk\'.
 405 --
 406 -- So that means you can't use a lazy recursive contruction like this:
 407 --
 408 -- > let xs = cons c xs in xs
 409 --
 410 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
 411 --
 412 cons :: Word8 -> ByteString -> ByteString
 413 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
 414 cons c (LPS ss)                        = LPS (P.singleton c : ss)
 415 {-# INLINE cons #-}
 416
 417 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
 418 snoc :: ByteString -> Word8 -> ByteString
 419 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
 420 {-# INLINE snoc #-}
 421
 422 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 423 head :: ByteString -> Word8
 424 head (LPS [])    = errorEmptyList "head"
 425 head (LPS (x:_)) = P.unsafeHead x
 426 {-# INLINE head #-}
 427
 428 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 429 tail :: ByteString -> ByteString
 430 tail (LPS [])     = errorEmptyList "tail"
 431 tail (LPS (x:xs))
 432   | P.length x == 1 = LPS xs
 433   | otherwise       = LPS (P.unsafeTail x : xs)
 434 {-# INLINE tail #-}
 435
 436 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
 437 last :: ByteString -> Word8
 438 last (LPS []) = errorEmptyList "last"
 439 last (LPS xs) = P.last (L.last xs)
 440 {-# INLINE last #-}
 441
 442 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
 443 init :: ByteString -> ByteString
 444 init (LPS []) = errorEmptyList "init"
 445 init (LPS xs)
 446     | P.length y == 1 = LPS ys
 447     | otherwise       = LPS (ys ++ [P.init y])
 448     where (y,ys) = (L.last xs, L.init xs)
 449 {-# INLINE init #-}
 450
 451 -- | /O(n)/ Append two ByteStrings
 452 append :: ByteString -> ByteString -> ByteString
 453 append (LPS []) (LPS ys) = LPS ys
 454 append (LPS xs) (LPS []) = LPS xs
 455 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
 456 {-# INLINE append #-}
 457
 458 -- ---------------------------------------------------------------------
 459 -- Transformations
 460
 461 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 462 -- element of @xs@.
 463 map :: (Word8 -> Word8) -> ByteString -> ByteString
 464 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
 465 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
 466 {-# INLINE map #-}
 467
 468 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 469 reverse :: ByteString -> ByteString
 470 reverse (LPS ps) = LPS (rev [] ps)
 471   where rev a []     = a
 472         rev a (x:xs) = rev (P.reverse x:a) xs
 473 -- note, here is one example where the extra element lazyness is an advantage.
 474 -- we can reerse the list of chunks strictly but reverse each chunk lazily
 475 -- so while we may force the whole lot into memory we do not need to copy
 476 -- each chunk until it is used.
 477 {-# INLINE reverse #-}
 478
 479 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
 480 -- \`intersperses\' that byte between the elements of the 'ByteString'.
 481 -- It is analogous to the intersperse function on Lists.
 482 -- intersperse :: Word8 -> ByteString -> ByteString
 483 -- intersperse = error "FIXME: not yet implemented"
 484
 485 {-
 486 intersperse c (LPS [])     = LPS []
 487 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
 488   where intersperse' c ps@(PS x s l) =
 489           P.create (2*l) $ \p -> withForeignPtr x $ \f ->
 490                 poke p c
 491                 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
 492 -}
 493
 494 -- | The 'transpose' function transposes the rows and columns of its
 495 -- 'ByteString' argument.
 496 transpose :: [ByteString] -> [ByteString]
 497 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
 498
 499 -- ---------------------------------------------------------------------
 500 -- Reducing 'ByteString's
 501
 502 -- | 'foldl', applied to a binary operator, a starting value (typically
 503 -- the left-identity of the operator), and a ByteString, reduces the
 504 -- ByteString using the binary operator, from left to right.
 505 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 506 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
 507 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
 508 {-# INLINE foldl #-}
 509
 510 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 511 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 512 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
 513 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
 514 {-# INLINE foldl' #-}
 515
 516 -- | 'foldr', applied to a binary operator, a starting value
 517 -- (typically the right-identity of the operator), and a ByteString,
 518 -- reduces the ByteString using the binary operator, from right to left.
 519 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 520 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
 521 {-# INLINE foldr #-}
 522
 523 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 524 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 525 -- This function is subject to array fusion.
 526 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 527 foldl1 _ (LPS []) = errorEmptyList "foldl1"
 528 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 529
 530 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 531 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 532 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
 533 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 534
 535 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 536 -- and thus must be applied to non-empty 'ByteString's
 537 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 538 foldr1 _ (LPS []) = errorEmptyList "foldr1"
 539 foldr1 f (LPS ps) = foldr1' ps
 540   where foldr1' (x:[]) = P.foldr1 f x
 541         foldr1' (x:xs) = P.foldr  f (foldr1' xs) x
 542
 543 -- ---------------------------------------------------------------------
 544 -- Special folds
 545
 546 -- | /O(n)/ Concatenate a list of ByteStrings.
 547 concat :: [ByteString] -> ByteString
 548 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
 549
 550 -- | Map a function over a 'ByteString' and concatenate the results
 551 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 552 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
 553     where
 554       k w = case f w of LPS xs -> P.concat xs
 555
 556 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 557 -- any element of the 'ByteString' satisfies the predicate.
 558 any :: (Word8 -> Bool) -> ByteString -> Bool
 559 any f (LPS xs) = L.or (L.map (P.any f) xs)
 560 -- todo fuse
 561
 562 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 563 -- if all elements of the 'ByteString' satisfy the predicate.
 564 all :: (Word8 -> Bool) -> ByteString -> Bool
 565 all f (LPS xs) = L.and (L.map (P.all f) xs)
 566 -- todo fuse
 567
 568 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 569 maximum :: ByteString -> Word8
 570 maximum (LPS [])     = errorEmptyList "maximum"
 571 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
 572 {-# INLINE maximum #-}
 573
 574 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 575 minimum :: ByteString -> Word8
 576 minimum (LPS [])     = errorEmptyList "minimum"
 577 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
 578 {-# INLINE minimum #-}
 579
 580 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 581 -- 'foldl'; it applies a function to each element of a ByteString,
 582 -- passing an accumulating parameter from left to right, and returning a
 583 -- final value of this accumulator together with the new ByteString.
 584 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 585 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
 586
 587 -- | /O(n)/ map Word8 functions, provided with the index at each position
 588 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 589 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
 590
 591 -- ---------------------------------------------------------------------
 592 -- Building ByteStrings
 593
 594 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 595 -- reduced values from the left. This function will fuse.
 596 --
 597 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 598 --
 599 -- Note that
 600 --
 601 -- > last (scanl f z xs) == foldl f z xs.
 602 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 603 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
 604 {-# INLINE scanl #-}
 605
 606 -- ---------------------------------------------------------------------
 607 -- Unfolds and replicates
 608
 609 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
 610 -- of @f@ to @x@:
 611 --
 612 -- > iterate f x == [x, f x, f (f x), ...]
 613 --
 614 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
 615 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
 616
 617 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
 618 -- element.
 619 --
 620 repeat :: Word8 -> ByteString
 621 repeat c = LPS (L.repeat block)
 622     where block =  P.replicate smallChunkSize c
 623
 624 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
 625 -- the value of every element.
 626 --
 627 replicate :: Int64 -> Word8 -> ByteString
 628 replicate w c
 629     | w <= 0             = empty
 630     | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
 631     | r == 0             = LPS (L.genericReplicate q s) -- preserve invariant
 632     | otherwise          = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
 633  where
 634     s      = P.replicate smallChunkSize c
 635     (q, r) = quotRem w (fromIntegral smallChunkSize)
 636
 637 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
 638 -- the infinite repetition of the original ByteString.
 639 --
 640 cycle :: ByteString -> ByteString
 641 cycle (LPS []) = errorEmptyList "cycle"
 642 cycle (LPS xs) = LPS (L.cycle xs)
 643
 644 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
 645 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
 646 -- the element and returns 'Nothing' if it is done producing the
 647 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 648 -- prepending to the ByteString and @b@ is used as the next element in a
 649 -- recursive call.
 650 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 651 unfoldr f = LPS . unfoldChunk 32
 652   where unfoldChunk n x =
 653           case P.unfoldrN n f x of
 654             (s, Nothing)
 655               | P.null s  -> []
 656               | otherwise -> s : []
 657             (s, Just x')  -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
 658
 659 -- ---------------------------------------------------------------------
 660 -- Substrings
 661
 662 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 663 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 664 take :: Int64 -> ByteString -> ByteString
 665 take i _ | i <= 0 = empty
 666 take i (LPS ps)   = LPS (take' i ps)
 667   where take' 0 _      = []
 668         take' _ []     = []
 669         take' n (x:xs) =
 670           if n < fromIntegral (P.length x)
 671             then P.take (fromIntegral n) x : []
 672             else x : take' (n - fromIntegral (P.length x)) xs
 673
 674 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 675 -- elements, or @[]@ if @n > 'length' xs@.
 676 drop  :: Int64 -> ByteString -> ByteString
 677 drop i p | i <= 0 = p
 678 drop i (LPS ps) = LPS (drop' i ps)
 679   where drop' 0 xs     = xs
 680         drop' _ []     = []
 681         drop' n (x:xs) =
 682           if n < fromIntegral (P.length x)
 683             then P.drop (fromIntegral n) x : xs
 684             else drop' (n - fromIntegral (P.length x)) xs
 685
 686 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 687 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
 688 splitAt i p        | i <= 0 = (empty, p)
 689 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
 690   where splitAt' 0 xs     = ([], xs)
 691         splitAt' _ []     = ([], [])
 692         splitAt' n (x:xs) =
 693           if n < fromIntegral (P.length x)
 694             then (P.take (fromIntegral n) x : [],
 695                   P.drop (fromIntegral n) x : xs)
 696             else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
 697                    in (x:xs', xs'')
 698
 699
 700 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 701 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 702 -- satisfy @p@.
 703 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 704 takeWhile f (LPS ps) = LPS (takeWhile' ps)
 705   where takeWhile' []     = []
 706         takeWhile' (x:xs) =
 707           case findIndexOrEnd (not . f) x of
 708             0                  -> []
 709             n | n < P.length x -> P.take n x : []
 710               | otherwise      -> x : takeWhile' xs
 711
 712 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 713 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 714 dropWhile f (LPS ps) = LPS (dropWhile' ps)
 715   where dropWhile' []     = []
 716         dropWhile' (x:xs) =
 717           case findIndexOrEnd (not . f) x of
 718             n | n < P.length x -> P.drop n x : xs
 719               | otherwise      -> dropWhile' xs
 720
 721 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 722 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 723 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
 724   where break' []     = ([], [])
 725         break' (x:xs) =
 726           case findIndexOrEnd f x of
 727             0                  -> ([], x : xs)
 728             n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
 729               | otherwise      -> let (xs', xs'') = break' xs
 730                                    in (x : xs', xs'')
 731
 732 --
 733 -- TODO
 734 --
 735 -- Add rules
 736 --
 737
 738 {-
 739 -- | 'breakByte' breaks its ByteString argument at the first occurence
 740 -- of the specified byte. It is more efficient than 'break' as it is
 741 -- implemented with @memchr(3)@. I.e.
 742 --
 743 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 744 --
 745 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 746 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
 747   where breakByte' []     = ([], [])
 748         breakByte' (x:xs) =
 749           case P.elemIndex c x of
 750             Just 0  -> ([], x : xs)
 751             Just n  -> (P.take n x : [], P.drop n x : xs)
 752             Nothing -> let (xs', xs'') = breakByte' xs
 753                         in (x : xs', xs'')
 754
 755 -- | 'spanByte' breaks its ByteString argument at the first
 756 -- occurence of a byte other than its argument. It is more efficient
 757 -- than 'span (==)'
 758 --
 759 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 760 --
 761 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 762 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
 763   where spanByte' []     = ([], [])
 764         spanByte' (x:xs) =
 765           case P.spanByte c x of
 766             (x', x'') | P.null x'  -> ([], x : xs)
 767                       | P.null x'' -> let (xs', xs'') = spanByte' xs
 768                                        in (x : xs', xs'')
 769                       | otherwise  -> (x' : [], x'' : xs)
 770 -}
 771
 772 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 773 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 774 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 775 span p = break (not . p)
 776
 777 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 778 -- separators, where the predicate returns True for a separator element.
 779 -- The resulting components do not contain the separators.  Two adjacent
 780 -- separators result in an empty component in the output.  eg.
 781 --
 782 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 783 -- > splitWith (=='a') []        == []
 784 --
 785 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 786 splitWith _ (LPS [])     = []
 787 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
 788
 789   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 790         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 791         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
 792         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 793
 794         cons' x xs | P.null x  = xs
 795                    | otherwise = x:xs
 796         {-# INLINE cons' #-}
 797 {-# INLINE splitWith #-}
 798
 799 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 800 -- argument, consuming the delimiter. I.e.
 801 --
 802 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 803 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
 804 -- > split 'x'  "x"          == ["",""]
 805 --
 806 -- and
 807 --
 808 -- > join [c] . split c == id
 809 -- > split == splitWith . (==)
 810 --
 811 -- As for all splitting functions in this library, this function does
 812 -- not copy the substrings, it just constructs new 'ByteStrings' that
 813 -- are slices of the original.
 814 --
 815 split :: Word8 -> ByteString -> [ByteString]
 816 split _ (LPS [])     = []
 817 split c (LPS (a:as)) = comb [] (P.split c a) as
 818
 819   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 820         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 821         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
 822         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 823
 824         cons' x xs | P.null x  = xs
 825                    | otherwise = x:xs
 826         {-# INLINE cons' #-}
 827 {-# INLINE split #-}
 828
 829 {-
 830 -- | Like 'splitWith', except that sequences of adjacent separators are
 831 -- treated as a single separator. eg.
 832 --
 833 -- > tokens (=='a') "aabbaca" == ["bb","c"]
 834 --
 835 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
 836 tokens f = L.filter (not.null) . splitWith f
 837 -}
 838
 839 -- | The 'group' function takes a ByteString and returns a list of
 840 -- ByteStrings such that the concatenation of the result is equal to the
 841 -- argument.  Moreover, each sublist in the result contains only equal
 842 -- elements.  For example,
 843 --
 844 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 845 --
 846 -- It is a special case of 'groupBy', which allows the programmer to
 847 -- supply their own equality test.
 848 group :: ByteString -> [ByteString]
 849 group (LPS [])     = []
 850 group (LPS (a:as)) = group' [] (P.group a) as
 851   where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 852         group' acc@(s':_) ss@(s:_) xs
 853           | P.unsafeHead s'
 854          /= P.unsafeHead s       = LPS (L.reverse acc) : group' [] ss xs
 855         group' acc (s:[]) []     = LPS (L.reverse (s : acc)) : []
 856         group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
 857         group' acc (s:ss) xs     = LPS (L.reverse (s : acc)) : group' [] ss xs
 858
 859 {-
 860 TODO: check if something like this might be faster
 861
 862 group :: ByteString -> [ByteString]
 863 group xs
 864     | null xs   = []
 865     | otherwise = ys : group zs
 866     where
 867         (ys, zs) = spanByte (unsafeHead xs) xs
 868 -}
 869
 870 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 871 --
 872 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 873 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
 874 {-
 875 groupBy _ (LPS [])     = []
 876 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
 877   where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 878         groupBy' acc@(_:_) c ss@(s:_) xs
 879           | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
 880         groupBy' acc _ (s:[]) []       = LPS (L.reverse (s : acc)) : []
 881         groupBy' []  _ (s:[]) (x:xs)   = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
 882         groupBy' acc c (s:[]) (x:xs)   = groupBy' (s:acc) c (P.groupBy k x) xs
 883         groupBy' acc _ (s:ss) xs       = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
 884 -}
 885
 886 {-
 887 TODO: check if something like this might be faster
 888
 889 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 890 groupBy k xs
 891     | null xs   = []
 892     | otherwise = take n xs : groupBy k (drop n xs)
 893     where
 894         n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
 895 -}
 896
 897 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
 898 -- 'ByteString's and concatenates the list after interspersing the first
 899 -- argument between each element of the list.
 900 join :: ByteString -> [ByteString] -> ByteString
 901 join s = concat . (L.intersperse s)
 902
 903 -- ---------------------------------------------------------------------
 904 -- Indexing ByteStrings
 905
 906 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
 907 index :: ByteString -> Int64 -> Word8
 908 index _        i | i < 0 = moduleError "index" ("negative index: " ++ show i)
 909 index (LPS ps) i         = index' ps i
 910   where index' []     n = moduleError "index" ("index too large: " ++ show n)
 911         index' (x:xs) n
 912           | n >= fromIntegral (P.length x) =
 913               index' xs (n - fromIntegral (P.length x))
 914           | otherwise       = P.unsafeIndex x (fromIntegral n)
 915
 916 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 917 -- element in the given 'ByteString' which is equal to the query
 918 -- element, or 'Nothing' if there is no such element.
 919 -- This implementation uses memchr(3).
 920 elemIndex :: Word8 -> ByteString -> Maybe Int64
 921 elemIndex c (LPS ps) = elemIndex' 0 ps
 922   where elemIndex' _ []     = Nothing
 923         elemIndex' n (x:xs) =
 924           case P.elemIndex c x of
 925             Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
 926             Just i  -> Just (n + fromIntegral i)
 927
 928 {-
 929 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 930 -- element in the given 'ByteString' which is equal to the query
 931 -- element, or 'Nothing' if there is no such element. The following
 932 -- holds:
 933 --
 934 -- > elemIndexEnd c xs ==
 935 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 936 --
 937 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 938 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 939     go (p `plusPtr` s) (l-1)
 940   where
 941     STRICT2(go)
 942     go p i | i < 0     = return Nothing
 943            | otherwise = do ch' <- peekByteOff p i
 944                             if ch == ch'
 945                                 then return $ Just i
 946                                 else go p (i-1)
 947 -}
 948 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 949 -- the indices of all elements equal to the query element, in ascending order.
 950 -- This implementation uses memchr(3).
 951 elemIndices :: Word8 -> ByteString -> [Int64]
 952 elemIndices c (LPS ps) = elemIndices' 0 ps
 953   where elemIndices' _ []     = []
 954         elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
 955                              ++ elemIndices' (n + fromIntegral (P.length x)) xs
 956
 957 -- | count returns the number of times its argument appears in the ByteString
 958 --
 959 -- > count = length . elemIndices
 960 --
 961 -- But more efficiently than using length on the intermediate list.
 962 count :: Word8 -> ByteString -> Int64
 963 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
 964
 965 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 966 -- returns the index of the first element in the ByteString
 967 -- satisfying the predicate.
 968 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 969 findIndex k (LPS ps) = findIndex' 0 ps
 970   where findIndex' _ []     = Nothing
 971         findIndex' n (x:xs) =
 972           case P.findIndex k x of
 973             Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
 974             Just i  -> Just (n + fromIntegral i)
 975 {-# INLINE findIndex #-}
 976
 977 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 978 -- and returns the first element in matching the predicate, or 'Nothing'
 979 -- if there is no such element.
 980 --
 981 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 982 --
 983 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 984 find f (LPS ps) = find' ps
 985   where find' []     = Nothing
 986         find' (x:xs) = case P.find f x of
 987             Nothing -> find' xs
 988             Just w  -> Just w
 989 {-# INLINE find #-}
 990
 991 -- | The 'findIndices' function extends 'findIndex', by returning the
 992 -- indices of all elements satisfying the predicate, in ascending order.
 993 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
 994 findIndices k (LPS ps) = findIndices' 0 ps
 995   where findIndices' _ []     = []
 996         findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
 997                              ++ findIndices' (n + fromIntegral (P.length x)) xs
 998
 999 -- ---------------------------------------------------------------------
1000 -- Searching ByteStrings
1001
1002 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1003 elem :: Word8 -> ByteString -> Bool
1004 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1005
1006 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1007 notElem :: Word8 -> ByteString -> Bool
1008 notElem c ps = not (elem c ps)
1009
1010 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1011 -- returns a ByteString containing those characters that satisfy the
1012 -- predicate.
1013 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1014 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1015 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1016 {-# INLINE filter #-}
1017
1018 {-
1019 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1020 -- (==)/, for the common case of filtering a single byte. It is more
1021 -- efficient to use /filterByte/ in this case.
1022 --
1023 -- > filterByte == filter . (==)
1024 --
1025 -- filterByte is around 10x faster, and uses much less space, than its
1026 -- filter equivalent
1027 filterByte :: Word8 -> ByteString -> ByteString
1028 filterByte w ps = replicate (count w ps) w
1029 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1030
1031 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1032 -- case of filtering a single byte out of a list. It is more efficient
1033 -- to use /filterNotByte/ in this case.
1034 --
1035 -- > filterNotByte == filter . (/=)
1036 --
1037 -- filterNotByte is around 2x faster than its filter equivalent.
1038 filterNotByte :: Word8 -> ByteString -> ByteString
1039 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1040 -}
1041
1042 -- ---------------------------------------------------------------------
1043 -- Searching for substrings
1044
1045 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1046 -- iff the first is a prefix of the second.
1047 isPrefixOf :: ByteString -> ByteString -> Bool
1048 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1049   where isPrefixL [] _  = True
1050         isPrefixL _ []  = False
1051         isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y  && isPrefixL xs ys
1052                                 | P.length x <  P.length y = x == yh && isPrefixL xs (yt:ys)
1053                                 | otherwise                = xh == y && isPrefixL (xt:xs) ys
1054           where (xh,xt) = P.splitAt (P.length y) x
1055                 (yh,yt) = P.splitAt (P.length x) y
1056
1057 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1058 -- iff the first is a suffix of the second.
1059 --
1060 -- The following holds:
1061 --
1062 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1063 --
1064 -- However, the real implemenation uses memcmp to compare the end of the
1065 -- string only, with no reverse required..
1066 --
1067 --isSuffixOf :: ByteString -> ByteString -> Bool
1068 --isSuffixOf = error "not yet implemented"
1069
1070 -- ---------------------------------------------------------------------
1071 -- Zipping
1072
1073 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1074 -- corresponding pairs of bytes. If one input ByteString is short,
1075 -- excess elements of the longer ByteString are discarded. This is
1076 -- equivalent to a pair of 'unpack' operations.
1077 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1078 zip = zipWith (,)
1079
1080 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1081 -- the first argument, instead of a tupling function.  For example,
1082 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1083 -- corresponding sums.
1084 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1085 zipWith _ (LPS [])     (LPS _)  = []
1086 zipWith _ (LPS _)      (LPS []) = []
1087 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1088   where zipWith' x xs y ys =
1089           (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1090
1091         zipWith'' x []      _ _       | P.null x       = []
1092         zipWith'' _ _       y []      | P.null y       = []
1093         zipWith'' x xs      y ys      | not (P.null x)
1094                                      && not (P.null y) = zipWith' x  xs y  ys
1095         zipWith'' x xs      _ (y':ys) | not (P.null x) = zipWith' x  xs y' ys
1096         zipWith'' _ (x':xs) y ys      | not (P.null y) = zipWith' x' xs y  ys
1097         zipWith'' _ (x':xs) _ (y':ys)                  = zipWith' x' xs y' ys
1098
1099 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1100 -- ByteStrings. Note that this performs two 'pack' operations.
1101 {-
1102 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1103 unzip _ls = error "not yet implemented"
1104 {-# INLINE unzip #-}
1105 -}
1106
1107 -- ---------------------------------------------------------------------
1108 -- Special lists
1109
1110 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1111 inits :: ByteString -> [ByteString]
1112 inits = (LPS [] :) . inits' . unLPS
1113   where inits' []     = []
1114         inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1115                      ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1116
1117 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1118 tails :: ByteString -> [ByteString]
1119 tails = tails' . unLPS
1120   where tails' []           = LPS [] : []
1121         tails' xs@(x:xs')
1122           | P.length x == 1 = LPS xs : tails' xs'
1123           | otherwise       = LPS xs : tails' (P.unsafeTail x : xs')
1124
1125 -- ---------------------------------------------------------------------
1126 -- Low level constructors
1127
1128 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1129 --   This is mainly useful to allow the rest of the data pointed
1130 --   to by the 'ByteString' to be garbage collected, for example
1131 --   if a large string has been read in, and only a small part of it
1132 --   is needed in the rest of the program.
1133 copy :: ByteString -> ByteString
1134 copy (LPS lps) = LPS (L.map P.copy lps)
1135 --TODO, we could coalese small blocks here
1136 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1137 -- the parent blocks then we'd better copy strictly.
1138
1139 -- ---------------------------------------------------------------------
1140
1141 -- TODO defrag func that concatenates block together that are below a threshold
1142 -- defrag :: Int -> ByteString -> ByteString
1143
1144 -- ---------------------------------------------------------------------
1145 -- Lazy ByteString IO
1146
1147 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1148 -- are read on demand, in at most @k@-sized chunks. It does not block
1149 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1150 -- available then they will be returned immediately as a smaller chunk.
1151 hGetContentsN :: Int -> Handle -> IO ByteString
1152 hGetContentsN k h = lazyRead >>= return . LPS
1153   where
1154     lazyRead = unsafeInterleaveIO loop
1155
1156     loop = do
1157         ps <- P.hGetNonBlocking h k
1158         --TODO: I think this should distinguish EOF from no data available
1159         -- the otherlying POSIX call makes this distincion, returning either
1160         -- 0 or EAGAIN
1161         if P.null ps
1162           then do eof <- hIsEOF h
1163                   if eof then return []
1164                          else hWaitForInput h (-1)
1165                            >> loop
1166           else do pss <- lazyRead
1167                   return (ps : pss)
1168
1169 -- | Read @n@ bytes into a 'ByteString', directly from the
1170 -- specified 'Handle', in chunks of size @k@.
1171 hGetN :: Int -> Handle -> Int -> IO ByteString
1172 hGetN _ _ 0 = return empty
1173 hGetN k h n = readChunks n >>= return . LPS
1174   where
1175     STRICT1(readChunks)
1176     readChunks i = do
1177         ps <- P.hGet h (min k i)
1178         case P.length ps of
1179             0 -> return []
1180             m -> do pss <- readChunks (i - m)
1181                     return (ps : pss)
1182
1183 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1184 -- waiting for data to become available, instead it returns only whatever data
1185 -- is available. Chunks are read on demand, in @k@-sized chunks.
1186 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1187 #if defined(__GLASGOW_HASKELL__)
1188 hGetNonBlockingN _ _ 0 = return empty
1189 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1190   where
1191     STRICT1(readChunks)
1192     readChunks i = do
1193         ps <- P.hGetNonBlocking h (min k i)
1194         case P.length ps of
1195             0 -> return []
1196             m -> do pss <- readChunks (i - m)
1197                     return (ps : pss)
1198 #else
1199 hGetNonBlockingN = hGetN
1200 #endif
1201
1202 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1203 -- are read on demand, using the default chunk size.
1204 hGetContents :: Handle -> IO ByteString
1205 hGetContents = hGetContentsN defaultChunkSize
1206
1207 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1208 hGet :: Handle -> Int -> IO ByteString
1209 hGet = hGetN defaultChunkSize
1210
1211 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1212 -- waiting for data to become available, instead it returns only whatever data
1213 -- is available.
1214 #if defined(__GLASGOW_HASKELL__)
1215 hGetNonBlocking :: Handle -> Int -> IO ByteString
1216 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1217 #else
1218 hGetNonBlocking = hGet
1219 #endif
1220
1221 -- | Read an entire file /lazily/ into a 'ByteString'.
1222 readFile :: FilePath -> IO ByteString
1223 readFile f = openBinaryFile f ReadMode >>= hGetContents
1224
1225 -- | Write a 'ByteString' to a file.
1226 writeFile :: FilePath -> ByteString -> IO ()
1227 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1228     (\hdl -> hPut hdl txt)
1229
1230 -- | Append a 'ByteString' to a file.
1231 appendFile :: FilePath -> ByteString -> IO ()
1232 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1233     (\hdl -> hPut hdl txt)
1234
1235 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1236 getContents :: IO ByteString
1237 getContents = hGetContents stdin
1238
1239 -- | Outputs a 'ByteString' to the specified 'Handle'.
1240 hPut :: Handle -> ByteString -> IO ()
1241 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1242
1243 -- | Write a ByteString to stdout
1244 putStr :: ByteString -> IO ()
1245 putStr = hPut stdout
1246
1247 -- | Write a ByteString to stdout, appending a newline byte
1248 putStrLn :: ByteString -> IO ()
1249 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1250
1251 -- | The interact function takes a function of type @ByteString -> ByteString@
1252 -- as its argument. The entire input from the standard input device is passed
1253 -- to this function as its argument, and the resulting string is output on the
1254 -- standard output device. It's great for writing one line programs!
1255 interact :: (ByteString -> ByteString) -> IO ()
1256 interact transformer = putStr . transformer =<< getContents
1257
1258 -- ---------------------------------------------------------------------
1259 -- Internal utilities
1260
1261 -- Common up near identical calls to `error' to reduce the number
1262 -- constant strings created when compiled:
1263 errorEmptyList :: String -> a
1264 errorEmptyList fun = moduleError fun "empty ByteString"
1265
1266 moduleError :: String -> String -> a
1267 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1268
1269 -- A manually fused version of "filter (not.null) . map f", since they
1270 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1271 --
1272 -- TODO fuse.
1273 --
1274 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1275 filterMap _ []     = []
1276 filterMap f (x:xs) = case f x of
1277                     y | P.null y  ->     filterMap f xs      -- manually fuse the invariant filter
1278                       | otherwise -> y : filterMap f xs
1279 {-# INLINE filterMap #-}
1280
1281
1282 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1283 -- of the string if no element is found, rather than Nothing.
1284 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1285 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1286   where
1287     STRICT2(go)
1288     go ptr n | n >= l    = return l
1289              | otherwise = do w <- peek ptr
1290                               if k w
1291                                 then return n
1292                                 else go (ptr `plusPtr` 1) (n+1)
1293 {-# INLINE findIndexOrEnd #-}