Data/ByteString/Lazy.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
   2 --
   3 -- Module      : ByteString.Lazy
   4 -- Copyright   : (c) Don Stewart 2006
   5 --               (c) Duncan Coutts 2006
   6 -- License     : BSD-style
   7 --
   8 -- Maintainer  : dons@cse.unsw.edu.au
   9 -- Stability   : experimental
  10 -- Portability : portable, requires ffi and cpp
  11 -- Tested with : GHC 6.4.1 and Hugs March 2005
  12 --
  13
  14 --
  15 -- | A time and space-efficient implementation of lazy byte vectors
  16 -- using lists of packed 'Word8' arrays, suitable for high performance
  17 -- use, both in terms of large data quantities, or high speed
  18 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
  19 -- arrays of bytes. They provide a means to manipulate large byte vectors
  20 -- without requiring the entire vector be resident in memory.
  21 --
  22 -- Some operations, such as concat, append, reverse and cons, have
  23 -- better complexity than their "Data.ByteString" equivalents, as due to
  24 -- optimisations resulting from the list spine structure. And for other
  25 -- operations Lazy ByteStrings are usually within a few percent of
  26 -- strict ones, but with better heap usage. For data larger than the
  27 -- available memory, or if you have tight memory constraints, this
  28 -- module will be the only option. The default chunk size is 64k, which
  29 -- should be good in most circumstances. For people with large L2
  30 -- caches, you may want to increase this to fit your cache.
  31 --
  32 -- This module is intended to be imported @qualified@, to avoid name
  33 -- clashes with "Prelude" functions.  eg.
  34 --
  35 -- > import qualified Data.ByteString.Lazy as B
  36 --
  37 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  38 -- UArray by Simon Marlow. Rewritten to support slices and use
  39 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  40 -- Lazy variant by Duncan Coutts and Don Stewart.
  41 --
  42
  43 module Data.ByteString.Lazy (
  44
  45         -- * The @ByteString@ type
  46         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  47
  48         -- * Introducing and eliminating 'ByteString's
  49         empty,                  -- :: ByteString
  50         singleton,               -- :: Word8   -> ByteString
  51         pack,                   -- :: [Word8] -> ByteString
  52         unpack,                 -- :: ByteString -> [Word8]
  53
  54         -- * Basic interface
  55         cons,                   -- :: Word8 -> ByteString -> ByteString
  56         snoc,                   -- :: ByteString -> Word8 -> ByteString
  57         append,                 -- :: ByteString -> ByteString -> ByteString
  58         head,                   -- :: ByteString -> Word8
  59         last,                   -- :: ByteString -> Word8
  60         tail,                   -- :: ByteString -> ByteString
  61         init,                   -- :: ByteString -> ByteString
  62         null,                   -- :: ByteString -> Bool
  63         length,                 -- :: ByteString -> Int64
  64
  65         -- * Transformating ByteStrings
  66         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  67         reverse,                -- :: ByteString -> ByteString
  68 --      intersperse,            -- :: Word8 -> ByteString -> ByteString
  69         transpose,              -- :: [ByteString] -> [ByteString]
  70
  71         -- * Reducing 'ByteString's (folds)
  72         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  73         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  74         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  75         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  76         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  77         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  78
  79         -- ** Special folds
  80         concat,                 -- :: [ByteString] -> ByteString
  81         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  82         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  83         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  84         maximum,                -- :: ByteString -> Word8
  85         minimum,                -- :: ByteString -> Word8
  86
  87         -- * Building ByteStrings
  88         -- ** Scans
  89         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  90 --      scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  91 --      scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  92 --      scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  93
  94         -- ** Accumulating maps
  95         mapAccumL,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  96         mapAccumR,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  97         mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
  98
  99         -- ** Infinite ByteStrings
 100         repeat,                 -- :: Word8 -> ByteString
 101         replicate,              -- :: Int64 -> Word8 -> ByteString
 102         cycle,                  -- :: ByteString -> ByteString
 103         iterate,                -- :: (Word8 -> Word8) -> Word8 -> ByteString
 104
 105         -- ** Unfolding
 106         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 107
 108         -- * Substrings
 109
 110         -- ** Breaking strings
 111         take,                   -- :: Int64 -> ByteString -> ByteString
 112         drop,                   -- :: Int64 -> ByteString -> ByteString
 113         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
 114         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 115         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 116         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 117         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 118         group,                  -- :: ByteString -> [ByteString]
 119         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 120         inits,                  -- :: ByteString -> [ByteString]
 121         tails,                  -- :: ByteString -> [ByteString]
 122
 123         -- ** Breaking into many substrings
 124         split,                  -- :: Word8 -> ByteString -> [ByteString]
 125         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 126         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 127
 128         -- ** Joining strings
 129         join,                   -- :: ByteString -> [ByteString] -> ByteString
 130
 131         -- * Predicates
 132         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 133 --      isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 134
 135         -- * Searching ByteStrings
 136
 137         -- ** Searching by equality
 138         elem,                   -- :: Word8 -> ByteString -> Bool
 139         notElem,                -- :: Word8 -> ByteString -> Bool
 140
 141         -- ** Searching with a predicate
 142         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 143         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 144 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 145
 146         -- * Indexing ByteStrings
 147         index,                  -- :: ByteString -> Int64 -> Word8
 148         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int64
 149         elemIndices,            -- :: Word8 -> ByteString -> [Int64]
 150         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 151         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int64]
 152         count,                  -- :: Word8 -> ByteString -> Int64
 153
 154         -- * Zipping and unzipping ByteStrings
 155         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 156         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 157 --      unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 158
 159         -- * Ordered ByteStrings
 160 --        sort,                   -- :: ByteString -> ByteString
 161
 162         copy,                   -- :: ByteString -> ByteString
 163
 164         -- * I\/O with 'ByteString's
 165
 166         -- ** Standard input and output
 167         getContents,            -- :: IO ByteString
 168         putStr,                 -- :: ByteString -> IO ()
 169         putStrLn,               -- :: ByteString -> IO ()
 170         interact,               -- :: (ByteString -> ByteString) -> IO ()
 171
 172         -- ** Files
 173         readFile,               -- :: FilePath -> IO ByteString
 174         writeFile,              -- :: FilePath -> ByteString -> IO ()
 175         appendFile,             -- :: FilePath -> ByteString -> IO ()
 176
 177         -- ** I\/O with Handles
 178         hGetContents,           -- :: Handle -> IO ByteString
 179         hGet,                   -- :: Handle -> Int -> IO ByteString
 180         hPut,                   -- :: Handle -> ByteString -> IO ()
 181         hGetNonBlocking,        -- :: Handle -> IO ByteString
 182 --      hGetN,                  -- :: Int -> Handle -> Int -> IO ByteString
 183 --      hGetContentsN,          -- :: Int -> Handle -> IO ByteString
 184 --      hGetNonBlockingN,       -- :: Int -> Handle -> IO ByteString
 185
 186   ) where
 187
 188 import qualified Prelude
 189 import Prelude hiding
 190     (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
 191     ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
 192     ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
 193     ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
 194     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
 195
 196 import qualified Data.List              as L  -- L for list/lazy
 197 import qualified Data.ByteString        as P  -- P for packed
 198 import qualified Data.ByteString.Base   as P
 199 import qualified Data.ByteString.Fusion as P
 200 import Data.ByteString.Fusion (PairS(..),loopL)
 201
 202 import Data.Monoid              (Monoid(..))
 203
 204 import Data.Word                (Word8)
 205 import Data.Int                 (Int64)
 206 import System.IO                (Handle,stdin,stdout,openBinaryFile,IOMode(..)
 207                                 ,hClose,hWaitForInput,hIsEOF)
 208 import System.IO.Unsafe
 209 import Control.Exception        (bracket)
 210
 211 import Foreign.ForeignPtr       (withForeignPtr)
 212 import Foreign.Ptr
 213 import Foreign.Storable
 214
 215 #if defined(__GLASGOW_HASKELL__)
 216 import Data.Generics            (Data(..), Typeable(..))
 217 #endif
 218
 219 -- -----------------------------------------------------------------------------
 220 --
 221 -- Useful macros, until we have bang patterns
 222 --
 223
 224 #define STRICT1(f) f a | a `seq` False = undefined
 225 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 226 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 227 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 228 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 229
 230 -- -----------------------------------------------------------------------------
 231
 232 -- | A space-efficient representation of a Word8 vector, supporting many
 233 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 234 --
 235 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 236 --
 237 newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string
 238     deriving (Show,Read
 239 #if defined(__GLASGOW_HASKELL__)
 240                         ,Data, Typeable
 241 #endif
 242              )
 243
 244 --
 245 -- hmm, what about getting the PS constructor unpacked into the cons cell?
 246 --
 247 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
 248 --
 249 -- Would avoid one indirection per chunk.
 250 --
 251
 252 unLPS :: ByteString -> [P.ByteString]
 253 unLPS (LPS xs) = xs
 254 {-# INLINE unLPS #-}
 255
 256 instance Eq  ByteString
 257     where (==)    = eq
 258
 259 instance Ord ByteString
 260     where compare = compareBytes
 261
 262 instance Monoid ByteString where
 263     mempty  = empty
 264     mappend = append
 265     mconcat = concat
 266
 267 ------------------------------------------------------------------------
 268
 269 -- XXX
 270 -- The data type invariant:
 271 -- Every ByteString is either empty or consists of non-null ByteStrings.
 272 -- All functions must preserve this, and the QC properties must check this.
 273 --
 274 _invariant :: ByteString -> Bool
 275 _invariant (LPS []) = True
 276 _invariant (LPS xs) = L.all (not . P.null) xs
 277
 278 -- In a form useful for QC testing
 279 _checkInvariant :: ByteString -> ByteString
 280 _checkInvariant lps
 281     | _invariant lps = lps
 282     | otherwise      = moduleError "invariant" ("violation: " ++ show lps)
 283
 284 -- The Data abstraction function
 285 --
 286 _abstr :: ByteString -> P.ByteString
 287 _abstr (LPS []) = P.empty
 288 _abstr (LPS xs) = P.concat xs
 289
 290 -- The representation uses lists of packed chunks. When we have to convert from
 291 -- a lazy list to the chunked representation, then by default we'll use this
 292 -- chunk size. Some functions give you more control over the chunk size.
 293 --
 294 -- Measurements here:
 295 --  http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
 296 --
 297 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
 298 -- The following value assumes people have something greater than 128k,
 299 -- and need to share the cache with other programs.
 300 --
 301 defaultChunkSize :: Int
 302 defaultChunkSize = 32 * k - overhead
 303    where k = 1024
 304          overhead = 2 * sizeOf (undefined :: Int)
 305
 306 smallChunkSize :: Int
 307 smallChunkSize = 4 * k - overhead
 308    where k = 1024
 309          overhead = 2 * sizeOf (undefined :: Int)
 310
 311 -- defaultChunkSize = 1
 312
 313 ------------------------------------------------------------------------
 314
 315 eq :: ByteString -> ByteString -> Bool
 316 eq (LPS xs) (LPS ys) = eq' xs ys
 317   where eq' [] [] = True
 318         eq' [] _  = False
 319         eq' _  [] = False
 320         eq' (a:as) (b:bs) =
 321           case compare (P.length a) (P.length b) of
 322             LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
 323             EQ -> a == b                       && eq' as bs
 324             GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
 325
 326 compareBytes :: ByteString -> ByteString -> Ordering
 327 compareBytes (LPS xs) (LPS ys) = cmp xs ys
 328   where cmp [] [] = EQ
 329         cmp [] _  = LT
 330         cmp _  [] = GT
 331         cmp (a:as) (b:bs) =
 332           case compare (P.length a) (P.length b) of
 333             LT -> case compare a (P.take (P.length a) b) of
 334                     EQ     -> cmp as (P.drop (P.length a) b : bs)
 335                     result -> result
 336             EQ -> case compare a b of
 337                     EQ     -> cmp as bs
 338                     result -> result
 339             GT -> case compare (P.take (P.length b) a) b of
 340                     EQ     -> cmp (P.drop (P.length b) a : as) bs
 341                     result -> result
 342
 343 -- -----------------------------------------------------------------------------
 344 -- Introducing and eliminating 'ByteString's
 345
 346 -- | /O(1)/ The empty 'ByteString'
 347 empty :: ByteString
 348 empty = LPS []
 349 {-# NOINLINE empty #-}
 350
 351 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 352 singleton :: Word8 -> ByteString
 353 singleton c = LPS [P.singleton c]
 354 {-# NOINLINE singleton #-}
 355
 356 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 357 pack :: [Word8] -> ByteString
 358 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
 359
 360 -- ?
 361 chunk :: Int -> [a] -> [[a]]
 362 chunk _    [] = []
 363 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
 364
 365 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 366 unpack :: ByteString -> [Word8]
 367 unpack (LPS ss) = L.concatMap P.unpack ss
 368 {-# INLINE unpack #-}
 369
 370 ------------------------------------------------------------------------
 371
 372 {-
 373 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 374 -- conversion function
 375 packWith :: (a -> Word8) -> [a] -> ByteString
 376 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
 377 {-# INLINE packWith #-}
 378 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 379
 380 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 381 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 382 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
 383 {-# INLINE unpackWith #-}
 384 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 385 -}
 386
 387 -- ---------------------------------------------------------------------
 388 -- Basic interface
 389
 390 -- | /O(1)/ Test whether a ByteString is empty.
 391 null :: ByteString -> Bool
 392 null (LPS []) = True
 393 null (_)      = False
 394 {-# INLINE null #-}
 395
 396 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
 397 length :: ByteString -> Int64
 398 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
 399
 400 -- avoid the intermediate list?
 401 -- length (LPS ss) = L.foldl lengthF 0 ss
 402 --     where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
 403 {-# INLINE length #-}
 404
 405 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
 406 -- strict in the ByteString that we are consing onto. More precisely, it forces
 407 -- the head and the first chunk. It does this because, for space efficiency, it
 408 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
 409 -- new \'chunk\'.
 410 --
 411 -- So that means you can't use a lazy recursive contruction like this:
 412 --
 413 -- > let xs = cons c xs in xs
 414 --
 415 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
 416 --
 417 cons :: Word8 -> ByteString -> ByteString
 418 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
 419 cons c (LPS ss)                        = LPS (P.singleton c : ss)
 420 {-# INLINE cons #-}
 421
 422 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
 423 snoc :: ByteString -> Word8 -> ByteString
 424 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
 425 {-# INLINE snoc #-}
 426
 427 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 428 head :: ByteString -> Word8
 429 head (LPS [])    = errorEmptyList "head"
 430 head (LPS (x:_)) = P.unsafeHead x
 431 {-# INLINE head #-}
 432
 433 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 434 tail :: ByteString -> ByteString
 435 tail (LPS [])     = errorEmptyList "tail"
 436 tail (LPS (x:xs))
 437   | P.length x == 1 = LPS xs
 438   | otherwise       = LPS (P.unsafeTail x : xs)
 439 {-# INLINE tail #-}
 440
 441 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
 442 last :: ByteString -> Word8
 443 last (LPS []) = errorEmptyList "last"
 444 last (LPS xs) = P.last (L.last xs)
 445 {-# INLINE last #-}
 446
 447 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
 448 init :: ByteString -> ByteString
 449 init (LPS []) = errorEmptyList "init"
 450 init (LPS xs)
 451     | P.length y == 1 = LPS ys
 452     | otherwise       = LPS (ys ++ [P.init y])
 453     where (y,ys) = (L.last xs, L.init xs)
 454 {-# INLINE init #-}
 455
 456 -- | /O(n)/ Append two ByteStrings
 457 append :: ByteString -> ByteString -> ByteString
 458 append (LPS []) (LPS ys) = LPS ys
 459 append (LPS xs) (LPS []) = LPS xs
 460 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
 461 {-# INLINE append #-}
 462
 463 -- ---------------------------------------------------------------------
 464 -- Transformations
 465
 466 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 467 -- element of @xs@.
 468 map :: (Word8 -> Word8) -> ByteString -> ByteString
 469 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
 470 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
 471 {-# INLINE map #-}
 472
 473 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 474 reverse :: ByteString -> ByteString
 475 reverse (LPS ps) = LPS (rev [] ps)
 476   where rev a []     = a
 477         rev a (x:xs) = rev (P.reverse x:a) xs
 478 -- note, here is one example where the extra element lazyness is an advantage.
 479 -- we can reerse the list of chunks strictly but reverse each chunk lazily
 480 -- so while we may force the whole lot into memory we do not need to copy
 481 -- each chunk until it is used.
 482 {-# INLINE reverse #-}
 483
 484 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
 485 -- \`intersperses\' that byte between the elements of the 'ByteString'.
 486 -- It is analogous to the intersperse function on Lists.
 487 -- intersperse :: Word8 -> ByteString -> ByteString
 488 -- intersperse = error "FIXME: not yet implemented"
 489
 490 {-
 491 intersperse c (LPS [])     = LPS []
 492 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
 493   where intersperse' c ps@(PS x s l) =
 494           P.create (2*l) $ \p -> withForeignPtr x $ \f ->
 495                 poke p c
 496                 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
 497 -}
 498
 499 -- | The 'transpose' function transposes the rows and columns of its
 500 -- 'ByteString' argument.
 501 transpose :: [ByteString] -> [ByteString]
 502 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
 503
 504 -- ---------------------------------------------------------------------
 505 -- Reducing 'ByteString's
 506
 507 -- | 'foldl', applied to a binary operator, a starting value (typically
 508 -- the left-identity of the operator), and a ByteString, reduces the
 509 -- ByteString using the binary operator, from left to right.
 510 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 511 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
 512 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
 513 {-# INLINE foldl #-}
 514
 515 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 516 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 517 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
 518 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
 519 {-# INLINE foldl' #-}
 520
 521 -- | 'foldr', applied to a binary operator, a starting value
 522 -- (typically the right-identity of the operator), and a ByteString,
 523 -- reduces the ByteString using the binary operator, from right to left.
 524 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 525 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
 526 {-# INLINE foldr #-}
 527
 528 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 529 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 530 -- This function is subject to array fusion.
 531 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 532 foldl1 _ (LPS []) = errorEmptyList "foldl1"
 533 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 534
 535 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 536 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 537 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
 538 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
 539
 540 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 541 -- and thus must be applied to non-empty 'ByteString's
 542 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 543 foldr1 _ (LPS []) = errorEmptyList "foldr1"
 544 foldr1 f (LPS ps) = foldr1' ps
 545   where foldr1' (x:[]) = P.foldr1 f x
 546         foldr1' (x:xs) = P.foldr  f (foldr1' xs) x
 547
 548 -- ---------------------------------------------------------------------
 549 -- Special folds
 550
 551 -- | /O(n)/ Concatenate a list of ByteStrings.
 552 concat :: [ByteString] -> ByteString
 553 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
 554
 555 -- | Map a function over a 'ByteString' and concatenate the results
 556 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 557 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
 558     where
 559       k w = case f w of LPS xs -> P.concat xs
 560
 561 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 562 -- any element of the 'ByteString' satisfies the predicate.
 563 any :: (Word8 -> Bool) -> ByteString -> Bool
 564 any f (LPS xs) = L.or (L.map (P.any f) xs)
 565 -- todo fuse
 566
 567 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 568 -- if all elements of the 'ByteString' satisfy the predicate.
 569 all :: (Word8 -> Bool) -> ByteString -> Bool
 570 all f (LPS xs) = L.and (L.map (P.all f) xs)
 571 -- todo fuse
 572
 573 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 574 maximum :: ByteString -> Word8
 575 maximum (LPS [])     = errorEmptyList "maximum"
 576 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
 577 {-# INLINE maximum #-}
 578
 579 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 580 minimum :: ByteString -> Word8
 581 minimum (LPS [])     = errorEmptyList "minimum"
 582 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
 583 {-# INLINE minimum #-}
 584
 585 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 586 -- 'foldl'; it applies a function to each element of a ByteString,
 587 -- passing an accumulating parameter from left to right, and returning a
 588 -- final value of this accumulator together with the new ByteString.
 589 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 590 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
 591
 592 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 593 mapAccumR = error "mapAccumR unimplemented"
 594
 595 -- | /O(n)/ map Word8 functions, provided with the index at each position
 596 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 597 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
 598
 599 -- ---------------------------------------------------------------------
 600 -- Building ByteStrings
 601
 602 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 603 -- reduced values from the left. This function will fuse.
 604 --
 605 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 606 --
 607 -- Note that
 608 --
 609 -- > last (scanl f z xs) == foldl f z xs.
 610 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 611 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
 612 {-# INLINE scanl #-}
 613
 614 -- ---------------------------------------------------------------------
 615 -- Unfolds and replicates
 616
 617 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
 618 -- of @f@ to @x@:
 619 --
 620 -- > iterate f x == [x, f x, f (f x), ...]
 621 --
 622 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
 623 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
 624
 625 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
 626 -- element.
 627 --
 628 repeat :: Word8 -> ByteString
 629 repeat c = LPS (L.repeat block)
 630     where block =  P.replicate smallChunkSize c
 631
 632 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
 633 -- the value of every element.
 634 --
 635 replicate :: Int64 -> Word8 -> ByteString
 636 replicate w c
 637     | w <= 0             = empty
 638     | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
 639     | r == 0             = LPS (L.genericReplicate q s) -- preserve invariant
 640     | otherwise          = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
 641  where
 642     s      = P.replicate smallChunkSize c
 643     (q, r) = quotRem w (fromIntegral smallChunkSize)
 644
 645 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
 646 -- the infinite repetition of the original ByteString.
 647 --
 648 cycle :: ByteString -> ByteString
 649 cycle (LPS []) = errorEmptyList "cycle"
 650 cycle (LPS xs) = LPS (L.cycle xs)
 651
 652 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
 653 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
 654 -- the element and returns 'Nothing' if it is done producing the
 655 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 656 -- prepending to the ByteString and @b@ is used as the next element in a
 657 -- recursive call.
 658 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 659 unfoldr f = LPS . unfoldChunk 32
 660   where unfoldChunk n x =
 661           case P.unfoldrN n f x of
 662             (s, Nothing)
 663               | P.null s  -> []
 664               | otherwise -> s : []
 665             (s, Just x')  -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
 666
 667 -- ---------------------------------------------------------------------
 668 -- Substrings
 669
 670 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 671 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 672 take :: Int64 -> ByteString -> ByteString
 673 take n _ | n < 0 = empty
 674 take i (LPS ps)  = LPS (take' i ps)
 675   where take' _ []     = []
 676         take' 0 _      = []
 677         take' n (x:xs) =
 678           if n < fromIntegral (P.length x)
 679             then P.take (fromIntegral n) x : []
 680             else x : take' (n - fromIntegral (P.length x)) xs
 681
 682 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 683 -- elements, or @[]@ if @n > 'length' xs@.
 684 drop  :: Int64 -> ByteString -> ByteString
 685 drop i p | i <= 0 = p
 686 drop i (LPS ps) = LPS (drop' i ps)
 687   where drop' _ []     = []
 688         drop' 0 xs     = xs
 689         drop' n (x:xs) =
 690           if n < fromIntegral (P.length x)
 691             then P.drop (fromIntegral n) x : xs
 692             else drop' (n - fromIntegral (P.length x)) xs
 693
 694 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 695 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
 696 splitAt i p        | i <= 0 = (empty, p)
 697 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
 698   where splitAt' _ []     = ([], [])
 699         splitAt' 0 xs     = ([], xs)
 700         splitAt' n (x:xs) =
 701           if n < fromIntegral (P.length x)
 702             then (P.take (fromIntegral n) x : [],
 703                   P.drop (fromIntegral n) x : xs)
 704             else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
 705                    in (x:xs', xs'')
 706
 707
 708 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 709 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 710 -- satisfy @p@.
 711 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 712 takeWhile f (LPS ps) = LPS (takeWhile' ps)
 713   where takeWhile' []     = []
 714         takeWhile' (x:xs) =
 715           case findIndexOrEnd (not . f) x of
 716             0                  -> []
 717             n | n < P.length x -> P.take n x : []
 718               | otherwise      -> x : takeWhile' xs
 719
 720 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 721 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 722 dropWhile f (LPS ps) = LPS (dropWhile' ps)
 723   where dropWhile' []     = []
 724         dropWhile' (x:xs) =
 725           case findIndexOrEnd (not . f) x of
 726             n | n < P.length x -> P.drop n x : xs
 727               | otherwise      -> dropWhile' xs
 728
 729 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 730 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 731 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
 732   where break' []     = ([], [])
 733         break' (x:xs) =
 734           case findIndexOrEnd f x of
 735             0                  -> ([], x : xs)
 736             n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
 737               | otherwise      -> let (xs', xs'') = break' xs
 738                                    in (x : xs', xs'')
 739
 740 --
 741 -- TODO
 742 --
 743 -- Add rules
 744 --
 745
 746 {-
 747 -- | 'breakByte' breaks its ByteString argument at the first occurence
 748 -- of the specified byte. It is more efficient than 'break' as it is
 749 -- implemented with @memchr(3)@. I.e.
 750 --
 751 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 752 --
 753 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 754 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
 755   where breakByte' []     = ([], [])
 756         breakByte' (x:xs) =
 757           case P.elemIndex c x of
 758             Just 0  -> ([], x : xs)
 759             Just n  -> (P.take n x : [], P.drop n x : xs)
 760             Nothing -> let (xs', xs'') = breakByte' xs
 761                         in (x : xs', xs'')
 762
 763 -- | 'spanByte' breaks its ByteString argument at the first
 764 -- occurence of a byte other than its argument. It is more efficient
 765 -- than 'span (==)'
 766 --
 767 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 768 --
 769 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 770 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
 771   where spanByte' []     = ([], [])
 772         spanByte' (x:xs) =
 773           case P.spanByte c x of
 774             (x', x'') | P.null x'  -> ([], x : xs)
 775                       | P.null x'' -> let (xs', xs'') = spanByte' xs
 776                                        in (x : xs', xs'')
 777                       | otherwise  -> (x' : [], x'' : xs)
 778 -}
 779
 780 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 781 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 782 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 783 span p = break (not . p)
 784
 785 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 786 -- separators, where the predicate returns True for a separator element.
 787 -- The resulting components do not contain the separators.  Two adjacent
 788 -- separators result in an empty component in the output.  eg.
 789 --
 790 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 791 -- > splitWith (=='a') []        == []
 792 --
 793 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 794 splitWith _ (LPS [])     = []
 795 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
 796
 797   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 798         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 799         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
 800         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 801
 802         cons' x xs | P.null x  = xs
 803                    | otherwise = x:xs
 804         {-# INLINE cons' #-}
 805 {-# INLINE splitWith #-}
 806
 807 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 808 -- argument, consuming the delimiter. I.e.
 809 --
 810 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 811 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
 812 -- > split 'x'  "x"          == ["",""]
 813 --
 814 -- and
 815 --
 816 -- > join [c] . split c == id
 817 -- > split == splitWith . (==)
 818 --
 819 -- As for all splitting functions in this library, this function does
 820 -- not copy the substrings, it just constructs new 'ByteStrings' that
 821 -- are slices of the original.
 822 --
 823 split :: Word8 -> ByteString -> [ByteString]
 824 split _ (LPS [])     = []
 825 split c (LPS (a:as)) = comb [] (P.split c a) as
 826
 827   where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 828         comb acc (s:[]) []     = LPS (L.reverse (cons' s acc)) : []
 829         comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
 830         comb acc (s:ss) xs     = LPS (L.reverse (cons' s acc)) : comb [] ss xs
 831
 832         cons' x xs | P.null x  = xs
 833                    | otherwise = x:xs
 834         {-# INLINE cons' #-}
 835 {-# INLINE split #-}
 836
 837 -- | Like 'splitWith', except that sequences of adjacent separators are
 838 -- treated as a single separator. eg.
 839 --
 840 -- > tokens (=='a') "aabbaca" == ["bb","c"]
 841 --
 842 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
 843 tokens f = L.filter (not.null) . splitWith f
 844
 845 -- | The 'group' function takes a ByteString and returns a list of
 846 -- ByteStrings such that the concatenation of the result is equal to the
 847 -- argument.  Moreover, each sublist in the result contains only equal
 848 -- elements.  For example,
 849 --
 850 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 851 --
 852 -- It is a special case of 'groupBy', which allows the programmer to
 853 -- supply their own equality test.
 854 group :: ByteString -> [ByteString]
 855 group (LPS [])     = []
 856 group (LPS (a:as)) = group' [] (P.group a) as
 857   where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 858         group' acc@(s':_) ss@(s:_) xs
 859           | P.unsafeHead s'
 860          /= P.unsafeHead s       = LPS (L.reverse acc) : group' [] ss xs
 861         group' acc (s:[]) []     = LPS (L.reverse (s : acc)) : []
 862         group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
 863         group' acc (s:ss) xs     = LPS (L.reverse (s : acc)) : group' [] ss xs
 864
 865 {-
 866 TODO: check if something like this might be faster
 867
 868 group :: ByteString -> [ByteString]
 869 group xs
 870     | null xs   = []
 871     | otherwise = ys : group zs
 872     where
 873         (ys, zs) = spanByte (unsafeHead xs) xs
 874 -}
 875
 876 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 877 --
 878 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 879 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
 880 {-
 881 groupBy _ (LPS [])     = []
 882 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
 883   where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
 884         groupBy' acc@(_:_) c ss@(s:_) xs
 885           | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
 886         groupBy' acc _ (s:[]) []       = LPS (L.reverse (s : acc)) : []
 887         groupBy' []  _ (s:[]) (x:xs)   = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
 888         groupBy' acc c (s:[]) (x:xs)   = groupBy' (s:acc) c (P.groupBy k x) xs
 889         groupBy' acc _ (s:ss) xs       = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
 890 -}
 891
 892 {-
 893 TODO: check if something like this might be faster
 894
 895 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 896 groupBy k xs
 897     | null xs   = []
 898     | otherwise = take n xs : groupBy k (drop n xs)
 899     where
 900         n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
 901 -}
 902
 903 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
 904 -- 'ByteString's and concatenates the list after interspersing the first
 905 -- argument between each element of the list.
 906 join :: ByteString -> [ByteString] -> ByteString
 907 join s = concat . (L.intersperse s)
 908
 909 -- ---------------------------------------------------------------------
 910 -- Indexing ByteStrings
 911
 912 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
 913 index :: ByteString -> Int64 -> Word8
 914 index _        i | i < 0 = moduleError "index" ("negative index: " ++ show i)
 915 index (LPS ps) i         = index' ps i
 916   where index' []     n = moduleError "index" ("index too large: " ++ show n)
 917         index' (x:xs) n
 918           | n >= fromIntegral (P.length x) =
 919               index' xs (n - fromIntegral (P.length x))
 920           | otherwise       = P.unsafeIndex x (fromIntegral n)
 921
 922 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 923 -- element in the given 'ByteString' which is equal to the query
 924 -- element, or 'Nothing' if there is no such element.
 925 -- This implementation uses memchr(3).
 926 elemIndex :: Word8 -> ByteString -> Maybe Int64
 927 elemIndex c (LPS ps) = elemIndex' 0 ps
 928   where elemIndex' _ []     = Nothing
 929         elemIndex' n (x:xs) =
 930           case P.elemIndex c x of
 931             Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
 932             Just i  -> Just (n + fromIntegral i)
 933
 934 {-
 935 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 936 -- element in the given 'ByteString' which is equal to the query
 937 -- element, or 'Nothing' if there is no such element. The following
 938 -- holds:
 939 --
 940 -- > elemIndexEnd c xs ==
 941 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 942 --
 943 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 944 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 945     go (p `plusPtr` s) (l-1)
 946   where
 947     STRICT2(go)
 948     go p i | i < 0     = return Nothing
 949            | otherwise = do ch' <- peekByteOff p i
 950                             if ch == ch'
 951                                 then return $ Just i
 952                                 else go p (i-1)
 953 -}
 954 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 955 -- the indices of all elements equal to the query element, in ascending order.
 956 -- This implementation uses memchr(3).
 957 elemIndices :: Word8 -> ByteString -> [Int64]
 958 elemIndices c (LPS ps) = elemIndices' 0 ps
 959   where elemIndices' _ []     = []
 960         elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
 961                              ++ elemIndices' (n + fromIntegral (P.length x)) xs
 962
 963 -- | count returns the number of times its argument appears in the ByteString
 964 --
 965 -- > count = length . elemIndices
 966 --
 967 -- But more efficiently than using length on the intermediate list.
 968 count :: Word8 -> ByteString -> Int64
 969 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
 970
 971 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 972 -- returns the index of the first element in the ByteString
 973 -- satisfying the predicate.
 974 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
 975 findIndex k (LPS ps) = findIndex' 0 ps
 976   where findIndex' _ []     = Nothing
 977         findIndex' n (x:xs) =
 978           case P.findIndex k x of
 979             Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
 980             Just i  -> Just (n + fromIntegral i)
 981 {-# INLINE findIndex #-}
 982
 983 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 984 -- and returns the first element in matching the predicate, or 'Nothing'
 985 -- if there is no such element.
 986 --
 987 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 988 --
 989 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 990 find f (LPS ps) = find' ps
 991   where find' []     = Nothing
 992         find' (x:xs) = case P.find f x of
 993             Nothing -> find' xs
 994             Just w  -> Just w
 995 {-# INLINE find #-}
 996
 997 -- | The 'findIndices' function extends 'findIndex', by returning the
 998 -- indices of all elements satisfying the predicate, in ascending order.
 999 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
1000 findIndices k (LPS ps) = findIndices' 0 ps
1001   where findIndices' _ []     = []
1002         findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
1003                              ++ findIndices' (n + fromIntegral (P.length x)) xs
1004
1005 -- ---------------------------------------------------------------------
1006 -- Searching ByteStrings
1007
1008 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1009 elem :: Word8 -> ByteString -> Bool
1010 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1011
1012 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1013 notElem :: Word8 -> ByteString -> Bool
1014 notElem c ps = not (elem c ps)
1015
1016 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1017 -- returns a ByteString containing those characters that satisfy the
1018 -- predicate.
1019 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1020 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1021 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1022 {-# INLINE filter #-}
1023
1024 {-
1025 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1026 -- (==)/, for the common case of filtering a single byte. It is more
1027 -- efficient to use /filterByte/ in this case.
1028 --
1029 -- > filterByte == filter . (==)
1030 --
1031 -- filterByte is around 10x faster, and uses much less space, than its
1032 -- filter equivalent
1033 filterByte :: Word8 -> ByteString -> ByteString
1034 filterByte w ps = replicate (count w ps) w
1035 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1036
1037 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1038 -- case of filtering a single byte out of a list. It is more efficient
1039 -- to use /filterNotByte/ in this case.
1040 --
1041 -- > filterNotByte == filter . (/=)
1042 --
1043 -- filterNotByte is around 2x faster than its filter equivalent.
1044 filterNotByte :: Word8 -> ByteString -> ByteString
1045 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1046 -}
1047
1048 -- ---------------------------------------------------------------------
1049 -- Searching for substrings
1050
1051 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1052 -- iff the first is a prefix of the second.
1053 isPrefixOf :: ByteString -> ByteString -> Bool
1054 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1055   where isPrefixL [] _  = True
1056         isPrefixL _ []  = False
1057         isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y  && isPrefixL xs ys
1058                                 | P.length x <  P.length y = x == yh && isPrefixL xs (yt:ys)
1059                                 | otherwise                = xh == y && isPrefixL (xt:xs) ys
1060           where (xh,xt) = P.splitAt (P.length y) x
1061                 (yh,yt) = P.splitAt (P.length x) y
1062
1063 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1064 -- iff the first is a suffix of the second.
1065 --
1066 -- The following holds:
1067 --
1068 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1069 --
1070 -- However, the real implemenation uses memcmp to compare the end of the
1071 -- string only, with no reverse required..
1072 --
1073 --isSuffixOf :: ByteString -> ByteString -> Bool
1074 --isSuffixOf = error "not yet implemented"
1075
1076 -- ---------------------------------------------------------------------
1077 -- Zipping
1078
1079 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1080 -- corresponding pairs of bytes. If one input ByteString is short,
1081 -- excess elements of the longer ByteString are discarded. This is
1082 -- equivalent to a pair of 'unpack' operations.
1083 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1084 zip = zipWith (,)
1085
1086 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1087 -- the first argument, instead of a tupling function.  For example,
1088 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1089 -- corresponding sums.
1090 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1091 zipWith _ (LPS [])     (LPS _)  = []
1092 zipWith _ (LPS _)      (LPS []) = []
1093 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1094   where zipWith' x xs y ys =
1095           (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1096
1097         zipWith'' x []      _ _       | P.null x       = []
1098         zipWith'' _ _       y []      | P.null y       = []
1099         zipWith'' x xs      y ys      | not (P.null x)
1100                                      && not (P.null y) = zipWith' x  xs y  ys
1101         zipWith'' x xs      _ (y':ys) | not (P.null x) = zipWith' x  xs y' ys
1102         zipWith'' _ (x':xs) y ys      | not (P.null y) = zipWith' x' xs y  ys
1103         zipWith'' _ (x':xs) _ (y':ys)                  = zipWith' x' xs y' ys
1104
1105 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1106 -- ByteStrings. Note that this performs two 'pack' operations.
1107 {-
1108 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1109 unzip _ls = error "not yet implemented"
1110 {-# INLINE unzip #-}
1111 -}
1112
1113 -- ---------------------------------------------------------------------
1114 -- Special lists
1115
1116 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1117 inits :: ByteString -> [ByteString]
1118 inits = (LPS [] :) . inits' . unLPS
1119   where inits' []     = []
1120         inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1121                      ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1122
1123 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1124 tails :: ByteString -> [ByteString]
1125 tails = tails' . unLPS
1126   where tails' []           = LPS [] : []
1127         tails' xs@(x:xs')
1128           | P.length x == 1 = LPS xs : tails' xs'
1129           | otherwise       = LPS xs : tails' (P.unsafeTail x : xs')
1130
1131 -- ---------------------------------------------------------------------
1132 -- Low level constructors
1133
1134 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1135 --   This is mainly useful to allow the rest of the data pointed
1136 --   to by the 'ByteString' to be garbage collected, for example
1137 --   if a large string has been read in, and only a small part of it
1138 --   is needed in the rest of the program.
1139 copy :: ByteString -> ByteString
1140 copy (LPS lps) = LPS (L.map P.copy lps)
1141 --TODO, we could coalese small blocks here
1142 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1143 -- the parent blocks then we'd better copy strictly.
1144
1145 -- ---------------------------------------------------------------------
1146
1147 -- TODO defrag func that concatenates block together that are below a threshold
1148 -- defrag :: Int -> ByteString -> ByteString
1149
1150 -- ---------------------------------------------------------------------
1151 -- Lazy ByteString IO
1152
1153 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1154 -- are read on demand, in at most @k@-sized chunks. It does not block
1155 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1156 -- available then they will be returned immediately as a smaller chunk.
1157 hGetContentsN :: Int -> Handle -> IO ByteString
1158 hGetContentsN k h = lazyRead >>= return . LPS
1159   where
1160     lazyRead = unsafeInterleaveIO loop
1161
1162     loop = do
1163         ps <- P.hGetNonBlocking h k
1164         --TODO: I think this should distinguish EOF from no data available
1165         -- the otherlying POSIX call makes this distincion, returning either
1166         -- 0 or EAGAIN
1167         if P.null ps
1168           then do eof <- hIsEOF h
1169                   if eof then return []
1170                          else hWaitForInput h (-1)
1171                            >> loop
1172           else do pss <- lazyRead
1173                   return (ps : pss)
1174
1175 -- | Read @n@ bytes into a 'ByteString', directly from the
1176 -- specified 'Handle', in chunks of size @k@.
1177 hGetN :: Int -> Handle -> Int -> IO ByteString
1178 hGetN _ _ 0 = return empty
1179 hGetN k h n = readChunks n >>= return . LPS
1180   where
1181     STRICT1(readChunks)
1182     readChunks i = do
1183         ps <- P.hGet h (min k i)
1184         case P.length ps of
1185             0 -> return []
1186             m -> do pss <- readChunks (i - m)
1187                     return (ps : pss)
1188
1189 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1190 -- waiting for data to become available, instead it returns only whatever data
1191 -- is available. Chunks are read on demand, in @k@-sized chunks.
1192 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1193 #if defined(__GLASGOW_HASKELL__)
1194 hGetNonBlockingN _ _ 0 = return empty
1195 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1196   where
1197     STRICT1(readChunks)
1198     readChunks i = do
1199         ps <- P.hGetNonBlocking h (min k i)
1200         case P.length ps of
1201             0 -> return []
1202             m -> do pss <- readChunks (i - m)
1203                     return (ps : pss)
1204 #else
1205 hGetNonBlockingN = hGetN
1206 #endif
1207
1208 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1209 -- are read on demand, using the default chunk size.
1210 hGetContents :: Handle -> IO ByteString
1211 hGetContents = hGetContentsN defaultChunkSize
1212
1213 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1214 hGet :: Handle -> Int -> IO ByteString
1215 hGet = hGetN defaultChunkSize
1216
1217 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1218 -- waiting for data to become available, instead it returns only whatever data
1219 -- is available.
1220 #if defined(__GLASGOW_HASKELL__)
1221 hGetNonBlocking :: Handle -> Int -> IO ByteString
1222 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1223 #else
1224 hGetNonBlocking = hGet
1225 #endif
1226
1227 -- | Read an entire file /lazily/ into a 'ByteString'.
1228 readFile :: FilePath -> IO ByteString
1229 readFile f = openBinaryFile f ReadMode >>= hGetContents
1230
1231 -- | Write a 'ByteString' to a file.
1232 writeFile :: FilePath -> ByteString -> IO ()
1233 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1234     (\hdl -> hPut hdl txt)
1235
1236 -- | Append a 'ByteString' to a file.
1237 appendFile :: FilePath -> ByteString -> IO ()
1238 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1239     (\hdl -> hPut hdl txt)
1240
1241 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1242 getContents :: IO ByteString
1243 getContents = hGetContents stdin
1244
1245 -- | Outputs a 'ByteString' to the specified 'Handle'.
1246 hPut :: Handle -> ByteString -> IO ()
1247 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1248
1249 -- | Write a ByteString to stdout
1250 putStr :: ByteString -> IO ()
1251 putStr = hPut stdout
1252
1253 -- | Write a ByteString to stdout, appending a newline byte
1254 putStrLn :: ByteString -> IO ()
1255 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1256
1257 -- | The interact function takes a function of type @ByteString -> ByteString@
1258 -- as its argument. The entire input from the standard input device is passed
1259 -- to this function as its argument, and the resulting string is output on the
1260 -- standard output device. It's great for writing one line programs!
1261 interact :: (ByteString -> ByteString) -> IO ()
1262 interact transformer = putStr . transformer =<< getContents
1263
1264 -- ---------------------------------------------------------------------
1265 -- Internal utilities
1266
1267 -- Common up near identical calls to `error' to reduce the number
1268 -- constant strings created when compiled:
1269 errorEmptyList :: String -> a
1270 errorEmptyList fun = moduleError fun "empty ByteString"
1271
1272 moduleError :: String -> String -> a
1273 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1274
1275 -- A manually fused version of "filter (not.null) . map f", since they
1276 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1277 --
1278 -- TODO fuse.
1279 --
1280 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1281 filterMap _ []     = []
1282 filterMap f (x:xs) = case f x of
1283                     y | P.null y  ->     filterMap f xs      -- manually fuse the invariant filter
1284                       | otherwise -> y : filterMap f xs
1285 {-# INLINE filterMap #-}
1286
1287
1288 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1289 -- of the string if no element is found, rather than Nothing.
1290 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1291 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1292   where
1293     STRICT2(go)
1294     go ptr n | n >= l    = return l
1295              | otherwise = do w <- peek ptr
1296                               if k w
1297                                 then return n
1298                                 else go (ptr `plusPtr` 1) (n+1)
1299 {-# INLINE findIndexOrEnd #-}