1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
3 -- Module : ByteString.Lazy
4 -- Copyright : (c) Don Stewart 2006
5 -- (c) Duncan Coutts 2006
8 -- Maintainer : dons@cse.unsw.edu.au
9 -- Stability : experimental
10 -- Portability : portable, requires ffi and cpp
11 -- Tested with : GHC 6.4.1 and Hugs March 2005
15 -- | A time and space-efficient implementation of lazy byte vectors
16 -- using lists of packed 'Word8' arrays, suitable for high performance
17 -- use, both in terms of large data quantities, or high speed
18 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
19 -- arrays of bytes. They provide a means to manipulate large byte vectors
20 -- without requiring the entire vector be resident in memory.
22 -- Some operations, such as concat, append, reverse and cons, have
23 -- better complexity than their "Data.ByteString" equivalents, due to
24 -- optimisations resulting from the list spine structure. And for other
25 -- operations Lazy ByteStrings are usually within a few percent of
26 -- strict ones, but with better heap usage. For data larger than the
27 -- available memory, or if you have tight memory constraints, this
28 -- module will be the only option. The default chunk size is 64k, which
29 -- should be good in most circumstances. For people with large L2
30 -- caches, you may want to increase this to fit your cache.
32 -- This module is intended to be imported @qualified@, to avoid name
33 -- clashes with "Prelude" functions. eg.
35 -- > import qualified Data.ByteString.Lazy as B
37 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
38 -- UArray by Simon Marlow. Rewritten to support slices and use
39 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
40 -- Lazy variant by Duncan Coutts and Don Stewart.
43 module Data.ByteString.Lazy (
45 -- * The @ByteString@ type
46 ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable
48 -- * Introducing and eliminating 'ByteString's
49 empty, -- :: ByteString
50 singleton, -- :: Word8 -> ByteString
51 pack, -- :: [Word8] -> ByteString
52 unpack, -- :: ByteString -> [Word8]
53 fromChunks, -- :: [Strict.ByteString] -> ByteString
54 toChunks, -- :: ByteString -> [Strict.ByteString]
57 cons, -- :: Word8 -> ByteString -> ByteString
58 snoc, -- :: ByteString -> Word8 -> ByteString
59 append, -- :: ByteString -> ByteString -> ByteString
60 head, -- :: ByteString -> Word8
61 last, -- :: ByteString -> Word8
62 tail, -- :: ByteString -> ByteString
63 init, -- :: ByteString -> ByteString
64 null, -- :: ByteString -> Bool
65 length, -- :: ByteString -> Int64
67 -- * Transformating ByteStrings
68 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
69 reverse, -- :: ByteString -> ByteString
70 -- intersperse, -- :: Word8 -> ByteString -> ByteString
71 transpose, -- :: [ByteString] -> [ByteString]
73 -- * Reducing 'ByteString's (folds)
74 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
75 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
77 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
79 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
82 concat, -- :: [ByteString] -> ByteString
83 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
84 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
86 maximum, -- :: ByteString -> Word8
87 minimum, -- :: ByteString -> Word8
89 -- * Building ByteStrings
91 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
92 -- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
93 -- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
94 -- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
96 -- ** Accumulating maps
97 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
98 mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
100 -- ** Infinite ByteStrings
101 repeat, -- :: Word8 -> ByteString
102 replicate, -- :: Int64 -> Word8 -> ByteString
103 cycle, -- :: ByteString -> ByteString
104 iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString
107 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
111 -- ** Breaking strings
112 take, -- :: Int64 -> ByteString -> ByteString
113 drop, -- :: Int64 -> ByteString -> ByteString
114 splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString)
115 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
116 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
117 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
118 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
119 group, -- :: ByteString -> [ByteString]
120 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
121 inits, -- :: ByteString -> [ByteString]
122 tails, -- :: ByteString -> [ByteString]
124 -- ** Breaking into many substrings
125 split, -- :: Word8 -> ByteString -> [ByteString]
126 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
128 -- ** Joining strings
129 join, -- :: ByteString -> [ByteString] -> ByteString
132 isPrefixOf, -- :: ByteString -> ByteString -> Bool
133 -- isSuffixOf, -- :: ByteString -> ByteString -> Bool
135 -- * Searching ByteStrings
137 -- ** Searching by equality
138 elem, -- :: Word8 -> ByteString -> Bool
139 notElem, -- :: Word8 -> ByteString -> Bool
141 -- ** Searching with a predicate
142 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
143 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
144 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
146 -- * Indexing ByteStrings
147 index, -- :: ByteString -> Int64 -> Word8
148 elemIndex, -- :: Word8 -> ByteString -> Maybe Int64
149 elemIndices, -- :: Word8 -> ByteString -> [Int64]
150 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
151 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64]
152 count, -- :: Word8 -> ByteString -> Int64
154 -- * Zipping and unzipping ByteStrings
155 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
156 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
157 -- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
159 -- * Ordered ByteStrings
160 -- sort, -- :: ByteString -> ByteString
162 copy, -- :: ByteString -> ByteString
164 -- * I\/O with 'ByteString's
166 -- ** Standard input and output
167 getContents, -- :: IO ByteString
168 putStr, -- :: ByteString -> IO ()
169 putStrLn, -- :: ByteString -> IO ()
170 interact, -- :: (ByteString -> ByteString) -> IO ()
173 readFile, -- :: FilePath -> IO ByteString
174 writeFile, -- :: FilePath -> ByteString -> IO ()
175 appendFile, -- :: FilePath -> ByteString -> IO ()
177 -- ** I\/O with Handles
178 hGetContents, -- :: Handle -> IO ByteString
179 hGet, -- :: Handle -> Int -> IO ByteString
180 hPut, -- :: Handle -> ByteString -> IO ()
181 hGetNonBlocking, -- :: Handle -> IO ByteString
183 -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
184 -- hGetContentsN, -- :: Int -> Handle -> IO ByteString
185 -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
189 import qualified Prelude
190 import Prelude hiding
191 (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
192 ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
193 ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
194 ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
195 ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
197 import qualified Data.List as L -- L for list/lazy
198 import qualified Data.ByteString as P -- P for packed
199 import qualified Data.ByteString.Base as P
200 import Data.ByteString.Base (LazyByteString(..))
201 import qualified Data.ByteString.Fusion as P
202 import Data.ByteString.Fusion (PairS(..),loopL)
204 import Data.Monoid (Monoid(..))
206 import Data.Word (Word8)
207 import Data.Int (Int64)
208 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..)
209 ,hClose,hWaitForInput,hIsEOF)
210 import System.IO.Unsafe
211 import Control.Exception (bracket)
213 import Foreign.ForeignPtr (withForeignPtr)
215 import Foreign.Storable
217 -- -----------------------------------------------------------------------------
219 -- Useful macros, until we have bang patterns
222 #define STRICT1(f) f a | a `seq` False = undefined
223 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
224 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
225 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
226 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
228 -- -----------------------------------------------------------------------------
230 type ByteString = LazyByteString
233 -- hmm, what about getting the PS constructor unpacked into the cons cell?
235 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
237 -- Would avoid one indirection per chunk.
240 unLPS :: ByteString -> [P.ByteString]
244 instance Eq ByteString
247 instance Ord ByteString
248 where compare = compareBytes
250 instance Monoid ByteString where
255 ------------------------------------------------------------------------
258 -- The data type invariant:
259 -- Every ByteString is either empty or consists of non-null ByteStrings.
260 -- All functions must preserve this, and the QC properties must check this.
262 _invariant :: ByteString -> Bool
263 _invariant (LPS []) = True
264 _invariant (LPS xs) = L.all (not . P.null) xs
266 -- In a form useful for QC testing
267 _checkInvariant :: ByteString -> ByteString
269 | _invariant lps = lps
270 | otherwise = moduleError "invariant" ("violation: " ++ show lps)
272 -- The Data abstraction function
274 _abstr :: ByteString -> P.ByteString
275 _abstr (LPS []) = P.empty
276 _abstr (LPS xs) = P.concat xs
278 -- The representation uses lists of packed chunks. When we have to convert from
279 -- a lazy list to the chunked representation, then by default we'll use this
280 -- chunk size. Some functions give you more control over the chunk size.
282 -- Measurements here:
283 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
285 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
286 -- The following value assumes people have something greater than 128k,
287 -- and need to share the cache with other programs.
289 defaultChunkSize :: Int
290 defaultChunkSize = 32 * k - overhead
292 overhead = 2 * sizeOf (undefined :: Int)
294 smallChunkSize :: Int
295 smallChunkSize = 4 * k - overhead
297 overhead = 2 * sizeOf (undefined :: Int)
299 -- defaultChunkSize = 1
301 ------------------------------------------------------------------------
303 eq :: ByteString -> ByteString -> Bool
304 eq (LPS xs) (LPS ys) = eq' xs ys
305 where eq' [] [] = True
309 case compare (P.length a) (P.length b) of
310 LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
311 EQ -> a == b && eq' as bs
312 GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
314 compareBytes :: ByteString -> ByteString -> Ordering
315 compareBytes (LPS xs) (LPS ys) = cmp xs ys
320 case compare (P.length a) (P.length b) of
321 LT -> case compare a (P.take (P.length a) b) of
322 EQ -> cmp as (P.drop (P.length a) b : bs)
324 EQ -> case compare a b of
327 GT -> case compare (P.take (P.length b) a) b of
328 EQ -> cmp (P.drop (P.length b) a : as) bs
331 -- -----------------------------------------------------------------------------
332 -- Introducing and eliminating 'ByteString's
334 -- | /O(1)/ The empty 'ByteString'
337 {-# NOINLINE empty #-}
339 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
340 singleton :: Word8 -> ByteString
341 singleton c = LPS [P.singleton c]
342 {-# NOINLINE singleton #-}
344 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
345 pack :: [Word8] -> ByteString
346 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
349 chunk :: Int -> [a] -> [[a]]
351 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
353 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
354 unpack :: ByteString -> [Word8]
355 unpack (LPS ss) = L.concatMap P.unpack ss
356 {-# INLINE unpack #-}
358 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
359 fromChunks :: [P.ByteString] -> ByteString
360 fromChunks ls = LPS $ L.filter (not . P.null) ls
362 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
363 toChunks :: ByteString -> [P.ByteString]
366 ------------------------------------------------------------------------
369 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
370 -- conversion function
371 packWith :: (a -> Word8) -> [a] -> ByteString
372 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
373 {-# INLINE packWith #-}
374 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
376 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
377 unpackWith :: (Word8 -> a) -> ByteString -> [a]
378 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
379 {-# INLINE unpackWith #-}
380 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
383 -- ---------------------------------------------------------------------
386 -- | /O(1)/ Test whether a ByteString is empty.
387 null :: ByteString -> Bool
392 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
393 length :: ByteString -> Int64
394 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
396 -- avoid the intermediate list?
397 -- length (LPS ss) = L.foldl lengthF 0 ss
398 -- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
399 {-# INLINE length #-}
401 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
402 -- strict in the ByteString that we are consing onto. More precisely, it forces
403 -- the head and the first chunk. It does this because, for space efficiency, it
404 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
407 -- So that means you can't use a lazy recursive contruction like this:
409 -- > let xs = cons c xs in xs
411 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
413 cons :: Word8 -> ByteString -> ByteString
414 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
415 cons c (LPS ss) = LPS (P.singleton c : ss)
418 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
419 snoc :: ByteString -> Word8 -> ByteString
420 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
423 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
424 head :: ByteString -> Word8
425 head (LPS []) = errorEmptyList "head"
426 head (LPS (x:_)) = P.unsafeHead x
429 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
430 tail :: ByteString -> ByteString
431 tail (LPS []) = errorEmptyList "tail"
433 | P.length x == 1 = LPS xs
434 | otherwise = LPS (P.unsafeTail x : xs)
437 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
438 last :: ByteString -> Word8
439 last (LPS []) = errorEmptyList "last"
440 last (LPS xs) = P.last (L.last xs)
443 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
444 init :: ByteString -> ByteString
445 init (LPS []) = errorEmptyList "init"
447 | P.length y == 1 = LPS ys
448 | otherwise = LPS (ys ++ [P.init y])
449 where (y,ys) = (L.last xs, L.init xs)
452 -- | /O(n)/ Append two ByteStrings
453 append :: ByteString -> ByteString -> ByteString
454 append (LPS []) (LPS ys) = LPS ys
455 append (LPS xs) (LPS []) = LPS xs
456 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
457 {-# INLINE append #-}
459 -- ---------------------------------------------------------------------
462 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
464 map :: (Word8 -> Word8) -> ByteString -> ByteString
465 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
466 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
469 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
470 reverse :: ByteString -> ByteString
471 reverse (LPS ps) = LPS (rev [] ps)
473 rev a (x:xs) = rev (P.reverse x:a) xs
474 -- note, here is one example where the extra element lazyness is an advantage.
475 -- we can reerse the list of chunks strictly but reverse each chunk lazily
476 -- so while we may force the whole lot into memory we do not need to copy
477 -- each chunk until it is used.
478 {-# INLINE reverse #-}
480 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
481 -- \`intersperses\' that byte between the elements of the 'ByteString'.
482 -- It is analogous to the intersperse function on Lists.
483 -- intersperse :: Word8 -> ByteString -> ByteString
484 -- intersperse = error "FIXME: not yet implemented"
487 intersperse c (LPS []) = LPS []
488 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
489 where intersperse' c ps@(PS x s l) =
490 P.create (2*l) $ \p -> withForeignPtr x $ \f ->
492 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
495 -- | The 'transpose' function transposes the rows and columns of its
496 -- 'ByteString' argument.
497 transpose :: [ByteString] -> [ByteString]
498 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
500 -- ---------------------------------------------------------------------
501 -- Reducing 'ByteString's
503 -- | 'foldl', applied to a binary operator, a starting value (typically
504 -- the left-identity of the operator), and a ByteString, reduces the
505 -- ByteString using the binary operator, from left to right.
506 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
507 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
508 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
511 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
512 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
513 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
514 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
515 {-# INLINE foldl' #-}
517 -- | 'foldr', applied to a binary operator, a starting value
518 -- (typically the right-identity of the operator), and a ByteString,
519 -- reduces the ByteString using the binary operator, from right to left.
520 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
521 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
524 -- | 'foldl1' is a variant of 'foldl' that has no starting value
525 -- argument, and thus must be applied to non-empty 'ByteStrings'.
526 -- This function is subject to array fusion.
527 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
528 foldl1 _ (LPS []) = errorEmptyList "foldl1"
529 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
531 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
532 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
533 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
534 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
536 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
537 -- and thus must be applied to non-empty 'ByteString's
538 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
539 foldr1 _ (LPS []) = errorEmptyList "foldr1"
540 foldr1 f (LPS ps) = foldr1' ps
541 where foldr1' (x:[]) = P.foldr1 f x
542 foldr1' (x:xs) = P.foldr f (foldr1' xs) x
544 -- ---------------------------------------------------------------------
547 -- | /O(n)/ Concatenate a list of ByteStrings.
548 concat :: [ByteString] -> ByteString
549 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
551 -- | Map a function over a 'ByteString' and concatenate the results
552 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
553 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
555 k w = case f w of LPS xs -> P.concat xs
557 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
558 -- any element of the 'ByteString' satisfies the predicate.
559 any :: (Word8 -> Bool) -> ByteString -> Bool
560 any f (LPS xs) = L.or (L.map (P.any f) xs)
563 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
564 -- if all elements of the 'ByteString' satisfy the predicate.
565 all :: (Word8 -> Bool) -> ByteString -> Bool
566 all f (LPS xs) = L.and (L.map (P.all f) xs)
569 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
570 maximum :: ByteString -> Word8
571 maximum (LPS []) = errorEmptyList "maximum"
572 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
573 {-# INLINE maximum #-}
575 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
576 minimum :: ByteString -> Word8
577 minimum (LPS []) = errorEmptyList "minimum"
578 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
579 {-# INLINE minimum #-}
581 -- | The 'mapAccumL' function behaves like a combination of 'map' and
582 -- 'foldl'; it applies a function to each element of a ByteString,
583 -- passing an accumulating parameter from left to right, and returning a
584 -- final value of this accumulator together with the new ByteString.
585 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
586 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
588 -- | /O(n)/ map Word8 functions, provided with the index at each position
589 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
590 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
592 -- ---------------------------------------------------------------------
593 -- Building ByteStrings
595 -- | 'scanl' is similar to 'foldl', but returns a list of successive
596 -- reduced values from the left. This function will fuse.
598 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
602 -- > last (scanl f z xs) == foldl f z xs.
603 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
604 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
607 -- ---------------------------------------------------------------------
608 -- Unfolds and replicates
610 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
613 -- > iterate f x == [x, f x, f (f x), ...]
615 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
616 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
618 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
621 repeat :: Word8 -> ByteString
622 repeat c = LPS (L.repeat block)
623 where block = P.replicate smallChunkSize c
625 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
626 -- the value of every element.
628 replicate :: Int64 -> Word8 -> ByteString
631 | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
632 | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant
633 | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
635 s = P.replicate smallChunkSize c
636 (q, r) = quotRem w (fromIntegral smallChunkSize)
638 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
639 -- the infinite repetition of the original ByteString.
641 cycle :: ByteString -> ByteString
642 cycle (LPS []) = errorEmptyList "cycle"
643 cycle (LPS xs) = LPS (L.cycle xs)
645 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
646 -- 'unfoldr' builds a ByteString from a seed value. The function takes
647 -- the element and returns 'Nothing' if it is done producing the
648 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
649 -- prepending to the ByteString and @b@ is used as the next element in a
651 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
652 unfoldr f = LPS . unfoldChunk 32
653 where unfoldChunk n x =
654 case P.unfoldrN n f x of
657 | otherwise -> s : []
658 (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
660 -- ---------------------------------------------------------------------
663 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
664 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
665 take :: Int64 -> ByteString -> ByteString
666 take i _ | i <= 0 = empty
667 take i (LPS ps) = LPS (take' i ps)
671 if n < fromIntegral (P.length x)
672 then P.take (fromIntegral n) x : []
673 else x : take' (n - fromIntegral (P.length x)) xs
675 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
676 -- elements, or @[]@ if @n > 'length' xs@.
677 drop :: Int64 -> ByteString -> ByteString
678 drop i p | i <= 0 = p
679 drop i (LPS ps) = LPS (drop' i ps)
680 where drop' 0 xs = xs
683 if n < fromIntegral (P.length x)
684 then P.drop (fromIntegral n) x : xs
685 else drop' (n - fromIntegral (P.length x)) xs
687 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
688 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
689 splitAt i p | i <= 0 = (empty, p)
690 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
691 where splitAt' 0 xs = ([], xs)
692 splitAt' _ [] = ([], [])
694 if n < fromIntegral (P.length x)
695 then (P.take (fromIntegral n) x : [],
696 P.drop (fromIntegral n) x : xs)
697 else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
701 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
702 -- returns the longest prefix (possibly empty) of @xs@ of elements that
704 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
705 takeWhile f (LPS ps) = LPS (takeWhile' ps)
706 where takeWhile' [] = []
708 case findIndexOrEnd (not . f) x of
710 n | n < P.length x -> P.take n x : []
711 | otherwise -> x : takeWhile' xs
713 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
714 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
715 dropWhile f (LPS ps) = LPS (dropWhile' ps)
716 where dropWhile' [] = []
718 case findIndexOrEnd (not . f) x of
719 n | n < P.length x -> P.drop n x : xs
720 | otherwise -> dropWhile' xs
722 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
723 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
724 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
725 where break' [] = ([], [])
727 case findIndexOrEnd f x of
729 n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
730 | otherwise -> let (xs', xs'') = break' xs
740 -- | 'breakByte' breaks its ByteString argument at the first occurence
741 -- of the specified byte. It is more efficient than 'break' as it is
742 -- implemented with @memchr(3)@. I.e.
744 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
746 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
747 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
748 where breakByte' [] = ([], [])
750 case P.elemIndex c x of
751 Just 0 -> ([], x : xs)
752 Just n -> (P.take n x : [], P.drop n x : xs)
753 Nothing -> let (xs', xs'') = breakByte' xs
756 -- | 'spanByte' breaks its ByteString argument at the first
757 -- occurence of a byte other than its argument. It is more efficient
760 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
762 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
763 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
764 where spanByte' [] = ([], [])
766 case P.spanByte c x of
767 (x', x'') | P.null x' -> ([], x : xs)
768 | P.null x'' -> let (xs', xs'') = spanByte' xs
770 | otherwise -> (x' : [], x'' : xs)
773 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
774 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
775 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
776 span p = break (not . p)
778 -- | /O(n)/ Splits a 'ByteString' into components delimited by
779 -- separators, where the predicate returns True for a separator element.
780 -- The resulting components do not contain the separators. Two adjacent
781 -- separators result in an empty component in the output. eg.
783 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
784 -- > splitWith (=='a') [] == []
786 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
787 splitWith _ (LPS []) = []
788 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
790 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
791 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
792 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
793 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
795 cons' x xs | P.null x = xs
798 {-# INLINE splitWith #-}
800 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
801 -- argument, consuming the delimiter. I.e.
803 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
804 -- > split 'a' "aXaXaXa" == ["","X","X","X",""]
805 -- > split 'x' "x" == ["",""]
809 -- > join [c] . split c == id
810 -- > split == splitWith . (==)
812 -- As for all splitting functions in this library, this function does
813 -- not copy the substrings, it just constructs new 'ByteStrings' that
814 -- are slices of the original.
816 split :: Word8 -> ByteString -> [ByteString]
817 split _ (LPS []) = []
818 split c (LPS (a:as)) = comb [] (P.split c a) as
820 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
821 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
822 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
823 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
825 cons' x xs | P.null x = xs
831 -- | Like 'splitWith', except that sequences of adjacent separators are
832 -- treated as a single separator. eg.
834 -- > tokens (=='a') "aabbaca" == ["bb","c"]
836 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
837 tokens f = L.filter (not.null) . splitWith f
840 -- | The 'group' function takes a ByteString and returns a list of
841 -- ByteStrings such that the concatenation of the result is equal to the
842 -- argument. Moreover, each sublist in the result contains only equal
843 -- elements. For example,
845 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
847 -- It is a special case of 'groupBy', which allows the programmer to
848 -- supply their own equality test.
849 group :: ByteString -> [ByteString]
851 group (LPS (a:as)) = group' [] (P.group a) as
852 where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
853 group' acc@(s':_) ss@(s:_) xs
855 /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs
856 group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : []
857 group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
858 group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs
861 TODO: check if something like this might be faster
863 group :: ByteString -> [ByteString]
866 | otherwise = ys : group zs
868 (ys, zs) = spanByte (unsafeHead xs) xs
871 -- | The 'groupBy' function is the non-overloaded version of 'group'.
873 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
874 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
876 groupBy _ (LPS []) = []
877 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
878 where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
879 groupBy' acc@(_:_) c ss@(s:_) xs
880 | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
881 groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : []
882 groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
883 groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs
884 groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
888 TODO: check if something like this might be faster
890 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
893 | otherwise = take n xs : groupBy k (drop n xs)
895 n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
898 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
899 -- 'ByteString's and concatenates the list after interspersing the first
900 -- argument between each element of the list.
901 join :: ByteString -> [ByteString] -> ByteString
902 join s = concat . (L.intersperse s)
904 -- ---------------------------------------------------------------------
905 -- Indexing ByteStrings
907 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
908 index :: ByteString -> Int64 -> Word8
909 index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i)
910 index (LPS ps) i = index' ps i
911 where index' [] n = moduleError "index" ("index too large: " ++ show n)
913 | n >= fromIntegral (P.length x) =
914 index' xs (n - fromIntegral (P.length x))
915 | otherwise = P.unsafeIndex x (fromIntegral n)
917 -- | /O(n)/ The 'elemIndex' function returns the index of the first
918 -- element in the given 'ByteString' which is equal to the query
919 -- element, or 'Nothing' if there is no such element.
920 -- This implementation uses memchr(3).
921 elemIndex :: Word8 -> ByteString -> Maybe Int64
922 elemIndex c (LPS ps) = elemIndex' 0 ps
923 where elemIndex' _ [] = Nothing
924 elemIndex' n (x:xs) =
925 case P.elemIndex c x of
926 Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
927 Just i -> Just (n + fromIntegral i)
930 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
931 -- element in the given 'ByteString' which is equal to the query
932 -- element, or 'Nothing' if there is no such element. The following
935 -- > elemIndexEnd c xs ==
936 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
938 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
939 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
940 go (p `plusPtr` s) (l-1)
943 go p i | i < 0 = return Nothing
944 | otherwise = do ch' <- peekByteOff p i
949 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
950 -- the indices of all elements equal to the query element, in ascending order.
951 -- This implementation uses memchr(3).
952 elemIndices :: Word8 -> ByteString -> [Int64]
953 elemIndices c (LPS ps) = elemIndices' 0 ps
954 where elemIndices' _ [] = []
955 elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
956 ++ elemIndices' (n + fromIntegral (P.length x)) xs
958 -- | count returns the number of times its argument appears in the ByteString
960 -- > count = length . elemIndices
962 -- But more efficiently than using length on the intermediate list.
963 count :: Word8 -> ByteString -> Int64
964 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
966 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
967 -- returns the index of the first element in the ByteString
968 -- satisfying the predicate.
969 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
970 findIndex k (LPS ps) = findIndex' 0 ps
971 where findIndex' _ [] = Nothing
972 findIndex' n (x:xs) =
973 case P.findIndex k x of
974 Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
975 Just i -> Just (n + fromIntegral i)
976 {-# INLINE findIndex #-}
978 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
979 -- and returns the first element in matching the predicate, or 'Nothing'
980 -- if there is no such element.
982 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
984 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
985 find f (LPS ps) = find' ps
986 where find' [] = Nothing
987 find' (x:xs) = case P.find f x of
992 -- | The 'findIndices' function extends 'findIndex', by returning the
993 -- indices of all elements satisfying the predicate, in ascending order.
994 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
995 findIndices k (LPS ps) = findIndices' 0 ps
996 where findIndices' _ [] = []
997 findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
998 ++ findIndices' (n + fromIntegral (P.length x)) xs
1000 -- ---------------------------------------------------------------------
1001 -- Searching ByteStrings
1003 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1004 elem :: Word8 -> ByteString -> Bool
1005 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1007 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1008 notElem :: Word8 -> ByteString -> Bool
1009 notElem c ps = not (elem c ps)
1011 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1012 -- returns a ByteString containing those characters that satisfy the
1014 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1015 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1016 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1017 {-# INLINE filter #-}
1020 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1021 -- (==)/, for the common case of filtering a single byte. It is more
1022 -- efficient to use /filterByte/ in this case.
1024 -- > filterByte == filter . (==)
1026 -- filterByte is around 10x faster, and uses much less space, than its
1027 -- filter equivalent
1028 filterByte :: Word8 -> ByteString -> ByteString
1029 filterByte w ps = replicate (count w ps) w
1030 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1032 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1033 -- case of filtering a single byte out of a list. It is more efficient
1034 -- to use /filterNotByte/ in this case.
1036 -- > filterNotByte == filter . (/=)
1038 -- filterNotByte is around 2x faster than its filter equivalent.
1039 filterNotByte :: Word8 -> ByteString -> ByteString
1040 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1043 -- ---------------------------------------------------------------------
1044 -- Searching for substrings
1046 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1047 -- iff the first is a prefix of the second.
1048 isPrefixOf :: ByteString -> ByteString -> Bool
1049 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1050 where isPrefixL [] _ = True
1051 isPrefixL _ [] = False
1052 isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys
1053 | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys)
1054 | otherwise = xh == y && isPrefixL (xt:xs) ys
1055 where (xh,xt) = P.splitAt (P.length y) x
1056 (yh,yt) = P.splitAt (P.length x) y
1058 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1059 -- iff the first is a suffix of the second.
1061 -- The following holds:
1063 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1065 -- However, the real implemenation uses memcmp to compare the end of the
1066 -- string only, with no reverse required..
1068 --isSuffixOf :: ByteString -> ByteString -> Bool
1069 --isSuffixOf = error "not yet implemented"
1071 -- ---------------------------------------------------------------------
1074 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1075 -- corresponding pairs of bytes. If one input ByteString is short,
1076 -- excess elements of the longer ByteString are discarded. This is
1077 -- equivalent to a pair of 'unpack' operations.
1078 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1081 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1082 -- the first argument, instead of a tupling function. For example,
1083 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1084 -- corresponding sums.
1085 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1086 zipWith _ (LPS []) (LPS _) = []
1087 zipWith _ (LPS _) (LPS []) = []
1088 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1089 where zipWith' x xs y ys =
1090 (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1092 zipWith'' x [] _ _ | P.null x = []
1093 zipWith'' _ _ y [] | P.null y = []
1094 zipWith'' x xs y ys | not (P.null x)
1095 && not (P.null y) = zipWith' x xs y ys
1096 zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys
1097 zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys
1098 zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys
1100 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1101 -- ByteStrings. Note that this performs two 'pack' operations.
1103 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1104 unzip _ls = error "not yet implemented"
1105 {-# INLINE unzip #-}
1108 -- ---------------------------------------------------------------------
1111 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1112 inits :: ByteString -> [ByteString]
1113 inits = (LPS [] :) . inits' . unLPS
1114 where inits' [] = []
1115 inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1116 ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1118 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1119 tails :: ByteString -> [ByteString]
1120 tails = tails' . unLPS
1121 where tails' [] = LPS [] : []
1123 | P.length x == 1 = LPS xs : tails' xs'
1124 | otherwise = LPS xs : tails' (P.unsafeTail x : xs')
1126 -- ---------------------------------------------------------------------
1127 -- Low level constructors
1129 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1130 -- This is mainly useful to allow the rest of the data pointed
1131 -- to by the 'ByteString' to be garbage collected, for example
1132 -- if a large string has been read in, and only a small part of it
1133 -- is needed in the rest of the program.
1134 copy :: ByteString -> ByteString
1135 copy (LPS lps) = LPS (L.map P.copy lps)
1136 --TODO, we could coalese small blocks here
1137 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1138 -- the parent blocks then we'd better copy strictly.
1140 -- ---------------------------------------------------------------------
1142 -- TODO defrag func that concatenates block together that are below a threshold
1143 -- defrag :: Int -> ByteString -> ByteString
1145 -- ---------------------------------------------------------------------
1146 -- Lazy ByteString IO
1148 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1149 -- are read on demand, in at most @k@-sized chunks. It does not block
1150 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1151 -- available then they will be returned immediately as a smaller chunk.
1152 hGetContentsN :: Int -> Handle -> IO ByteString
1153 hGetContentsN k h = lazyRead >>= return . LPS
1155 lazyRead = unsafeInterleaveIO loop
1158 ps <- P.hGetNonBlocking h k
1159 --TODO: I think this should distinguish EOF from no data available
1160 -- the otherlying POSIX call makes this distincion, returning either
1163 then do eof <- hIsEOF h
1164 if eof then return []
1165 else hWaitForInput h (-1)
1167 else do pss <- lazyRead
1170 -- | Read @n@ bytes into a 'ByteString', directly from the
1171 -- specified 'Handle', in chunks of size @k@.
1172 hGetN :: Int -> Handle -> Int -> IO ByteString
1173 hGetN _ _ 0 = return empty
1174 hGetN k h n = readChunks n >>= return . LPS
1178 ps <- P.hGet h (min k i)
1181 m -> do pss <- readChunks (i - m)
1184 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1185 -- waiting for data to become available, instead it returns only whatever data
1186 -- is available. Chunks are read on demand, in @k@-sized chunks.
1187 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1188 #if defined(__GLASGOW_HASKELL__)
1189 hGetNonBlockingN _ _ 0 = return empty
1190 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1194 ps <- P.hGetNonBlocking h (min k i)
1197 m -> do pss <- readChunks (i - m)
1200 hGetNonBlockingN = hGetN
1203 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1204 -- are read on demand, using the default chunk size.
1205 hGetContents :: Handle -> IO ByteString
1206 hGetContents = hGetContentsN defaultChunkSize
1208 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1209 hGet :: Handle -> Int -> IO ByteString
1210 hGet = hGetN defaultChunkSize
1212 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1213 -- waiting for data to become available, instead it returns only whatever data
1215 #if defined(__GLASGOW_HASKELL__)
1216 hGetNonBlocking :: Handle -> Int -> IO ByteString
1217 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1219 hGetNonBlocking = hGet
1222 -- | Read an entire file /lazily/ into a 'ByteString'.
1223 readFile :: FilePath -> IO ByteString
1224 readFile f = openBinaryFile f ReadMode >>= hGetContents
1226 -- | Write a 'ByteString' to a file.
1227 writeFile :: FilePath -> ByteString -> IO ()
1228 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1229 (\hdl -> hPut hdl txt)
1231 -- | Append a 'ByteString' to a file.
1232 appendFile :: FilePath -> ByteString -> IO ()
1233 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1234 (\hdl -> hPut hdl txt)
1236 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1237 getContents :: IO ByteString
1238 getContents = hGetContents stdin
1240 -- | Outputs a 'ByteString' to the specified 'Handle'.
1241 hPut :: Handle -> ByteString -> IO ()
1242 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1244 -- | Write a ByteString to stdout
1245 putStr :: ByteString -> IO ()
1246 putStr = hPut stdout
1248 -- | Write a ByteString to stdout, appending a newline byte
1249 putStrLn :: ByteString -> IO ()
1250 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1252 -- | The interact function takes a function of type @ByteString -> ByteString@
1253 -- as its argument. The entire input from the standard input device is passed
1254 -- to this function as its argument, and the resulting string is output on the
1255 -- standard output device. It's great for writing one line programs!
1256 interact :: (ByteString -> ByteString) -> IO ()
1257 interact transformer = putStr . transformer =<< getContents
1259 -- ---------------------------------------------------------------------
1260 -- Internal utilities
1262 -- Common up near identical calls to `error' to reduce the number
1263 -- constant strings created when compiled:
1264 errorEmptyList :: String -> a
1265 errorEmptyList fun = moduleError fun "empty ByteString"
1267 moduleError :: String -> String -> a
1268 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1270 -- A manually fused version of "filter (not.null) . map f", since they
1271 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1275 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1277 filterMap f (x:xs) = case f x of
1278 y | P.null y -> filterMap f xs -- manually fuse the invariant filter
1279 | otherwise -> y : filterMap f xs
1280 {-# INLINE filterMap #-}
1283 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1284 -- of the string if no element is found, rather than Nothing.
1285 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1286 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1289 go ptr n | n >= l = return l
1290 | otherwise = do w <- peek ptr
1293 else go (ptr `plusPtr` 1) (n+1)
1294 {-# INLINE findIndexOrEnd #-}