1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
3 -- Module : Data.ByteString.Lazy
4 -- Copyright : (c) Don Stewart 2006
5 -- (c) Duncan Coutts 2006
8 -- Maintainer : dons@cse.unsw.edu.au
9 -- Stability : experimental
10 -- Portability : non-portable (instance of type synonym)
12 -- A time and space-efficient implementation of lazy byte vectors
13 -- using lists of packed 'Word8' arrays, suitable for high performance
14 -- use, both in terms of large data quantities, or high speed
15 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
16 -- arrays of bytes. They provide a means to manipulate large byte vectors
17 -- without requiring the entire vector be resident in memory.
19 -- Some operations, such as concat, append, reverse and cons, have
20 -- better complexity than their "Data.ByteString" equivalents, due to
21 -- optimisations resulting from the list spine structure. And for other
22 -- operations lazy ByteStrings are usually within a few percent of
23 -- strict ones, but with better heap usage. For data larger than the
24 -- available memory, or if you have tight memory constraints, this
25 -- module will be the only option. The default chunk size is 64k, which
26 -- should be good in most circumstances. For people with large L2
27 -- caches, you may want to increase this to fit your cache.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
32 -- > import qualified Data.ByteString.Lazy as B
34 -- Original GHC implementation by Bryan O\'Sullivan.
35 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
36 -- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr'
38 -- Polished and extended by Don Stewart.
39 -- Lazy variant by Duncan Coutts and Don Stewart.
42 module Data.ByteString.Lazy (
44 -- * The @ByteString@ type
45 ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable
47 -- * Introducing and eliminating 'ByteString's
48 empty, -- :: ByteString
49 singleton, -- :: Word8 -> ByteString
50 pack, -- :: [Word8] -> ByteString
51 unpack, -- :: ByteString -> [Word8]
52 fromChunks, -- :: [Strict.ByteString] -> ByteString
53 toChunks, -- :: ByteString -> [Strict.ByteString]
56 cons, -- :: Word8 -> ByteString -> ByteString
57 snoc, -- :: ByteString -> Word8 -> ByteString
58 append, -- :: ByteString -> ByteString -> ByteString
59 head, -- :: ByteString -> Word8
60 last, -- :: ByteString -> Word8
61 tail, -- :: ByteString -> ByteString
62 init, -- :: ByteString -> ByteString
63 null, -- :: ByteString -> Bool
64 length, -- :: ByteString -> Int64
66 -- * Transformating ByteStrings
67 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
68 reverse, -- :: ByteString -> ByteString
69 -- intersperse, -- :: Word8 -> ByteString -> ByteString
70 transpose, -- :: [ByteString] -> [ByteString]
72 -- * Reducing 'ByteString's (folds)
73 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
74 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
75 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
81 concat, -- :: [ByteString] -> ByteString
82 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
83 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 maximum, -- :: ByteString -> Word8
86 minimum, -- :: ByteString -> Word8
88 -- * Building ByteStrings
90 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 -- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
92 -- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
93 -- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
95 -- ** Accumulating maps
96 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
97 mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
99 -- ** Infinite ByteStrings
100 repeat, -- :: Word8 -> ByteString
101 replicate, -- :: Int64 -> Word8 -> ByteString
102 cycle, -- :: ByteString -> ByteString
103 iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString
106 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
110 -- ** Breaking strings
111 take, -- :: Int64 -> ByteString -> ByteString
112 drop, -- :: Int64 -> ByteString -> ByteString
113 splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString)
114 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
115 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
116 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
117 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
118 group, -- :: ByteString -> [ByteString]
119 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
120 inits, -- :: ByteString -> [ByteString]
121 tails, -- :: ByteString -> [ByteString]
123 -- ** Breaking into many substrings
124 split, -- :: Word8 -> ByteString -> [ByteString]
125 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
127 -- ** Joining strings
128 join, -- :: ByteString -> [ByteString] -> ByteString
131 isPrefixOf, -- :: ByteString -> ByteString -> Bool
132 -- isSuffixOf, -- :: ByteString -> ByteString -> Bool
134 -- * Searching ByteStrings
136 -- ** Searching by equality
137 elem, -- :: Word8 -> ByteString -> Bool
138 notElem, -- :: Word8 -> ByteString -> Bool
140 -- ** Searching with a predicate
141 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
142 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
143 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
145 -- * Indexing ByteStrings
146 index, -- :: ByteString -> Int64 -> Word8
147 elemIndex, -- :: Word8 -> ByteString -> Maybe Int64
148 elemIndices, -- :: Word8 -> ByteString -> [Int64]
149 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
150 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64]
151 count, -- :: Word8 -> ByteString -> Int64
153 -- * Zipping and unzipping ByteStrings
154 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
155 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
156 -- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
158 -- * Ordered ByteStrings
159 -- sort, -- :: ByteString -> ByteString
161 copy, -- :: ByteString -> ByteString
163 -- * I\/O with 'ByteString's
165 -- ** Standard input and output
166 getContents, -- :: IO ByteString
167 putStr, -- :: ByteString -> IO ()
168 putStrLn, -- :: ByteString -> IO ()
169 interact, -- :: (ByteString -> ByteString) -> IO ()
172 readFile, -- :: FilePath -> IO ByteString
173 writeFile, -- :: FilePath -> ByteString -> IO ()
174 appendFile, -- :: FilePath -> ByteString -> IO ()
176 -- ** I\/O with Handles
177 hGetContents, -- :: Handle -> IO ByteString
178 hGet, -- :: Handle -> Int -> IO ByteString
179 hPut, -- :: Handle -> ByteString -> IO ()
180 hGetNonBlocking, -- :: Handle -> IO ByteString
182 -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
183 -- hGetContentsN, -- :: Int -> Handle -> IO ByteString
184 -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
188 import qualified Prelude
189 import Prelude hiding
190 (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
191 ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
192 ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
193 ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
194 ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
196 import qualified Data.List as L -- L for list/lazy
197 import qualified Data.ByteString as P -- P for packed
198 import qualified Data.ByteString.Base as P
199 import Data.ByteString.Base (LazyByteString(..))
200 import qualified Data.ByteString.Fusion as P
201 import Data.ByteString.Fusion (PairS(..),loopL)
203 import Data.Monoid (Monoid(..))
205 import Data.Word (Word8)
206 import Data.Int (Int64)
207 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..)
208 ,hClose,hWaitForInput,hIsEOF)
209 import System.IO.Unsafe
210 import Control.Exception (bracket)
212 import Foreign.ForeignPtr (withForeignPtr)
214 import Foreign.Storable
216 -- -----------------------------------------------------------------------------
218 -- Useful macros, until we have bang patterns
221 #define STRICT1(f) f a | a `seq` False = undefined
222 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
223 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
224 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
225 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
227 -- -----------------------------------------------------------------------------
229 type ByteString = LazyByteString
232 -- hmm, what about getting the PS constructor unpacked into the cons cell?
234 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
236 -- Would avoid one indirection per chunk.
239 unLPS :: ByteString -> [P.ByteString]
243 instance Eq ByteString
246 instance Ord ByteString
247 where compare = compareBytes
249 instance Monoid ByteString where
254 ------------------------------------------------------------------------
257 -- The data type invariant:
258 -- Every ByteString is either empty or consists of non-null ByteStrings.
259 -- All functions must preserve this, and the QC properties must check this.
261 _invariant :: ByteString -> Bool
262 _invariant (LPS []) = True
263 _invariant (LPS xs) = L.all (not . P.null) xs
265 -- In a form useful for QC testing
266 _checkInvariant :: ByteString -> ByteString
268 | _invariant lps = lps
269 | otherwise = moduleError "invariant" ("violation: " ++ show lps)
271 -- The Data abstraction function
273 _abstr :: ByteString -> P.ByteString
274 _abstr (LPS []) = P.empty
275 _abstr (LPS xs) = P.concat xs
277 -- The representation uses lists of packed chunks. When we have to convert from
278 -- a lazy list to the chunked representation, then by default we'll use this
279 -- chunk size. Some functions give you more control over the chunk size.
281 -- Measurements here:
282 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
284 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
285 -- The following value assumes people have something greater than 128k,
286 -- and need to share the cache with other programs.
288 defaultChunkSize :: Int
289 defaultChunkSize = 32 * k - overhead
291 overhead = 2 * sizeOf (undefined :: Int)
293 smallChunkSize :: Int
294 smallChunkSize = 4 * k - overhead
296 overhead = 2 * sizeOf (undefined :: Int)
298 -- defaultChunkSize = 1
300 ------------------------------------------------------------------------
302 eq :: ByteString -> ByteString -> Bool
303 eq (LPS xs) (LPS ys) = eq' xs ys
304 where eq' [] [] = True
308 case compare (P.length a) (P.length b) of
309 LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
310 EQ -> a == b && eq' as bs
311 GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
313 compareBytes :: ByteString -> ByteString -> Ordering
314 compareBytes (LPS xs) (LPS ys) = cmp xs ys
319 case compare (P.length a) (P.length b) of
320 LT -> case compare a (P.take (P.length a) b) of
321 EQ -> cmp as (P.drop (P.length a) b : bs)
323 EQ -> case compare a b of
326 GT -> case compare (P.take (P.length b) a) b of
327 EQ -> cmp (P.drop (P.length b) a : as) bs
330 -- -----------------------------------------------------------------------------
331 -- Introducing and eliminating 'ByteString's
333 -- | /O(1)/ The empty 'ByteString'
336 {-# NOINLINE empty #-}
338 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
339 singleton :: Word8 -> ByteString
340 singleton c = LPS [P.singleton c]
341 {-# NOINLINE singleton #-}
343 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
344 pack :: [Word8] -> ByteString
345 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
348 chunk :: Int -> [a] -> [[a]]
350 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
352 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
353 unpack :: ByteString -> [Word8]
354 unpack (LPS ss) = L.concatMap P.unpack ss
355 {-# INLINE unpack #-}
357 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
358 fromChunks :: [P.ByteString] -> ByteString
359 fromChunks ls = LPS $ L.filter (not . P.null) ls
361 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
362 toChunks :: ByteString -> [P.ByteString]
365 ------------------------------------------------------------------------
368 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
369 -- conversion function
370 packWith :: (a -> Word8) -> [a] -> ByteString
371 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
372 {-# INLINE packWith #-}
373 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
375 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
376 unpackWith :: (Word8 -> a) -> ByteString -> [a]
377 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
378 {-# INLINE unpackWith #-}
379 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
382 -- ---------------------------------------------------------------------
385 -- | /O(1)/ Test whether a ByteString is empty.
386 null :: ByteString -> Bool
391 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
392 length :: ByteString -> Int64
393 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
395 -- avoid the intermediate list?
396 -- length (LPS ss) = L.foldl lengthF 0 ss
397 -- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
398 {-# INLINE length #-}
400 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
401 -- strict in the ByteString that we are consing onto. More precisely, it forces
402 -- the head and the first chunk. It does this because, for space efficiency, it
403 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
406 -- So that means you can't use a lazy recursive contruction like this:
408 -- > let xs = cons c xs in xs
410 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
412 cons :: Word8 -> ByteString -> ByteString
413 cons c (LPS (s:ss)) | P.length s < 16 = LPS (P.cons c s : ss)
414 cons c (LPS ss) = LPS (P.singleton c : ss)
417 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
418 snoc :: ByteString -> Word8 -> ByteString
419 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
422 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
423 head :: ByteString -> Word8
424 head (LPS []) = errorEmptyList "head"
425 head (LPS (x:_)) = P.unsafeHead x
428 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
429 tail :: ByteString -> ByteString
430 tail (LPS []) = errorEmptyList "tail"
432 | P.length x == 1 = LPS xs
433 | otherwise = LPS (P.unsafeTail x : xs)
436 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
437 last :: ByteString -> Word8
438 last (LPS []) = errorEmptyList "last"
439 last (LPS xs) = P.last (L.last xs)
442 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
443 init :: ByteString -> ByteString
444 init (LPS []) = errorEmptyList "init"
446 | P.length y == 1 = LPS ys
447 | otherwise = LPS (ys ++ [P.init y])
448 where (y,ys) = (L.last xs, L.init xs)
451 -- | /O(n)/ Append two ByteStrings
452 append :: ByteString -> ByteString -> ByteString
453 append (LPS []) (LPS ys) = LPS ys
454 append (LPS xs) (LPS []) = LPS xs
455 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
456 {-# INLINE append #-}
458 -- ---------------------------------------------------------------------
461 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
463 map :: (Word8 -> Word8) -> ByteString -> ByteString
464 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
465 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
468 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
469 reverse :: ByteString -> ByteString
470 reverse (LPS ps) = LPS (rev [] ps)
472 rev a (x:xs) = rev (P.reverse x:a) xs
473 -- note, here is one example where the extra element lazyness is an advantage.
474 -- we can reerse the list of chunks strictly but reverse each chunk lazily
475 -- so while we may force the whole lot into memory we do not need to copy
476 -- each chunk until it is used.
477 {-# INLINE reverse #-}
479 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
480 -- \`intersperses\' that byte between the elements of the 'ByteString'.
481 -- It is analogous to the intersperse function on Lists.
482 -- intersperse :: Word8 -> ByteString -> ByteString
483 -- intersperse = error "FIXME: not yet implemented"
486 intersperse c (LPS []) = LPS []
487 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
488 where intersperse' c ps@(PS x s l) =
489 P.create (2*l) $ \p -> withForeignPtr x $ \f ->
491 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
494 -- | The 'transpose' function transposes the rows and columns of its
495 -- 'ByteString' argument.
496 transpose :: [ByteString] -> [ByteString]
497 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
499 -- ---------------------------------------------------------------------
500 -- Reducing 'ByteString's
502 -- | 'foldl', applied to a binary operator, a starting value (typically
503 -- the left-identity of the operator), and a ByteString, reduces the
504 -- ByteString using the binary operator, from left to right.
505 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
506 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
507 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
510 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
511 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
512 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
513 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
514 {-# INLINE foldl' #-}
516 -- | 'foldr', applied to a binary operator, a starting value
517 -- (typically the right-identity of the operator), and a ByteString,
518 -- reduces the ByteString using the binary operator, from right to left.
519 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
520 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
523 -- | 'foldl1' is a variant of 'foldl' that has no starting value
524 -- argument, and thus must be applied to non-empty 'ByteStrings'.
525 -- This function is subject to array fusion.
526 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
527 foldl1 _ (LPS []) = errorEmptyList "foldl1"
528 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
530 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
531 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
532 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
533 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
535 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
536 -- and thus must be applied to non-empty 'ByteString's
537 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
538 foldr1 _ (LPS []) = errorEmptyList "foldr1"
539 foldr1 f (LPS ps) = foldr1' ps
540 where foldr1' (x:[]) = P.foldr1 f x
541 foldr1' (x:xs) = P.foldr f (foldr1' xs) x
543 -- ---------------------------------------------------------------------
546 -- | /O(n)/ Concatenate a list of ByteStrings.
547 concat :: [ByteString] -> ByteString
548 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
550 -- | Map a function over a 'ByteString' and concatenate the results
551 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
552 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
554 k w = case f w of LPS xs -> P.concat xs
556 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
557 -- any element of the 'ByteString' satisfies the predicate.
558 any :: (Word8 -> Bool) -> ByteString -> Bool
559 any f (LPS xs) = L.or (L.map (P.any f) xs)
562 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
563 -- if all elements of the 'ByteString' satisfy the predicate.
564 all :: (Word8 -> Bool) -> ByteString -> Bool
565 all f (LPS xs) = L.and (L.map (P.all f) xs)
568 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
569 maximum :: ByteString -> Word8
570 maximum (LPS []) = errorEmptyList "maximum"
571 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
572 {-# INLINE maximum #-}
574 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
575 minimum :: ByteString -> Word8
576 minimum (LPS []) = errorEmptyList "minimum"
577 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
578 {-# INLINE minimum #-}
580 -- | The 'mapAccumL' function behaves like a combination of 'map' and
581 -- 'foldl'; it applies a function to each element of a ByteString,
582 -- passing an accumulating parameter from left to right, and returning a
583 -- final value of this accumulator together with the new ByteString.
584 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
585 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
587 -- | /O(n)/ map Word8 functions, provided with the index at each position
588 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
589 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
591 -- ---------------------------------------------------------------------
592 -- Building ByteStrings
594 -- | 'scanl' is similar to 'foldl', but returns a list of successive
595 -- reduced values from the left. This function will fuse.
597 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
601 -- > last (scanl f z xs) == foldl f z xs.
602 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
603 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
606 -- ---------------------------------------------------------------------
607 -- Unfolds and replicates
609 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
612 -- > iterate f x == [x, f x, f (f x), ...]
614 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
615 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
617 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
620 repeat :: Word8 -> ByteString
621 repeat c = LPS (L.repeat block)
622 where block = P.replicate smallChunkSize c
624 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
625 -- the value of every element.
627 replicate :: Int64 -> Word8 -> ByteString
630 | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
631 | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant
632 | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
634 s = P.replicate smallChunkSize c
635 (q, r) = quotRem w (fromIntegral smallChunkSize)
637 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
638 -- the infinite repetition of the original ByteString.
640 cycle :: ByteString -> ByteString
641 cycle (LPS []) = errorEmptyList "cycle"
642 cycle (LPS xs) = LPS (L.cycle xs)
644 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
645 -- 'unfoldr' builds a ByteString from a seed value. The function takes
646 -- the element and returns 'Nothing' if it is done producing the
647 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
648 -- prepending to the ByteString and @b@ is used as the next element in a
650 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
651 unfoldr f = LPS . unfoldChunk 32
652 where unfoldChunk n x =
653 case P.unfoldrN n f x of
656 | otherwise -> s : []
657 (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
659 -- ---------------------------------------------------------------------
662 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
663 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
664 take :: Int64 -> ByteString -> ByteString
665 take i _ | i <= 0 = empty
666 take i (LPS ps) = LPS (take' i ps)
670 if n < fromIntegral (P.length x)
671 then P.take (fromIntegral n) x : []
672 else x : take' (n - fromIntegral (P.length x)) xs
674 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
675 -- elements, or @[]@ if @n > 'length' xs@.
676 drop :: Int64 -> ByteString -> ByteString
677 drop i p | i <= 0 = p
678 drop i (LPS ps) = LPS (drop' i ps)
679 where drop' 0 xs = xs
682 if n < fromIntegral (P.length x)
683 then P.drop (fromIntegral n) x : xs
684 else drop' (n - fromIntegral (P.length x)) xs
686 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
687 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
688 splitAt i p | i <= 0 = (empty, p)
689 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
690 where splitAt' 0 xs = ([], xs)
691 splitAt' _ [] = ([], [])
693 if n < fromIntegral (P.length x)
694 then (P.take (fromIntegral n) x : [],
695 P.drop (fromIntegral n) x : xs)
696 else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
700 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
701 -- returns the longest prefix (possibly empty) of @xs@ of elements that
703 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
704 takeWhile f (LPS ps) = LPS (takeWhile' ps)
705 where takeWhile' [] = []
707 case findIndexOrEnd (not . f) x of
709 n | n < P.length x -> P.take n x : []
710 | otherwise -> x : takeWhile' xs
712 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
713 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
714 dropWhile f (LPS ps) = LPS (dropWhile' ps)
715 where dropWhile' [] = []
717 case findIndexOrEnd (not . f) x of
718 n | n < P.length x -> P.drop n x : xs
719 | otherwise -> dropWhile' xs
721 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
722 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
723 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
724 where break' [] = ([], [])
726 case findIndexOrEnd f x of
728 n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
729 | otherwise -> let (xs', xs'') = break' xs
739 -- | 'breakByte' breaks its ByteString argument at the first occurence
740 -- of the specified byte. It is more efficient than 'break' as it is
741 -- implemented with @memchr(3)@. I.e.
743 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
745 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
746 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
747 where breakByte' [] = ([], [])
749 case P.elemIndex c x of
750 Just 0 -> ([], x : xs)
751 Just n -> (P.take n x : [], P.drop n x : xs)
752 Nothing -> let (xs', xs'') = breakByte' xs
755 -- | 'spanByte' breaks its ByteString argument at the first
756 -- occurence of a byte other than its argument. It is more efficient
759 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
761 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
762 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
763 where spanByte' [] = ([], [])
765 case P.spanByte c x of
766 (x', x'') | P.null x' -> ([], x : xs)
767 | P.null x'' -> let (xs', xs'') = spanByte' xs
769 | otherwise -> (x' : [], x'' : xs)
772 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
773 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
774 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
775 span p = break (not . p)
777 -- | /O(n)/ Splits a 'ByteString' into components delimited by
778 -- separators, where the predicate returns True for a separator element.
779 -- The resulting components do not contain the separators. Two adjacent
780 -- separators result in an empty component in the output. eg.
782 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
783 -- > splitWith (=='a') [] == []
785 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
786 splitWith _ (LPS []) = []
787 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
789 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
790 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
791 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
792 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
794 cons' x xs | P.null x = xs
797 {-# INLINE splitWith #-}
799 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
800 -- argument, consuming the delimiter. I.e.
802 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
803 -- > split 'a' "aXaXaXa" == ["","X","X","X",""]
804 -- > split 'x' "x" == ["",""]
808 -- > join [c] . split c == id
809 -- > split == splitWith . (==)
811 -- As for all splitting functions in this library, this function does
812 -- not copy the substrings, it just constructs new 'ByteStrings' that
813 -- are slices of the original.
815 split :: Word8 -> ByteString -> [ByteString]
816 split _ (LPS []) = []
817 split c (LPS (a:as)) = comb [] (P.split c a) as
819 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
820 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
821 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
822 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
824 cons' x xs | P.null x = xs
830 -- | Like 'splitWith', except that sequences of adjacent separators are
831 -- treated as a single separator. eg.
833 -- > tokens (=='a') "aabbaca" == ["bb","c"]
835 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
836 tokens f = L.filter (not.null) . splitWith f
839 -- | The 'group' function takes a ByteString and returns a list of
840 -- ByteStrings such that the concatenation of the result is equal to the
841 -- argument. Moreover, each sublist in the result contains only equal
842 -- elements. For example,
844 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
846 -- It is a special case of 'groupBy', which allows the programmer to
847 -- supply their own equality test.
848 group :: ByteString -> [ByteString]
850 group (LPS (a:as)) = group' [] (P.group a) as
851 where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
852 group' acc@(s':_) ss@(s:_) xs
854 /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs
855 group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : []
856 group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
857 group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs
860 TODO: check if something like this might be faster
862 group :: ByteString -> [ByteString]
865 | otherwise = ys : group zs
867 (ys, zs) = spanByte (unsafeHead xs) xs
870 -- | The 'groupBy' function is the non-overloaded version of 'group'.
872 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
873 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
875 groupBy _ (LPS []) = []
876 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
877 where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
878 groupBy' acc@(_:_) c ss@(s:_) xs
879 | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
880 groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : []
881 groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
882 groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs
883 groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
887 TODO: check if something like this might be faster
889 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
892 | otherwise = take n xs : groupBy k (drop n xs)
894 n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
897 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
898 -- 'ByteString's and concatenates the list after interspersing the first
899 -- argument between each element of the list.
900 join :: ByteString -> [ByteString] -> ByteString
901 join s = concat . (L.intersperse s)
903 -- ---------------------------------------------------------------------
904 -- Indexing ByteStrings
906 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
907 index :: ByteString -> Int64 -> Word8
908 index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i)
909 index (LPS ps) i = index' ps i
910 where index' [] n = moduleError "index" ("index too large: " ++ show n)
912 | n >= fromIntegral (P.length x) =
913 index' xs (n - fromIntegral (P.length x))
914 | otherwise = P.unsafeIndex x (fromIntegral n)
916 -- | /O(n)/ The 'elemIndex' function returns the index of the first
917 -- element in the given 'ByteString' which is equal to the query
918 -- element, or 'Nothing' if there is no such element.
919 -- This implementation uses memchr(3).
920 elemIndex :: Word8 -> ByteString -> Maybe Int64
921 elemIndex c (LPS ps) = elemIndex' 0 ps
922 where elemIndex' _ [] = Nothing
923 elemIndex' n (x:xs) =
924 case P.elemIndex c x of
925 Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
926 Just i -> Just (n + fromIntegral i)
929 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
930 -- element in the given 'ByteString' which is equal to the query
931 -- element, or 'Nothing' if there is no such element. The following
934 -- > elemIndexEnd c xs ==
935 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
937 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
938 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
939 go (p `plusPtr` s) (l-1)
942 go p i | i < 0 = return Nothing
943 | otherwise = do ch' <- peekByteOff p i
948 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
949 -- the indices of all elements equal to the query element, in ascending order.
950 -- This implementation uses memchr(3).
951 elemIndices :: Word8 -> ByteString -> [Int64]
952 elemIndices c (LPS ps) = elemIndices' 0 ps
953 where elemIndices' _ [] = []
954 elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
955 ++ elemIndices' (n + fromIntegral (P.length x)) xs
957 -- | count returns the number of times its argument appears in the ByteString
959 -- > count = length . elemIndices
961 -- But more efficiently than using length on the intermediate list.
962 count :: Word8 -> ByteString -> Int64
963 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
965 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
966 -- returns the index of the first element in the ByteString
967 -- satisfying the predicate.
968 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
969 findIndex k (LPS ps) = findIndex' 0 ps
970 where findIndex' _ [] = Nothing
971 findIndex' n (x:xs) =
972 case P.findIndex k x of
973 Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
974 Just i -> Just (n + fromIntegral i)
975 {-# INLINE findIndex #-}
977 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
978 -- and returns the first element in matching the predicate, or 'Nothing'
979 -- if there is no such element.
981 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
983 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
984 find f (LPS ps) = find' ps
985 where find' [] = Nothing
986 find' (x:xs) = case P.find f x of
991 -- | The 'findIndices' function extends 'findIndex', by returning the
992 -- indices of all elements satisfying the predicate, in ascending order.
993 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
994 findIndices k (LPS ps) = findIndices' 0 ps
995 where findIndices' _ [] = []
996 findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
997 ++ findIndices' (n + fromIntegral (P.length x)) xs
999 -- ---------------------------------------------------------------------
1000 -- Searching ByteStrings
1002 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1003 elem :: Word8 -> ByteString -> Bool
1004 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1006 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1007 notElem :: Word8 -> ByteString -> Bool
1008 notElem c ps = not (elem c ps)
1010 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1011 -- returns a ByteString containing those characters that satisfy the
1013 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1014 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1015 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1016 {-# INLINE filter #-}
1019 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1020 -- (==)/, for the common case of filtering a single byte. It is more
1021 -- efficient to use /filterByte/ in this case.
1023 -- > filterByte == filter . (==)
1025 -- filterByte is around 10x faster, and uses much less space, than its
1026 -- filter equivalent
1027 filterByte :: Word8 -> ByteString -> ByteString
1028 filterByte w ps = replicate (count w ps) w
1029 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1031 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1032 -- case of filtering a single byte out of a list. It is more efficient
1033 -- to use /filterNotByte/ in this case.
1035 -- > filterNotByte == filter . (/=)
1037 -- filterNotByte is around 2x faster than its filter equivalent.
1038 filterNotByte :: Word8 -> ByteString -> ByteString
1039 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1042 -- ---------------------------------------------------------------------
1043 -- Searching for substrings
1045 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1046 -- iff the first is a prefix of the second.
1047 isPrefixOf :: ByteString -> ByteString -> Bool
1048 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1049 where isPrefixL [] _ = True
1050 isPrefixL _ [] = False
1051 isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys
1052 | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys)
1053 | otherwise = xh == y && isPrefixL (xt:xs) ys
1054 where (xh,xt) = P.splitAt (P.length y) x
1055 (yh,yt) = P.splitAt (P.length x) y
1057 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1058 -- iff the first is a suffix of the second.
1060 -- The following holds:
1062 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1064 -- However, the real implemenation uses memcmp to compare the end of the
1065 -- string only, with no reverse required..
1067 --isSuffixOf :: ByteString -> ByteString -> Bool
1068 --isSuffixOf = error "not yet implemented"
1070 -- ---------------------------------------------------------------------
1073 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1074 -- corresponding pairs of bytes. If one input ByteString is short,
1075 -- excess elements of the longer ByteString are discarded. This is
1076 -- equivalent to a pair of 'unpack' operations.
1077 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1080 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1081 -- the first argument, instead of a tupling function. For example,
1082 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1083 -- corresponding sums.
1084 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1085 zipWith _ (LPS []) (LPS _) = []
1086 zipWith _ (LPS _) (LPS []) = []
1087 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1088 where zipWith' x xs y ys =
1089 (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1091 zipWith'' x [] _ _ | P.null x = []
1092 zipWith'' _ _ y [] | P.null y = []
1093 zipWith'' x xs y ys | not (P.null x)
1094 && not (P.null y) = zipWith' x xs y ys
1095 zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys
1096 zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys
1097 zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys
1099 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1100 -- ByteStrings. Note that this performs two 'pack' operations.
1102 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1103 unzip _ls = error "not yet implemented"
1104 {-# INLINE unzip #-}
1107 -- ---------------------------------------------------------------------
1110 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1111 inits :: ByteString -> [ByteString]
1112 inits = (LPS [] :) . inits' . unLPS
1113 where inits' [] = []
1114 inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1115 ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1117 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1118 tails :: ByteString -> [ByteString]
1119 tails = tails' . unLPS
1120 where tails' [] = LPS [] : []
1122 | P.length x == 1 = LPS xs : tails' xs'
1123 | otherwise = LPS xs : tails' (P.unsafeTail x : xs')
1125 -- ---------------------------------------------------------------------
1126 -- Low level constructors
1128 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1129 -- This is mainly useful to allow the rest of the data pointed
1130 -- to by the 'ByteString' to be garbage collected, for example
1131 -- if a large string has been read in, and only a small part of it
1132 -- is needed in the rest of the program.
1133 copy :: ByteString -> ByteString
1134 copy (LPS lps) = LPS (L.map P.copy lps)
1135 --TODO, we could coalese small blocks here
1136 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1137 -- the parent blocks then we'd better copy strictly.
1139 -- ---------------------------------------------------------------------
1141 -- TODO defrag func that concatenates block together that are below a threshold
1142 -- defrag :: Int -> ByteString -> ByteString
1144 -- ---------------------------------------------------------------------
1145 -- Lazy ByteString IO
1147 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1148 -- are read on demand, in at most @k@-sized chunks. It does not block
1149 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1150 -- available then they will be returned immediately as a smaller chunk.
1151 hGetContentsN :: Int -> Handle -> IO ByteString
1152 hGetContentsN k h = lazyRead >>= return . LPS
1154 lazyRead = unsafeInterleaveIO loop
1157 ps <- P.hGetNonBlocking h k
1158 --TODO: I think this should distinguish EOF from no data available
1159 -- the otherlying POSIX call makes this distincion, returning either
1162 then do eof <- hIsEOF h
1163 if eof then return []
1164 else hWaitForInput h (-1)
1166 else do pss <- lazyRead
1169 -- | Read @n@ bytes into a 'ByteString', directly from the
1170 -- specified 'Handle', in chunks of size @k@.
1171 hGetN :: Int -> Handle -> Int -> IO ByteString
1172 hGetN _ _ 0 = return empty
1173 hGetN k h n = readChunks n >>= return . LPS
1177 ps <- P.hGet h (min k i)
1180 m -> do pss <- readChunks (i - m)
1183 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1184 -- waiting for data to become available, instead it returns only whatever data
1185 -- is available. Chunks are read on demand, in @k@-sized chunks.
1186 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1187 #if defined(__GLASGOW_HASKELL__)
1188 hGetNonBlockingN _ _ 0 = return empty
1189 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1193 ps <- P.hGetNonBlocking h (min k i)
1196 m -> do pss <- readChunks (i - m)
1199 hGetNonBlockingN = hGetN
1202 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1203 -- are read on demand, using the default chunk size.
1204 hGetContents :: Handle -> IO ByteString
1205 hGetContents = hGetContentsN defaultChunkSize
1207 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1208 hGet :: Handle -> Int -> IO ByteString
1209 hGet = hGetN defaultChunkSize
1211 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1212 -- waiting for data to become available, instead it returns only whatever data
1214 #if defined(__GLASGOW_HASKELL__)
1215 hGetNonBlocking :: Handle -> Int -> IO ByteString
1216 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1218 hGetNonBlocking = hGet
1221 -- | Read an entire file /lazily/ into a 'ByteString'.
1222 readFile :: FilePath -> IO ByteString
1223 readFile f = openBinaryFile f ReadMode >>= hGetContents
1225 -- | Write a 'ByteString' to a file.
1226 writeFile :: FilePath -> ByteString -> IO ()
1227 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1228 (\hdl -> hPut hdl txt)
1230 -- | Append a 'ByteString' to a file.
1231 appendFile :: FilePath -> ByteString -> IO ()
1232 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1233 (\hdl -> hPut hdl txt)
1235 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1236 getContents :: IO ByteString
1237 getContents = hGetContents stdin
1239 -- | Outputs a 'ByteString' to the specified 'Handle'.
1240 hPut :: Handle -> ByteString -> IO ()
1241 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1243 -- | Write a ByteString to stdout
1244 putStr :: ByteString -> IO ()
1245 putStr = hPut stdout
1247 -- | Write a ByteString to stdout, appending a newline byte
1248 putStrLn :: ByteString -> IO ()
1249 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1251 -- | The interact function takes a function of type @ByteString -> ByteString@
1252 -- as its argument. The entire input from the standard input device is passed
1253 -- to this function as its argument, and the resulting string is output on the
1254 -- standard output device. It's great for writing one line programs!
1255 interact :: (ByteString -> ByteString) -> IO ()
1256 interact transformer = putStr . transformer =<< getContents
1258 -- ---------------------------------------------------------------------
1259 -- Internal utilities
1261 -- Common up near identical calls to `error' to reduce the number
1262 -- constant strings created when compiled:
1263 errorEmptyList :: String -> a
1264 errorEmptyList fun = moduleError fun "empty ByteString"
1266 moduleError :: String -> String -> a
1267 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1269 -- A manually fused version of "filter (not.null) . map f", since they
1270 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1274 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1276 filterMap f (x:xs) = case f x of
1277 y | P.null y -> filterMap f xs -- manually fuse the invariant filter
1278 | otherwise -> y : filterMap f xs
1279 {-# INLINE filterMap #-}
1282 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1283 -- of the string if no element is found, rather than Nothing.
1284 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1285 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1288 go ptr n | n >= l = return l
1289 | otherwise = do w <- peek ptr
1292 else go (ptr `plusPtr` 1) (n+1)
1293 {-# INLINE findIndexOrEnd #-}