1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
3 -- Module : ByteString.Lazy
4 -- Copyright : (c) Don Stewart 2006
5 -- (c) Duncan Coutts 2006
8 -- Maintainer : dons@cse.unsw.edu.au
9 -- Stability : experimental
10 -- Portability : portable, requires ffi and cpp
11 -- Tested with : GHC 6.4.1 and Hugs March 2005
15 -- | A time and space-efficient implementation of lazy byte vectors
16 -- using lists of packed 'Word8' arrays, suitable for high performance
17 -- use, both in terms of large data quantities, or high speed
18 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
19 -- arrays of bytes. They provide a means to manipulate large byte vectors
20 -- without requiring the entire vector be resident in memory.
22 -- Some operations, such as concat, append, reverse and cons, have
23 -- better complexity than their "Data.ByteString" equivalents, as due to
24 -- optimisations resulting from the list spine structure. And for other
25 -- operations Lazy ByteStrings are usually within a few percent of
26 -- strict ones, but with better heap usage. For data larger than the
27 -- available memory, or if you have tight memory constraints, this
28 -- module will be the only option. The default chunk size is 64k, which
29 -- should be good in most circumstances. For people with large L2
30 -- caches, you may want to increase this to fit your cache.
32 -- This module is intended to be imported @qualified@, to avoid name
33 -- clashes with "Prelude" functions. eg.
35 -- > import qualified Data.ByteString.Lazy as B
37 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
38 -- UArray by Simon Marlow. Rewritten to support slices and use
39 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
40 -- Lazy variant by Duncan Coutts and Don Stewart.
43 module Data.ByteString.Lazy (
45 -- * The @ByteString@ type
46 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
48 -- * Introducing and eliminating 'ByteString's
49 empty, -- :: ByteString
50 singleton, -- :: Word8 -> ByteString
51 pack, -- :: [Word8] -> ByteString
52 unpack, -- :: ByteString -> [Word8]
55 cons, -- :: Word8 -> ByteString -> ByteString
56 snoc, -- :: ByteString -> Word8 -> ByteString
57 append, -- :: ByteString -> ByteString -> ByteString
58 head, -- :: ByteString -> Word8
59 last, -- :: ByteString -> Word8
60 tail, -- :: ByteString -> ByteString
61 init, -- :: ByteString -> ByteString
62 null, -- :: ByteString -> Bool
63 length, -- :: ByteString -> Int64
65 -- * Transformating ByteStrings
66 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
67 reverse, -- :: ByteString -> ByteString
68 -- intersperse, -- :: Word8 -> ByteString -> ByteString
69 transpose, -- :: [ByteString] -> [ByteString]
71 -- * Reducing 'ByteString's (folds)
72 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
73 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
74 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
75 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
80 concat, -- :: [ByteString] -> ByteString
81 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
82 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 maximum, -- :: ByteString -> Word8
85 minimum, -- :: ByteString -> Word8
87 -- * Building ByteStrings
89 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
90 -- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
91 -- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
92 -- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
94 -- ** Accumulating maps
95 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
97 mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
99 -- ** Infinite ByteStrings
100 repeat, -- :: Word8 -> ByteString
101 replicate, -- :: Int64 -> Word8 -> ByteString
102 cycle, -- :: ByteString -> ByteString
103 iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString
106 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
110 -- ** Breaking strings
111 take, -- :: Int64 -> ByteString -> ByteString
112 drop, -- :: Int64 -> ByteString -> ByteString
113 splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString)
114 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
115 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
116 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
117 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
118 group, -- :: ByteString -> [ByteString]
119 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
120 inits, -- :: ByteString -> [ByteString]
121 tails, -- :: ByteString -> [ByteString]
123 -- ** Breaking into many substrings
124 split, -- :: Word8 -> ByteString -> [ByteString]
125 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
126 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
128 -- ** Joining strings
129 join, -- :: ByteString -> [ByteString] -> ByteString
132 isPrefixOf, -- :: ByteString -> ByteString -> Bool
133 -- isSuffixOf, -- :: ByteString -> ByteString -> Bool
135 -- * Searching ByteStrings
137 -- ** Searching by equality
138 elem, -- :: Word8 -> ByteString -> Bool
139 notElem, -- :: Word8 -> ByteString -> Bool
141 -- ** Searching with a predicate
142 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
143 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
144 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
146 -- * Indexing ByteStrings
147 index, -- :: ByteString -> Int64 -> Word8
148 elemIndex, -- :: Word8 -> ByteString -> Maybe Int64
149 elemIndices, -- :: Word8 -> ByteString -> [Int64]
150 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
151 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64]
152 count, -- :: Word8 -> ByteString -> Int64
154 -- * Zipping and unzipping ByteStrings
155 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
156 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
157 -- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
159 -- * Ordered ByteStrings
160 -- sort, -- :: ByteString -> ByteString
162 copy, -- :: ByteString -> ByteString
164 -- * I\/O with 'ByteString's
166 -- ** Standard input and output
167 getContents, -- :: IO ByteString
168 putStr, -- :: ByteString -> IO ()
169 putStrLn, -- :: ByteString -> IO ()
170 interact, -- :: (ByteString -> ByteString) -> IO ()
173 readFile, -- :: FilePath -> IO ByteString
174 writeFile, -- :: FilePath -> ByteString -> IO ()
175 appendFile, -- :: FilePath -> ByteString -> IO ()
177 -- ** I\/O with Handles
178 hGetContents, -- :: Handle -> IO ByteString
179 hGet, -- :: Handle -> Int -> IO ByteString
180 hPut, -- :: Handle -> ByteString -> IO ()
181 hGetNonBlocking, -- :: Handle -> IO ByteString
182 -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
183 -- hGetContentsN, -- :: Int -> Handle -> IO ByteString
184 -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
188 import qualified Prelude
189 import Prelude hiding
190 (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
191 ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
192 ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
193 ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
194 ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
196 import qualified Data.List as L -- L for list/lazy
197 import qualified Data.ByteString as P -- P for packed
198 import qualified Data.ByteString.Base as P
199 import qualified Data.ByteString.Fusion as P
200 import Data.ByteString.Fusion (PairS(..),loopL)
202 import Data.Monoid (Monoid(..))
204 import Data.Word (Word8)
205 import Data.Int (Int64)
206 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..)
207 ,hClose,hWaitForInput,hIsEOF)
208 import System.IO.Unsafe
209 import Control.Exception (bracket)
211 import Foreign.ForeignPtr (withForeignPtr)
213 import Foreign.Storable
215 #if defined(__GLASGOW_HASKELL__)
216 import Data.Generics (Data(..), Typeable(..))
219 -- -----------------------------------------------------------------------------
221 -- Useful macros, until we have bang patterns
224 #define STRICT1(f) f a | a `seq` False = undefined
225 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
226 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
227 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
228 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
230 -- -----------------------------------------------------------------------------
232 -- | A space-efficient representation of a Word8 vector, supporting many
233 -- efficient operations. A 'ByteString' contains 8-bit characters only.
235 -- Instances of Eq, Ord, Read, Show, Data, Typeable
237 newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string
239 #if defined(__GLASGOW_HASKELL__)
245 -- hmm, what about getting the PS constructor unpacked into the cons cell?
247 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
249 -- Would avoid one indirection per chunk.
252 unLPS :: ByteString -> [P.ByteString]
256 instance Eq ByteString
259 instance Ord ByteString
260 where compare = compareBytes
262 instance Monoid ByteString where
267 ------------------------------------------------------------------------
270 -- The data type invariant:
271 -- Every ByteString is either empty or consists of non-null ByteStrings.
272 -- All functions must preserve this, and the QC properties must check this.
274 _invariant :: ByteString -> Bool
275 _invariant (LPS []) = True
276 _invariant (LPS xs) = L.all (not . P.null) xs
278 -- In a form useful for QC testing
279 _checkInvariant :: ByteString -> ByteString
281 | _invariant lps = lps
282 | otherwise = moduleError "invariant" ("violation: " ++ show lps)
284 -- The Data abstraction function
286 _abstr :: ByteString -> P.ByteString
287 _abstr (LPS []) = P.empty
288 _abstr (LPS xs) = P.concat xs
290 -- The representation uses lists of packed chunks. When we have to convert from
291 -- a lazy list to the chunked representation, then by default we'll use this
292 -- chunk size. Some functions give you more control over the chunk size.
294 -- Measurements here:
295 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
297 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
298 -- The following value assumes people have something greater than 128k,
299 -- and need to share the cache with other programs.
301 defaultChunkSize :: Int
302 defaultChunkSize = 32 * k - overhead
304 overhead = 2 * sizeOf (undefined :: Int)
306 smallChunkSize :: Int
307 smallChunkSize = 4 * k - overhead
309 overhead = 2 * sizeOf (undefined :: Int)
311 -- defaultChunkSize = 1
313 ------------------------------------------------------------------------
315 eq :: ByteString -> ByteString -> Bool
316 eq (LPS xs) (LPS ys) = eq' xs ys
317 where eq' [] [] = True
321 case compare (P.length a) (P.length b) of
322 LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
323 EQ -> a == b && eq' as bs
324 GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
326 compareBytes :: ByteString -> ByteString -> Ordering
327 compareBytes (LPS xs) (LPS ys) = cmp xs ys
332 case compare (P.length a) (P.length b) of
333 LT -> case compare a (P.take (P.length a) b) of
334 EQ -> cmp as (P.drop (P.length a) b : bs)
336 EQ -> case compare a b of
339 GT -> case compare (P.take (P.length b) a) b of
340 EQ -> cmp (P.drop (P.length b) a : as) bs
343 -- -----------------------------------------------------------------------------
344 -- Introducing and eliminating 'ByteString's
346 -- | /O(1)/ The empty 'ByteString'
349 {-# NOINLINE empty #-}
351 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
352 singleton :: Word8 -> ByteString
353 singleton c = LPS [P.singleton c]
354 {-# NOINLINE singleton #-}
356 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
357 pack :: [Word8] -> ByteString
358 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
361 chunk :: Int -> [a] -> [[a]]
363 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
365 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
366 unpack :: ByteString -> [Word8]
367 unpack (LPS ss) = L.concatMap P.unpack ss
368 {-# INLINE unpack #-}
370 ------------------------------------------------------------------------
373 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
374 -- conversion function
375 packWith :: (a -> Word8) -> [a] -> ByteString
376 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
377 {-# INLINE packWith #-}
378 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
380 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
381 unpackWith :: (Word8 -> a) -> ByteString -> [a]
382 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
383 {-# INLINE unpackWith #-}
384 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
387 -- ---------------------------------------------------------------------
390 -- | /O(1)/ Test whether a ByteString is empty.
391 null :: ByteString -> Bool
396 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
397 length :: ByteString -> Int64
398 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
400 -- avoid the intermediate list?
401 -- length (LPS ss) = L.foldl lengthF 0 ss
402 -- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
403 {-# INLINE length #-}
405 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
406 -- strict in the ByteString that we are consing onto. More precisely, it forces
407 -- the head and the first chunk. It does this because, for space efficiency, it
408 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
411 -- So that means you can't use a lazy recursive contruction like this:
413 -- > let xs = cons c xs in xs
415 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
417 cons :: Word8 -> ByteString -> ByteString
418 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
419 cons c (LPS ss) = LPS (P.singleton c : ss)
422 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
423 snoc :: ByteString -> Word8 -> ByteString
424 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
427 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
428 head :: ByteString -> Word8
429 head (LPS []) = errorEmptyList "head"
430 head (LPS (x:_)) = P.unsafeHead x
433 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
434 tail :: ByteString -> ByteString
435 tail (LPS []) = errorEmptyList "tail"
437 | P.length x == 1 = LPS xs
438 | otherwise = LPS (P.unsafeTail x : xs)
441 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
442 last :: ByteString -> Word8
443 last (LPS []) = errorEmptyList "last"
444 last (LPS xs) = P.last (L.last xs)
447 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
448 init :: ByteString -> ByteString
449 init (LPS []) = errorEmptyList "init"
451 | P.length y == 1 = LPS ys
452 | otherwise = LPS (ys ++ [P.init y])
453 where (y,ys) = (L.last xs, L.init xs)
456 -- | /O(n)/ Append two ByteStrings
457 append :: ByteString -> ByteString -> ByteString
458 append (LPS []) (LPS ys) = LPS ys
459 append (LPS xs) (LPS []) = LPS xs
460 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
461 {-# INLINE append #-}
463 -- ---------------------------------------------------------------------
466 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
468 map :: (Word8 -> Word8) -> ByteString -> ByteString
469 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
470 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
473 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
474 reverse :: ByteString -> ByteString
475 reverse (LPS ps) = LPS (rev [] ps)
477 rev a (x:xs) = rev (P.reverse x:a) xs
478 -- note, here is one example where the extra element lazyness is an advantage.
479 -- we can reerse the list of chunks strictly but reverse each chunk lazily
480 -- so while we may force the whole lot into memory we do not need to copy
481 -- each chunk until it is used.
482 {-# INLINE reverse #-}
484 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
485 -- \`intersperses\' that byte between the elements of the 'ByteString'.
486 -- It is analogous to the intersperse function on Lists.
487 -- intersperse :: Word8 -> ByteString -> ByteString
488 -- intersperse = error "FIXME: not yet implemented"
491 intersperse c (LPS []) = LPS []
492 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
493 where intersperse' c ps@(PS x s l) =
494 P.create (2*l) $ \p -> withForeignPtr x $ \f ->
496 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
499 -- | The 'transpose' function transposes the rows and columns of its
500 -- 'ByteString' argument.
501 transpose :: [ByteString] -> [ByteString]
502 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
504 -- ---------------------------------------------------------------------
505 -- Reducing 'ByteString's
507 -- | 'foldl', applied to a binary operator, a starting value (typically
508 -- the left-identity of the operator), and a ByteString, reduces the
509 -- ByteString using the binary operator, from left to right.
510 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
511 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
512 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
515 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
516 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
517 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
518 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
519 {-# INLINE foldl' #-}
521 -- | 'foldr', applied to a binary operator, a starting value
522 -- (typically the right-identity of the operator), and a ByteString,
523 -- reduces the ByteString using the binary operator, from right to left.
524 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
525 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
528 -- | 'foldl1' is a variant of 'foldl' that has no starting value
529 -- argument, and thus must be applied to non-empty 'ByteStrings'.
530 -- This function is subject to array fusion.
531 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
532 foldl1 _ (LPS []) = errorEmptyList "foldl1"
533 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
535 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
536 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
537 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
538 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
540 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
541 -- and thus must be applied to non-empty 'ByteString's
542 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
543 foldr1 _ (LPS []) = errorEmptyList "foldr1"
544 foldr1 f (LPS ps) = foldr1' ps
545 where foldr1' (x:[]) = P.foldr1 f x
546 foldr1' (x:xs) = P.foldr f (foldr1' xs) x
548 -- ---------------------------------------------------------------------
551 -- | /O(n)/ Concatenate a list of ByteStrings.
552 concat :: [ByteString] -> ByteString
553 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
555 -- | Map a function over a 'ByteString' and concatenate the results
556 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
557 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
559 k w = case f w of LPS xs -> P.concat xs
561 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
562 -- any element of the 'ByteString' satisfies the predicate.
563 any :: (Word8 -> Bool) -> ByteString -> Bool
564 any f (LPS xs) = L.or (L.map (P.any f) xs)
567 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
568 -- if all elements of the 'ByteString' satisfy the predicate.
569 all :: (Word8 -> Bool) -> ByteString -> Bool
570 all f (LPS xs) = L.and (L.map (P.all f) xs)
573 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
574 maximum :: ByteString -> Word8
575 maximum (LPS []) = errorEmptyList "maximum"
576 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
577 {-# INLINE maximum #-}
579 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
580 minimum :: ByteString -> Word8
581 minimum (LPS []) = errorEmptyList "minimum"
582 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
583 {-# INLINE minimum #-}
585 -- | The 'mapAccumL' function behaves like a combination of 'map' and
586 -- 'foldl'; it applies a function to each element of a ByteString,
587 -- passing an accumulating parameter from left to right, and returning a
588 -- final value of this accumulator together with the new ByteString.
589 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
590 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
592 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
593 mapAccumR = error "mapAccumR unimplemented"
595 -- | /O(n)/ map Word8 functions, provided with the index at each position
596 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
597 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
599 -- ---------------------------------------------------------------------
600 -- Building ByteStrings
602 -- | 'scanl' is similar to 'foldl', but returns a list of successive
603 -- reduced values from the left. This function will fuse.
605 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
609 -- > last (scanl f z xs) == foldl f z xs.
610 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
611 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
614 -- ---------------------------------------------------------------------
615 -- Unfolds and replicates
617 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
620 -- > iterate f x == [x, f x, f (f x), ...]
622 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
623 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
625 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
628 repeat :: Word8 -> ByteString
629 repeat c = LPS (L.repeat block)
630 where block = P.replicate smallChunkSize c
632 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
633 -- the value of every element.
635 replicate :: Int64 -> Word8 -> ByteString
638 | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
639 | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant
640 | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
642 s = P.replicate smallChunkSize c
643 (q, r) = quotRem w (fromIntegral smallChunkSize)
645 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
646 -- the infinite repetition of the original ByteString.
648 cycle :: ByteString -> ByteString
649 cycle (LPS []) = errorEmptyList "cycle"
650 cycle (LPS xs) = LPS (L.cycle xs)
652 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
653 -- 'unfoldr' builds a ByteString from a seed value. The function takes
654 -- the element and returns 'Nothing' if it is done producing the
655 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
656 -- prepending to the ByteString and @b@ is used as the next element in a
658 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
659 unfoldr f = LPS . unfoldChunk 32
660 where unfoldChunk n x =
661 case P.unfoldrN n f x of
664 | otherwise -> s : []
665 (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
667 -- ---------------------------------------------------------------------
670 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
671 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
672 take :: Int64 -> ByteString -> ByteString
673 take n _ | n < 0 = empty
674 take i (LPS ps) = LPS (take' i ps)
675 where take' _ [] = []
678 if n < fromIntegral (P.length x)
679 then P.take (fromIntegral n) x : []
680 else x : take' (n - fromIntegral (P.length x)) xs
682 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
683 -- elements, or @[]@ if @n > 'length' xs@.
684 drop :: Int64 -> ByteString -> ByteString
685 drop i p | i <= 0 = p
686 drop i (LPS ps) = LPS (drop' i ps)
687 where drop' _ [] = []
690 if n < fromIntegral (P.length x)
691 then P.drop (fromIntegral n) x : xs
692 else drop' (n - fromIntegral (P.length x)) xs
694 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
695 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
696 splitAt i p | i <= 0 = (empty, p)
697 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
698 where splitAt' _ [] = ([], [])
699 splitAt' 0 xs = ([], xs)
701 if n < fromIntegral (P.length x)
702 then (P.take (fromIntegral n) x : [],
703 P.drop (fromIntegral n) x : xs)
704 else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
708 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
709 -- returns the longest prefix (possibly empty) of @xs@ of elements that
711 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
712 takeWhile f (LPS ps) = LPS (takeWhile' ps)
713 where takeWhile' [] = []
715 case findIndexOrEnd (not . f) x of
717 n | n < P.length x -> P.take n x : []
718 | otherwise -> x : takeWhile' xs
720 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
721 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
722 dropWhile f (LPS ps) = LPS (dropWhile' ps)
723 where dropWhile' [] = []
725 case findIndexOrEnd (not . f) x of
726 n | n < P.length x -> P.drop n x : xs
727 | otherwise -> dropWhile' xs
729 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
730 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
731 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
732 where break' [] = ([], [])
734 case findIndexOrEnd f x of
736 n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
737 | otherwise -> let (xs', xs'') = break' xs
747 -- | 'breakByte' breaks its ByteString argument at the first occurence
748 -- of the specified byte. It is more efficient than 'break' as it is
749 -- implemented with @memchr(3)@. I.e.
751 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
753 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
754 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
755 where breakByte' [] = ([], [])
757 case P.elemIndex c x of
758 Just 0 -> ([], x : xs)
759 Just n -> (P.take n x : [], P.drop n x : xs)
760 Nothing -> let (xs', xs'') = breakByte' xs
763 -- | 'spanByte' breaks its ByteString argument at the first
764 -- occurence of a byte other than its argument. It is more efficient
767 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
769 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
770 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
771 where spanByte' [] = ([], [])
773 case P.spanByte c x of
774 (x', x'') | P.null x' -> ([], x : xs)
775 | P.null x'' -> let (xs', xs'') = spanByte' xs
777 | otherwise -> (x' : [], x'' : xs)
780 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
781 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
782 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
783 span p = break (not . p)
785 -- | /O(n)/ Splits a 'ByteString' into components delimited by
786 -- separators, where the predicate returns True for a separator element.
787 -- The resulting components do not contain the separators. Two adjacent
788 -- separators result in an empty component in the output. eg.
790 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
791 -- > splitWith (=='a') [] == []
793 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
794 splitWith _ (LPS []) = []
795 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
797 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
798 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
799 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
800 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
802 cons' x xs | P.null x = xs
805 {-# INLINE splitWith #-}
807 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
808 -- argument, consuming the delimiter. I.e.
810 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
811 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
812 -- > split 'x' "x" == ["",""]
816 -- > join [c] . split c == id
817 -- > split == splitWith . (==)
819 -- As for all splitting functions in this library, this function does
820 -- not copy the substrings, it just constructs new 'ByteStrings' that
821 -- are slices of the original.
823 split :: Word8 -> ByteString -> [ByteString]
824 split _ (LPS []) = []
825 split c (LPS (a:as)) = comb [] (P.split c a) as
827 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
828 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
829 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
830 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
832 cons' x xs | P.null x = xs
837 -- | Like 'splitWith', except that sequences of adjacent separators are
838 -- treated as a single separator. eg.
840 -- > tokens (=='a') "aabbaca" == ["bb","c"]
842 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
843 tokens f = L.filter (not.null) . splitWith f
845 -- | The 'group' function takes a ByteString and returns a list of
846 -- ByteStrings such that the concatenation of the result is equal to the
847 -- argument. Moreover, each sublist in the result contains only equal
848 -- elements. For example,
850 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
852 -- It is a special case of 'groupBy', which allows the programmer to
853 -- supply their own equality test.
854 group :: ByteString -> [ByteString]
856 group (LPS (a:as)) = group' [] (P.group a) as
857 where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
858 group' acc@(s':_) ss@(s:_) xs
860 /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs
861 group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : []
862 group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
863 group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs
866 TODO: check if something like this might be faster
868 group :: ByteString -> [ByteString]
871 | otherwise = ys : group zs
873 (ys, zs) = spanByte (unsafeHead xs) xs
876 -- | The 'groupBy' function is the non-overloaded version of 'group'.
878 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
879 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
881 groupBy _ (LPS []) = []
882 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
883 where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
884 groupBy' acc@(_:_) c ss@(s:_) xs
885 | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
886 groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : []
887 groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
888 groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs
889 groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
893 TODO: check if something like this might be faster
895 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
898 | otherwise = take n xs : groupBy k (drop n xs)
900 n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
903 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
904 -- 'ByteString's and concatenates the list after interspersing the first
905 -- argument between each element of the list.
906 join :: ByteString -> [ByteString] -> ByteString
907 join s = concat . (L.intersperse s)
909 -- ---------------------------------------------------------------------
910 -- Indexing ByteStrings
912 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
913 index :: ByteString -> Int64 -> Word8
914 index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i)
915 index (LPS ps) i = index' ps i
916 where index' [] n = moduleError "index" ("index too large: " ++ show n)
918 | n >= fromIntegral (P.length x) =
919 index' xs (n - fromIntegral (P.length x))
920 | otherwise = P.unsafeIndex x (fromIntegral n)
922 -- | /O(n)/ The 'elemIndex' function returns the index of the first
923 -- element in the given 'ByteString' which is equal to the query
924 -- element, or 'Nothing' if there is no such element.
925 -- This implementation uses memchr(3).
926 elemIndex :: Word8 -> ByteString -> Maybe Int64
927 elemIndex c (LPS ps) = elemIndex' 0 ps
928 where elemIndex' _ [] = Nothing
929 elemIndex' n (x:xs) =
930 case P.elemIndex c x of
931 Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
932 Just i -> Just (n + fromIntegral i)
935 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
936 -- element in the given 'ByteString' which is equal to the query
937 -- element, or 'Nothing' if there is no such element. The following
940 -- > elemIndexEnd c xs ==
941 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
943 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
944 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
945 go (p `plusPtr` s) (l-1)
948 go p i | i < 0 = return Nothing
949 | otherwise = do ch' <- peekByteOff p i
954 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
955 -- the indices of all elements equal to the query element, in ascending order.
956 -- This implementation uses memchr(3).
957 elemIndices :: Word8 -> ByteString -> [Int64]
958 elemIndices c (LPS ps) = elemIndices' 0 ps
959 where elemIndices' _ [] = []
960 elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
961 ++ elemIndices' (n + fromIntegral (P.length x)) xs
963 -- | count returns the number of times its argument appears in the ByteString
965 -- > count = length . elemIndices
967 -- But more efficiently than using length on the intermediate list.
968 count :: Word8 -> ByteString -> Int64
969 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
971 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
972 -- returns the index of the first element in the ByteString
973 -- satisfying the predicate.
974 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
975 findIndex k (LPS ps) = findIndex' 0 ps
976 where findIndex' _ [] = Nothing
977 findIndex' n (x:xs) =
978 case P.findIndex k x of
979 Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
980 Just i -> Just (n + fromIntegral i)
981 {-# INLINE findIndex #-}
983 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
984 -- and returns the first element in matching the predicate, or 'Nothing'
985 -- if there is no such element.
987 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
989 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
990 find f (LPS ps) = find' ps
991 where find' [] = Nothing
992 find' (x:xs) = case P.find f x of
997 -- | The 'findIndices' function extends 'findIndex', by returning the
998 -- indices of all elements satisfying the predicate, in ascending order.
999 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
1000 findIndices k (LPS ps) = findIndices' 0 ps
1001 where findIndices' _ [] = []
1002 findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
1003 ++ findIndices' (n + fromIntegral (P.length x)) xs
1005 -- ---------------------------------------------------------------------
1006 -- Searching ByteStrings
1008 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1009 elem :: Word8 -> ByteString -> Bool
1010 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1012 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1013 notElem :: Word8 -> ByteString -> Bool
1014 notElem c ps = not (elem c ps)
1016 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1017 -- returns a ByteString containing those characters that satisfy the
1019 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1020 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1021 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1022 {-# INLINE filter #-}
1025 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1026 -- (==)/, for the common case of filtering a single byte. It is more
1027 -- efficient to use /filterByte/ in this case.
1029 -- > filterByte == filter . (==)
1031 -- filterByte is around 10x faster, and uses much less space, than its
1032 -- filter equivalent
1033 filterByte :: Word8 -> ByteString -> ByteString
1034 filterByte w ps = replicate (count w ps) w
1035 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1037 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1038 -- case of filtering a single byte out of a list. It is more efficient
1039 -- to use /filterNotByte/ in this case.
1041 -- > filterNotByte == filter . (/=)
1043 -- filterNotByte is around 2x faster than its filter equivalent.
1044 filterNotByte :: Word8 -> ByteString -> ByteString
1045 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1048 -- ---------------------------------------------------------------------
1049 -- Searching for substrings
1051 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1052 -- iff the first is a prefix of the second.
1053 isPrefixOf :: ByteString -> ByteString -> Bool
1054 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1055 where isPrefixL [] _ = True
1056 isPrefixL _ [] = False
1057 isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys
1058 | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys)
1059 | otherwise = xh == y && isPrefixL (xt:xs) ys
1060 where (xh,xt) = P.splitAt (P.length y) x
1061 (yh,yt) = P.splitAt (P.length x) y
1063 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1064 -- iff the first is a suffix of the second.
1066 -- The following holds:
1068 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1070 -- However, the real implemenation uses memcmp to compare the end of the
1071 -- string only, with no reverse required..
1073 --isSuffixOf :: ByteString -> ByteString -> Bool
1074 --isSuffixOf = error "not yet implemented"
1076 -- ---------------------------------------------------------------------
1079 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1080 -- corresponding pairs of bytes. If one input ByteString is short,
1081 -- excess elements of the longer ByteString are discarded. This is
1082 -- equivalent to a pair of 'unpack' operations.
1083 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1086 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1087 -- the first argument, instead of a tupling function. For example,
1088 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1089 -- corresponding sums.
1090 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1091 zipWith _ (LPS []) (LPS _) = []
1092 zipWith _ (LPS _) (LPS []) = []
1093 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1094 where zipWith' x xs y ys =
1095 (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1097 zipWith'' x [] _ _ | P.null x = []
1098 zipWith'' _ _ y [] | P.null y = []
1099 zipWith'' x xs y ys | not (P.null x)
1100 && not (P.null y) = zipWith' x xs y ys
1101 zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys
1102 zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys
1103 zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys
1105 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1106 -- ByteStrings. Note that this performs two 'pack' operations.
1108 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1109 unzip _ls = error "not yet implemented"
1110 {-# INLINE unzip #-}
1113 -- ---------------------------------------------------------------------
1116 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1117 inits :: ByteString -> [ByteString]
1118 inits = (LPS [] :) . inits' . unLPS
1119 where inits' [] = []
1120 inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1121 ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1123 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1124 tails :: ByteString -> [ByteString]
1125 tails = tails' . unLPS
1126 where tails' [] = LPS [] : []
1128 | P.length x == 1 = LPS xs : tails' xs'
1129 | otherwise = LPS xs : tails' (P.unsafeTail x : xs')
1131 -- ---------------------------------------------------------------------
1132 -- Low level constructors
1134 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1135 -- This is mainly useful to allow the rest of the data pointed
1136 -- to by the 'ByteString' to be garbage collected, for example
1137 -- if a large string has been read in, and only a small part of it
1138 -- is needed in the rest of the program.
1139 copy :: ByteString -> ByteString
1140 copy (LPS lps) = LPS (L.map P.copy lps)
1141 --TODO, we could coalese small blocks here
1142 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1143 -- the parent blocks then we'd better copy strictly.
1145 -- ---------------------------------------------------------------------
1147 -- TODO defrag func that concatenates block together that are below a threshold
1148 -- defrag :: Int -> ByteString -> ByteString
1150 -- ---------------------------------------------------------------------
1151 -- Lazy ByteString IO
1153 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1154 -- are read on demand, in at most @k@-sized chunks. It does not block
1155 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1156 -- available then they will be returned immediately as a smaller chunk.
1157 hGetContentsN :: Int -> Handle -> IO ByteString
1158 hGetContentsN k h = lazyRead >>= return . LPS
1160 lazyRead = unsafeInterleaveIO loop
1163 ps <- P.hGetNonBlocking h k
1164 --TODO: I think this should distinguish EOF from no data available
1165 -- the otherlying POSIX call makes this distincion, returning either
1168 then do eof <- hIsEOF h
1169 if eof then return []
1170 else hWaitForInput h (-1)
1172 else do pss <- lazyRead
1175 -- | Read @n@ bytes into a 'ByteString', directly from the
1176 -- specified 'Handle', in chunks of size @k@.
1177 hGetN :: Int -> Handle -> Int -> IO ByteString
1178 hGetN _ _ 0 = return empty
1179 hGetN k h n = readChunks n >>= return . LPS
1183 ps <- P.hGet h (min k i)
1186 m -> do pss <- readChunks (i - m)
1189 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1190 -- waiting for data to become available, instead it returns only whatever data
1191 -- is available. Chunks are read on demand, in @k@-sized chunks.
1192 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1193 #if defined(__GLASGOW_HASKELL__)
1194 hGetNonBlockingN _ _ 0 = return empty
1195 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1199 ps <- P.hGetNonBlocking h (min k i)
1202 m -> do pss <- readChunks (i - m)
1205 hGetNonBlockingN = hGetN
1208 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1209 -- are read on demand, using the default chunk size.
1210 hGetContents :: Handle -> IO ByteString
1211 hGetContents = hGetContentsN defaultChunkSize
1213 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1214 hGet :: Handle -> Int -> IO ByteString
1215 hGet = hGetN defaultChunkSize
1217 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1218 -- waiting for data to become available, instead it returns only whatever data
1220 #if defined(__GLASGOW_HASKELL__)
1221 hGetNonBlocking :: Handle -> Int -> IO ByteString
1222 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1224 hGetNonBlocking = hGet
1227 -- | Read an entire file /lazily/ into a 'ByteString'.
1228 readFile :: FilePath -> IO ByteString
1229 readFile f = openBinaryFile f ReadMode >>= hGetContents
1231 -- | Write a 'ByteString' to a file.
1232 writeFile :: FilePath -> ByteString -> IO ()
1233 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1234 (\hdl -> hPut hdl txt)
1236 -- | Append a 'ByteString' to a file.
1237 appendFile :: FilePath -> ByteString -> IO ()
1238 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1239 (\hdl -> hPut hdl txt)
1241 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1242 getContents :: IO ByteString
1243 getContents = hGetContents stdin
1245 -- | Outputs a 'ByteString' to the specified 'Handle'.
1246 hPut :: Handle -> ByteString -> IO ()
1247 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1249 -- | Write a ByteString to stdout
1250 putStr :: ByteString -> IO ()
1251 putStr = hPut stdout
1253 -- | Write a ByteString to stdout, appending a newline byte
1254 putStrLn :: ByteString -> IO ()
1255 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1257 -- | The interact function takes a function of type @ByteString -> ByteString@
1258 -- as its argument. The entire input from the standard input device is passed
1259 -- to this function as its argument, and the resulting string is output on the
1260 -- standard output device. It's great for writing one line programs!
1261 interact :: (ByteString -> ByteString) -> IO ()
1262 interact transformer = putStr . transformer =<< getContents
1264 -- ---------------------------------------------------------------------
1265 -- Internal utilities
1267 -- Common up near identical calls to `error' to reduce the number
1268 -- constant strings created when compiled:
1269 errorEmptyList :: String -> a
1270 errorEmptyList fun = moduleError fun "empty ByteString"
1272 moduleError :: String -> String -> a
1273 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1275 -- A manually fused version of "filter (not.null) . map f", since they
1276 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1280 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1282 filterMap f (x:xs) = case f x of
1283 y | P.null y -> filterMap f xs -- manually fuse the invariant filter
1284 | otherwise -> y : filterMap f xs
1285 {-# INLINE filterMap #-}
1288 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1289 -- of the string if no element is found, rather than Nothing.
1290 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1291 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1294 go ptr n | n >= l = return l
1295 | otherwise = do w <- peek ptr
1298 else go (ptr `plusPtr` 1) (n+1)
1299 {-# INLINE findIndexOrEnd #-}