1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
3 -- Module : Data.ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
10 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
11 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
13 -- License : BSD-style
15 -- Maintainer : dons@cse.unsw.edu.au
16 -- Stability : experimental
17 -- Portability : portable
19 -- A time and space-efficient implementation of byte vectors using
20 -- packed Word8 arrays, suitable for high performance use, both in terms
21 -- of large data quantities, or high speed requirements. Byte vectors
22 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
23 -- and can be passed between C and Haskell with little effort.
25 -- This module is intended to be imported @qualified@, to avoid name
26 -- clashes with "Prelude" functions. eg.
28 -- > import qualified Data.ByteString as B
30 -- Original GHC implementation by Bryan O\'Sullivan.
31 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
32 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
33 -- Polished and extended by Don Stewart.
36 module Data.ByteString (
38 -- * The @ByteString@ type
39 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
41 -- * Introducing and eliminating 'ByteString's
42 empty, -- :: ByteString
43 singleton, -- :: Word8 -> ByteString
44 pack, -- :: [Word8] -> ByteString
45 unpack, -- :: ByteString -> [Word8]
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: ByteString -> Word8 -> ByteString
50 append, -- :: ByteString -> ByteString -> ByteString
51 head, -- :: ByteString -> Word8
52 last, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 init, -- :: ByteString -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
58 -- * Transformating ByteStrings
59 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
60 reverse, -- :: ByteString -> ByteString
61 intersperse, -- :: Word8 -> ByteString -> ByteString
62 transpose, -- :: [ByteString] -> [ByteString]
64 -- * Reducing 'ByteString's (folds)
65 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
66 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
67 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
68 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
70 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
71 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
83 -- * Building ByteStrings
85 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
86 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
87 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
88 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
90 -- ** Accumulating maps
91 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
92 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
93 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
95 -- ** Unfolding ByteStrings
96 replicate, -- :: Int -> Word8 -> ByteString
97 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
98 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
102 -- ** Breaking strings
103 take, -- :: Int -> ByteString -> ByteString
104 drop, -- :: Int -> ByteString -> ByteString
105 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
106 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
107 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
108 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
109 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
110 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
111 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 group, -- :: ByteString -> [ByteString]
113 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
114 inits, -- :: ByteString -> [ByteString]
115 tails, -- :: ByteString -> [ByteString]
117 -- ** Breaking into many substrings
118 split, -- :: Word8 -> ByteString -> [ByteString]
119 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
121 -- ** Joining strings
122 join, -- :: ByteString -> [ByteString] -> ByteString
125 isPrefixOf, -- :: ByteString -> ByteString -> Bool
126 isSuffixOf, -- :: ByteString -> ByteString -> Bool
128 -- ** Search for arbitrary substrings
129 isSubstringOf, -- :: ByteString -> ByteString -> Bool
130 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
131 findSubstrings, -- :: ByteString -> ByteString -> [Int]
133 -- * Searching ByteStrings
135 -- ** Searching by equality
136 -- | These functions use memchr(3) to efficiently search the ByteString
137 elem, -- :: Word8 -> ByteString -> Bool
138 notElem, -- :: Word8 -> ByteString -> Bool
140 -- ** Searching with a predicate
141 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
142 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
143 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
145 -- * Indexing ByteStrings
146 index, -- :: ByteString -> Int -> Word8
147 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
148 elemIndices, -- :: Word8 -> ByteString -> [Int]
149 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
150 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
151 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
152 count, -- :: Word8 -> ByteString -> Int
154 -- * Zipping and unzipping ByteStrings
155 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
156 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
157 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
159 -- * Ordered ByteStrings
160 sort, -- :: ByteString -> ByteString
162 -- * Low level CString conversions
164 -- ** Packing CStrings and pointers
165 packCString, -- :: CString -> ByteString
166 packCStringLen, -- :: CString -> ByteString
167 packMallocCString, -- :: CString -> ByteString
169 -- ** Using ByteStrings as CStrings
170 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
171 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
173 -- ** Copying ByteStrings
174 -- | These functions perform memcpy(3) operations
175 copy, -- :: ByteString -> ByteString
176 copyCString, -- :: CString -> IO ByteString
177 copyCStringLen, -- :: CStringLen -> IO ByteString
179 -- * I\/O with 'ByteString's
181 -- ** Standard input and output
182 getLine, -- :: IO ByteString
183 getContents, -- :: IO ByteString
184 putStr, -- :: ByteString -> IO ()
185 putStrLn, -- :: ByteString -> IO ()
186 interact, -- :: (ByteString -> ByteString) -> IO ()
189 readFile, -- :: FilePath -> IO ByteString
190 writeFile, -- :: FilePath -> ByteString -> IO ()
191 appendFile, -- :: FilePath -> ByteString -> IO ()
192 -- mmapFile, -- :: FilePath -> IO ByteString
194 -- ** I\/O with Handles
195 hGetLine, -- :: Handle -> IO ByteString
196 hGetContents, -- :: Handle -> IO ByteString
197 hGet, -- :: Handle -> Int -> IO ByteString
198 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
199 hPut, -- :: Handle -> ByteString -> IO ()
200 hPutStr, -- :: Handle -> ByteString -> IO ()
201 hPutStrLn, -- :: Handle -> ByteString -> IO ()
203 #if defined(__GLASGOW_HASKELL__)
204 -- * Fusion utilities
205 unpackList, -- eek, otherwise it gets thrown away by the simplifier
206 lengthU, maximumU, minimumU
211 import qualified Prelude as P
212 import Prelude hiding (reverse,head,tail,last,init,null
213 ,length,map,lines,foldl,foldr,unlines
214 ,concat,any,take,drop,splitAt,takeWhile
215 ,dropWhile,span,break,elem,filter,maximum
216 ,minimum,all,concatMap,foldl1,foldr1
217 ,scanl,scanl1,scanr,scanr1
218 ,readFile,writeFile,appendFile,replicate
219 ,getContents,getLine,putStr,putStrLn,interact
220 ,zip,zipWith,unzip,notElem)
222 import Data.ByteString.Base
223 import Data.ByteString.Fusion
225 import qualified Data.List as List
227 import Data.Word (Word8)
228 import Data.Maybe (listToMaybe)
229 import Data.Array (listArray)
230 import qualified Data.Array as Array ((!))
232 -- Control.Exception.bracket not available in yhc or nhc
233 import Control.Exception (bracket, assert)
234 import qualified Control.Exception as Exception
235 import Control.Monad (when)
237 import Foreign.C.String (CString, CStringLen)
238 import Foreign.C.Types (CSize)
239 import Foreign.ForeignPtr
240 import Foreign.Marshal.Array
242 import Foreign.Storable (Storable(..))
244 -- hGetBuf and hPutBuf not available in yhc or nhc
245 import System.IO (stdin,stdout,hClose,hFileSize
246 ,hGetBuf,hPutBuf,openBinaryFile
249 import Data.Monoid (Monoid, mempty, mappend, mconcat)
251 #if !defined(__GLASGOW_HASKELL__)
252 import System.IO.Unsafe
253 import qualified System.Environment
254 import qualified System.IO (hGetLine)
257 #if defined(__GLASGOW_HASKELL__)
259 import System.IO (hGetBufNonBlocking)
260 import System.IO.Error (isEOFError)
263 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
264 import GHC.Base (build)
265 import GHC.Word hiding (Word8)
266 import GHC.Ptr (Ptr(..))
267 import GHC.ST (ST(..))
272 -- -----------------------------------------------------------------------------
274 -- Useful macros, until we have bang patterns
277 #define STRICT1(f) f a | a `seq` False = undefined
278 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
279 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
280 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
281 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
283 -- -----------------------------------------------------------------------------
285 instance Eq ByteString
288 instance Ord ByteString
289 where compare = compareBytes
291 instance Monoid ByteString where
297 instance Arbitrary PackedString where
298 arbitrary = P.pack `fmap` arbitrary
299 coarbitrary s = coarbitrary (P.unpack s)
302 -- | /O(n)/ Equality on the 'ByteString' type.
303 eq :: ByteString -> ByteString -> Bool
304 eq a@(PS p s l) b@(PS p' s' l')
305 | l /= l' = False -- short cut on length
306 | p == p' && s == s' = True -- short cut for the same string
307 | otherwise = compareBytes a b == EQ
310 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
311 compareBytes :: ByteString -> ByteString -> Ordering
312 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
313 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
314 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
315 | otherwise = inlinePerformIO $
316 withForeignPtr x1 $ \p1 ->
317 withForeignPtr x2 $ \p2 -> do
318 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
319 return $! case i `compare` 0 of
320 EQ -> l1 `compare` l2
322 {-# INLINE compareBytes #-}
326 -- About 4x slower over 32M
328 compareBytes :: ByteString -> ByteString -> Ordering
329 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
330 withForeignPtr fp1 $ \p1 ->
331 withForeignPtr fp2 $ \p2 ->
332 cmp (p1 `plusPtr` off1)
333 (p2 `plusPtr` off2) 0 len1 len2
335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
337 cmp p1 p2 n len1 len2
338 | n == len1 = if n == len2 then return EQ else return LT
339 | n == len2 = return GT
341 (a :: Word8) <- peekByteOff p1 n
342 (b :: Word8) <- peekByteOff p2 n
343 case a `compare` b of
344 EQ -> cmp p1 p2 (n+1) len1 len2
347 {-# INLINE compareBytes #-}
350 -- -----------------------------------------------------------------------------
351 -- Introducing and eliminating 'ByteString's
353 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
354 singleton :: Word8 -> ByteString
355 singleton c = unsafeCreate 1 $ \p -> poke p c
356 {-# INLINE [1] singleton #-}
359 -- XXX The unsafePerformIO is critical!
363 -- singleton 255 `compare` singleton 127
367 -- case mallocByteString 2 of
368 -- ForeignPtr f internals ->
369 -- case writeWord8OffAddr# f 0 255 of _ ->
370 -- case writeWord8OffAddr# f 0 127 of _ ->
371 -- case eqAddr# f f of
372 -- False -> case compare (GHC.Prim.plusAddr# f 0)
373 -- (GHC.Prim.plusAddr# f 0)
377 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
379 -- For applications with large numbers of string literals, pack can be a
380 -- bottleneck. In such cases, consider using packAddress (GHC only).
381 pack :: [Word8] -> ByteString
383 #if !defined(__GLASGOW_HASKELL__)
385 pack str = unsafeCreate (P.length str) $ \p -> go p str
388 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
390 #else /* hack away */
392 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
394 go _ _ [] = return ()
395 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
397 writeByte p i c = ST $ \s# ->
398 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
402 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
403 unpack :: ByteString -> [Word8]
405 #if !defined(__GLASGOW_HASKELL__)
407 unpack (PS _ _ 0) = []
408 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
409 go (p `plusPtr` s) (l - 1) []
412 go p 0 acc = peek p >>= \e -> return (e : acc)
413 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
414 {-# INLINE unpack #-}
418 unpack ps = build (unpackFoldr ps)
419 {-# INLINE unpack #-}
422 -- critical this isn't strict in the acc
423 -- as it will break in the presence of list fusion. this is a known
424 -- issue with seq and build/foldr rewrite rules, which rely on lazy
425 -- demanding to avoid bottoms in the list.
427 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
428 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
429 let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
430 loop _ (-1) acc = return acc
433 loop q (n-1) (a `f` acc)
434 loop (p `plusPtr` off) (len-1) ch
435 {-# INLINE [0] unpackFoldr #-}
437 unpackList :: ByteString -> [Word8]
438 unpackList (PS fp off len) = withPtr fp $ \p -> do
440 loop _ (-1) acc = return acc
443 loop q (n-1) (a : acc)
444 loop (p `plusPtr` off) (len-1) []
447 "FPS unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
452 -- ---------------------------------------------------------------------
455 -- | /O(1)/ Test whether a ByteString is empty.
456 null :: ByteString -> Bool
457 null (PS _ _ l) = assert (l >= 0) $ l <= 0
460 -- ---------------------------------------------------------------------
461 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
462 length :: ByteString -> Int
463 length (PS _ _ l) = assert (l >= 0) $ l
466 -- length/loop fusion. When taking the length of any fuseable loop,
467 -- rewrite it as a foldl', and thus avoid allocating the result buffer
468 -- worth around 10% in speed testing.
471 #if defined(__GLASGOW_HASKELL__)
472 {-# INLINE [1] length #-}
475 lengthU :: ByteString -> Int
476 lengthU = foldl' (const . (+1)) (0::Int)
477 {-# INLINE lengthU #-}
482 "FPS length/loop" forall loop s .
483 length (loopArr (loopWrapper loop s)) =
484 lengthU (loopArr (loopWrapper loop s))
488 ------------------------------------------------------------------------
490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
491 -- complexity, as it requires a memcpy.
492 cons :: Word8 -> ByteString -> ByteString
493 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
495 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
499 snoc :: ByteString -> Word8 -> ByteString
500 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
501 memcpy p (f `plusPtr` s) (fromIntegral l)
502 poke (p `plusPtr` l) c
507 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
508 -- An exception will be thrown in the case of an empty ByteString.
509 head :: ByteString -> Word8
511 | l <= 0 = errorEmptyList "head"
512 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
515 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
516 -- An exception will be thrown in the case of an empty ByteString.
517 tail :: ByteString -> ByteString
519 | l <= 0 = errorEmptyList "tail"
520 | otherwise = PS p (s+1) (l-1)
523 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
524 -- An exception will be thrown in the case of an empty ByteString.
525 last :: ByteString -> Word8
527 | null ps = errorEmptyList "last"
528 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
531 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
532 -- An exception will be thrown in the case of an empty ByteString.
533 init :: ByteString -> ByteString
535 | null ps = errorEmptyList "init"
536 | otherwise = PS p s (l-1)
539 -- | /O(n)/ Append two ByteStrings
540 append :: ByteString -> ByteString -> ByteString
541 append xs ys | null xs = ys
543 | otherwise = concat [xs,ys]
544 {-# INLINE append #-}
546 -- ---------------------------------------------------------------------
549 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
550 -- element of @xs@. This function is subject to array fusion.
551 map :: (Word8 -> Word8) -> ByteString -> ByteString
552 #if defined(LOOPU_FUSION)
553 map f = loopArr . loopU (mapEFL f) NoAcc
554 #elif defined(LOOPUP_FUSION)
555 map f = loopArr . loopUp (mapEFL f) NoAcc
556 #elif defined(LOOPNOACC_FUSION)
557 map f = loopArr . loopNoAcc (mapEFL f)
559 map f = loopArr . loopMap f
564 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
565 -- slightly faster for one-shot cases.
566 map' :: (Word8 -> Word8) -> ByteString -> ByteString
567 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
568 create len $ map_ 0 (a `plusPtr` s)
570 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
573 | n >= len = return ()
575 x <- peekByteOff p1 n
576 pokeByteOff p2 n (f x)
581 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
582 reverse :: ByteString -> ByteString
583 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
584 c_reverse p (f `plusPtr` s) (fromIntegral l)
586 -- todo, fuseable version
588 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
589 -- 'ByteString' and \`intersperses\' that byte between the elements of
590 -- the 'ByteString'. It is analogous to the intersperse function on
592 intersperse :: Word8 -> ByteString -> ByteString
593 intersperse c ps@(PS x s l)
595 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
596 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
599 intersperse c = pack . List.intersperse c . unpack
602 -- | The 'transpose' function transposes the rows and columns of its
603 -- 'ByteString' argument.
604 transpose :: [ByteString] -> [ByteString]
605 transpose ps = P.map pack (List.transpose (P.map unpack ps))
607 -- ---------------------------------------------------------------------
608 -- Reducing 'ByteString's
610 -- | 'foldl', applied to a binary operator, a starting value (typically
611 -- the left-identity of the operator), and a ByteString, reduces the
612 -- ByteString using the binary operator, from left to right.
613 -- This function is subject to array fusion.
614 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
615 #if !defined(LOOPU_FUSION)
616 foldl f z = loopAcc . loopUp (foldEFL f) z
618 foldl f z = loopAcc . loopU (foldEFL f) z
624 -- About twice as fast with 6.4.1, but not fuseable
625 -- A simple fold . map is enough to make it worth while.
627 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
628 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
631 lgo z p q | p == q = return z
632 | otherwise = do c <- peek p
633 lgo (f z c) (p `plusPtr` 1) q
636 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
637 -- Though actually foldl is also strict in the accumulator.
638 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
640 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
641 {-# INLINE foldl' #-}
643 -- | 'foldr', applied to a binary operator, a starting value
644 -- (typically the right-identity of the operator), and a ByteString,
645 -- reduces the ByteString using the binary operator, from right to left.
646 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
647 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
650 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
651 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
652 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
653 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
656 go z p q | p == q = return z
657 | otherwise = do c <- peek p
658 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
659 {-# INLINE [1] foldr' #-}
661 -- | 'foldl1' is a variant of 'foldl' that has no starting value
662 -- argument, and thus must be applied to non-empty 'ByteStrings'.
663 -- This function is subject to array fusion.
664 -- An exception will be thrown in the case of an empty ByteString.
665 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
667 | null ps = errorEmptyList "foldl1"
668 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
669 {-# INLINE foldl1 #-}
671 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
672 -- An exception will be thrown in the case of an empty ByteString.
673 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
675 | null ps = errorEmptyList "foldl1'"
676 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
677 {-# INLINE foldl1' #-}
679 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
680 -- and thus must be applied to non-empty 'ByteString's
681 -- An exception will be thrown in the case of an empty ByteString.
682 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
684 | null ps = errorEmptyList "foldr1"
685 | otherwise = foldr f (last ps) (init ps)
686 {-# INLINE foldr1 #-}
688 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
690 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
692 | null ps = errorEmptyList "foldr1"
693 | otherwise = foldr' f (last ps) (init ps)
694 {-# INLINE [1] foldr1' #-}
696 -- ---------------------------------------------------------------------
699 -- | /O(n)/ Concatenate a list of ByteStrings.
700 concat :: [ByteString] -> ByteString
703 concat xs = unsafeCreate len $ \ptr -> go xs ptr
704 where len = P.sum . P.map length $ xs
707 go (PS p s l:ps) ptr = do
708 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
709 go ps (ptr `plusPtr` l)
711 -- | Map a function over a 'ByteString' and concatenate the results
712 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
713 concatMap f = concat . foldr ((:) . f) []
715 -- foldr (append . f) empty
717 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
718 -- any element of the 'ByteString' satisfies the predicate.
719 any :: (Word8 -> Bool) -> ByteString -> Bool
720 any _ (PS _ _ 0) = False
721 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
722 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
725 go p q | p == q = return False
726 | otherwise = do c <- peek p
727 if f c then return True
728 else go (p `plusPtr` 1) q
732 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
733 -- if all elements of the 'ByteString' satisfy the predicate.
734 all :: (Word8 -> Bool) -> ByteString -> Bool
735 all _ (PS _ _ 0) = True
736 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
737 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
740 go p q | p == q = return True -- end of list
741 | otherwise = do c <- peek p
743 then go (p `plusPtr` 1) q
746 ------------------------------------------------------------------------
748 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
749 -- This function will fuse.
750 -- An exception will be thrown in the case of an empty ByteString.
751 maximum :: ByteString -> Word8
752 maximum xs@(PS x s l)
753 | null xs = errorEmptyList "maximum"
754 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
755 c_maximum (p `plusPtr` s) (fromIntegral l)
757 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
758 -- This function will fuse.
759 -- An exception will be thrown in the case of an empty ByteString.
760 minimum :: ByteString -> Word8
761 minimum xs@(PS x s l)
762 | null xs = errorEmptyList "minimum"
763 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
764 c_minimum (p `plusPtr` s) (fromIntegral l)
767 -- minimum/maximum/loop fusion. As for length (and other folds), when we
768 -- see we're applied after a fuseable op, switch from using the C
769 -- version, to the fuseable version. The result should then avoid
770 -- allocating a buffer.
773 #if defined(__GLASGOW_HASKELL__)
774 {-# INLINE [1] minimum #-}
775 {-# INLINE [1] maximum #-}
778 maximumU :: ByteString -> Word8
779 maximumU = foldl1' max
780 {-# INLINE maximumU #-}
782 minimumU :: ByteString -> Word8
783 minimumU = foldl1' min
784 {-# INLINE minimumU #-}
788 "FPS minimum/loop" forall loop s .
789 minimum (loopArr (loopWrapper loop s)) =
790 minimumU (loopArr (loopWrapper loop s))
792 "FPS maximum/loop" forall loop s .
793 maximum (loopArr (loopWrapper loop s)) =
794 maximumU (loopArr (loopWrapper loop s))
798 ------------------------------------------------------------------------
800 -- | The 'mapAccumL' function behaves like a combination of 'map' and
801 -- 'foldl'; it applies a function to each element of a ByteString,
802 -- passing an accumulating parameter from left to right, and returning a
803 -- final value of this accumulator together with the new list.
804 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
805 #if !defined(LOOPU_FUSION)
806 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
808 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
810 {-# INLINE mapAccumL #-}
812 -- | The 'mapAccumR' function behaves like a combination of 'map' and
813 -- 'foldr'; it applies a function to each element of a ByteString,
814 -- passing an accumulating parameter from right to left, and returning a
815 -- final value of this accumulator together with the new ByteString.
816 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
817 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
818 {-# INLINE mapAccumR #-}
820 -- | /O(n)/ map Word8 functions, provided with the index at each position
821 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
822 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
823 {-# INLINE mapIndexed #-}
825 -- ---------------------------------------------------------------------
826 -- Building ByteStrings
828 -- | 'scanl' is similar to 'foldl', but returns a list of successive
829 -- reduced values from the left. This function will fuse.
831 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
835 -- > last (scanl f z xs) == foldl f z xs.
836 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
837 #if !defined(LOOPU_FUSION)
838 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
840 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
843 -- n.b. haskell's List scan returns a list one bigger than the
844 -- input, so we need to snoc here to get some extra space, however,
845 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
848 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
849 -- This function will fuse.
851 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
852 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
855 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
856 {-# INLINE scanl1 #-}
858 -- | scanr is the right-to-left dual of scanl.
859 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
860 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
863 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
864 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
867 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
868 {-# INLINE scanr1 #-}
870 -- ---------------------------------------------------------------------
871 -- Unfolds and replicates
873 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
874 -- the value of every element. The following holds:
876 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
878 -- This implemenation uses @memset(3)@
879 replicate :: Int -> Word8 -> ByteString
882 | otherwise = unsafeCreate w $ \ptr ->
883 memset ptr c (fromIntegral w) >> return ()
885 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
886 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
887 -- ByteString from a seed value. The function takes the element and
888 -- returns 'Nothing' if it is done producing the ByteString or returns
889 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
890 -- and @b@ is the seed value for further production.
894 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
895 -- > == pack [0, 1, 2, 3, 4, 5]
897 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
898 unfoldr f = concat . unfoldChunk 32 64
899 where unfoldChunk n n' x =
900 case unfoldrN n f x of
901 (s, Nothing) -> s : []
902 (s, Just x') -> s : unfoldChunk n' (n+n') x'
904 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
905 -- value. However, the length of the result is limited by the first
906 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
907 -- when the maximum length of the result is known.
909 -- The following equation relates 'unfoldrN' and 'unfoldr':
911 -- > unfoldrN n f s == take n (unfoldr f s)
913 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
915 | i < 0 = (empty, Just x0)
916 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
920 Nothing -> return (0, n, Nothing)
922 | n == i -> return (0, n, Just x)
923 | otherwise -> do poke p w
924 go (p `plusPtr` 1) x' (n+1)
926 -- ---------------------------------------------------------------------
929 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
930 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
931 take :: Int -> ByteString -> ByteString
935 | otherwise = PS x s n
938 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
939 -- elements, or @[]@ if @n > 'length' xs@.
940 drop :: Int -> ByteString -> ByteString
944 | otherwise = PS x (s+n) (l-n)
947 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
948 splitAt :: Int -> ByteString -> (ByteString, ByteString)
949 splitAt n ps@(PS x s l)
950 | n <= 0 = (empty, ps)
951 | n >= l = (ps, empty)
952 | otherwise = (PS x s n, PS x (s+n) (l-n))
953 {-# INLINE splitAt #-}
955 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
956 -- returns the longest prefix (possibly empty) of @xs@ of elements that
958 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
959 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
960 {-# INLINE takeWhile #-}
962 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
963 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
964 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
965 {-# INLINE dropWhile #-}
967 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
968 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
969 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
970 {-# INLINE [1] break #-}
973 "FPS specialise break (x==)" forall x.
974 break ((==) x) = breakByte x
977 #if __GLASGOW_HASKELL__ >= 605
979 "FPS specialise break (==x)" forall x.
980 break (==x) = breakByte x
984 -- | 'breakByte' breaks its ByteString argument at the first occurence
985 -- of the specified byte. It is more efficient than 'break' as it is
986 -- implemented with @memchr(3)@. I.e.
988 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
990 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
991 breakByte c p = case elemIndex c p of
993 Just n -> (unsafeTake n p, unsafeDrop n p)
994 {-# INLINE breakByte #-}
996 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
998 -- breakEnd p == spanEnd (not.p)
999 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1000 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1002 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1003 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1004 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1005 span p ps = break (not . p) ps
1006 {-# INLINE [1] span #-}
1008 -- | 'spanByte' breaks its ByteString argument at the first
1009 -- occurence of a byte other than its argument. It is more efficient
1012 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1014 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1015 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1016 go (p `plusPtr` s) 0
1019 go p i | i >= l = return (ps, empty)
1020 | otherwise = do c' <- peekByteOff p i
1022 then return (unsafeTake i ps, unsafeDrop i ps)
1024 {-# INLINE spanByte #-}
1027 "FPS specialise span (x==)" forall x.
1028 span ((==) x) = spanByte x
1031 #if __GLASGOW_HASKELL__ >= 605
1033 "FPS specialise span (==x)" forall x.
1034 span (==x) = spanByte x
1038 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1041 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1045 -- > spanEnd (not . isSpace) ps
1047 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1049 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1050 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1052 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1053 -- separators, where the predicate returns True for a separator element.
1054 -- The resulting components do not contain the separators. Two adjacent
1055 -- separators result in an empty component in the output. eg.
1057 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1058 -- > splitWith (=='a') [] == []
1060 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1062 #if defined(__GLASGOW_HASKELL__)
1063 splitWith _pred (PS _ _ 0) = []
1064 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1065 where pred# c# = pred_ (W8# c#)
1068 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1069 splitLoop pred' p 0 off' len' fp'
1071 splitLoop :: (Word# -> Bool)
1073 -> Int -> Int -> Int
1077 splitLoop pred' p idx' off' len' fp'
1078 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1079 | idx' >= len' = return [PS fp' off' idx']
1081 w <- peekElemOff p (off'+idx')
1082 if pred' (case w of W8# w# -> w#)
1083 then return (PS fp' off' idx' :
1084 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1085 else splitLoop pred' p (idx'+1) off' len' fp'
1086 {-# INLINE splitWith #-}
1089 splitWith _ (PS _ _ 0) = []
1090 splitWith p ps = loop p ps
1093 loop q qs = if null rest then [chunk]
1094 else chunk : loop q (unsafeTail rest)
1095 where (chunk,rest) = break q qs
1098 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1099 -- argument, consuming the delimiter. I.e.
1101 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1102 -- > split 'a' "aXaXaXa" == ["","X","X","X",""]
1103 -- > split 'x' "x" == ["",""]
1107 -- > join [c] . split c == id
1108 -- > split == splitWith . (==)
1110 -- As for all splitting functions in this library, this function does
1111 -- not copy the substrings, it just constructs new 'ByteStrings' that
1112 -- are slices of the original.
1114 split :: Word8 -> ByteString -> [ByteString]
1115 split _ (PS _ _ 0) = []
1116 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1117 let ptr = p `plusPtr` s
1121 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1122 w (fromIntegral (l-n))
1124 then [PS x (s+n) (l-n)]
1125 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1128 {-# INLINE split #-}
1131 -- slower. but stays inside Haskell.
1132 split _ (PS _ _ 0) = []
1133 split (W8# w#) (PS fp off len) = splitWith' off len fp
1135 splitWith' off' len' fp' = withPtr fp $ \p ->
1136 splitLoop p 0 off' len' fp'
1138 splitLoop :: Ptr Word8
1139 -> Int -> Int -> Int
1144 splitLoop p idx' off' len' fp'
1145 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1146 | idx' >= len' = return [PS fp' off' idx']
1148 (W8# x#) <- peekElemOff p (off'+idx')
1149 if word2Int# w# ==# word2Int# x#
1150 then return (PS fp' off' idx' :
1151 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1152 else splitLoop p (idx'+1) off' len' fp'
1156 -- | Like 'splitWith', except that sequences of adjacent separators are
1157 -- treated as a single separator. eg.
1159 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1161 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1162 tokens f = P.filter (not.null) . splitWith f
1163 {-# INLINE tokens #-}
1166 -- | The 'group' function takes a ByteString and returns a list of
1167 -- ByteStrings such that the concatenation of the result is equal to the
1168 -- argument. Moreover, each sublist in the result contains only equal
1169 -- elements. For example,
1171 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1173 -- It is a special case of 'groupBy', which allows the programmer to
1174 -- supply their own equality test. It is about 40% faster than
1176 group :: ByteString -> [ByteString]
1179 | otherwise = ys : group zs
1181 (ys, zs) = spanByte (unsafeHead xs) xs
1183 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1184 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1187 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1189 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1191 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1192 -- 'ByteString's and concatenates the list after interspersing the first
1193 -- argument between each element of the list.
1194 join :: ByteString -> [ByteString] -> ByteString
1195 join s = concat . (List.intersperse s)
1196 {-# INLINE [1] join #-}
1199 "FPS specialise join c -> joinByte" forall c s1 s2 .
1200 join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1204 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1205 -- with a char. Around 4 times faster than the generalised join.
1207 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1208 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1209 withForeignPtr ffp $ \fp ->
1210 withForeignPtr fgp $ \gp -> do
1211 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1212 poke (ptr `plusPtr` l) c
1213 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1215 len = length f + length g + 1
1216 {-# INLINE joinWithByte #-}
1218 -- ---------------------------------------------------------------------
1219 -- Indexing ByteStrings
1221 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1222 index :: ByteString -> Int -> Word8
1224 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1225 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1226 ++ ", length = " ++ show (length ps))
1227 | otherwise = ps `unsafeIndex` n
1228 {-# INLINE index #-}
1230 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1231 -- element in the given 'ByteString' which is equal to the query
1232 -- element, or 'Nothing' if there is no such element.
1233 -- This implementation uses memchr(3).
1234 elemIndex :: Word8 -> ByteString -> Maybe Int
1235 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1236 let p' = p `plusPtr` s
1237 q <- memchr p' c (fromIntegral l)
1238 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1239 {-# INLINE elemIndex #-}
1241 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1242 -- element in the given 'ByteString' which is equal to the query
1243 -- element, or 'Nothing' if there is no such element. The following
1246 -- > elemIndexEnd c xs ==
1247 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1249 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1250 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1251 go (p `plusPtr` s) (l-1)
1254 go p i | i < 0 = return Nothing
1255 | otherwise = do ch' <- peekByteOff p i
1257 then return $ Just i
1259 {-# INLINE elemIndexEnd #-}
1261 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1262 -- the indices of all elements equal to the query element, in ascending order.
1263 -- This implementation uses memchr(3).
1264 elemIndices :: Word8 -> ByteString -> [Int]
1265 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1266 let ptr = p `plusPtr` s
1269 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1270 w (fromIntegral (l - n))
1273 else let i = q `minusPtr` ptr
1276 {-# INLINE elemIndices #-}
1280 elemIndices :: Word8 -> ByteString -> [Int]
1281 elemIndices c ps = loop 0 ps
1283 loop _ ps' | null ps' = []
1284 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1285 | otherwise = loop (n+1) (unsafeTail ps')
1288 -- | count returns the number of times its argument appears in the ByteString
1290 -- > count = length . elemIndices
1292 -- But more efficiently than using length on the intermediate list.
1293 count :: Word8 -> ByteString -> Int
1294 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1295 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1296 {-# INLINE count #-}
1300 -- around 30% slower
1302 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1303 go (p `plusPtr` s) (fromIntegral m) 0
1305 go :: Ptr Word8 -> CSize -> Int -> IO Int
1311 else do let k = fromIntegral $ q `minusPtr` p
1312 go (q `plusPtr` 1) (l-k-1) (i+1)
1315 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1316 -- returns the index of the first element in the ByteString
1317 -- satisfying the predicate.
1318 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1319 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1322 go ptr n | n >= l = return Nothing
1323 | otherwise = do w <- peek ptr
1325 then return (Just n)
1326 else go (ptr `plusPtr` 1) (n+1)
1327 {-# INLINE findIndex #-}
1329 -- | The 'findIndices' function extends 'findIndex', by returning the
1330 -- indices of all elements satisfying the predicate, in ascending order.
1331 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1332 findIndices p ps = loop 0 ps
1335 loop n qs | null qs = []
1336 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1337 | otherwise = loop (n+1) (unsafeTail qs)
1339 -- ---------------------------------------------------------------------
1340 -- Searching ByteStrings
1342 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1343 elem :: Word8 -> ByteString -> Bool
1344 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1347 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1348 notElem :: Word8 -> ByteString -> Bool
1349 notElem c ps = not (elem c ps)
1350 {-# INLINE notElem #-}
1352 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1353 -- returns a ByteString containing those characters that satisfy the
1354 -- predicate. This function is subject to array fusion.
1355 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1356 #if defined(LOOPU_FUSION)
1357 filter p = loopArr . loopU (filterEFL p) NoAcc
1358 #elif defined(LOOPUP_FUSION)
1359 filter p = loopArr . loopUp (filterEFL p) NoAcc
1360 #elif defined(LOOPNOACC_FUSION)
1361 filter p = loopArr . loopNoAcc (filterEFL p)
1363 filter f = loopArr . loopFilter f
1365 {-# INLINE filter #-}
1368 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1369 -- around 2x faster for some one-shot applications.
1370 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1371 filter' k ps@(PS x s l)
1373 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1374 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1375 return $! t `minusPtr` p -- actual length
1378 go f t end | f == end = return t
1382 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1383 else go (f `plusPtr` 1) t end
1384 {-# INLINE filter' #-}
1388 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1389 -- case of filtering a single byte. It is more efficient to use
1390 -- /filterByte/ in this case.
1392 -- > filterByte == filter . (==)
1394 -- filterByte is around 10x faster, and uses much less space, than its
1395 -- filter equivalent
1396 filterByte :: Word8 -> ByteString -> ByteString
1397 filterByte w ps = replicate (count w ps) w
1398 {-# INLINE filterByte #-}
1401 "FPS specialise filter (== x)" forall x.
1402 filter ((==) x) = filterByte x
1405 #if __GLASGOW_HASKELL__ >= 605
1407 "FPS specialise filter (== x)" forall x.
1408 filter (== x) = filterByte x
1413 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1414 -- case of filtering a single byte out of a list. It is more efficient
1415 -- to use /filterNotByte/ in this case.
1417 -- > filterNotByte == filter . (/=)
1419 -- filterNotByte is around 2x faster than its filter equivalent.
1420 filterNotByte :: Word8 -> ByteString -> ByteString
1421 filterNotByte w = filter (/= w)
1422 {-# INLINE filterNotByte #-}
1425 "FPS specialise filter (x /=)" forall x.
1426 filter ((/=) x) = filterNotByte x
1429 #if __GLASGOW_HASKELL__ >= 605
1431 "FPS specialise filter (/= x)" forall x.
1432 filter (/= x) = filterNotByte x
1436 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1437 -- and returns the first element in matching the predicate, or 'Nothing'
1438 -- if there is no such element.
1440 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1442 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1443 find f p = case findIndex f p of
1444 Just n -> Just (p `unsafeIndex` n)
1450 -- fuseable, but we don't want to walk the whole array.
1452 find k = foldl findEFL Nothing
1453 where findEFL a@(Just _) _ = a
1454 findEFL _ c | k c = Just c
1455 | otherwise = Nothing
1458 -- ---------------------------------------------------------------------
1459 -- Searching for substrings
1461 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1462 -- iff the first is a prefix of the second.
1463 isPrefixOf :: ByteString -> ByteString -> Bool
1464 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1467 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1468 withForeignPtr x2 $ \p2 -> do
1469 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1472 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1473 -- iff the first is a suffix of the second.
1475 -- The following holds:
1477 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1479 -- However, the real implemenation uses memcmp to compare the end of the
1480 -- string only, with no reverse required..
1481 isSuffixOf :: ByteString -> ByteString -> Bool
1482 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1485 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1486 withForeignPtr x2 $ \p2 -> do
1487 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1490 -- | Check whether one string is a substring of another. @isSubstringOf
1491 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1492 isSubstringOf :: ByteString -- ^ String to search for.
1493 -> ByteString -- ^ String to search in.
1495 isSubstringOf p s = not $ P.null $ findSubstrings p s
1497 -- | Get the first index of a substring in another string,
1498 -- or 'Nothing' if the string is not found.
1499 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1500 findSubstring :: ByteString -- ^ String to search for.
1501 -> ByteString -- ^ String to seach in.
1503 findSubstring = (listToMaybe .) . findSubstrings
1505 -- | Find the indexes of all (possibly overlapping) occurances of a
1506 -- substring in a string. This function uses the Knuth-Morris-Pratt
1507 -- string matching algorithm.
1508 findSubstrings :: ByteString -- ^ String to search for.
1509 -> ByteString -- ^ String to seach in.
1512 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1514 patc x = pat `unsafeIndex` x
1515 strc x = str `unsafeIndex` x
1517 -- maybe we should make kmpNext a UArray before using it in search?
1518 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1519 kmpNextL p _ | null p = []
1520 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1522 x = if not (null ps) && unsafeHead ps == patc j'
1523 then kmpNext Array.! j' else j'
1525 search i j = match ++ rest -- i: position in string, j: position in pattern
1526 where match = if j == m then [(i - j)] else []
1527 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1528 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1531 -- ---------------------------------------------------------------------
1534 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1535 -- corresponding pairs of bytes. If one input ByteString is short,
1536 -- excess elements of the longer ByteString are discarded. This is
1537 -- equivalent to a pair of 'unpack' operations.
1538 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1540 | null ps || null qs = []
1541 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1543 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1544 -- the first argument, instead of a tupling function. For example,
1545 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1546 -- corresponding sums.
1547 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1549 | null ps || null qs = []
1550 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1551 #if defined(__GLASGOW_HASKELL__)
1552 {-# INLINE [1] zipWith #-}
1556 -- | A specialised version of zipWith for the common case of a
1557 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1558 -- are used to automatically covert zipWith into zipWith' when a pack is
1559 -- performed on the result of zipWith, but we also export it for
1562 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1563 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1564 withForeignPtr fp $ \a ->
1565 withForeignPtr fq $ \b ->
1566 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1568 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1571 | n >= len = return ()
1573 x <- peekByteOff p1 n
1574 y <- peekByteOff p2 n
1575 pokeByteOff r n (f x y)
1576 zipWith_ (n+1) p1 p2 r
1579 {-# INLINE zipWith' #-}
1583 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1584 zipWith f p q = unpack (zipWith' f p q)
1588 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1589 -- ByteStrings. Note that this performs two 'pack' operations.
1590 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1591 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1592 {-# INLINE unzip #-}
1594 -- ---------------------------------------------------------------------
1597 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1598 inits :: ByteString -> [ByteString]
1599 inits (PS x s l) = [PS x s n | n <- [0..l]]
1601 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1602 tails :: ByteString -> [ByteString]
1603 tails p | null p = [empty]
1604 | otherwise = p : tails (unsafeTail p)
1606 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1608 -- ---------------------------------------------------------------------
1609 -- ** Ordered 'ByteString's
1611 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1612 sort :: ByteString -> ByteString
1613 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1615 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1616 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1619 go 256 _ = return ()
1620 go i ptr = do n <- peekElemOff arr i
1621 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1622 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1626 sort :: ByteString -> ByteString
1627 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1628 memcpy p (f `plusPtr` s) l
1629 c_qsort p l -- inplace
1632 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1634 -- Try some linear sorts: radix, counting
1637 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1638 -- sortBy f ps = undefined
1640 -- ---------------------------------------------------------------------
1641 -- Low level constructors
1643 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1644 -- finalizer associated to it. The ByteString length is calculated using
1645 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1646 packCString :: CString -> ByteString
1647 packCString cstr = unsafePerformIO $ do
1648 fp <- newForeignPtr_ (castPtr cstr)
1650 return $! PS fp 0 (fromIntegral l)
1652 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1653 -- have /no/ finalizer associated with it. This operation has /O(1)/
1654 -- complexity as we already know the final size, so no /strlen(3)/ is
1656 packCStringLen :: CStringLen -> ByteString
1657 packCStringLen (ptr,len) = unsafePerformIO $ do
1658 fp <- newForeignPtr_ (castPtr ptr)
1659 return $! PS fp 0 (fromIntegral len)
1661 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1662 -- have a @free(3)@ finalizer associated to it.
1663 packMallocCString :: CString -> ByteString
1664 packMallocCString cstr = unsafePerformIO $ do
1665 fp <- newForeignFreePtr (castPtr cstr)
1666 len <- c_strlen cstr
1667 return $! PS fp 0 (fromIntegral len)
1669 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1670 -- null-terminated @CString@. The @CString@ will be freed
1671 -- automatically. This is a memcpy(3).
1672 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1673 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1674 where alloc = withForeignPtr ps $ \p -> do
1675 buf <- c_malloc (fromIntegral l+1)
1676 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1677 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1678 return (castPtr buf)
1680 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1681 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1682 useAsCStringLen = unsafeUseAsCStringLen
1685 -- why were we doing this?
1687 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1688 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1690 -- alloc = withForeignPtr ps $ \p -> do
1691 -- buf <- c_malloc (fromIntegral l+1)
1692 -- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1693 -- poke (buf `plusPtr` l) (0::Word8) -- n.b.
1694 -- return $! (castPtr buf, l)
1697 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1698 -- This is mainly useful to allow the rest of the data pointed
1699 -- to by the 'ByteString' to be garbage collected, for example
1700 -- if a large string has been read in, and only a small part of it
1701 -- is needed in the rest of the program.
1702 copy :: ByteString -> ByteString
1703 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1704 memcpy p (f `plusPtr` s) (fromIntegral l)
1706 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1707 -- CString is going to be deallocated from C land.
1708 copyCString :: CString -> IO ByteString
1709 copyCString cstr = do
1710 len <- c_strlen cstr
1711 copyCStringLen (cstr, fromIntegral len)
1713 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1714 copyCStringLen :: CStringLen -> IO ByteString
1715 copyCStringLen (cstr, len) = create len $ \p ->
1716 memcpy p (castPtr cstr) (fromIntegral len)
1718 -- ---------------------------------------------------------------------
1721 -- | Read a line from stdin.
1722 getLine :: IO ByteString
1723 getLine = hGetLine stdin
1726 -- | Lazily construct a list of lines of ByteStrings. This will be much
1727 -- better on memory consumption than using 'hGetContents >>= lines'
1728 -- If you're considering this, a better choice might be to use
1729 -- Data.ByteString.Lazy
1730 hGetLines :: Handle -> IO [ByteString]
1733 go = unsafeInterleaveIO $ do
1743 -- | Read a line from a handle
1745 hGetLine :: Handle -> IO ByteString
1746 #if !defined(__GLASGOW_HASKELL__)
1747 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
1749 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1750 case haBufferMode handle_ of
1751 NoBuffering -> error "no buffering"
1752 _other -> hGetLineBuffered handle_
1755 hGetLineBuffered handle_ = do
1756 let ref = haBuffer handle_
1757 buf <- readIORef ref
1758 hGetLineBufferedLoop handle_ ref buf 0 []
1760 hGetLineBufferedLoop handle_ ref
1761 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1763 off <- findEOL r w raw
1764 let new_len = len + off - r
1765 xs <- mkPS raw r off
1767 -- if eol == True, then off is the offset of the '\n'
1768 -- otherwise off == w and the buffer is now empty.
1770 then do if (w == off + 1)
1771 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1772 else writeIORef ref buf{ bufRPtr = off + 1 }
1773 mkBigPS new_len (xs:xss)
1775 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1776 buf{ bufWPtr=0, bufRPtr=0 }
1778 -- Nothing indicates we caught an EOF, and we may have a
1779 -- partial line to return.
1781 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1783 then mkBigPS new_len (xs:xss)
1786 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1788 -- find the end-of-line character, if there is one
1792 (c,r') <- readCharFromBuffer raw r
1794 then return r -- NB. not r': don't include the '\n'
1795 else findEOL r' w raw
1797 maybeFillReadBuffer fd is_line is_stream buf = catch
1798 (do buf' <- fillReadBuffer fd is_line is_stream buf
1800 (\e -> if isEOFError e then return Nothing else ioError e)
1802 -- TODO, rewrite to use normal memcpy
1803 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1804 mkPS buf start end =
1805 let len = end - start
1806 in create len $ \p -> do
1807 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1810 mkBigPS :: Int -> [ByteString] -> IO ByteString
1811 mkBigPS _ [ps] = return ps
1812 mkBigPS _ pss = return $! concat (P.reverse pss)
1816 -- ---------------------------------------------------------------------
1819 -- | Outputs a 'ByteString' to the specified 'Handle'.
1820 hPut :: Handle -> ByteString -> IO ()
1821 hPut _ (PS _ _ 0) = return ()
1822 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1824 -- | A synonym for @hPut@, for compatibility
1825 hPutStr :: Handle -> ByteString -> IO ()
1828 -- | Write a ByteString to a handle, appending a newline byte
1829 hPutStrLn :: Handle -> ByteString -> IO ()
1831 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1832 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1834 -- | Write a ByteString to stdout
1835 putStr :: ByteString -> IO ()
1836 putStr = hPut stdout
1838 -- | Write a ByteString to stdout, appending a newline byte
1839 putStrLn :: ByteString -> IO ()
1840 putStrLn = hPutStrLn stdout
1842 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1843 -- is far more efficient than reading the characters into a 'String'
1844 -- and then using 'pack'.
1845 hGet :: Handle -> Int -> IO ByteString
1846 hGet _ 0 = return empty
1847 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1849 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1850 -- waiting for data to become available, instead it returns only whatever data
1852 hGetNonBlocking :: Handle -> Int -> IO ByteString
1853 #if defined(__GLASGOW_HASKELL__)
1854 hGetNonBlocking _ 0 = return empty
1855 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1857 hGetNonBlocking = hGet
1860 -- | Read entire handle contents into a 'ByteString'.
1861 -- This function reads chunks at a time, doubling the chunksize on each
1862 -- read. The final buffer is then realloced to the appropriate size. For
1863 -- files > half of available memory, this may lead to memory exhaustion.
1864 -- Consider using 'readFile' in this case.
1866 -- As with 'hGet', the string representation in the file is assumed to
1869 hGetContents :: Handle -> IO ByteString
1871 let start_size = 1024
1872 p <- mallocArray start_size
1873 i <- hGetBuf h p start_size
1875 then do p' <- reallocArray p i
1876 fp <- newForeignFreePtr p'
1882 p' <- reallocArray p s'
1883 i <- hGetBuf h (p' `plusPtr` s) s
1885 then do let i' = s + i
1886 p'' <- reallocArray p' i'
1887 fp <- newForeignFreePtr p''
1888 return $! PS fp 0 i'
1891 -- | getContents. Equivalent to hGetContents stdin
1892 getContents :: IO ByteString
1893 getContents = hGetContents stdin
1895 -- | The interact function takes a function of type @ByteString -> ByteString@
1896 -- as its argument. The entire input from the standard input device is passed
1897 -- to this function as its argument, and the resulting string is output on the
1898 -- standard output device. It's great for writing one line programs!
1899 interact :: (ByteString -> ByteString) -> IO ()
1900 interact transformer = putStr . transformer =<< getContents
1902 -- | Read an entire file strictly into a 'ByteString'. This is far more
1903 -- efficient than reading the characters into a 'String' and then using
1904 -- 'pack'. It also may be more efficient than opening the file and
1905 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1906 -- for 'text mode' use the Char8 version of this function.
1907 readFile :: FilePath -> IO ByteString
1908 readFile f = bracket (openBinaryFile f ReadMode) hClose
1909 (\h -> hFileSize h >>= hGet h . fromIntegral)
1911 -- | Write a 'ByteString' to a file.
1912 writeFile :: FilePath -> ByteString -> IO ()
1913 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1916 -- | Append a 'ByteString' to a file.
1917 appendFile :: FilePath -> ByteString -> IO ()
1918 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1923 -- Disable until we can move it into a portable .hsc file
1926 -- | Like readFile, this reads an entire file directly into a
1927 -- 'ByteString', but it is even more efficient. It involves directly
1928 -- mapping the file to memory. This has the advantage that the contents
1929 -- of the file never need to be copied. Also, under memory pressure the
1930 -- page may simply be discarded, while in the case of readFile it would
1931 -- need to be written to swap. If you read many small files, mmapFile
1932 -- will be less memory-efficient than readFile, since each mmapFile
1933 -- takes up a separate page of memory. Also, you can run into bus
1934 -- errors if the file is modified. As with 'readFile', the string
1935 -- representation in the file is assumed to be ISO-8859-1.
1937 -- On systems without mmap, this is the same as a readFile.
1939 mmapFile :: FilePath -> IO ByteString
1940 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1942 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1944 h <- openBinaryFile f ReadMode
1945 l <- fromIntegral `fmap` hFileSize h
1946 -- Don't bother mmaping small files because each mmapped file takes up
1947 -- at least one full VM block.
1949 then do thefp <- mallocByteString l
1950 withForeignPtr thefp $ \p-> hGetBuf h p l
1955 fd <- fromIntegral `fmap` handleToFd h
1957 fp <- if p == nullPtr
1958 then do thefp <- mallocByteString l
1959 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1962 -- The munmap leads to crashes on OpenBSD.
1963 -- maybe there's a use after unmap in there somewhere?
1964 -- Bulat suggests adding the hClose to the
1965 -- finalizer, excellent idea.
1966 #if !defined(__OpenBSD__)
1967 let unmap = c_munmap p l >> return ()
1969 let unmap = return ()
1971 fp <- newForeignPtr p unmap
1976 where mmap_limit = 16*1024
1979 -- ---------------------------------------------------------------------
1980 -- Internal utilities
1982 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1983 -- of the string if no element is found, rather than Nothing.
1984 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1985 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1988 go ptr n | n >= l = return l
1989 | otherwise = do w <- peek ptr
1992 else go (ptr `plusPtr` 1) (n+1)
1993 {-# INLINE findIndexOrEnd #-}
1995 -- | Perform an operation with a temporary ByteString
1996 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1997 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1998 {-# INLINE withPtr #-}
2000 -- Common up near identical calls to `error' to reduce the number
2001 -- constant strings created when compiled:
2002 errorEmptyList :: String -> a
2003 errorEmptyList fun = moduleError fun "empty ByteString"
2004 {-# NOINLINE errorEmptyList #-}
2006 moduleError :: String -> String -> a
2007 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2008 {-# NOINLINE moduleError #-}
2010 -- Find from the end of the string using predicate
2011 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2012 STRICT2(findFromEndUntil)
2013 findFromEndUntil f ps@(PS x s l) =
2015 else if f (last ps) then l
2016 else findFromEndUntil f (PS x s (l-1))
2018 {-# INLINE newForeignFreePtr #-}
2019 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2020 newForeignFreePtr p = newForeignPtr c_free_finalizer p