1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 singleton, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
51 cons, -- :: Word8 -> ByteString -> ByteString
52 snoc, -- :: ByteString -> Word8 -> ByteString
53 append, -- :: ByteString -> ByteString -> ByteString
54 head, -- :: ByteString -> Word8
55 last, -- :: ByteString -> Word8
56 tail, -- :: ByteString -> ByteString
57 init, -- :: ByteString -> ByteString
58 null, -- :: ByteString -> Bool
59 length, -- :: ByteString -> Int
61 -- * Transformating ByteStrings
62 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
63 reverse, -- :: ByteString -> ByteString
64 intersperse, -- :: Word8 -> ByteString -> ByteString
65 transpose, -- :: [ByteString] -> [ByteString]
67 -- * Reducing 'ByteString's (folds)
68 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
69 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
70 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
71 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
74 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
75 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 concat, -- :: [ByteString] -> ByteString
80 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
81 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
82 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 maximum, -- :: ByteString -> Word8
84 minimum, -- :: ByteString -> Word8
86 -- * Building ByteStrings
88 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
89 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
90 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
93 -- ** Accumulating maps
94 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
95 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
98 -- ** Unfolding ByteStrings
99 replicate, -- :: Int -> Word8 -> ByteString
100 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
101 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
105 -- ** Breaking strings
106 take, -- :: Int -> ByteString -> ByteString
107 drop, -- :: Int -> ByteString -> ByteString
108 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
109 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
113 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
114 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
117 inits, -- :: ByteString -> [ByteString]
118 tails, -- :: ByteString -> [ByteString]
120 -- ** Breaking into many substrings
121 split, -- :: Word8 -> ByteString -> [ByteString]
122 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
124 -- ** Joining strings
125 join, -- :: ByteString -> [ByteString] -> ByteString
128 isPrefixOf, -- :: ByteString -> ByteString -> Bool
129 isSuffixOf, -- :: ByteString -> ByteString -> Bool
131 -- ** Search for arbitrary substrings
132 isSubstringOf, -- :: ByteString -> ByteString -> Bool
133 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
134 findSubstrings, -- :: ByteString -> ByteString -> [Int]
136 -- * Searching ByteStrings
138 -- ** Searching by equality
139 -- | These functions use memchr(3) to efficiently search the ByteString
140 elem, -- :: Word8 -> ByteString -> Bool
141 notElem, -- :: Word8 -> ByteString -> Bool
143 -- ** Searching with a predicate
144 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
148 -- * Indexing ByteStrings
149 index, -- :: ByteString -> Int -> Word8
150 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
151 elemIndices, -- :: Word8 -> ByteString -> [Int]
152 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
153 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
154 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
155 count, -- :: Word8 -> ByteString -> Int
157 -- * Zipping and unzipping ByteStrings
158 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
159 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
160 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
162 -- * Ordered ByteStrings
163 sort, -- :: ByteString -> ByteString
165 -- * Low level CString conversions
167 -- ** Packing CStrings and pointers
168 packCString, -- :: CString -> ByteString
169 packCStringLen, -- :: CString -> ByteString
170 packMallocCString, -- :: CString -> ByteString
172 -- ** Using ByteStrings as CStrings
173 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
174 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
176 -- ** Copying ByteStrings
177 -- | These functions perform memcpy(3) operations
178 copy, -- :: ByteString -> ByteString
179 copyCString, -- :: CString -> IO ByteString
180 copyCStringLen, -- :: CStringLen -> IO ByteString
182 -- * I\/O with 'ByteString's
184 -- ** Standard input and output
185 getLine, -- :: IO ByteString
186 getContents, -- :: IO ByteString
187 putStr, -- :: ByteString -> IO ()
188 putStrLn, -- :: ByteString -> IO ()
189 interact, -- :: (ByteString -> ByteString) -> IO ()
192 readFile, -- :: FilePath -> IO ByteString
193 writeFile, -- :: FilePath -> ByteString -> IO ()
194 appendFile, -- :: FilePath -> ByteString -> IO ()
195 -- mmapFile, -- :: FilePath -> IO ByteString
197 -- ** I\/O with Handles
198 hGetLine, -- :: Handle -> IO ByteString
199 hGetContents, -- :: Handle -> IO ByteString
200 hGet, -- :: Handle -> Int -> IO ByteString
201 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
202 hPut, -- :: Handle -> ByteString -> IO ()
203 hPutStr, -- :: Handle -> ByteString -> IO ()
204 hPutStrLn, -- :: Handle -> ByteString -> IO ()
206 #if defined(__GLASGOW_HASKELL__)
207 -- * Fusion utilities
208 unpackList, -- eek, otherwise it gets thrown away by the simplifier
209 lengthU, maximumU, minimumU
214 import qualified Prelude as P
215 import Prelude hiding (reverse,head,tail,last,init,null
216 ,length,map,lines,foldl,foldr,unlines
217 ,concat,any,take,drop,splitAt,takeWhile
218 ,dropWhile,span,break,elem,filter,maximum
219 ,minimum,all,concatMap,foldl1,foldr1
220 ,scanl,scanl1,scanr,scanr1
221 ,readFile,writeFile,appendFile,replicate
222 ,getContents,getLine,putStr,putStrLn,interact
223 ,zip,zipWith,unzip,notElem)
225 import Data.ByteString.Base
226 import Data.ByteString.Fusion
228 import qualified Data.List as List
230 import Data.Word (Word8)
231 import Data.Maybe (listToMaybe)
232 import Data.Array (listArray)
233 import qualified Data.Array as Array ((!))
235 -- Control.Exception.bracket not available in yhc or nhc
236 import Control.Exception (bracket, assert)
237 import qualified Control.Exception as Exception
238 import Control.Monad (when)
240 import Foreign.C.String (CString, CStringLen)
241 import Foreign.C.Types (CSize)
242 import Foreign.ForeignPtr
243 import Foreign.Marshal.Array
245 import Foreign.Storable (Storable(..))
247 -- hGetBuf and hPutBuf not available in yhc or nhc
248 import System.IO (stdin,stdout,hClose,hFileSize
249 ,hGetBuf,hPutBuf,openBinaryFile
252 import Data.Monoid (Monoid, mempty, mappend, mconcat)
254 #if !defined(__GLASGOW_HASKELL__)
255 import System.IO.Unsafe
256 import qualified System.Environment
257 import qualified System.IO (hGetLine)
260 #if defined(__GLASGOW_HASKELL__)
262 import System.IO (hGetBufNonBlocking)
263 import System.IO.Error (isEOFError)
266 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
267 import GHC.Base (build)
268 import GHC.Word hiding (Word8)
269 import GHC.Ptr (Ptr(..))
270 import GHC.ST (ST(..))
275 -- -----------------------------------------------------------------------------
277 -- Useful macros, until we have bang patterns
280 #define STRICT1(f) f a | a `seq` False = undefined
281 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
282 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
283 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
284 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
286 -- -----------------------------------------------------------------------------
288 instance Eq ByteString
291 instance Ord ByteString
292 where compare = compareBytes
294 instance Monoid ByteString where
300 instance Arbitrary PackedString where
301 arbitrary = P.pack `fmap` arbitrary
302 coarbitrary s = coarbitrary (P.unpack s)
305 -- | /O(n)/ Equality on the 'ByteString' type.
306 eq :: ByteString -> ByteString -> Bool
307 eq a@(PS p s l) b@(PS p' s' l')
308 | l /= l' = False -- short cut on length
309 | p == p' && s == s' = True -- short cut for the same string
310 | otherwise = compareBytes a b == EQ
313 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
314 compareBytes :: ByteString -> ByteString -> Ordering
315 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
316 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
317 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
318 | otherwise = inlinePerformIO $
319 withForeignPtr x1 $ \p1 ->
320 withForeignPtr x2 $ \p2 -> do
321 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
322 return $! case i `compare` 0 of
323 EQ -> l1 `compare` l2
325 {-# INLINE compareBytes #-}
329 -- About 4x slower over 32M
331 compareBytes :: ByteString -> ByteString -> Ordering
332 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
333 withForeignPtr fp1 $ \p1 ->
334 withForeignPtr fp2 $ \p2 ->
335 cmp (p1 `plusPtr` off1)
336 (p2 `plusPtr` off2) 0 len1 len2
338 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
340 cmp p1 p2 n len1 len2
341 | n == len1 = if n == len2 then return EQ else return LT
342 | n == len2 = return GT
344 (a :: Word8) <- peekByteOff p1 n
345 (b :: Word8) <- peekByteOff p2 n
346 case a `compare` b of
347 EQ -> cmp p1 p2 (n+1) len1 len2
350 {-# INLINE compareBytes #-}
353 -- -----------------------------------------------------------------------------
354 -- Introducing and eliminating 'ByteString's
356 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
357 singleton :: Word8 -> ByteString
358 singleton c = unsafeCreate 1 $ \p -> poke p c
359 {-# INLINE [1] singleton #-}
362 -- XXX The unsafePerformIO is critical!
366 -- singleton 255 `compare` singleton 127
370 -- case mallocByteString 2 of
371 -- ForeignPtr f internals ->
372 -- case writeWord8OffAddr# f 0 255 of _ ->
373 -- case writeWord8OffAddr# f 0 127 of _ ->
374 -- case eqAddr# f f of
375 -- False -> case compare (GHC.Prim.plusAddr# f 0)
376 -- (GHC.Prim.plusAddr# f 0)
380 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
382 -- For applications with large numbers of string literals, pack can be a
383 -- bottleneck. In such cases, consider using packAddress (GHC only).
384 pack :: [Word8] -> ByteString
386 #if !defined(__GLASGOW_HASKELL__)
388 pack str = unsafeCreate (P.length str) $ \p -> go p str
391 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
393 #else /* hack away */
395 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
397 go _ _ [] = return ()
398 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
400 writeByte p i c = ST $ \s# ->
401 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
405 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
406 unpack :: ByteString -> [Word8]
408 #if !defined(__GLASGOW_HASKELL__)
410 unpack (PS _ _ 0) = []
411 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
412 go (p `plusPtr` s) (l - 1) []
415 go p 0 acc = peek p >>= \e -> return (e : acc)
416 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
417 {-# INLINE unpack #-}
421 unpack ps = build (unpackFoldr ps)
422 {-# INLINE unpack #-}
425 -- critical this isn't strict in the acc
426 -- as it will break in the presence of list fusion. this is a known
427 -- issue with seq and build/foldr rewrite rules, which rely on lazy
428 -- demanding to avoid bottoms in the list.
430 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
431 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
432 let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
433 loop _ (-1) acc = return acc
436 loop q (n-1) (a `f` acc)
437 loop (p `plusPtr` off) (len-1) ch
438 {-# INLINE [0] unpackFoldr #-}
440 unpackList :: ByteString -> [Word8]
441 unpackList (PS fp off len) = withPtr fp $ \p -> do
443 loop _ (-1) acc = return acc
446 loop q (n-1) (a : acc)
447 loop (p `plusPtr` off) (len-1) []
450 "FPS unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
455 -- ---------------------------------------------------------------------
458 -- | /O(1)/ Test whether a ByteString is empty.
459 null :: ByteString -> Bool
460 null (PS _ _ l) = assert (l >= 0) $ l <= 0
463 -- ---------------------------------------------------------------------
464 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
465 length :: ByteString -> Int
466 length (PS _ _ l) = assert (l >= 0) $ l
469 -- length/loop fusion. When taking the length of any fuseable loop,
470 -- rewrite it as a foldl', and thus avoid allocating the result buffer
471 -- worth around 10% in speed testing.
474 #if defined(__GLASGOW_HASKELL__)
475 {-# INLINE [1] length #-}
478 lengthU :: ByteString -> Int
479 lengthU = foldl' (const . (+1)) (0::Int)
480 {-# INLINE lengthU #-}
485 "FPS length/loop" forall loop s .
486 length (loopArr (loopWrapper loop s)) =
487 lengthU (loopArr (loopWrapper loop s))
491 ------------------------------------------------------------------------
493 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
494 -- complexity, as it requires a memcpy.
495 cons :: Word8 -> ByteString -> ByteString
496 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
498 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
501 -- | /O(n)/ Append a byte to the end of a 'ByteString'
502 snoc :: ByteString -> Word8 -> ByteString
503 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
504 memcpy p (f `plusPtr` s) (fromIntegral l)
505 poke (p `plusPtr` l) c
510 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
511 -- An exception will be thrown in the case of an empty ByteString.
512 head :: ByteString -> Word8
514 | l <= 0 = errorEmptyList "head"
515 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
518 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
519 -- An exception will be thrown in the case of an empty ByteString.
520 tail :: ByteString -> ByteString
522 | l <= 0 = errorEmptyList "tail"
523 | otherwise = PS p (s+1) (l-1)
526 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
527 -- An exception will be thrown in the case of an empty ByteString.
528 last :: ByteString -> Word8
530 | null ps = errorEmptyList "last"
531 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
534 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
535 -- An exception will be thrown in the case of an empty ByteString.
536 init :: ByteString -> ByteString
538 | null ps = errorEmptyList "init"
539 | otherwise = PS p s (l-1)
542 -- | /O(n)/ Append two ByteStrings
543 append :: ByteString -> ByteString -> ByteString
544 append xs ys | null xs = ys
546 | otherwise = concat [xs,ys]
547 {-# INLINE append #-}
549 -- ---------------------------------------------------------------------
552 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
553 -- element of @xs@. This function is subject to array fusion.
554 map :: (Word8 -> Word8) -> ByteString -> ByteString
555 #if defined(LOOPU_FUSION)
556 map f = loopArr . loopU (mapEFL f) NoAcc
557 #elif defined(LOOPUP_FUSION)
558 map f = loopArr . loopUp (mapEFL f) NoAcc
559 #elif defined(LOOPNOACC_FUSION)
560 map f = loopArr . loopNoAcc (mapEFL f)
562 map f = loopArr . loopMap f
567 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
568 -- slightly faster for one-shot cases.
569 map' :: (Word8 -> Word8) -> ByteString -> ByteString
570 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
571 create len $ map_ 0 (a `plusPtr` s)
573 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
576 | n >= len = return ()
578 x <- peekByteOff p1 n
579 pokeByteOff p2 n (f x)
584 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
585 reverse :: ByteString -> ByteString
586 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
587 c_reverse p (f `plusPtr` s) (fromIntegral l)
589 -- todo, fuseable version
591 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
592 -- 'ByteString' and \`intersperses\' that byte between the elements of
593 -- the 'ByteString'. It is analogous to the intersperse function on
595 intersperse :: Word8 -> ByteString -> ByteString
596 intersperse c ps@(PS x s l)
598 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
599 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
602 intersperse c = pack . List.intersperse c . unpack
605 -- | The 'transpose' function transposes the rows and columns of its
606 -- 'ByteString' argument.
607 transpose :: [ByteString] -> [ByteString]
608 transpose ps = P.map pack (List.transpose (P.map unpack ps))
610 -- ---------------------------------------------------------------------
611 -- Reducing 'ByteString's
613 -- | 'foldl', applied to a binary operator, a starting value (typically
614 -- the left-identity of the operator), and a ByteString, reduces the
615 -- ByteString using the binary operator, from left to right.
616 -- This function is subject to array fusion.
617 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
618 #if !defined(LOOPU_FUSION)
619 foldl f z = loopAcc . loopUp (foldEFL f) z
621 foldl f z = loopAcc . loopU (foldEFL f) z
627 -- About twice as fast with 6.4.1, but not fuseable
628 -- A simple fold . map is enough to make it worth while.
630 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
631 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
634 lgo z p q | p == q = return z
635 | otherwise = do c <- peek p
636 lgo (f z c) (p `plusPtr` 1) q
639 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
640 -- Though actually foldl is also strict in the accumulator.
641 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
643 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
644 {-# INLINE foldl' #-}
646 -- | 'foldr', applied to a binary operator, a starting value
647 -- (typically the right-identity of the operator), and a ByteString,
648 -- reduces the ByteString using the binary operator, from right to left.
649 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
650 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
653 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
654 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
655 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
656 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
659 go z p q | p == q = return z
660 | otherwise = do c <- peek p
661 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
662 {-# INLINE [1] foldr' #-}
664 -- | 'foldl1' is a variant of 'foldl' that has no starting value
665 -- argument, and thus must be applied to non-empty 'ByteStrings'.
666 -- This function is subject to array fusion.
667 -- An exception will be thrown in the case of an empty ByteString.
668 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
670 | null ps = errorEmptyList "foldl1"
671 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
672 {-# INLINE foldl1 #-}
674 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
675 -- An exception will be thrown in the case of an empty ByteString.
676 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
678 | null ps = errorEmptyList "foldl1'"
679 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
680 {-# INLINE foldl1' #-}
682 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
683 -- and thus must be applied to non-empty 'ByteString's
684 -- An exception will be thrown in the case of an empty ByteString.
685 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
687 | null ps = errorEmptyList "foldr1"
688 | otherwise = foldr f (last ps) (init ps)
689 {-# INLINE foldr1 #-}
691 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
693 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
695 | null ps = errorEmptyList "foldr1"
696 | otherwise = foldr' f (last ps) (init ps)
697 {-# INLINE [1] foldr1' #-}
699 -- ---------------------------------------------------------------------
702 -- | /O(n)/ Concatenate a list of ByteStrings.
703 concat :: [ByteString] -> ByteString
706 concat xs = unsafeCreate len $ \ptr -> go xs ptr
707 where len = P.sum . P.map length $ xs
710 go (PS p s l:ps) ptr = do
711 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
712 go ps (ptr `plusPtr` l)
714 -- | Map a function over a 'ByteString' and concatenate the results
715 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
716 concatMap f = concat . foldr ((:) . f) []
718 -- foldr (append . f) empty
720 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
721 -- any element of the 'ByteString' satisfies the predicate.
722 any :: (Word8 -> Bool) -> ByteString -> Bool
723 any _ (PS _ _ 0) = False
724 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
725 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
728 go p q | p == q = return False
729 | otherwise = do c <- peek p
730 if f c then return True
731 else go (p `plusPtr` 1) q
735 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
736 -- if all elements of the 'ByteString' satisfy the predicate.
737 all :: (Word8 -> Bool) -> ByteString -> Bool
738 all _ (PS _ _ 0) = True
739 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
740 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
743 go p q | p == q = return True -- end of list
744 | otherwise = do c <- peek p
746 then go (p `plusPtr` 1) q
749 ------------------------------------------------------------------------
751 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
752 -- This function will fuse.
753 -- An exception will be thrown in the case of an empty ByteString.
754 maximum :: ByteString -> Word8
755 maximum xs@(PS x s l)
756 | null xs = errorEmptyList "maximum"
757 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
758 c_maximum (p `plusPtr` s) (fromIntegral l)
760 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
761 -- This function will fuse.
762 -- An exception will be thrown in the case of an empty ByteString.
763 minimum :: ByteString -> Word8
764 minimum xs@(PS x s l)
765 | null xs = errorEmptyList "minimum"
766 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
767 c_minimum (p `plusPtr` s) (fromIntegral l)
770 -- minimum/maximum/loop fusion. As for length (and other folds), when we
771 -- see we're applied after a fuseable op, switch from using the C
772 -- version, to the fuseable version. The result should then avoid
773 -- allocating a buffer.
776 #if defined(__GLASGOW_HASKELL__)
777 {-# INLINE [1] minimum #-}
778 {-# INLINE [1] maximum #-}
781 maximumU :: ByteString -> Word8
782 maximumU = foldl1' max
783 {-# INLINE maximumU #-}
785 minimumU :: ByteString -> Word8
786 minimumU = foldl1' min
787 {-# INLINE minimumU #-}
791 "FPS minimum/loop" forall loop s .
792 minimum (loopArr (loopWrapper loop s)) =
793 minimumU (loopArr (loopWrapper loop s))
795 "FPS maximum/loop" forall loop s .
796 maximum (loopArr (loopWrapper loop s)) =
797 maximumU (loopArr (loopWrapper loop s))
801 ------------------------------------------------------------------------
803 -- | The 'mapAccumL' function behaves like a combination of 'map' and
804 -- 'foldl'; it applies a function to each element of a ByteString,
805 -- passing an accumulating parameter from left to right, and returning a
806 -- final value of this accumulator together with the new list.
807 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
808 #if !defined(LOOPU_FUSION)
809 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
811 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
813 {-# INLINE mapAccumL #-}
815 -- | The 'mapAccumR' function behaves like a combination of 'map' and
816 -- 'foldr'; it applies a function to each element of a ByteString,
817 -- passing an accumulating parameter from right to left, and returning a
818 -- final value of this accumulator together with the new ByteString.
819 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
820 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
821 {-# INLINE mapAccumR #-}
823 -- | /O(n)/ map Word8 functions, provided with the index at each position
824 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
825 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
826 {-# INLINE mapIndexed #-}
828 -- ---------------------------------------------------------------------
829 -- Building ByteStrings
831 -- | 'scanl' is similar to 'foldl', but returns a list of successive
832 -- reduced values from the left. This function will fuse.
834 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
838 -- > last (scanl f z xs) == foldl f z xs.
839 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
840 #if !defined(LOOPU_FUSION)
841 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
843 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
846 -- n.b. haskell's List scan returns a list one bigger than the
847 -- input, so we need to snoc here to get some extra space, however,
848 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
851 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
852 -- This function will fuse.
854 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
855 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
858 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
859 {-# INLINE scanl1 #-}
861 -- | scanr is the right-to-left dual of scanl.
862 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
863 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
866 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
867 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
870 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
871 {-# INLINE scanr1 #-}
873 -- ---------------------------------------------------------------------
874 -- Unfolds and replicates
876 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
877 -- the value of every element. The following holds:
879 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
881 -- This implemenation uses @memset(3)@
882 replicate :: Int -> Word8 -> ByteString
885 | otherwise = unsafeCreate w $ \ptr ->
886 memset ptr c (fromIntegral w) >> return ()
888 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
889 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
890 -- ByteString from a seed value. The function takes the element and
891 -- returns 'Nothing' if it is done producing the ByteString or returns
892 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
893 -- and @b@ is the seed value for further production.
897 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
898 -- > == pack [0, 1, 2, 3, 4, 5]
900 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
901 unfoldr f = concat . unfoldChunk 32 64
902 where unfoldChunk n n' x =
903 case unfoldrN n f x of
904 (s, Nothing) -> s : []
905 (s, Just x') -> s : unfoldChunk n' (n+n') x'
907 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
908 -- value. However, the length of the result is limited by the first
909 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
910 -- when the maximum length of the result is known.
912 -- The following equation relates 'unfoldrN' and 'unfoldr':
914 -- > unfoldrN n f s == take n (unfoldr f s)
916 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
918 | i < 0 = (empty, Just x0)
919 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
923 Nothing -> return (0, n, Nothing)
925 | n == i -> return (0, n, Just x)
926 | otherwise -> do poke p w
927 go (p `plusPtr` 1) x' (n+1)
929 -- ---------------------------------------------------------------------
932 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
933 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
934 take :: Int -> ByteString -> ByteString
938 | otherwise = PS x s n
941 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
942 -- elements, or @[]@ if @n > 'length' xs@.
943 drop :: Int -> ByteString -> ByteString
947 | otherwise = PS x (s+n) (l-n)
950 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
951 splitAt :: Int -> ByteString -> (ByteString, ByteString)
952 splitAt n ps@(PS x s l)
953 | n <= 0 = (empty, ps)
954 | n >= l = (ps, empty)
955 | otherwise = (PS x s n, PS x (s+n) (l-n))
956 {-# INLINE splitAt #-}
958 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
959 -- returns the longest prefix (possibly empty) of @xs@ of elements that
961 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
962 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
963 {-# INLINE takeWhile #-}
965 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
966 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
967 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
968 {-# INLINE dropWhile #-}
970 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
971 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
972 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
973 {-# INLINE [1] break #-}
976 "FPS specialise break (x==)" forall x.
977 break ((==) x) = breakByte x
980 #if __GLASGOW_HASKELL__ >= 605
982 "FPS specialise break (==x)" forall x.
983 break (==x) = breakByte x
987 -- | 'breakByte' breaks its ByteString argument at the first occurence
988 -- of the specified byte. It is more efficient than 'break' as it is
989 -- implemented with @memchr(3)@. I.e.
991 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
993 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
994 breakByte c p = case elemIndex c p of
996 Just n -> (unsafeTake n p, unsafeDrop n p)
997 {-# INLINE breakByte #-}
999 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1001 -- breakEnd p == spanEnd (not.p)
1002 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1003 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1005 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1006 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1007 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1008 span p ps = break (not . p) ps
1009 {-# INLINE [1] span #-}
1011 -- | 'spanByte' breaks its ByteString argument at the first
1012 -- occurence of a byte other than its argument. It is more efficient
1015 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1017 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1018 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1019 go (p `plusPtr` s) 0
1022 go p i | i >= l = return (ps, empty)
1023 | otherwise = do c' <- peekByteOff p i
1025 then return (unsafeTake i ps, unsafeDrop i ps)
1027 {-# INLINE spanByte #-}
1030 "FPS specialise span (x==)" forall x.
1031 span ((==) x) = spanByte x
1034 #if __GLASGOW_HASKELL__ >= 605
1036 "FPS specialise span (==x)" forall x.
1037 span (==x) = spanByte x
1041 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1044 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1048 -- > spanEnd (not . isSpace) ps
1050 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1052 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1053 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1055 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1056 -- separators, where the predicate returns True for a separator element.
1057 -- The resulting components do not contain the separators. Two adjacent
1058 -- separators result in an empty component in the output. eg.
1060 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1061 -- > splitWith (=='a') [] == []
1063 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1065 #if defined(__GLASGOW_HASKELL__)
1066 splitWith _pred (PS _ _ 0) = []
1067 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1068 where pred# c# = pred_ (W8# c#)
1071 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1072 splitLoop pred' p 0 off' len' fp'
1074 splitLoop :: (Word# -> Bool)
1076 -> Int -> Int -> Int
1080 splitLoop pred' p idx' off' len' fp'
1081 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1082 | idx' >= len' = return [PS fp' off' idx']
1084 w <- peekElemOff p (off'+idx')
1085 if pred' (case w of W8# w# -> w#)
1086 then return (PS fp' off' idx' :
1087 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1088 else splitLoop pred' p (idx'+1) off' len' fp'
1089 {-# INLINE splitWith #-}
1092 splitWith _ (PS _ _ 0) = []
1093 splitWith p ps = loop p ps
1096 loop q qs = if null rest then [chunk]
1097 else chunk : loop q (unsafeTail rest)
1098 where (chunk,rest) = break q qs
1101 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1102 -- argument, consuming the delimiter. I.e.
1104 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1105 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1106 -- > split 'x' "x" == ["",""]
1110 -- > join [c] . split c == id
1111 -- > split == splitWith . (==)
1113 -- As for all splitting functions in this library, this function does
1114 -- not copy the substrings, it just constructs new 'ByteStrings' that
1115 -- are slices of the original.
1117 split :: Word8 -> ByteString -> [ByteString]
1118 split _ (PS _ _ 0) = []
1119 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1120 let ptr = p `plusPtr` s
1124 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1125 w (fromIntegral (l-n))
1127 then [PS x (s+n) (l-n)]
1128 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1131 {-# INLINE split #-}
1134 -- slower. but stays inside Haskell.
1135 split _ (PS _ _ 0) = []
1136 split (W8# w#) (PS fp off len) = splitWith' off len fp
1138 splitWith' off' len' fp' = withPtr fp $ \p ->
1139 splitLoop p 0 off' len' fp'
1141 splitLoop :: Ptr Word8
1142 -> Int -> Int -> Int
1147 splitLoop p idx' off' len' fp'
1148 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1149 | idx' >= len' = return [PS fp' off' idx']
1151 (W8# x#) <- peekElemOff p (off'+idx')
1152 if word2Int# w# ==# word2Int# x#
1153 then return (PS fp' off' idx' :
1154 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1155 else splitLoop p (idx'+1) off' len' fp'
1159 -- | Like 'splitWith', except that sequences of adjacent separators are
1160 -- treated as a single separator. eg.
1162 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1164 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1165 tokens f = P.filter (not.null) . splitWith f
1166 {-# INLINE tokens #-}
1169 -- | The 'group' function takes a ByteString and returns a list of
1170 -- ByteStrings such that the concatenation of the result is equal to the
1171 -- argument. Moreover, each sublist in the result contains only equal
1172 -- elements. For example,
1174 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1176 -- It is a special case of 'groupBy', which allows the programmer to
1177 -- supply their own equality test. It is about 40% faster than
1179 group :: ByteString -> [ByteString]
1182 | otherwise = ys : group zs
1184 (ys, zs) = spanByte (unsafeHead xs) xs
1186 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1187 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1190 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1192 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1194 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1195 -- 'ByteString's and concatenates the list after interspersing the first
1196 -- argument between each element of the list.
1197 join :: ByteString -> [ByteString] -> ByteString
1198 join s = concat . (List.intersperse s)
1199 {-# INLINE [1] join #-}
1202 "FPS specialise join c -> joinByte" forall c s1 s2 .
1203 join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1207 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1208 -- with a char. Around 4 times faster than the generalised join.
1210 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1211 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1212 withForeignPtr ffp $ \fp ->
1213 withForeignPtr fgp $ \gp -> do
1214 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1215 poke (ptr `plusPtr` l) c
1216 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1218 len = length f + length g + 1
1219 {-# INLINE joinWithByte #-}
1221 -- ---------------------------------------------------------------------
1222 -- Indexing ByteStrings
1224 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1225 index :: ByteString -> Int -> Word8
1227 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1228 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1229 ++ ", length = " ++ show (length ps))
1230 | otherwise = ps `unsafeIndex` n
1231 {-# INLINE index #-}
1233 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1234 -- element in the given 'ByteString' which is equal to the query
1235 -- element, or 'Nothing' if there is no such element.
1236 -- This implementation uses memchr(3).
1237 elemIndex :: Word8 -> ByteString -> Maybe Int
1238 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1239 let p' = p `plusPtr` s
1240 q <- memchr p' c (fromIntegral l)
1241 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1242 {-# INLINE elemIndex #-}
1244 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1245 -- element in the given 'ByteString' which is equal to the query
1246 -- element, or 'Nothing' if there is no such element. The following
1249 -- > elemIndexEnd c xs ==
1250 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1252 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1253 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1254 go (p `plusPtr` s) (l-1)
1257 go p i | i < 0 = return Nothing
1258 | otherwise = do ch' <- peekByteOff p i
1260 then return $ Just i
1262 {-# INLINE elemIndexEnd #-}
1264 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1265 -- the indices of all elements equal to the query element, in ascending order.
1266 -- This implementation uses memchr(3).
1267 elemIndices :: Word8 -> ByteString -> [Int]
1268 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1269 let ptr = p `plusPtr` s
1272 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1273 w (fromIntegral (l - n))
1276 else let i = q `minusPtr` ptr
1279 {-# INLINE elemIndices #-}
1283 elemIndices :: Word8 -> ByteString -> [Int]
1284 elemIndices c ps = loop 0 ps
1286 loop _ ps' | null ps' = []
1287 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1288 | otherwise = loop (n+1) (unsafeTail ps')
1291 -- | count returns the number of times its argument appears in the ByteString
1293 -- > count = length . elemIndices
1295 -- But more efficiently than using length on the intermediate list.
1296 count :: Word8 -> ByteString -> Int
1297 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1298 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1299 {-# INLINE count #-}
1303 -- around 30% slower
1305 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1306 go (p `plusPtr` s) (fromIntegral m) 0
1308 go :: Ptr Word8 -> CSize -> Int -> IO Int
1314 else do let k = fromIntegral $ q `minusPtr` p
1315 go (q `plusPtr` 1) (l-k-1) (i+1)
1318 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1319 -- returns the index of the first element in the ByteString
1320 -- satisfying the predicate.
1321 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1322 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1325 go ptr n | n >= l = return Nothing
1326 | otherwise = do w <- peek ptr
1328 then return (Just n)
1329 else go (ptr `plusPtr` 1) (n+1)
1330 {-# INLINE findIndex #-}
1332 -- | The 'findIndices' function extends 'findIndex', by returning the
1333 -- indices of all elements satisfying the predicate, in ascending order.
1334 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1335 findIndices p ps = loop 0 ps
1338 loop n qs | null qs = []
1339 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1340 | otherwise = loop (n+1) (unsafeTail qs)
1342 -- ---------------------------------------------------------------------
1343 -- Searching ByteStrings
1345 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1346 elem :: Word8 -> ByteString -> Bool
1347 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1350 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1351 notElem :: Word8 -> ByteString -> Bool
1352 notElem c ps = not (elem c ps)
1353 {-# INLINE notElem #-}
1355 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1356 -- returns a ByteString containing those characters that satisfy the
1357 -- predicate. This function is subject to array fusion.
1358 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1359 #if defined(LOOPU_FUSION)
1360 filter p = loopArr . loopU (filterEFL p) NoAcc
1361 #elif defined(LOOPUP_FUSION)
1362 filter p = loopArr . loopUp (filterEFL p) NoAcc
1363 #elif defined(LOOPNOACC_FUSION)
1364 filter p = loopArr . loopNoAcc (filterEFL p)
1366 filter f = loopArr . loopFilter f
1368 {-# INLINE filter #-}
1371 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1372 -- around 2x faster for some one-shot applications.
1373 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1374 filter' k ps@(PS x s l)
1376 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1377 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1378 return $! t `minusPtr` p -- actual length
1381 go f t end | f == end = return t
1385 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1386 else go (f `plusPtr` 1) t end
1387 {-# INLINE filter' #-}
1391 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1392 -- case of filtering a single byte. It is more efficient to use
1393 -- /filterByte/ in this case.
1395 -- > filterByte == filter . (==)
1397 -- filterByte is around 10x faster, and uses much less space, than its
1398 -- filter equivalent
1399 filterByte :: Word8 -> ByteString -> ByteString
1400 filterByte w ps = replicate (count w ps) w
1401 {-# INLINE filterByte #-}
1404 "FPS specialise filter (== x)" forall x.
1405 filter ((==) x) = filterByte x
1408 #if __GLASGOW_HASKELL__ >= 605
1410 "FPS specialise filter (== x)" forall x.
1411 filter (== x) = filterByte x
1416 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1417 -- case of filtering a single byte out of a list. It is more efficient
1418 -- to use /filterNotByte/ in this case.
1420 -- > filterNotByte == filter . (/=)
1422 -- filterNotByte is around 2x faster than its filter equivalent.
1423 filterNotByte :: Word8 -> ByteString -> ByteString
1424 filterNotByte w = filter (/= w)
1425 {-# INLINE filterNotByte #-}
1428 "FPS specialise filter (x /=)" forall x.
1429 filter ((/=) x) = filterNotByte x
1432 #if __GLASGOW_HASKELL__ >= 605
1434 "FPS specialise filter (/= x)" forall x.
1435 filter (/= x) = filterNotByte x
1439 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1440 -- and returns the first element in matching the predicate, or 'Nothing'
1441 -- if there is no such element.
1443 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1445 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1446 find f p = case findIndex f p of
1447 Just n -> Just (p `unsafeIndex` n)
1453 -- fuseable, but we don't want to walk the whole array.
1455 find k = foldl findEFL Nothing
1456 where findEFL a@(Just _) _ = a
1457 findEFL _ c | k c = Just c
1458 | otherwise = Nothing
1461 -- ---------------------------------------------------------------------
1462 -- Searching for substrings
1464 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1465 -- iff the first is a prefix of the second.
1466 isPrefixOf :: ByteString -> ByteString -> Bool
1467 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1470 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1471 withForeignPtr x2 $ \p2 -> do
1472 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1475 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1476 -- iff the first is a suffix of the second.
1478 -- The following holds:
1480 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1482 -- However, the real implemenation uses memcmp to compare the end of the
1483 -- string only, with no reverse required..
1484 isSuffixOf :: ByteString -> ByteString -> Bool
1485 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1488 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1489 withForeignPtr x2 $ \p2 -> do
1490 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1493 -- | Check whether one string is a substring of another. @isSubstringOf
1494 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1495 isSubstringOf :: ByteString -- ^ String to search for.
1496 -> ByteString -- ^ String to search in.
1498 isSubstringOf p s = not $ P.null $ findSubstrings p s
1500 -- | Get the first index of a substring in another string,
1501 -- or 'Nothing' if the string is not found.
1502 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1503 findSubstring :: ByteString -- ^ String to search for.
1504 -> ByteString -- ^ String to seach in.
1506 findSubstring = (listToMaybe .) . findSubstrings
1508 -- | Find the indexes of all (possibly overlapping) occurances of a
1509 -- substring in a string. This function uses the Knuth-Morris-Pratt
1510 -- string matching algorithm.
1511 findSubstrings :: ByteString -- ^ String to search for.
1512 -> ByteString -- ^ String to seach in.
1515 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1517 patc x = pat `unsafeIndex` x
1518 strc x = str `unsafeIndex` x
1520 -- maybe we should make kmpNext a UArray before using it in search?
1521 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1522 kmpNextL p _ | null p = []
1523 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1525 x = if not (null ps) && unsafeHead ps == patc j'
1526 then kmpNext Array.! j' else j'
1528 search i j = match ++ rest -- i: position in string, j: position in pattern
1529 where match = if j == m then [(i - j)] else []
1530 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1531 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1534 -- ---------------------------------------------------------------------
1537 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1538 -- corresponding pairs of bytes. If one input ByteString is short,
1539 -- excess elements of the longer ByteString are discarded. This is
1540 -- equivalent to a pair of 'unpack' operations.
1541 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1543 | null ps || null qs = []
1544 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1546 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1547 -- the first argument, instead of a tupling function. For example,
1548 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1549 -- corresponding sums.
1550 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1552 | null ps || null qs = []
1553 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1554 #if defined(__GLASGOW_HASKELL__)
1555 {-# INLINE [1] zipWith #-}
1559 -- | A specialised version of zipWith for the common case of a
1560 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1561 -- are used to automatically covert zipWith into zipWith' when a pack is
1562 -- performed on the result of zipWith, but we also export it for
1565 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1566 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1567 withForeignPtr fp $ \a ->
1568 withForeignPtr fq $ \b ->
1569 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1571 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1574 | n >= len = return ()
1576 x <- peekByteOff p1 n
1577 y <- peekByteOff p2 n
1578 pokeByteOff r n (f x y)
1579 zipWith_ (n+1) p1 p2 r
1582 {-# INLINE zipWith' #-}
1586 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1587 zipWith f p q = unpack (zipWith' f p q)
1591 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1592 -- ByteStrings. Note that this performs two 'pack' operations.
1593 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1594 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1595 {-# INLINE unzip #-}
1597 -- ---------------------------------------------------------------------
1600 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1601 inits :: ByteString -> [ByteString]
1602 inits (PS x s l) = [PS x s n | n <- [0..l]]
1604 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1605 tails :: ByteString -> [ByteString]
1606 tails p | null p = [empty]
1607 | otherwise = p : tails (unsafeTail p)
1609 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1611 -- ---------------------------------------------------------------------
1612 -- ** Ordered 'ByteString's
1614 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1615 sort :: ByteString -> ByteString
1616 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1618 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1619 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1622 go 256 _ = return ()
1623 go i ptr = do n <- peekElemOff arr i
1624 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1625 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1629 sort :: ByteString -> ByteString
1630 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1631 memcpy p (f `plusPtr` s) l
1632 c_qsort p l -- inplace
1635 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1637 -- Try some linear sorts: radix, counting
1640 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1641 -- sortBy f ps = undefined
1643 -- ---------------------------------------------------------------------
1644 -- Low level constructors
1646 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1647 -- finalizer associated to it. The ByteString length is calculated using
1648 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1649 packCString :: CString -> ByteString
1650 packCString cstr = unsafePerformIO $ do
1651 fp <- newForeignPtr_ (castPtr cstr)
1653 return $! PS fp 0 (fromIntegral l)
1655 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1656 -- have /no/ finalizer associated with it. This operation has /O(1)/
1657 -- complexity as we already know the final size, so no /strlen(3)/ is
1659 packCStringLen :: CStringLen -> ByteString
1660 packCStringLen (ptr,len) = unsafePerformIO $ do
1661 fp <- newForeignPtr_ (castPtr ptr)
1662 return $! PS fp 0 (fromIntegral len)
1664 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1665 -- have a @free(3)@ finalizer associated to it.
1666 packMallocCString :: CString -> ByteString
1667 packMallocCString cstr = unsafePerformIO $ do
1668 fp <- newForeignFreePtr (castPtr cstr)
1669 len <- c_strlen cstr
1670 return $! PS fp 0 (fromIntegral len)
1672 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1673 -- null-terminated @CString@. The @CString@ will be freed
1674 -- automatically. This is a memcpy(3).
1675 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1676 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1677 where alloc = withForeignPtr ps $ \p -> do
1678 buf <- c_malloc (fromIntegral l+1)
1679 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1680 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1681 return (castPtr buf)
1683 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1684 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1685 useAsCStringLen = unsafeUseAsCStringLen
1688 -- why were we doing this?
1690 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1691 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1693 -- alloc = withForeignPtr ps $ \p -> do
1694 -- buf <- c_malloc (fromIntegral l+1)
1695 -- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1696 -- poke (buf `plusPtr` l) (0::Word8) -- n.b.
1697 -- return $! (castPtr buf, l)
1700 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1701 -- This is mainly useful to allow the rest of the data pointed
1702 -- to by the 'ByteString' to be garbage collected, for example
1703 -- if a large string has been read in, and only a small part of it
1704 -- is needed in the rest of the program.
1705 copy :: ByteString -> ByteString
1706 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1707 memcpy p (f `plusPtr` s) (fromIntegral l)
1709 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1710 -- CString is going to be deallocated from C land.
1711 copyCString :: CString -> IO ByteString
1712 copyCString cstr = do
1713 len <- c_strlen cstr
1714 copyCStringLen (cstr, fromIntegral len)
1716 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1717 copyCStringLen :: CStringLen -> IO ByteString
1718 copyCStringLen (cstr, len) = create len $ \p ->
1719 memcpy p (castPtr cstr) (fromIntegral len)
1721 -- ---------------------------------------------------------------------
1724 -- | Read a line from stdin.
1725 getLine :: IO ByteString
1726 getLine = hGetLine stdin
1729 -- | Lazily construct a list of lines of ByteStrings. This will be much
1730 -- better on memory consumption than using 'hGetContents >>= lines'
1731 -- If you're considering this, a better choice might be to use
1732 -- Data.ByteString.Lazy
1733 hGetLines :: Handle -> IO [ByteString]
1736 go = unsafeInterleaveIO $ do
1746 -- | Read a line from a handle
1748 hGetLine :: Handle -> IO ByteString
1749 #if !defined(__GLASGOW_HASKELL__)
1750 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
1752 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1753 case haBufferMode handle_ of
1754 NoBuffering -> error "no buffering"
1755 _other -> hGetLineBuffered handle_
1758 hGetLineBuffered handle_ = do
1759 let ref = haBuffer handle_
1760 buf <- readIORef ref
1761 hGetLineBufferedLoop handle_ ref buf 0 []
1763 hGetLineBufferedLoop handle_ ref
1764 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1766 off <- findEOL r w raw
1767 let new_len = len + off - r
1768 xs <- mkPS raw r off
1770 -- if eol == True, then off is the offset of the '\n'
1771 -- otherwise off == w and the buffer is now empty.
1773 then do if (w == off + 1)
1774 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1775 else writeIORef ref buf{ bufRPtr = off + 1 }
1776 mkBigPS new_len (xs:xss)
1778 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1779 buf{ bufWPtr=0, bufRPtr=0 }
1781 -- Nothing indicates we caught an EOF, and we may have a
1782 -- partial line to return.
1784 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1786 then mkBigPS new_len (xs:xss)
1789 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1791 -- find the end-of-line character, if there is one
1795 (c,r') <- readCharFromBuffer raw r
1797 then return r -- NB. not r': don't include the '\n'
1798 else findEOL r' w raw
1800 maybeFillReadBuffer fd is_line is_stream buf = catch
1801 (do buf' <- fillReadBuffer fd is_line is_stream buf
1803 (\e -> if isEOFError e then return Nothing else ioError e)
1805 -- TODO, rewrite to use normal memcpy
1806 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1807 mkPS buf start end =
1808 let len = end - start
1809 in create len $ \p -> do
1810 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1813 mkBigPS :: Int -> [ByteString] -> IO ByteString
1814 mkBigPS _ [ps] = return ps
1815 mkBigPS _ pss = return $! concat (P.reverse pss)
1819 -- ---------------------------------------------------------------------
1822 -- | Outputs a 'ByteString' to the specified 'Handle'.
1823 hPut :: Handle -> ByteString -> IO ()
1824 hPut _ (PS _ _ 0) = return ()
1825 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1827 -- | A synonym for @hPut@, for compatibility
1828 hPutStr :: Handle -> ByteString -> IO ()
1831 -- | Write a ByteString to a handle, appending a newline byte
1832 hPutStrLn :: Handle -> ByteString -> IO ()
1834 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1835 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1837 -- | Write a ByteString to stdout
1838 putStr :: ByteString -> IO ()
1839 putStr = hPut stdout
1841 -- | Write a ByteString to stdout, appending a newline byte
1842 putStrLn :: ByteString -> IO ()
1843 putStrLn = hPutStrLn stdout
1845 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1846 -- is far more efficient than reading the characters into a 'String'
1847 -- and then using 'pack'.
1848 hGet :: Handle -> Int -> IO ByteString
1849 hGet _ 0 = return empty
1850 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1852 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1853 -- waiting for data to become available, instead it returns only whatever data
1855 hGetNonBlocking :: Handle -> Int -> IO ByteString
1856 #if defined(__GLASGOW_HASKELL__)
1857 hGetNonBlocking _ 0 = return empty
1858 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1860 hGetNonBlocking = hGet
1863 -- | Read entire handle contents into a 'ByteString'.
1864 -- This function reads chunks at a time, doubling the chunksize on each
1865 -- read. The final buffer is then realloced to the appropriate size. For
1866 -- files > half of available memory, this may lead to memory exhaustion.
1867 -- Consider using 'readFile' in this case.
1869 -- As with 'hGet', the string representation in the file is assumed to
1872 hGetContents :: Handle -> IO ByteString
1874 let start_size = 1024
1875 p <- mallocArray start_size
1876 i <- hGetBuf h p start_size
1878 then do p' <- reallocArray p i
1879 fp <- newForeignFreePtr p'
1885 p' <- reallocArray p s'
1886 i <- hGetBuf h (p' `plusPtr` s) s
1888 then do let i' = s + i
1889 p'' <- reallocArray p' i'
1890 fp <- newForeignFreePtr p''
1891 return $! PS fp 0 i'
1894 -- | getContents. Equivalent to hGetContents stdin
1895 getContents :: IO ByteString
1896 getContents = hGetContents stdin
1898 -- | The interact function takes a function of type @ByteString -> ByteString@
1899 -- as its argument. The entire input from the standard input device is passed
1900 -- to this function as its argument, and the resulting string is output on the
1901 -- standard output device. It's great for writing one line programs!
1902 interact :: (ByteString -> ByteString) -> IO ()
1903 interact transformer = putStr . transformer =<< getContents
1905 -- | Read an entire file strictly into a 'ByteString'. This is far more
1906 -- efficient than reading the characters into a 'String' and then using
1907 -- 'pack'. It also may be more efficient than opening the file and
1908 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1909 -- for 'text mode' use the Char8 version of this function.
1910 readFile :: FilePath -> IO ByteString
1911 readFile f = bracket (openBinaryFile f ReadMode) hClose
1912 (\h -> hFileSize h >>= hGet h . fromIntegral)
1914 -- | Write a 'ByteString' to a file.
1915 writeFile :: FilePath -> ByteString -> IO ()
1916 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1919 -- | Append a 'ByteString' to a file.
1920 appendFile :: FilePath -> ByteString -> IO ()
1921 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1926 -- Disable until we can move it into a portable .hsc file
1929 -- | Like readFile, this reads an entire file directly into a
1930 -- 'ByteString', but it is even more efficient. It involves directly
1931 -- mapping the file to memory. This has the advantage that the contents
1932 -- of the file never need to be copied. Also, under memory pressure the
1933 -- page may simply be discarded, while in the case of readFile it would
1934 -- need to be written to swap. If you read many small files, mmapFile
1935 -- will be less memory-efficient than readFile, since each mmapFile
1936 -- takes up a separate page of memory. Also, you can run into bus
1937 -- errors if the file is modified. As with 'readFile', the string
1938 -- representation in the file is assumed to be ISO-8859-1.
1940 -- On systems without mmap, this is the same as a readFile.
1942 mmapFile :: FilePath -> IO ByteString
1943 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1945 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1947 h <- openBinaryFile f ReadMode
1948 l <- fromIntegral `fmap` hFileSize h
1949 -- Don't bother mmaping small files because each mmapped file takes up
1950 -- at least one full VM block.
1952 then do thefp <- mallocByteString l
1953 withForeignPtr thefp $ \p-> hGetBuf h p l
1958 fd <- fromIntegral `fmap` handleToFd h
1960 fp <- if p == nullPtr
1961 then do thefp <- mallocByteString l
1962 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1965 -- The munmap leads to crashes on OpenBSD.
1966 -- maybe there's a use after unmap in there somewhere?
1967 -- Bulat suggests adding the hClose to the
1968 -- finalizer, excellent idea.
1969 #if !defined(__OpenBSD__)
1970 let unmap = c_munmap p l >> return ()
1972 let unmap = return ()
1974 fp <- newForeignPtr p unmap
1979 where mmap_limit = 16*1024
1982 -- ---------------------------------------------------------------------
1983 -- Internal utilities
1985 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1986 -- of the string if no element is found, rather than Nothing.
1987 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1988 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1991 go ptr n | n >= l = return l
1992 | otherwise = do w <- peek ptr
1995 else go (ptr `plusPtr` 1) (n+1)
1996 {-# INLINE findIndexOrEnd #-}
1998 -- | Perform an operation with a temporary ByteString
1999 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2000 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2001 {-# INLINE withPtr #-}
2003 -- Common up near identical calls to `error' to reduce the number
2004 -- constant strings created when compiled:
2005 errorEmptyList :: String -> a
2006 errorEmptyList fun = moduleError fun "empty ByteString"
2007 {-# NOINLINE errorEmptyList #-}
2009 moduleError :: String -> String -> a
2010 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2011 {-# NOINLINE moduleError #-}
2013 -- Find from the end of the string using predicate
2014 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2015 STRICT2(findFromEndUntil)
2016 findFromEndUntil f ps@(PS x s l) =
2018 else if f (last ps) then l
2019 else findFromEndUntil f (PS x s (l-1))
2021 {-# INLINE newForeignFreePtr #-}
2022 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2023 newForeignFreePtr p = newForeignPtr c_free_finalizer p