1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 singleton, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
51 cons, -- :: Word8 -> ByteString -> ByteString
52 snoc, -- :: ByteString -> Word8 -> ByteString
53 append, -- :: ByteString -> ByteString -> ByteString
54 head, -- :: ByteString -> Word8
55 last, -- :: ByteString -> Word8
56 tail, -- :: ByteString -> ByteString
57 init, -- :: ByteString -> ByteString
58 null, -- :: ByteString -> Bool
59 length, -- :: ByteString -> Int
61 -- * Transformating ByteStrings
62 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
63 reverse, -- :: ByteString -> ByteString
64 intersperse, -- :: Word8 -> ByteString -> ByteString
65 transpose, -- :: [ByteString] -> [ByteString]
67 -- * Reducing 'ByteString's (folds)
68 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
69 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
70 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
71 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
74 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
75 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 concat, -- :: [ByteString] -> ByteString
80 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
81 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
82 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 maximum, -- :: ByteString -> Word8
84 minimum, -- :: ByteString -> Word8
86 -- * Building ByteStrings
88 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
89 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
90 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
93 -- ** Accumulating maps
94 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
95 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
98 -- ** Unfolding ByteStrings
99 replicate, -- :: Int -> Word8 -> ByteString
100 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
101 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
105 -- ** Breaking strings
106 take, -- :: Int -> ByteString -> ByteString
107 drop, -- :: Int -> ByteString -> ByteString
108 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
109 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
113 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
114 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
117 inits, -- :: ByteString -> [ByteString]
118 tails, -- :: ByteString -> [ByteString]
120 -- ** Breaking into many substrings
121 split, -- :: Word8 -> ByteString -> [ByteString]
122 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
124 -- ** Joining strings
125 join, -- :: ByteString -> [ByteString] -> ByteString
128 isPrefixOf, -- :: ByteString -> ByteString -> Bool
129 isSuffixOf, -- :: ByteString -> ByteString -> Bool
131 -- ** Search for arbitrary substrings
132 isSubstringOf, -- :: ByteString -> ByteString -> Bool
133 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
134 findSubstrings, -- :: ByteString -> ByteString -> [Int]
136 -- * Searching ByteStrings
138 -- ** Searching by equality
139 -- | These functions use memchr(3) to efficiently search the ByteString
140 elem, -- :: Word8 -> ByteString -> Bool
141 notElem, -- :: Word8 -> ByteString -> Bool
143 -- ** Searching with a predicate
144 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
148 -- * Indexing ByteStrings
149 index, -- :: ByteString -> Int -> Word8
150 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
151 elemIndices, -- :: Word8 -> ByteString -> [Int]
152 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
153 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
154 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
155 count, -- :: Word8 -> ByteString -> Int
157 -- * Zipping and unzipping ByteStrings
158 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
159 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
160 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
162 -- * Ordered ByteStrings
163 sort, -- :: ByteString -> ByteString
165 -- * Low level CString conversions
167 -- ** Packing CStrings and pointers
168 packCString, -- :: CString -> ByteString
169 packCStringLen, -- :: CString -> ByteString
170 packMallocCString, -- :: CString -> ByteString
172 -- ** Using ByteStrings as CStrings
173 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
174 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
176 -- ** Copying ByteStrings
177 -- | These functions perform memcpy(3) operations
178 copy, -- :: ByteString -> ByteString
179 copyCString, -- :: CString -> IO ByteString
180 copyCStringLen, -- :: CStringLen -> IO ByteString
182 -- * I\/O with 'ByteString's
184 -- ** Standard input and output
185 getLine, -- :: IO ByteString
186 getContents, -- :: IO ByteString
187 putStr, -- :: ByteString -> IO ()
188 putStrLn, -- :: ByteString -> IO ()
189 interact, -- :: (ByteString -> ByteString) -> IO ()
192 readFile, -- :: FilePath -> IO ByteString
193 writeFile, -- :: FilePath -> ByteString -> IO ()
194 appendFile, -- :: FilePath -> ByteString -> IO ()
195 -- mmapFile, -- :: FilePath -> IO ByteString
197 -- ** I\/O with Handles
198 hGetLine, -- :: Handle -> IO ByteString
199 hGetContents, -- :: Handle -> IO ByteString
200 hGet, -- :: Handle -> Int -> IO ByteString
201 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
202 hPut, -- :: Handle -> ByteString -> IO ()
203 hPutStr, -- :: Handle -> ByteString -> IO ()
204 hPutStrLn, -- :: Handle -> ByteString -> IO ()
206 #if defined(__GLASGOW_HASKELL__)
207 -- * Fusion utilities
208 unpackList, -- eek, otherwise it gets thrown away by the simplifier
209 lengthU, maximumU, minimumU
214 import qualified Prelude as P
215 import Prelude hiding (reverse,head,tail,last,init,null
216 ,length,map,lines,foldl,foldr,unlines
217 ,concat,any,take,drop,splitAt,takeWhile
218 ,dropWhile,span,break,elem,filter,maximum
219 ,minimum,all,concatMap,foldl1,foldr1
220 ,scanl,scanl1,scanr,scanr1
221 ,readFile,writeFile,appendFile,replicate
222 ,getContents,getLine,putStr,putStrLn,interact
223 ,zip,zipWith,unzip,notElem)
225 import Data.ByteString.Base
226 import Data.ByteString.Fusion
228 import qualified Data.List as List
230 import Data.Word (Word8)
231 import Data.Maybe (listToMaybe)
232 import Data.Array (listArray)
233 import qualified Data.Array as Array ((!))
235 -- Control.Exception.bracket not available in yhc or nhc
236 import Control.Exception (bracket, assert)
237 import qualified Control.Exception as Exception
238 import Control.Monad (when)
240 import Foreign.C.String (CString, CStringLen)
241 import Foreign.C.Types (CSize)
242 import Foreign.ForeignPtr
243 import Foreign.Marshal.Array
245 import Foreign.Storable (Storable(..))
247 -- hGetBuf and hPutBuf not available in yhc or nhc
248 import System.IO (stdin,stdout,hClose,hFileSize
249 ,hGetBuf,hPutBuf,openBinaryFile
252 import Data.Monoid (Monoid, mempty, mappend, mconcat)
254 #if !defined(__GLASGOW_HASKELL__)
255 import System.IO.Unsafe
256 import qualified System.Environment
257 import qualified System.IO (hGetLine)
260 #if defined(__GLASGOW_HASKELL__)
262 import System.IO (hGetBufNonBlocking)
263 import System.IO.Error (isEOFError)
266 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
267 import GHC.Base (build)
268 import GHC.Word hiding (Word8)
269 import GHC.Ptr (Ptr(..))
270 import GHC.ST (ST(..))
275 -- -----------------------------------------------------------------------------
277 -- Useful macros, until we have bang patterns
280 #define STRICT1(f) f a | a `seq` False = undefined
281 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
282 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
283 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
284 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
286 -- -----------------------------------------------------------------------------
288 instance Eq ByteString
291 instance Ord ByteString
292 where compare = compareBytes
294 instance Show ByteString where
295 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
297 instance Read ByteString where
298 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
300 instance Monoid ByteString where
306 instance Arbitrary PackedString where
307 arbitrary = P.pack `fmap` arbitrary
308 coarbitrary s = coarbitrary (P.unpack s)
311 -- | /O(n)/ Equality on the 'ByteString' type.
312 eq :: ByteString -> ByteString -> Bool
313 eq a@(PS p s l) b@(PS p' s' l')
314 | l /= l' = False -- short cut on length
315 | p == p' && s == s' = True -- short cut for the same string
316 | otherwise = compareBytes a b == EQ
319 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
320 compareBytes :: ByteString -> ByteString -> Ordering
321 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
322 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
323 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
324 | otherwise = inlinePerformIO $
325 withForeignPtr x1 $ \p1 ->
326 withForeignPtr x2 $ \p2 -> do
327 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
328 return $! case i `compare` 0 of
329 EQ -> l1 `compare` l2
331 {-# INLINE compareBytes #-}
335 -- About 4x slower over 32M
337 compareBytes :: ByteString -> ByteString -> Ordering
338 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
339 withForeignPtr fp1 $ \p1 ->
340 withForeignPtr fp2 $ \p2 ->
341 cmp (p1 `plusPtr` off1)
342 (p2 `plusPtr` off2) 0 len1 len2
344 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
346 cmp p1 p2 n len1 len2
347 | n == len1 = if n == len2 then return EQ else return LT
348 | n == len2 = return GT
350 (a :: Word8) <- peekByteOff p1 n
351 (b :: Word8) <- peekByteOff p2 n
352 case a `compare` b of
353 EQ -> cmp p1 p2 (n+1) len1 len2
356 {-# INLINE compareBytes #-}
359 -- -----------------------------------------------------------------------------
360 -- Introducing and eliminating 'ByteString's
362 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
363 singleton :: Word8 -> ByteString
364 singleton c = unsafeCreate 1 $ \p -> poke p c
365 {-# INLINE [1] singleton #-}
368 -- XXX The unsafePerformIO is critical!
372 -- singleton 255 `compare` singleton 127
376 -- case mallocByteString 2 of
377 -- ForeignPtr f internals ->
378 -- case writeWord8OffAddr# f 0 255 of _ ->
379 -- case writeWord8OffAddr# f 0 127 of _ ->
380 -- case eqAddr# f f of
381 -- False -> case compare (GHC.Prim.plusAddr# f 0)
382 -- (GHC.Prim.plusAddr# f 0)
386 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
388 -- For applications with large numbers of string literals, pack can be a
389 -- bottleneck. In such cases, consider using packAddress (GHC only).
390 pack :: [Word8] -> ByteString
392 #if !defined(__GLASGOW_HASKELL__)
394 pack str = unsafeCreate (P.length str) $ \p -> go p str
397 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
399 #else /* hack away */
401 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
403 go _ _ [] = return ()
404 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
406 writeByte p i c = ST $ \s# ->
407 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
411 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
412 unpack :: ByteString -> [Word8]
414 #if !defined(__GLASGOW_HASKELL__)
416 unpack (PS _ _ 0) = []
417 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
418 go (p `plusPtr` s) (l - 1) []
421 go p 0 acc = peek p >>= \e -> return (e : acc)
422 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
423 {-# INLINE unpack #-}
427 unpack ps = build (unpackFoldr ps)
428 {-# INLINE unpack #-}
431 -- critical this isn't strict in the acc
432 -- as it will break in the presence of list fusion. this is a known
433 -- issue with seq and build/foldr rewrite rules, which rely on lazy
434 -- demanding to avoid bottoms in the list.
436 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
437 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
438 let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
439 loop _ (-1) acc = return acc
442 loop q (n-1) (a `f` acc)
443 loop (p `plusPtr` off) (len-1) ch
444 {-# INLINE [0] unpackFoldr #-}
446 unpackList :: ByteString -> [Word8]
447 unpackList (PS fp off len) = withPtr fp $ \p -> do
449 loop _ (-1) acc = return acc
452 loop q (n-1) (a : acc)
453 loop (p `plusPtr` off) (len-1) []
456 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
461 ------------------------------------------------------------------------
463 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
464 -- conversion function
465 packWith :: (a -> Word8) -> [a] -> ByteString
466 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
470 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
471 {-# INLINE packWith #-}
472 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
474 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
475 unpackWith :: (Word8 -> a) -> ByteString -> [a]
476 unpackWith _ (PS _ _ 0) = []
477 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
478 go (p `plusPtr` s) (l - 1) []
481 go p 0 acc = peek p >>= \e -> return (k e : acc)
482 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
483 {-# INLINE unpackWith #-}
484 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
486 -- ---------------------------------------------------------------------
489 -- | /O(1)/ Test whether a ByteString is empty.
490 null :: ByteString -> Bool
491 null (PS _ _ l) = assert (l >= 0) $ l <= 0
494 -- ---------------------------------------------------------------------
495 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
496 length :: ByteString -> Int
497 length (PS _ _ l) = assert (l >= 0) $ l
500 -- length/loop fusion. When taking the length of any fuseable loop,
501 -- rewrite it as a foldl', and thus avoid allocating the result buffer
502 -- worth around 10% in speed testing.
505 #if defined(__GLASGOW_HASKELL__)
506 {-# INLINE [1] length #-}
509 lengthU :: ByteString -> Int
510 lengthU = foldl' (const . (+1)) (0::Int)
511 {-# INLINE lengthU #-}
516 "length/loop" forall loop s .
517 length (loopArr (loopWrapper loop s)) =
518 lengthU (loopArr (loopWrapper loop s))
522 ------------------------------------------------------------------------
524 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
525 -- complexity, as it requires a memcpy.
526 cons :: Word8 -> ByteString -> ByteString
527 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
529 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
532 -- | /O(n)/ Append a byte to the end of a 'ByteString'
533 snoc :: ByteString -> Word8 -> ByteString
534 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
535 memcpy p (f `plusPtr` s) (fromIntegral l)
536 poke (p `plusPtr` l) c
541 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
542 -- An exception will be thrown in the case of an empty ByteString.
543 head :: ByteString -> Word8
545 | l <= 0 = errorEmptyList "head"
546 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
549 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
550 -- An exception will be thrown in the case of an empty ByteString.
551 tail :: ByteString -> ByteString
553 | l <= 0 = errorEmptyList "tail"
554 | otherwise = PS p (s+1) (l-1)
557 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
558 -- An exception will be thrown in the case of an empty ByteString.
559 last :: ByteString -> Word8
561 | null ps = errorEmptyList "last"
562 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
565 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
566 -- An exception will be thrown in the case of an empty ByteString.
567 init :: ByteString -> ByteString
569 | null ps = errorEmptyList "init"
570 | otherwise = PS p s (l-1)
573 -- | /O(n)/ Append two ByteStrings
574 append :: ByteString -> ByteString -> ByteString
575 append xs ys | null xs = ys
577 | otherwise = concat [xs,ys]
578 {-# INLINE append #-}
580 -- ---------------------------------------------------------------------
583 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
584 -- element of @xs@. This function is subject to array fusion.
585 map :: (Word8 -> Word8) -> ByteString -> ByteString
586 #if defined(LOOPU_FUSION)
587 map f = loopArr . loopU (mapEFL f) NoAcc
588 #elif defined(LOOPUP_FUSION)
589 map f = loopArr . loopUp (mapEFL f) NoAcc
590 #elif defined(LOOPNOACC_FUSION)
591 map f = loopArr . loopNoAcc (mapEFL f)
593 map f = loopArr . loopMap f
598 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
599 -- slightly faster for one-shot cases.
600 map' :: (Word8 -> Word8) -> ByteString -> ByteString
601 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
602 create len $ map_ 0 (a `plusPtr` s)
604 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
607 | n >= len = return ()
609 x <- peekByteOff p1 n
610 pokeByteOff p2 n (f x)
615 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
616 reverse :: ByteString -> ByteString
617 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
618 c_reverse p (f `plusPtr` s) (fromIntegral l)
620 -- todo, fuseable version
622 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
623 -- 'ByteString' and \`intersperses\' that byte between the elements of
624 -- the 'ByteString'. It is analogous to the intersperse function on
626 intersperse :: Word8 -> ByteString -> ByteString
627 intersperse c ps@(PS x s l)
629 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
630 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
633 intersperse c = pack . List.intersperse c . unpack
636 -- | The 'transpose' function transposes the rows and columns of its
637 -- 'ByteString' argument.
638 transpose :: [ByteString] -> [ByteString]
639 transpose ps = P.map pack (List.transpose (P.map unpack ps))
641 -- ---------------------------------------------------------------------
642 -- Reducing 'ByteString's
644 -- | 'foldl', applied to a binary operator, a starting value (typically
645 -- the left-identity of the operator), and a ByteString, reduces the
646 -- ByteString using the binary operator, from left to right.
647 -- This function is subject to array fusion.
648 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
649 #if !defined(LOOPU_FUSION)
650 foldl f z = loopAcc . loopUp (foldEFL f) z
652 foldl f z = loopAcc . loopU (foldEFL f) z
658 -- About twice as fast with 6.4.1, but not fuseable
659 -- A simple fold . map is enough to make it worth while.
661 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
662 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
665 lgo z p q | p == q = return z
666 | otherwise = do c <- peek p
667 lgo (f z c) (p `plusPtr` 1) q
670 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
671 -- Though actually foldl is also strict in the accumulator.
672 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
674 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
675 {-# INLINE foldl' #-}
677 -- | 'foldr', applied to a binary operator, a starting value
678 -- (typically the right-identity of the operator), and a ByteString,
679 -- reduces the ByteString using the binary operator, from right to left.
680 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
681 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
684 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
685 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
686 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
687 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
690 go z p q | p == q = return z
691 | otherwise = do c <- peek p
692 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
693 {-# INLINE [1] foldr' #-}
695 -- | 'foldl1' is a variant of 'foldl' that has no starting value
696 -- argument, and thus must be applied to non-empty 'ByteStrings'.
697 -- This function is subject to array fusion.
698 -- An exception will be thrown in the case of an empty ByteString.
699 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
701 | null ps = errorEmptyList "foldl1"
702 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
703 {-# INLINE foldl1 #-}
705 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
706 -- An exception will be thrown in the case of an empty ByteString.
707 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
709 | null ps = errorEmptyList "foldl1'"
710 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
711 {-# INLINE foldl1' #-}
713 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
714 -- and thus must be applied to non-empty 'ByteString's
715 -- An exception will be thrown in the case of an empty ByteString.
716 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
718 | null ps = errorEmptyList "foldr1"
719 | otherwise = foldr f (last ps) (init ps)
720 {-# INLINE foldr1 #-}
722 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
724 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
726 | null ps = errorEmptyList "foldr1"
727 | otherwise = foldr' f (last ps) (init ps)
728 {-# INLINE [1] foldr1' #-}
730 -- ---------------------------------------------------------------------
733 -- | /O(n)/ Concatenate a list of ByteStrings.
734 concat :: [ByteString] -> ByteString
737 concat xs = unsafeCreate len $ \ptr -> go xs ptr
738 where len = P.sum . P.map length $ xs
741 go (PS p s l:ps) ptr = do
742 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
743 go ps (ptr `plusPtr` l)
745 -- | Map a function over a 'ByteString' and concatenate the results
746 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
747 concatMap f = concat . foldr ((:) . f) []
749 -- foldr (append . f) empty
751 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
752 -- any element of the 'ByteString' satisfies the predicate.
753 any :: (Word8 -> Bool) -> ByteString -> Bool
754 any _ (PS _ _ 0) = False
755 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
756 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
759 go p q | p == q = return False
760 | otherwise = do c <- peek p
761 if f c then return True
762 else go (p `plusPtr` 1) q
766 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
767 -- if all elements of the 'ByteString' satisfy the predicate.
768 all :: (Word8 -> Bool) -> ByteString -> Bool
769 all _ (PS _ _ 0) = True
770 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
771 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
774 go p q | p == q = return True -- end of list
775 | otherwise = do c <- peek p
777 then go (p `plusPtr` 1) q
780 ------------------------------------------------------------------------
782 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
783 -- This function will fuse.
784 -- An exception will be thrown in the case of an empty ByteString.
785 maximum :: ByteString -> Word8
786 maximum xs@(PS x s l)
787 | null xs = errorEmptyList "maximum"
788 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
789 c_maximum (p `plusPtr` s) (fromIntegral l)
791 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
792 -- This function will fuse.
793 -- An exception will be thrown in the case of an empty ByteString.
794 minimum :: ByteString -> Word8
795 minimum xs@(PS x s l)
796 | null xs = errorEmptyList "minimum"
797 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
798 c_minimum (p `plusPtr` s) (fromIntegral l)
801 -- minimum/maximum/loop fusion. As for length (and other folds), when we
802 -- see we're applied after a fuseable op, switch from using the C
803 -- version, to the fuseable version. The result should then avoid
804 -- allocating a buffer.
807 #if defined(__GLASGOW_HASKELL__)
808 {-# INLINE [1] minimum #-}
809 {-# INLINE [1] maximum #-}
812 maximumU :: ByteString -> Word8
813 maximumU = foldl1' max
814 {-# INLINE maximumU #-}
816 minimumU :: ByteString -> Word8
817 minimumU = foldl1' min
818 {-# INLINE minimumU #-}
822 "minimum/loop" forall loop s .
823 minimum (loopArr (loopWrapper loop s)) =
824 minimumU (loopArr (loopWrapper loop s))
826 "maximum/loop" forall loop s .
827 maximum (loopArr (loopWrapper loop s)) =
828 maximumU (loopArr (loopWrapper loop s))
832 ------------------------------------------------------------------------
834 -- | The 'mapAccumL' function behaves like a combination of 'map' and
835 -- 'foldl'; it applies a function to each element of a ByteString,
836 -- passing an accumulating parameter from left to right, and returning a
837 -- final value of this accumulator together with the new list.
838 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
839 #if !defined(LOOPU_FUSION)
840 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
842 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
844 {-# INLINE mapAccumL #-}
846 -- | The 'mapAccumR' function behaves like a combination of 'map' and
847 -- 'foldr'; it applies a function to each element of a ByteString,
848 -- passing an accumulating parameter from right to left, and returning a
849 -- final value of this accumulator together with the new ByteString.
850 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
851 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
852 {-# INLINE mapAccumR #-}
854 -- | /O(n)/ map Word8 functions, provided with the index at each position
855 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
856 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
857 {-# INLINE mapIndexed #-}
859 -- ---------------------------------------------------------------------
860 -- Building ByteStrings
862 -- | 'scanl' is similar to 'foldl', but returns a list of successive
863 -- reduced values from the left. This function will fuse.
865 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
869 -- > last (scanl f z xs) == foldl f z xs.
870 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
871 #if !defined(LOOPU_FUSION)
872 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
874 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
877 -- n.b. haskell's List scan returns a list one bigger than the
878 -- input, so we need to snoc here to get some extra space, however,
879 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
882 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
883 -- This function will fuse.
885 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
886 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
889 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
890 {-# INLINE scanl1 #-}
892 -- | scanr is the right-to-left dual of scanl.
893 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
894 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
897 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
898 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
901 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
902 {-# INLINE scanr1 #-}
904 -- ---------------------------------------------------------------------
905 -- Unfolds and replicates
907 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
908 -- the value of every element. The following holds:
910 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
912 -- This implemenation uses @memset(3)@
913 replicate :: Int -> Word8 -> ByteString
916 | otherwise = unsafeCreate w $ \ptr ->
917 memset ptr c (fromIntegral w) >> return ()
919 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
920 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
921 -- ByteString from a seed value. The function takes the element and
922 -- returns 'Nothing' if it is done producing the ByteString or returns
923 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
924 -- and @b@ is the seed value for further production.
928 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
929 -- > == pack [0, 1, 2, 3, 4, 5]
931 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
932 unfoldr f = concat . unfoldChunk 32 64
933 where unfoldChunk n n' x =
934 case unfoldrN n f x of
935 (s, Nothing) -> s : []
936 (s, Just x') -> s : unfoldChunk n' (n+n') x'
938 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
939 -- value. However, the length of the result is limited by the first
940 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
941 -- when the maximum length of the result is known.
943 -- The following equation relates 'unfoldrN' and 'unfoldr':
945 -- > unfoldrN n f s == take n (unfoldr f s)
947 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
949 | i < 0 = (empty, Just x0)
950 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
954 Nothing -> return (0, n, Nothing)
956 | n == i -> return (0, n, Just x)
957 | otherwise -> do poke p w
958 go (p `plusPtr` 1) x' (n+1)
960 -- ---------------------------------------------------------------------
963 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
964 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
965 take :: Int -> ByteString -> ByteString
969 | otherwise = PS x s n
972 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
973 -- elements, or @[]@ if @n > 'length' xs@.
974 drop :: Int -> ByteString -> ByteString
978 | otherwise = PS x (s+n) (l-n)
981 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
982 splitAt :: Int -> ByteString -> (ByteString, ByteString)
983 splitAt n ps@(PS x s l)
984 | n <= 0 = (empty, ps)
985 | n >= l = (ps, empty)
986 | otherwise = (PS x s n, PS x (s+n) (l-n))
987 {-# INLINE splitAt #-}
989 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
990 -- returns the longest prefix (possibly empty) of @xs@ of elements that
992 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
993 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
994 {-# INLINE takeWhile #-}
996 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
997 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
998 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
999 {-# INLINE dropWhile #-}
1001 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1002 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1003 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1004 {-# INLINE [1] break #-}
1007 "FPS specialise break (x==)" forall x.
1008 break ((==) x) = breakByte x
1011 #if __GLASGOW_HASKELL__ >= 605
1013 "FPS specialise break (==x)" forall x.
1014 break (==x) = breakByte x
1018 -- | 'breakByte' breaks its ByteString argument at the first occurence
1019 -- of the specified byte. It is more efficient than 'break' as it is
1020 -- implemented with @memchr(3)@. I.e.
1022 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1024 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1025 breakByte c p = case elemIndex c p of
1026 Nothing -> (p,empty)
1027 Just n -> (unsafeTake n p, unsafeDrop n p)
1028 {-# INLINE breakByte #-}
1030 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1032 -- breakEnd p == spanEnd (not.p)
1033 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1034 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1036 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1037 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1038 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1039 span p ps = break (not . p) ps
1040 {-# INLINE [1] span #-}
1042 -- | 'spanByte' breaks its ByteString argument at the first
1043 -- occurence of a byte other than its argument. It is more efficient
1046 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1048 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1049 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1050 go (p `plusPtr` s) 0
1053 go p i | i >= l = return (ps, empty)
1054 | otherwise = do c' <- peekByteOff p i
1056 then return (unsafeTake i ps, unsafeDrop i ps)
1058 {-# INLINE spanByte #-}
1061 "FPS specialise span (x==)" forall x.
1062 span ((==) x) = spanByte x
1065 #if __GLASGOW_HASKELL__ >= 605
1067 "FPS specialise span (==x)" forall x.
1068 span (==x) = spanByte x
1072 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1075 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1079 -- > spanEnd (not . isSpace) ps
1081 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1083 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1084 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1086 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1087 -- separators, where the predicate returns True for a separator element.
1088 -- The resulting components do not contain the separators. Two adjacent
1089 -- separators result in an empty component in the output. eg.
1091 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1092 -- > splitWith (=='a') [] == []
1094 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1096 #if defined(__GLASGOW_HASKELL__)
1097 splitWith _pred (PS _ _ 0) = []
1098 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1099 where pred# c# = pred_ (W8# c#)
1102 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1103 splitLoop pred' p 0 off' len' fp'
1105 splitLoop :: (Word# -> Bool)
1107 -> Int -> Int -> Int
1111 splitLoop pred' p idx' off' len' fp'
1112 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1113 | idx' >= len' = return [PS fp' off' idx']
1115 w <- peekElemOff p (off'+idx')
1116 if pred' (case w of W8# w# -> w#)
1117 then return (PS fp' off' idx' :
1118 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1119 else splitLoop pred' p (idx'+1) off' len' fp'
1120 {-# INLINE splitWith #-}
1123 splitWith _ (PS _ _ 0) = []
1124 splitWith p ps = loop p ps
1127 loop q qs = if null rest then [chunk]
1128 else chunk : loop q (unsafeTail rest)
1129 where (chunk,rest) = break q qs
1132 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1133 -- argument, consuming the delimiter. I.e.
1135 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1136 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1137 -- > split 'x' "x" == ["",""]
1141 -- > join [c] . split c == id
1142 -- > split == splitWith . (==)
1144 -- As for all splitting functions in this library, this function does
1145 -- not copy the substrings, it just constructs new 'ByteStrings' that
1146 -- are slices of the original.
1148 split :: Word8 -> ByteString -> [ByteString]
1149 split _ (PS _ _ 0) = []
1150 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1151 let ptr = p `plusPtr` s
1155 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1156 w (fromIntegral (l-n))
1158 then [PS x (s+n) (l-n)]
1159 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1162 {-# INLINE split #-}
1165 -- slower. but stays inside Haskell.
1166 split _ (PS _ _ 0) = []
1167 split (W8# w#) (PS fp off len) = splitWith' off len fp
1169 splitWith' off' len' fp' = withPtr fp $ \p ->
1170 splitLoop p 0 off' len' fp'
1172 splitLoop :: Ptr Word8
1173 -> Int -> Int -> Int
1178 splitLoop p idx' off' len' fp'
1179 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1180 | idx' >= len' = return [PS fp' off' idx']
1182 (W8# x#) <- peekElemOff p (off'+idx')
1183 if word2Int# w# ==# word2Int# x#
1184 then return (PS fp' off' idx' :
1185 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1186 else splitLoop p (idx'+1) off' len' fp'
1190 -- | Like 'splitWith', except that sequences of adjacent separators are
1191 -- treated as a single separator. eg.
1193 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1195 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1196 tokens f = P.filter (not.null) . splitWith f
1197 {-# INLINE tokens #-}
1200 -- | The 'group' function takes a ByteString and returns a list of
1201 -- ByteStrings such that the concatenation of the result is equal to the
1202 -- argument. Moreover, each sublist in the result contains only equal
1203 -- elements. For example,
1205 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1207 -- It is a special case of 'groupBy', which allows the programmer to
1208 -- supply their own equality test. It is about 40% faster than
1210 group :: ByteString -> [ByteString]
1213 | otherwise = ys : group zs
1215 (ys, zs) = spanByte (unsafeHead xs) xs
1217 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1218 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1221 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1223 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1225 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1226 -- 'ByteString's and concatenates the list after interspersing the first
1227 -- argument between each element of the list.
1228 join :: ByteString -> [ByteString] -> ByteString
1229 join s = concat . (List.intersperse s)
1230 {-# INLINE [1] join #-}
1233 "FPS specialise join c -> joinByte" forall c s1 s2 .
1234 join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1238 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1239 -- with a char. Around 4 times faster than the generalised join.
1241 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1242 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1243 withForeignPtr ffp $ \fp ->
1244 withForeignPtr fgp $ \gp -> do
1245 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1246 poke (ptr `plusPtr` l) c
1247 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1249 len = length f + length g + 1
1250 {-# INLINE joinWithByte #-}
1252 -- ---------------------------------------------------------------------
1253 -- Indexing ByteStrings
1255 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1256 index :: ByteString -> Int -> Word8
1258 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1259 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1260 ++ ", length = " ++ show (length ps))
1261 | otherwise = ps `unsafeIndex` n
1262 {-# INLINE index #-}
1264 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1265 -- element in the given 'ByteString' which is equal to the query
1266 -- element, or 'Nothing' if there is no such element.
1267 -- This implementation uses memchr(3).
1268 elemIndex :: Word8 -> ByteString -> Maybe Int
1269 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1270 let p' = p `plusPtr` s
1271 q <- memchr p' c (fromIntegral l)
1272 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1273 {-# INLINE elemIndex #-}
1275 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1276 -- element in the given 'ByteString' which is equal to the query
1277 -- element, or 'Nothing' if there is no such element. The following
1280 -- > elemIndexEnd c xs ==
1281 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1283 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1284 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1285 go (p `plusPtr` s) (l-1)
1288 go p i | i < 0 = return Nothing
1289 | otherwise = do ch' <- peekByteOff p i
1291 then return $ Just i
1293 {-# INLINE elemIndexEnd #-}
1295 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1296 -- the indices of all elements equal to the query element, in ascending order.
1297 -- This implementation uses memchr(3).
1298 elemIndices :: Word8 -> ByteString -> [Int]
1299 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1300 let ptr = p `plusPtr` s
1303 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1304 w (fromIntegral (l - n))
1307 else let i = q `minusPtr` ptr
1310 {-# INLINE elemIndices #-}
1314 elemIndices :: Word8 -> ByteString -> [Int]
1315 elemIndices c ps = loop 0 ps
1317 loop _ ps' | null ps' = []
1318 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1319 | otherwise = loop (n+1) (unsafeTail ps')
1322 -- | count returns the number of times its argument appears in the ByteString
1324 -- > count = length . elemIndices
1326 -- But more efficiently than using length on the intermediate list.
1327 count :: Word8 -> ByteString -> Int
1328 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1329 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1330 {-# INLINE count #-}
1334 -- around 30% slower
1336 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1337 go (p `plusPtr` s) (fromIntegral m) 0
1339 go :: Ptr Word8 -> CSize -> Int -> IO Int
1345 else do let k = fromIntegral $ q `minusPtr` p
1346 go (q `plusPtr` 1) (l-k-1) (i+1)
1349 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1350 -- returns the index of the first element in the ByteString
1351 -- satisfying the predicate.
1352 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1353 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1356 go ptr n | n >= l = return Nothing
1357 | otherwise = do w <- peek ptr
1359 then return (Just n)
1360 else go (ptr `plusPtr` 1) (n+1)
1361 {-# INLINE findIndex #-}
1363 -- | The 'findIndices' function extends 'findIndex', by returning the
1364 -- indices of all elements satisfying the predicate, in ascending order.
1365 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1366 findIndices p ps = loop 0 ps
1369 loop n qs | null qs = []
1370 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1371 | otherwise = loop (n+1) (unsafeTail qs)
1373 -- ---------------------------------------------------------------------
1374 -- Searching ByteStrings
1376 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1377 elem :: Word8 -> ByteString -> Bool
1378 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1381 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1382 notElem :: Word8 -> ByteString -> Bool
1383 notElem c ps = not (elem c ps)
1384 {-# INLINE notElem #-}
1386 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1387 -- returns a ByteString containing those characters that satisfy the
1388 -- predicate. This function is subject to array fusion.
1389 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1390 #if defined(LOOPU_FUSION)
1391 filter p = loopArr . loopU (filterEFL p) NoAcc
1392 #elif defined(LOOPUP_FUSION)
1393 filter p = loopArr . loopUp (filterEFL p) NoAcc
1394 #elif defined(LOOPNOACC_FUSION)
1395 filter p = loopArr . loopNoAcc (filterEFL p)
1397 filter f = loopArr . loopFilter f
1399 {-# INLINE filter #-}
1402 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1403 -- around 2x faster for some one-shot applications.
1404 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1405 filter' k ps@(PS x s l)
1407 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1408 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1409 return $! t `minusPtr` p -- actual length
1412 go f t end | f == end = return t
1416 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1417 else go (f `plusPtr` 1) t end
1418 {-# INLINE filter' #-}
1422 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1423 -- case of filtering a single byte. It is more efficient to use
1424 -- /filterByte/ in this case.
1426 -- > filterByte == filter . (==)
1428 -- filterByte is around 10x faster, and uses much less space, than its
1429 -- filter equivalent
1430 filterByte :: Word8 -> ByteString -> ByteString
1431 filterByte w ps = replicate (count w ps) w
1432 {-# INLINE filterByte #-}
1435 "FPS specialise filter (== x)" forall x.
1436 filter ((==) x) = filterByte x
1439 #if __GLASGOW_HASKELL__ >= 605
1441 "FPS specialise filter (== x)" forall x.
1442 filter (== x) = filterByte x
1447 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1448 -- case of filtering a single byte out of a list. It is more efficient
1449 -- to use /filterNotByte/ in this case.
1451 -- > filterNotByte == filter . (/=)
1453 -- filterNotByte is around 2x faster than its filter equivalent.
1454 filterNotByte :: Word8 -> ByteString -> ByteString
1455 filterNotByte w = filter (/= w)
1456 {-# INLINE filterNotByte #-}
1459 "FPS specialise filter (x /=)" forall x.
1460 filter ((/=) x) = filterNotByte x
1463 #if __GLASGOW_HASKELL__ >= 605
1465 "FPS specialise filter (/= x)" forall x.
1466 filter (/= x) = filterNotByte x
1470 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1471 -- and returns the first element in matching the predicate, or 'Nothing'
1472 -- if there is no such element.
1474 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1476 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1477 find f p = case findIndex f p of
1478 Just n -> Just (p `unsafeIndex` n)
1484 -- fuseable, but we don't want to walk the whole array.
1486 find k = foldl findEFL Nothing
1487 where findEFL a@(Just _) _ = a
1488 findEFL _ c | k c = Just c
1489 | otherwise = Nothing
1492 -- ---------------------------------------------------------------------
1493 -- Searching for substrings
1495 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1496 -- iff the first is a prefix of the second.
1497 isPrefixOf :: ByteString -> ByteString -> Bool
1498 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1501 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1502 withForeignPtr x2 $ \p2 -> do
1503 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1506 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1507 -- iff the first is a suffix of the second.
1509 -- The following holds:
1511 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1513 -- However, the real implemenation uses memcmp to compare the end of the
1514 -- string only, with no reverse required..
1515 isSuffixOf :: ByteString -> ByteString -> Bool
1516 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1519 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1520 withForeignPtr x2 $ \p2 -> do
1521 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1524 -- | Check whether one string is a substring of another. @isSubstringOf
1525 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1526 isSubstringOf :: ByteString -- ^ String to search for.
1527 -> ByteString -- ^ String to search in.
1529 isSubstringOf p s = not $ P.null $ findSubstrings p s
1531 -- | Get the first index of a substring in another string,
1532 -- or 'Nothing' if the string is not found.
1533 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1534 findSubstring :: ByteString -- ^ String to search for.
1535 -> ByteString -- ^ String to seach in.
1537 findSubstring = (listToMaybe .) . findSubstrings
1539 -- | Find the indexes of all (possibly overlapping) occurances of a
1540 -- substring in a string. This function uses the Knuth-Morris-Pratt
1541 -- string matching algorithm.
1542 findSubstrings :: ByteString -- ^ String to search for.
1543 -> ByteString -- ^ String to seach in.
1546 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1548 patc x = pat `unsafeIndex` x
1549 strc x = str `unsafeIndex` x
1551 -- maybe we should make kmpNext a UArray before using it in search?
1552 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1553 kmpNextL p _ | null p = []
1554 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1556 x = if not (null ps) && unsafeHead ps == patc j'
1557 then kmpNext Array.! j' else j'
1559 search i j = match ++ rest -- i: position in string, j: position in pattern
1560 where match = if j == m then [(i - j)] else []
1561 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1562 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1565 -- ---------------------------------------------------------------------
1568 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1569 -- corresponding pairs of bytes. If one input ByteString is short,
1570 -- excess elements of the longer ByteString are discarded. This is
1571 -- equivalent to a pair of 'unpack' operations.
1572 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1574 | null ps || null qs = []
1575 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1577 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1578 -- the first argument, instead of a tupling function. For example,
1579 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1580 -- corresponding sums.
1581 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1583 | null ps || null qs = []
1584 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1587 -- | A specialised version of zipWith for the common case of a
1588 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1589 -- are used to automatically covert zipWith into zipWith' when a pack is
1590 -- performed on the result of zipWith, but we also export it for
1593 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1594 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1595 withForeignPtr fp $ \a ->
1596 withForeignPtr fq $ \b ->
1597 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1599 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1602 | n >= len = return ()
1604 x <- peekByteOff p1 n
1605 y <- peekByteOff p2 n
1606 pokeByteOff r n (f x y)
1607 zipWith_ (n+1) p1 p2 r
1610 {-# INLINE zipWith' #-}
1614 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1615 zipWith f p q = unpack (zipWith' f p q)
1618 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1619 -- ByteStrings. Note that this performs two 'pack' operations.
1620 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1621 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1622 {-# INLINE unzip #-}
1624 -- ---------------------------------------------------------------------
1627 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1628 inits :: ByteString -> [ByteString]
1629 inits (PS x s l) = [PS x s n | n <- [0..l]]
1631 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1632 tails :: ByteString -> [ByteString]
1633 tails p | null p = [empty]
1634 | otherwise = p : tails (unsafeTail p)
1636 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1638 -- ---------------------------------------------------------------------
1639 -- ** Ordered 'ByteString's
1641 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1642 sort :: ByteString -> ByteString
1643 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1645 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1646 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1649 go 256 _ = return ()
1650 go i ptr = do n <- peekElemOff arr i
1651 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1652 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1656 sort :: ByteString -> ByteString
1657 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1658 memcpy p (f `plusPtr` s) l
1659 c_qsort p l -- inplace
1662 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1664 -- Try some linear sorts: radix, counting
1667 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1668 -- sortBy f ps = undefined
1670 -- ---------------------------------------------------------------------
1671 -- Low level constructors
1673 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1674 -- finalizer associated to it. The ByteString length is calculated using
1675 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1676 packCString :: CString -> ByteString
1677 packCString cstr = unsafePerformIO $ do
1678 fp <- newForeignPtr_ (castPtr cstr)
1680 return $! PS fp 0 (fromIntegral l)
1682 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1683 -- have /no/ finalizer associated with it. This operation has /O(1)/
1684 -- complexity as we already know the final size, so no /strlen(3)/ is
1686 packCStringLen :: CStringLen -> ByteString
1687 packCStringLen (ptr,len) = unsafePerformIO $ do
1688 fp <- newForeignPtr_ (castPtr ptr)
1689 return $! PS fp 0 (fromIntegral len)
1691 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1692 -- have a @free(3)@ finalizer associated to it.
1693 packMallocCString :: CString -> ByteString
1694 packMallocCString cstr = unsafePerformIO $ do
1695 fp <- newForeignFreePtr (castPtr cstr)
1696 len <- c_strlen cstr
1697 return $! PS fp 0 (fromIntegral len)
1699 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1700 -- null-terminated @CString@. The @CString@ will be freed
1701 -- automatically. This is a memcpy(3).
1702 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1703 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1704 where alloc = withForeignPtr ps $ \p -> do
1705 buf <- c_malloc (fromIntegral l+1)
1706 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1707 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1708 return (castPtr buf)
1710 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1711 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1712 useAsCStringLen = unsafeUseAsCStringLen
1715 -- why were we doing this?
1717 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1718 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1720 -- alloc = withForeignPtr ps $ \p -> do
1721 -- buf <- c_malloc (fromIntegral l+1)
1722 -- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1723 -- poke (buf `plusPtr` l) (0::Word8) -- n.b.
1724 -- return $! (castPtr buf, l)
1727 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1728 -- This is mainly useful to allow the rest of the data pointed
1729 -- to by the 'ByteString' to be garbage collected, for example
1730 -- if a large string has been read in, and only a small part of it
1731 -- is needed in the rest of the program.
1732 copy :: ByteString -> ByteString
1733 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1734 memcpy p (f `plusPtr` s) (fromIntegral l)
1736 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1737 -- CString is going to be deallocated from C land.
1738 copyCString :: CString -> IO ByteString
1739 copyCString cstr = do
1740 len <- c_strlen cstr
1741 copyCStringLen (cstr, fromIntegral len)
1743 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1744 copyCStringLen :: CStringLen -> IO ByteString
1745 copyCStringLen (cstr, len) = create len $ \p ->
1746 memcpy p (castPtr cstr) (fromIntegral len)
1748 -- ---------------------------------------------------------------------
1751 -- | Read a line from stdin.
1752 getLine :: IO ByteString
1753 getLine = hGetLine stdin
1756 -- | Lazily construct a list of lines of ByteStrings. This will be much
1757 -- better on memory consumption than using 'hGetContents >>= lines'
1758 -- If you're considering this, a better choice might be to use
1759 -- Data.ByteString.Lazy
1760 hGetLines :: Handle -> IO [ByteString]
1763 go = unsafeInterleaveIO $ do
1773 -- | Read a line from a handle
1775 hGetLine :: Handle -> IO ByteString
1776 #if !defined(__GLASGOW_HASKELL__)
1778 string <- System.IO.hGetLine h
1779 return $ packWith c2w string
1781 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1782 case haBufferMode handle_ of
1783 NoBuffering -> error "no buffering"
1784 _other -> hGetLineBuffered handle_
1787 hGetLineBuffered handle_ = do
1788 let ref = haBuffer handle_
1789 buf <- readIORef ref
1790 hGetLineBufferedLoop handle_ ref buf 0 []
1792 hGetLineBufferedLoop handle_ ref
1793 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1795 off <- findEOL r w raw
1796 let new_len = len + off - r
1797 xs <- mkPS raw r off
1799 -- if eol == True, then off is the offset of the '\n'
1800 -- otherwise off == w and the buffer is now empty.
1802 then do if (w == off + 1)
1803 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1804 else writeIORef ref buf{ bufRPtr = off + 1 }
1805 mkBigPS new_len (xs:xss)
1807 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1808 buf{ bufWPtr=0, bufRPtr=0 }
1810 -- Nothing indicates we caught an EOF, and we may have a
1811 -- partial line to return.
1813 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1815 then mkBigPS new_len (xs:xss)
1818 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1820 -- find the end-of-line character, if there is one
1824 (c,r') <- readCharFromBuffer raw r
1826 then return r -- NB. not r': don't include the '\n'
1827 else findEOL r' w raw
1829 maybeFillReadBuffer fd is_line is_stream buf = catch
1830 (do buf' <- fillReadBuffer fd is_line is_stream buf
1832 (\e -> if isEOFError e then return Nothing else ioError e)
1834 -- TODO, rewrite to use normal memcpy
1835 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1836 mkPS buf start end =
1837 let len = end - start
1838 in create len $ \p -> do
1839 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1842 mkBigPS :: Int -> [ByteString] -> IO ByteString
1843 mkBigPS _ [ps] = return ps
1844 mkBigPS _ pss = return $! concat (P.reverse pss)
1848 -- ---------------------------------------------------------------------
1851 -- | Outputs a 'ByteString' to the specified 'Handle'.
1852 hPut :: Handle -> ByteString -> IO ()
1853 hPut _ (PS _ _ 0) = return ()
1854 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1856 -- | A synonym for @hPut@, for compatibility
1857 hPutStr :: Handle -> ByteString -> IO ()
1860 -- | Write a ByteString to a handle, appending a newline byte
1861 hPutStrLn :: Handle -> ByteString -> IO ()
1863 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1864 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1866 -- | Write a ByteString to stdout
1867 putStr :: ByteString -> IO ()
1868 putStr = hPut stdout
1870 -- | Write a ByteString to stdout, appending a newline byte
1871 putStrLn :: ByteString -> IO ()
1872 putStrLn = hPutStrLn stdout
1874 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1875 -- is far more efficient than reading the characters into a 'String'
1876 -- and then using 'pack'.
1877 hGet :: Handle -> Int -> IO ByteString
1878 hGet _ 0 = return empty
1879 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1881 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1882 -- waiting for data to become available, instead it returns only whatever data
1884 hGetNonBlocking :: Handle -> Int -> IO ByteString
1885 #if defined(__GLASGOW_HASKELL__)
1886 hGetNonBlocking _ 0 = return empty
1887 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1889 hGetNonBlocking = hGet
1892 -- | Read entire handle contents into a 'ByteString'.
1893 -- This function reads chunks at a time, doubling the chunksize on each
1894 -- read. The final buffer is then realloced to the appropriate size. For
1895 -- files > half of available memory, this may lead to memory exhaustion.
1896 -- Consider using 'readFile' in this case.
1898 -- As with 'hGet', the string representation in the file is assumed to
1901 hGetContents :: Handle -> IO ByteString
1903 let start_size = 1024
1904 p <- mallocArray start_size
1905 i <- hGetBuf h p start_size
1907 then do p' <- reallocArray p i
1908 fp <- newForeignFreePtr p'
1914 p' <- reallocArray p s'
1915 i <- hGetBuf h (p' `plusPtr` s) s
1917 then do let i' = s + i
1918 p'' <- reallocArray p' i'
1919 fp <- newForeignFreePtr p''
1920 return $! PS fp 0 i'
1923 -- | getContents. Equivalent to hGetContents stdin
1924 getContents :: IO ByteString
1925 getContents = hGetContents stdin
1927 -- | The interact function takes a function of type @ByteString -> ByteString@
1928 -- as its argument. The entire input from the standard input device is passed
1929 -- to this function as its argument, and the resulting string is output on the
1930 -- standard output device. It's great for writing one line programs!
1931 interact :: (ByteString -> ByteString) -> IO ()
1932 interact transformer = putStr . transformer =<< getContents
1934 -- | Read an entire file strictly into a 'ByteString'. This is far more
1935 -- efficient than reading the characters into a 'String' and then using
1936 -- 'pack'. It also may be more efficient than opening the file and
1937 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1938 -- for 'text mode' use the Char8 version of this function.
1939 readFile :: FilePath -> IO ByteString
1940 readFile f = bracket (openBinaryFile f ReadMode) hClose
1941 (\h -> hFileSize h >>= hGet h . fromIntegral)
1943 -- | Write a 'ByteString' to a file.
1944 writeFile :: FilePath -> ByteString -> IO ()
1945 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1948 -- | Append a 'ByteString' to a file.
1949 appendFile :: FilePath -> ByteString -> IO ()
1950 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1955 -- Disable until we can move it into a portable .hsc file
1958 -- | Like readFile, this reads an entire file directly into a
1959 -- 'ByteString', but it is even more efficient. It involves directly
1960 -- mapping the file to memory. This has the advantage that the contents
1961 -- of the file never need to be copied. Also, under memory pressure the
1962 -- page may simply be discarded, while in the case of readFile it would
1963 -- need to be written to swap. If you read many small files, mmapFile
1964 -- will be less memory-efficient than readFile, since each mmapFile
1965 -- takes up a separate page of memory. Also, you can run into bus
1966 -- errors if the file is modified. As with 'readFile', the string
1967 -- representation in the file is assumed to be ISO-8859-1.
1969 -- On systems without mmap, this is the same as a readFile.
1971 mmapFile :: FilePath -> IO ByteString
1972 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1974 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1976 h <- openBinaryFile f ReadMode
1977 l <- fromIntegral `fmap` hFileSize h
1978 -- Don't bother mmaping small files because each mmapped file takes up
1979 -- at least one full VM block.
1981 then do thefp <- mallocByteString l
1982 withForeignPtr thefp $ \p-> hGetBuf h p l
1987 fd <- fromIntegral `fmap` handleToFd h
1989 fp <- if p == nullPtr
1990 then do thefp <- mallocByteString l
1991 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1994 -- The munmap leads to crashes on OpenBSD.
1995 -- maybe there's a use after unmap in there somewhere?
1996 -- Bulat suggests adding the hClose to the
1997 -- finalizer, excellent idea.
1998 #if !defined(__OpenBSD__)
1999 let unmap = c_munmap p l >> return ()
2001 let unmap = return ()
2003 fp <- newForeignPtr p unmap
2008 where mmap_limit = 16*1024
2011 -- ---------------------------------------------------------------------
2012 -- Internal utilities
2014 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
2015 -- of the string if no element is found, rather than Nothing.
2016 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2017 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
2020 go ptr n | n >= l = return l
2021 | otherwise = do w <- peek ptr
2024 else go (ptr `plusPtr` 1) (n+1)
2025 {-# INLINE findIndexOrEnd #-}
2027 -- | Perform an operation with a temporary ByteString
2028 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2029 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2030 {-# INLINE withPtr #-}
2032 -- Common up near identical calls to `error' to reduce the number
2033 -- constant strings created when compiled:
2034 errorEmptyList :: String -> a
2035 errorEmptyList fun = moduleError fun "empty ByteString"
2036 {-# NOINLINE errorEmptyList #-}
2038 moduleError :: String -> String -> a
2039 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2040 {-# NOINLINE moduleError #-}
2042 -- Find from the end of the string using predicate
2043 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2044 STRICT2(findFromEndUntil)
2045 findFromEndUntil f ps@(PS x s l) =
2047 else if f (last ps) then l
2048 else findFromEndUntil f (PS x s (l-1))
2050 {-# INLINE newForeignFreePtr #-}
2051 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2052 newForeignFreePtr p = newForeignPtr c_free_finalizer p