1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 singleton, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 append, -- :: ByteString -> ByteString -> ByteString
56 head, -- :: ByteString -> Word8
57 last, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 init, -- :: ByteString -> ByteString
60 null, -- :: ByteString -> Bool
61 length, -- :: ByteString -> Int
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
66 reverse, -- :: ByteString -> ByteString
67 intersperse, -- :: Word8 -> ByteString -> ByteString
68 transpose, -- :: [ByteString] -> [ByteString]
70 -- * Reducing 'ByteString's (folds)
71 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
72 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
73 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
74 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
75 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
76 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 concat, -- :: [ByteString] -> ByteString
80 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
81 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
82 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 maximum, -- :: ByteString -> Word8
84 minimum, -- :: ByteString -> Word8
86 -- * Building ByteStrings
88 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
89 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
90 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
93 -- ** Accumulating maps
94 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
95 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
98 -- ** Unfolding ByteStrings
99 replicate, -- :: Int -> Word8 -> ByteString
100 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
101 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
105 -- ** Breaking strings
106 take, -- :: Int -> ByteString -> ByteString
107 drop, -- :: Int -> ByteString -> ByteString
108 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
109 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
113 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
114 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
117 inits, -- :: ByteString -> [ByteString]
118 tails, -- :: ByteString -> [ByteString]
120 -- ** Breaking and dropping on specific bytes
121 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
122 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
124 -- ** Breaking into many substrings
125 split, -- :: Word8 -> ByteString -> [ByteString]
126 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
127 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
129 -- ** Joining strings
130 join, -- :: ByteString -> [ByteString] -> ByteString
131 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
134 isPrefixOf, -- :: ByteString -> ByteString -> Bool
135 isSuffixOf, -- :: ByteString -> ByteString -> Bool
137 -- ** Search for arbitrary substrings
138 isSubstringOf, -- :: ByteString -> ByteString -> Bool
139 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
140 findSubstrings, -- :: ByteString -> ByteString -> [Int]
142 -- * Searching ByteStrings
144 -- ** Searching by equality
145 -- | These functions use memchr(3) to efficiently search the ByteString
146 elem, -- :: Word8 -> ByteString -> Bool
147 notElem, -- :: Word8 -> ByteString -> Bool
148 filterByte, -- :: Word8 -> ByteString -> ByteString
149 filterNotByte, -- :: Word8 -> ByteString -> ByteString
151 -- ** Searching with a predicate
152 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
153 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
154 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
155 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
157 -- * Indexing ByteStrings
158 index, -- :: ByteString -> Int -> Word8
159 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
160 elemIndices, -- :: Word8 -> ByteString -> [Int]
161 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
162 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
163 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
164 count, -- :: Word8 -> ByteString -> Int
165 findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int
167 -- * Zipping and unzipping ByteStrings
168 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
169 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
171 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
173 -- * Ordered ByteStrings
174 sort, -- :: ByteString -> ByteString
176 -- * Low level CString conversions
178 -- ** Packing CStrings and pointers
179 packCString, -- :: CString -> ByteString
180 packCStringLen, -- :: CString -> ByteString
181 packMallocCString, -- :: CString -> ByteString
183 -- ** Using ByteStrings as CStrings
184 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
185 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
187 -- ** Copying ByteStrings
188 -- | These functions perform memcpy(3) operations
189 copy, -- :: ByteString -> ByteString
190 copyCString, -- :: CString -> IO ByteString
191 copyCStringLen, -- :: CStringLen -> IO ByteString
193 -- * I\/O with 'ByteString's
195 -- ** Standard input and output
197 #if defined(__GLASGOW_HASKELL__)
198 getLine, -- :: IO ByteString
200 getContents, -- :: IO ByteString
201 putStr, -- :: ByteString -> IO ()
202 putStrLn, -- :: ByteString -> IO ()
205 readFile, -- :: FilePath -> IO ByteString
206 writeFile, -- :: FilePath -> ByteString -> IO ()
207 appendFile, -- :: FilePath -> ByteString -> IO ()
208 -- mmapFile, -- :: FilePath -> IO ByteString
210 -- ** I\/O with Handles
211 #if defined(__GLASGOW_HASKELL__)
212 getArgs, -- :: IO [ByteString]
213 hGetLine, -- :: Handle -> IO ByteString
214 hGetLines, -- :: Handle -> IO [ByteString]
215 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
217 hGetContents, -- :: Handle -> IO ByteString
218 hGet, -- :: Handle -> Int -> IO ByteString
219 hPut, -- :: Handle -> ByteString -> IO ()
220 hPutStr, -- :: Handle -> ByteString -> IO ()
221 hPutStrLn, -- :: Handle -> ByteString -> IO ()
223 -- * Fusion utilities
224 #if defined(__GLASGOW_HASKELL__)
225 unpackList, -- eek, otherwise it gets thrown away by the simplifier
227 lengthU, maximumU, minimumU
230 import qualified Prelude as P
231 import Prelude hiding (reverse,head,tail,last,init,null
232 ,length,map,lines,foldl,foldr,unlines
233 ,concat,any,take,drop,splitAt,takeWhile
234 ,dropWhile,span,break,elem,filter,maximum
235 ,minimum,all,concatMap,foldl1,foldr1
236 ,scanl,scanl1,scanr,scanr1
237 ,readFile,writeFile,appendFile,replicate
238 ,getContents,getLine,putStr,putStrLn
239 ,zip,zipWith,unzip,notElem)
241 import Data.ByteString.Base
242 import Data.ByteString.Fusion
244 import qualified Data.List as List
246 import Data.Word (Word8)
247 import Data.Maybe (listToMaybe)
248 import Data.Array (listArray)
249 import qualified Data.Array as Array ((!))
251 -- Control.Exception.bracket not available in yhc or nhc
252 import Control.Exception (bracket, assert)
253 import Control.Monad (when)
255 import Foreign.C.String (CString, CStringLen)
256 import Foreign.C.Types (CSize)
257 import Foreign.ForeignPtr
258 import Foreign.Marshal.Array
260 import Foreign.Storable (Storable(..))
262 -- hGetBuf and hPutBuf not available in yhc or nhc
263 import System.IO (stdin,stdout,hClose,hFileSize
264 ,hGetBuf,hPutBuf,openBinaryFile
267 import Data.Monoid (Monoid, mempty, mappend, mconcat)
269 #if !defined(__GLASGOW_HASKELL__)
270 import System.IO.Unsafe
273 #if defined(__GLASGOW_HASKELL__)
275 import System.IO (hGetBufNonBlocking)
276 import System.IO.Error (isEOFError)
278 import Foreign.Marshal (alloca)
279 import qualified Foreign.Concurrent as FC (newForeignPtr)
282 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
283 -- import GHC.Base (build)
284 import GHC.Word hiding (Word8)
285 import GHC.Ptr (Ptr(..))
286 import GHC.ST (ST(..))
291 -- -----------------------------------------------------------------------------
293 -- Useful macros, until we have bang patterns
296 #define STRICT1(f) f a | a `seq` False = undefined
297 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
298 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
299 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
300 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
302 -- -----------------------------------------------------------------------------
304 instance Eq ByteString
307 instance Ord ByteString
308 where compare = compareBytes
310 instance Show ByteString where
311 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
313 instance Read ByteString where
314 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
316 instance Monoid ByteString where
322 instance Arbitrary PackedString where
323 arbitrary = P.pack `fmap` arbitrary
324 coarbitrary s = coarbitrary (P.unpack s)
327 -- | /O(n)/ Equality on the 'ByteString' type.
328 eq :: ByteString -> ByteString -> Bool
329 eq a@(PS p s l) b@(PS p' s' l')
330 | l /= l' = False -- short cut on length
331 | p == p' && s == s' = True -- short cut for the same string
332 | otherwise = compareBytes a b == EQ
335 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
336 compareBytes :: ByteString -> ByteString -> Ordering
337 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
338 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
339 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
340 | otherwise = inlinePerformIO $
341 withForeignPtr x1 $ \p1 ->
342 withForeignPtr x2 $ \p2 -> do
343 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
344 return $! case i `compare` 0 of
345 EQ -> l1 `compare` l2
347 {-# INLINE compareBytes #-}
351 -- About 4x slower over 32M
353 compareBytes :: ByteString -> ByteString -> Ordering
354 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
355 withForeignPtr fp1 $ \p1 ->
356 withForeignPtr fp2 $ \p2 ->
357 cmp (p1 `plusPtr` off1)
358 (p2 `plusPtr` off2) 0 len1 len2
360 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
362 cmp p1 p2 n len1 len2
363 | n == len1 = if n == len2 then return EQ else return LT
364 | n == len2 = return GT
366 (a :: Word8) <- peekByteOff p1 n
367 (b :: Word8) <- peekByteOff p2 n
368 case a `compare` b of
369 EQ -> cmp p1 p2 (n+1) len1 len2
372 {-# INLINE compareBytes #-}
375 -- -----------------------------------------------------------------------------
376 -- Introducing and eliminating 'ByteString's
378 -- | /O(1)/ The empty 'ByteString'
380 empty = unsafeCreate 0 $ const $ return ()
381 {-# NOINLINE empty #-}
383 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
384 singleton :: Word8 -> ByteString
385 singleton c = unsafeCreate 1 $ \p -> poke p c
386 {-# INLINE singleton #-}
389 -- XXX The unsafePerformIO is critical!
393 -- singleton 255 `compare` singleton 127
397 -- case mallocByteString 2 of
398 -- ForeignPtr f internals ->
399 -- case writeWord8OffAddr# f 0 255 of _ ->
400 -- case writeWord8OffAddr# f 0 127 of _ ->
401 -- case eqAddr# f f of
402 -- False -> case compare (GHC.Prim.plusAddr# f 0)
403 -- (GHC.Prim.plusAddr# f 0)
407 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
409 -- For applications with large numbers of string literals, pack can be a
410 -- bottleneck. In such cases, consider using packAddress (GHC only).
411 pack :: [Word8] -> ByteString
413 #if !defined(__GLASGOW_HASKELL__)
415 pack str = unsafeCreate (P.length str) $ \p -> go p str
418 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
420 #else /* hack away */
422 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
424 go _ _ [] = return ()
425 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
427 writeByte p i c = ST $ \s# ->
428 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
432 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
433 unpack :: ByteString -> [Word8]
435 -- #if !defined(__GLASGOW_HASKELL__)
437 unpack (PS _ _ 0) = []
438 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
439 go (p `plusPtr` s) (l - 1) []
442 go p 0 acc = peek p >>= \e -> return (e : acc)
443 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
444 {-# INLINE unpack #-}
449 -- Interacting with head/build fusion rule in ghc 6.5. Disable for now
452 -- unpack ps = build (unpackFoldr ps)
453 -- {-# INLINE unpack #-}
455 unpackList :: ByteString -> [Word8]
456 unpackList (PS fp off len) = withPtr fp $ \p -> do
458 loop _ (-1) acc = return acc
461 loop q (n-1) (a : acc)
462 loop (p `plusPtr` off) (len-1) []
465 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
468 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
469 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
471 loop _ (-1) acc = return acc
474 loop q (n-1) (a `f` acc)
475 loop (p `plusPtr` off) (len-1) ch
476 {-# INLINE [0] unpackFoldr #-}
480 ------------------------------------------------------------------------
482 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
483 -- conversion function
484 packWith :: (a -> Word8) -> [a] -> ByteString
485 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
489 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
490 {-# INLINE packWith #-}
491 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
493 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
494 unpackWith :: (Word8 -> a) -> ByteString -> [a]
495 unpackWith _ (PS _ _ 0) = []
496 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
497 go (p `plusPtr` s) (l - 1) []
500 go p 0 acc = peek p >>= \e -> return (k e : acc)
501 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
502 {-# INLINE unpackWith #-}
503 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
505 -- ---------------------------------------------------------------------
508 -- | /O(1)/ Test whether a ByteString is empty.
509 null :: ByteString -> Bool
510 null (PS _ _ l) = assert (l >= 0) $ l <= 0
513 -- ---------------------------------------------------------------------
514 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
515 length :: ByteString -> Int
516 length (PS _ _ l) = assert (l >= 0) $ l
519 -- length/loop fusion. When taking the length of any fuseable loop,
520 -- rewrite it as a foldl', and thus avoid allocating the result buffer
521 -- worth around 10% in speed testing.
524 #if defined(__GLASGOW_HASKELL__)
525 {-# INLINE [1] length #-}
528 lengthU :: ByteString -> Int
529 lengthU = foldl' (const . (+1)) (0::Int)
530 {-# INLINE lengthU #-}
535 "length/loop" forall loop s .
536 length (loopArr (loopWrapper loop s)) =
537 lengthU (loopArr (loopWrapper loop s))
541 ------------------------------------------------------------------------
543 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
544 -- complexity, as it requires a memcpy.
545 cons :: Word8 -> ByteString -> ByteString
546 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
548 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
551 -- | /O(n)/ Append a byte to the end of a 'ByteString'
552 snoc :: ByteString -> Word8 -> ByteString
553 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
554 memcpy p (f `plusPtr` s) (fromIntegral l)
555 poke (p `plusPtr` l) c
560 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
561 -- An exception will be thrown in the case of an empty ByteString.
562 head :: ByteString -> Word8
564 | l <= 0 = errorEmptyList "head"
565 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
568 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
569 -- An exception will be thrown in the case of an empty ByteString.
570 tail :: ByteString -> ByteString
572 | l <= 0 = errorEmptyList "tail"
573 | otherwise = PS p (s+1) (l-1)
576 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
577 -- An exception will be thrown in the case of an empty ByteString.
578 last :: ByteString -> Word8
580 | null ps = errorEmptyList "last"
581 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
584 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
585 -- An exception will be thrown in the case of an empty ByteString.
586 init :: ByteString -> ByteString
588 | null ps = errorEmptyList "init"
589 | otherwise = PS p s (l-1)
592 -- | /O(n)/ Append two ByteStrings
593 append :: ByteString -> ByteString -> ByteString
594 append xs ys | null xs = ys
596 | otherwise = concat [xs,ys]
597 {-# INLINE append #-}
599 -- ---------------------------------------------------------------------
602 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
603 -- element of @xs@. This function is subject to array fusion.
604 map :: (Word8 -> Word8) -> ByteString -> ByteString
605 #if defined(LOOPU_FUSION)
606 map f = loopArr . loopU (mapEFL f) NoAcc
607 #elif defined(LOOPUP_FUSION)
608 map f = loopArr . loopUp (mapEFL f) NoAcc
609 #elif defined(LOOPNOACC_FUSION)
610 map f = loopArr . loopNoAcc (mapEFL f)
612 map f = loopArr . loopMap f
616 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
617 -- slightly faster for one-shot cases.
618 map' :: (Word8 -> Word8) -> ByteString -> ByteString
619 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
620 create len $ map_ 0 (a `plusPtr` s)
622 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
625 | n >= len = return ()
627 x <- peekByteOff p1 n
628 pokeByteOff p2 n (f x)
632 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
633 reverse :: ByteString -> ByteString
634 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
635 c_reverse p (f `plusPtr` s) (fromIntegral l)
637 -- todo, fuseable version
639 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
640 -- 'ByteString' and \`intersperses\' that byte between the elements of
641 -- the 'ByteString'. It is analogous to the intersperse function on
643 intersperse :: Word8 -> ByteString -> ByteString
644 intersperse c ps@(PS x s l)
646 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
647 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
650 intersperse c = pack . List.intersperse c . unpack
653 -- | The 'transpose' function transposes the rows and columns of its
654 -- 'ByteString' argument.
655 transpose :: [ByteString] -> [ByteString]
656 transpose ps = P.map pack (List.transpose (P.map unpack ps))
658 -- ---------------------------------------------------------------------
659 -- Reducing 'ByteString's
661 -- | 'foldl', applied to a binary operator, a starting value (typically
662 -- the left-identity of the operator), and a ByteString, reduces the
663 -- ByteString using the binary operator, from left to right.
664 -- This function is subject to array fusion.
665 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
666 #if !defined(LOOPU_FUSION)
667 foldl f z = loopAcc . loopUp (foldEFL f) z
669 foldl f z = loopAcc . loopU (foldEFL f) z
675 -- About twice as fast with 6.4.1, but not fuseable
676 -- A simple fold . map is enough to make it worth while.
678 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
679 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
682 lgo z p q | p == q = return z
683 | otherwise = do c <- peek p
684 lgo (f z c) (p `plusPtr` 1) q
687 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
688 -- Though actually foldl is also strict in the accumulator.
689 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
691 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
692 {-# INLINE foldl' #-}
694 -- | 'foldr', applied to a binary operator, a starting value
695 -- (typically the right-identity of the operator), and a ByteString,
696 -- reduces the ByteString using the binary operator, from right to left.
697 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
698 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
701 -- | 'foldl1' is a variant of 'foldl' that has no starting value
702 -- argument, and thus must be applied to non-empty 'ByteStrings'.
703 -- This function is subject to array fusion.
704 -- An exception will be thrown in the case of an empty ByteString.
705 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
707 | null ps = errorEmptyList "foldl1"
708 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
709 {-# INLINE foldl1 #-}
711 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
712 -- An exception will be thrown in the case of an empty ByteString.
713 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
715 | null ps = errorEmptyList "foldl1'"
716 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
717 {-# INLINE foldl1' #-}
719 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
720 -- and thus must be applied to non-empty 'ByteString's
721 -- An exception will be thrown in the case of an empty ByteString.
722 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
724 | null ps = errorEmptyList "foldr1"
725 | otherwise = foldr f (last ps) (init ps)
726 {-# INLINE foldr1 #-}
728 -- ---------------------------------------------------------------------
731 -- | /O(n)/ Concatenate a list of ByteStrings.
732 concat :: [ByteString] -> ByteString
735 concat xs = unsafeCreate len $ \ptr -> go xs ptr
736 where len = P.sum . P.map length $ xs
739 go (PS p s l:ps) ptr = do
740 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
741 go ps (ptr `plusPtr` l)
743 -- | Map a function over a 'ByteString' and concatenate the results
744 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
745 concatMap f = concat . foldr ((:) . f) []
747 -- foldr (append . f) empty
749 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
750 -- any element of the 'ByteString' satisfies the predicate.
751 any :: (Word8 -> Bool) -> ByteString -> Bool
752 any _ (PS _ _ 0) = False
753 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
754 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
757 go p q | p == q = return False
758 | otherwise = do c <- peek p
759 if f c then return True
760 else go (p `plusPtr` 1) q
764 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
765 -- if all elements of the 'ByteString' satisfy the predicate.
766 all :: (Word8 -> Bool) -> ByteString -> Bool
767 all _ (PS _ _ 0) = True
768 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
769 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
772 go p q | p == q = return True -- end of list
773 | otherwise = do c <- peek p
775 then go (p `plusPtr` 1) q
778 ------------------------------------------------------------------------
780 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
781 -- This function will fuse.
782 -- An exception will be thrown in the case of an empty ByteString.
783 maximum :: ByteString -> Word8
784 maximum xs@(PS x s l)
785 | null xs = errorEmptyList "maximum"
786 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
787 c_maximum (p `plusPtr` s) (fromIntegral l)
789 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
790 -- This function will fuse.
791 -- An exception will be thrown in the case of an empty ByteString.
792 minimum :: ByteString -> Word8
793 minimum xs@(PS x s l)
794 | null xs = errorEmptyList "minimum"
795 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
796 c_minimum (p `plusPtr` s) (fromIntegral l)
799 -- minimum/maximum/loop fusion. As for length (and other folds), when we
800 -- see we're applied after a fuseable op, switch from using the C
801 -- version, to the fuseable version. The result should then avoid
802 -- allocating a buffer.
805 #if defined(__GLASGOW_HASKELL__)
806 {-# INLINE [1] minimum #-}
807 {-# INLINE [1] maximum #-}
810 maximumU :: ByteString -> Word8
811 maximumU = foldl1' max
812 {-# INLINE maximumU #-}
814 minimumU :: ByteString -> Word8
815 minimumU = foldl1' min
816 {-# INLINE minimumU #-}
820 "minimum/loop" forall loop s .
821 minimum (loopArr (loopWrapper loop s)) =
822 minimumU (loopArr (loopWrapper loop s))
824 "maximum/loop" forall loop s .
825 maximum (loopArr (loopWrapper loop s)) =
826 maximumU (loopArr (loopWrapper loop s))
830 ------------------------------------------------------------------------
832 -- | The 'mapAccumL' function behaves like a combination of 'map' and
833 -- 'foldl'; it applies a function to each element of a ByteString,
834 -- passing an accumulating parameter from left to right, and returning a
835 -- final value of this accumulator together with the new list.
836 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
837 #if !defined(LOOPU_FUSION)
838 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
840 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
842 {-# INLINE mapAccumL #-}
844 -- | The 'mapAccumR' function behaves like a combination of 'map' and
845 -- 'foldr'; it applies a function to each element of a ByteString,
846 -- passing an accumulating parameter from right to left, and returning a
847 -- final value of this accumulator together with the new ByteString.
848 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
849 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
850 {-# INLINE mapAccumR #-}
852 -- | /O(n)/ map Word8 functions, provided with the index at each position
853 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
854 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
855 {-# INLINE mapIndexed #-}
857 -- ---------------------------------------------------------------------
858 -- Building ByteStrings
860 -- | 'scanl' is similar to 'foldl', but returns a list of successive
861 -- reduced values from the left. This function will fuse.
863 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
867 -- > last (scanl f z xs) == foldl f z xs.
868 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
869 #if !defined(LOOPU_FUSION)
870 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
872 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
875 -- n.b. haskell's List scan returns a list one bigger than the
876 -- input, so we need to snoc here to get some extra space, however,
877 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
880 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
881 -- This function will fuse.
883 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
884 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
887 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
888 {-# INLINE scanl1 #-}
890 -- | scanr is the right-to-left dual of scanl.
891 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
892 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
895 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
896 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
899 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
900 {-# INLINE scanr1 #-}
902 -- ---------------------------------------------------------------------
903 -- Unfolds and replicates
905 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
906 -- the value of every element. The following holds:
908 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
910 -- This implemenation uses @memset(3)@
911 replicate :: Int -> Word8 -> ByteString
914 | otherwise = unsafeCreate w $ \ptr ->
915 memset ptr c (fromIntegral w) >> return ()
917 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
918 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
919 -- ByteString from a seed value. The function takes the element and
920 -- returns 'Nothing' if it is done producing the ByteString or returns
921 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
922 -- and @b@ is the seed value for further production.
926 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
927 -- > == pack [0, 1, 2, 3, 4, 5]
929 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
930 unfoldr f = concat . unfoldChunk 32 64
931 where unfoldChunk n n' x =
932 case unfoldrN n f x of
933 (s, Nothing) -> s : []
934 (s, Just x') -> s : unfoldChunk n' (n+n') x'
936 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
937 -- value. However, the length of the result is limited by the first
938 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
939 -- when the maximum length of the result is known.
941 -- The following equation relates 'unfoldrN' and 'unfoldr':
943 -- > unfoldrN n f s == take n (unfoldr f s)
945 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
947 | i < 0 = (empty, Just x0)
948 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
952 Nothing -> return (0, n, Nothing)
954 | n == i -> return (0, n, Just x)
955 | otherwise -> do poke p w
956 go (p `plusPtr` 1) x' (n+1)
958 -- ---------------------------------------------------------------------
961 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
962 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
963 take :: Int -> ByteString -> ByteString
967 | otherwise = PS x s n
970 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
971 -- elements, or @[]@ if @n > 'length' xs@.
972 drop :: Int -> ByteString -> ByteString
976 | otherwise = PS x (s+n) (l-n)
979 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
980 splitAt :: Int -> ByteString -> (ByteString, ByteString)
981 splitAt n ps@(PS x s l)
982 | n <= 0 = (empty, ps)
983 | n >= l = (ps, empty)
984 | otherwise = (PS x s n, PS x (s+n) (l-n))
985 {-# INLINE splitAt #-}
987 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
988 -- returns the longest prefix (possibly empty) of @xs@ of elements that
990 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
991 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
992 {-# INLINE takeWhile #-}
994 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
995 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
996 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
997 {-# INLINE dropWhile #-}
999 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1000 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1001 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1002 {-# INLINE break #-}
1004 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1006 -- breakEnd p == spanEnd (not.p)
1007 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1008 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1010 -- | 'breakByte' breaks its ByteString argument at the first occurence
1011 -- of the specified byte. It is more efficient than 'break' as it is
1012 -- implemented with @memchr(3)@. I.e.
1014 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1016 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1017 breakByte c p = case elemIndex c p of
1018 Nothing -> (p,empty)
1019 Just n -> (unsafeTake n p, unsafeDrop n p)
1020 {-# INLINE breakByte #-}
1022 -- | 'spanByte' breaks its ByteString argument at the first
1023 -- occurence of a byte other than its argument. It is more efficient
1026 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1028 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1029 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1030 go (p `plusPtr` s) 0
1033 go p i | i >= l = return (ps, empty)
1034 | otherwise = do c' <- peekByteOff p i
1036 then return (unsafeTake i ps, unsafeDrop i ps)
1038 {-# INLINE spanByte #-}
1040 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1041 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1042 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1043 span p ps = break (not . p) ps
1046 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1049 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1053 -- > spanEnd (not . isSpace) ps
1055 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1057 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1058 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1060 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1061 -- separators, where the predicate returns True for a separator element.
1062 -- The resulting components do not contain the separators. Two adjacent
1063 -- separators result in an empty component in the output. eg.
1065 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1066 -- > splitWith (=='a') [] == []
1068 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1070 #if defined(__GLASGOW_HASKELL__)
1071 splitWith _pred (PS _ _ 0) = []
1072 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1073 where pred# c# = pred_ (W8# c#)
1076 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1077 splitLoop pred' p 0 off' len' fp'
1079 splitLoop :: (Word# -> Bool)
1081 -> Int -> Int -> Int
1085 splitLoop pred' p idx' off' len' fp'
1086 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1087 | idx' >= len' = return [PS fp' off' idx']
1089 w <- peekElemOff p (off'+idx')
1090 if pred' (case w of W8# w# -> w#)
1091 then return (PS fp' off' idx' :
1092 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1093 else splitLoop pred' p (idx'+1) off' len' fp'
1094 {-# INLINE splitWith #-}
1097 splitWith _ (PS _ _ 0) = []
1098 splitWith p ps = loop p ps
1101 loop q qs = if null rest then [chunk]
1102 else chunk : loop q (unsafeTail rest)
1103 where (chunk,rest) = break q qs
1106 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1107 -- argument, consuming the delimiter. I.e.
1109 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1110 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1111 -- > split 'x' "x" == ["",""]
1115 -- > join [c] . split c == id
1116 -- > split == splitWith . (==)
1118 -- As for all splitting functions in this library, this function does
1119 -- not copy the substrings, it just constructs new 'ByteStrings' that
1120 -- are slices of the original.
1122 split :: Word8 -> ByteString -> [ByteString]
1123 split _ (PS _ _ 0) = []
1124 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1125 let ptr = p `plusPtr` s
1129 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1130 w (fromIntegral (l-n))
1132 then [PS x (s+n) (l-n)]
1133 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1136 {-# INLINE split #-}
1139 -- slower. but stays inside Haskell.
1140 split _ (PS _ _ 0) = []
1141 split (W8# w#) (PS fp off len) = splitWith' off len fp
1143 splitWith' off' len' fp' = withPtr fp $ \p ->
1144 splitLoop p 0 off' len' fp'
1146 splitLoop :: Ptr Word8
1147 -> Int -> Int -> Int
1152 splitLoop p idx' off' len' fp'
1153 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1154 | idx' >= len' = return [PS fp' off' idx']
1156 (W8# x#) <- peekElemOff p (off'+idx')
1157 if word2Int# w# ==# word2Int# x#
1158 then return (PS fp' off' idx' :
1159 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1160 else splitLoop p (idx'+1) off' len' fp'
1163 -- | Like 'splitWith', except that sequences of adjacent separators are
1164 -- treated as a single separator. eg.
1166 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1168 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1169 tokens f = P.filter (not.null) . splitWith f
1170 {-# INLINE tokens #-}
1172 -- | The 'group' function takes a ByteString and returns a list of
1173 -- ByteStrings such that the concatenation of the result is equal to the
1174 -- argument. Moreover, each sublist in the result contains only equal
1175 -- elements. For example,
1177 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1179 -- It is a special case of 'groupBy', which allows the programmer to
1180 -- supply their own equality test. It is about 40% faster than
1182 group :: ByteString -> [ByteString]
1185 | otherwise = ys : group zs
1187 (ys, zs) = spanByte (unsafeHead xs) xs
1189 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1190 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1193 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1195 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1197 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1198 -- 'ByteString's and concatenates the list after interspersing the first
1199 -- argument between each element of the list.
1200 join :: ByteString -> [ByteString] -> ByteString
1201 join s = concat . (List.intersperse s)
1205 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1206 -- with a char. Around 4 times faster than the generalised join.
1208 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1209 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1210 withForeignPtr ffp $ \fp ->
1211 withForeignPtr fgp $ \gp -> do
1212 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1213 poke (ptr `plusPtr` l) c
1214 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1216 len = length f + length g + 1
1217 {-# INLINE joinWithByte #-}
1219 -- ---------------------------------------------------------------------
1220 -- Indexing ByteStrings
1222 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1223 index :: ByteString -> Int -> Word8
1225 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1226 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1227 ++ ", length = " ++ show (length ps))
1228 | otherwise = ps `unsafeIndex` n
1229 {-# INLINE index #-}
1231 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1232 -- element in the given 'ByteString' which is equal to the query
1233 -- element, or 'Nothing' if there is no such element.
1234 -- This implementation uses memchr(3).
1235 elemIndex :: Word8 -> ByteString -> Maybe Int
1236 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1237 let p' = p `plusPtr` s
1238 q <- memchr p' c (fromIntegral l)
1239 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1240 {-# INLINE elemIndex #-}
1242 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1243 -- element in the given 'ByteString' which is equal to the query
1244 -- element, or 'Nothing' if there is no such element. The following
1247 -- > elemIndexEnd c xs ==
1248 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1250 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1251 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1252 go (p `plusPtr` s) (l-1)
1255 go p i | i < 0 = return Nothing
1256 | otherwise = do ch' <- peekByteOff p i
1258 then return $ Just i
1260 {-# INLINE elemIndexEnd #-}
1262 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1263 -- the indices of all elements equal to the query element, in ascending order.
1264 -- This implementation uses memchr(3).
1265 elemIndices :: Word8 -> ByteString -> [Int]
1266 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1267 let ptr = p `plusPtr` s
1270 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1271 w (fromIntegral (l - n))
1274 else let i = q `minusPtr` ptr
1277 {-# INLINE elemIndices #-}
1281 elemIndices :: Word8 -> ByteString -> [Int]
1282 elemIndices c ps = loop 0 ps
1284 loop _ ps' | null ps' = []
1285 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1286 | otherwise = loop (n+1) (unsafeTail ps')
1289 -- | count returns the number of times its argument appears in the ByteString
1291 -- > count = length . elemIndices
1293 -- But more efficiently than using length on the intermediate list.
1294 count :: Word8 -> ByteString -> Int
1295 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1296 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1297 {-# INLINE count #-}
1301 -- around 30% slower
1303 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1304 go (p `plusPtr` s) (fromIntegral m) 0
1306 go :: Ptr Word8 -> CSize -> Int -> IO Int
1312 else do let k = fromIntegral $ q `minusPtr` p
1313 go (q `plusPtr` 1) (l-k-1) (i+1)
1316 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1317 -- returns the index of the first element in the ByteString
1318 -- satisfying the predicate.
1319 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1320 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1323 go ptr n | n >= l = return Nothing
1324 | otherwise = do w <- peek ptr
1326 then return (Just n)
1327 else go (ptr `plusPtr` 1) (n+1)
1328 {-# INLINE findIndex #-}
1330 -- | The 'findIndices' function extends 'findIndex', by returning the
1331 -- indices of all elements satisfying the predicate, in ascending order.
1332 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1333 findIndices p ps = loop 0 ps
1336 loop n qs | null qs = []
1337 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1338 | otherwise = loop (n+1) (unsafeTail qs)
1340 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1341 -- of the string if no element is found, rather than Nothing.
1342 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1343 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1346 go ptr n | n >= l = return l
1347 | otherwise = do w <- peek ptr
1350 else go (ptr `plusPtr` 1) (n+1)
1351 {-# INLINE findIndexOrEnd #-}
1353 -- ---------------------------------------------------------------------
1354 -- Searching ByteStrings
1356 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1357 elem :: Word8 -> ByteString -> Bool
1358 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1361 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1362 notElem :: Word8 -> ByteString -> Bool
1363 notElem c ps = not (elem c ps)
1364 {-# INLINE notElem #-}
1366 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1367 -- returns a ByteString containing those characters that satisfy the
1368 -- predicate. This function is subject to array fusion.
1369 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1370 #if defined(LOOPU_FUSION)
1371 filter p = loopArr . loopU (filterEFL p) NoAcc
1372 #elif defined(LOOPUP_FUSION)
1373 filter p = loopArr . loopUp (filterEFL p) NoAcc
1374 #elif defined(LOOPNOACC_FUSION)
1375 filter p = loopArr . loopNoAcc (filterEFL p)
1377 filter f = loopArr . loopFilter f
1379 {-# INLINE filter #-}
1381 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1382 -- around 2x faster for some one-shot applications.
1383 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1384 filter' k ps@(PS x s l)
1386 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1387 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1388 return $! t `minusPtr` p -- actual length
1391 go f t end | f == end = return t
1395 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1396 else go (f `plusPtr` 1) t end
1397 {-# INLINE filter' #-}
1400 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1401 -- case of filtering a single byte. It is more efficient to use
1402 -- /filterByte/ in this case.
1404 -- > filterByte == filter . (==)
1406 -- filterByte is around 10x faster, and uses much less space, than its
1407 -- filter equivalent
1408 filterByte :: Word8 -> ByteString -> ByteString
1409 filterByte w ps = replicate (count w ps) w
1410 {-# INLINE filterByte #-}
1413 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1414 -- case of filtering a single byte out of a list. It is more efficient
1415 -- to use /filterNotByte/ in this case.
1417 -- > filterNotByte == filter . (/=)
1419 -- filterNotByte is around 2x faster than its filter equivalent.
1420 filterNotByte :: Word8 -> ByteString -> ByteString
1421 filterNotByte w = filter' (/= w)
1422 {-# INLINE filterNotByte #-}
1424 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1425 -- and returns the first element in matching the predicate, or 'Nothing'
1426 -- if there is no such element.
1428 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1430 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1431 find f p = case findIndex f p of
1432 Just n -> Just (p `unsafeIndex` n)
1438 -- fuseable, but we don't want to walk the whole array.
1440 find k = foldl findEFL Nothing
1441 where findEFL a@(Just _) _ = a
1442 findEFL _ c | k c = Just c
1443 | otherwise = Nothing
1446 -- ---------------------------------------------------------------------
1447 -- Searching for substrings
1449 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1450 -- iff the first is a prefix of the second.
1451 isPrefixOf :: ByteString -> ByteString -> Bool
1452 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1455 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1456 withForeignPtr x2 $ \p2 -> do
1457 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1460 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1461 -- iff the first is a suffix of the second.
1463 -- The following holds:
1465 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1467 -- However, the real implemenation uses memcmp to compare the end of the
1468 -- string only, with no reverse required..
1469 isSuffixOf :: ByteString -> ByteString -> Bool
1470 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1473 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1474 withForeignPtr x2 $ \p2 -> do
1475 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1478 -- | Check whether one string is a substring of another. @isSubstringOf
1479 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1480 isSubstringOf :: ByteString -- ^ String to search for.
1481 -> ByteString -- ^ String to search in.
1483 isSubstringOf p s = not $ P.null $ findSubstrings p s
1485 -- | Get the first index of a substring in another string,
1486 -- or 'Nothing' if the string is not found.
1487 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1488 findSubstring :: ByteString -- ^ String to search for.
1489 -> ByteString -- ^ String to seach in.
1491 findSubstring = (listToMaybe .) . findSubstrings
1493 -- | Find the indexes of all (possibly overlapping) occurances of a
1494 -- substring in a string. This function uses the Knuth-Morris-Pratt
1495 -- string matching algorithm.
1496 findSubstrings :: ByteString -- ^ String to search for.
1497 -> ByteString -- ^ String to seach in.
1500 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1502 patc x = pat `unsafeIndex` x
1503 strc x = str `unsafeIndex` x
1505 -- maybe we should make kmpNext a UArray before using it in search?
1506 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1507 kmpNextL p _ | null p = []
1508 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1510 x = if not (null ps) && unsafeHead ps == patc j'
1511 then kmpNext Array.! j' else j'
1513 search i j = match ++ rest -- i: position in string, j: position in pattern
1514 where match = if j == m then [(i - j)] else []
1515 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1516 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1519 -- ---------------------------------------------------------------------
1522 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1523 -- corresponding pairs of bytes. If one input ByteString is short,
1524 -- excess elements of the longer ByteString are discarded. This is
1525 -- equivalent to a pair of 'unpack' operations.
1526 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1528 | null ps || null qs = []
1529 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1531 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1532 -- the first argument, instead of a tupling function. For example,
1533 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1534 -- corresponding sums.
1535 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1537 | null ps || null qs = []
1538 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1541 -- | A specialised version of zipWith for the common case of a
1542 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1543 -- are used to automatically covert zipWith into zipWith' when a pack is
1544 -- performed on the result of zipWith, but we also export it for
1547 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1548 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1549 withForeignPtr fp $ \a ->
1550 withForeignPtr fq $ \b ->
1551 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1553 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1556 | n >= len = return ()
1558 x <- peekByteOff p1 n
1559 y <- peekByteOff p2 n
1560 pokeByteOff r n (f x y)
1561 zipWith_ (n+1) p1 p2 r
1564 {-# INLINE zipWith' #-}
1568 "Specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1569 pack (zipWith f p q) = zipWith' f p q
1572 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1573 -- ByteStrings. Note that this performs two 'pack' operations.
1574 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1575 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1576 {-# INLINE unzip #-}
1578 -- ---------------------------------------------------------------------
1581 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1582 inits :: ByteString -> [ByteString]
1583 inits (PS x s l) = [PS x s n | n <- [0..l]]
1585 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1586 tails :: ByteString -> [ByteString]
1587 tails p | null p = [empty]
1588 | otherwise = p : tails (unsafeTail p)
1590 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1592 -- ---------------------------------------------------------------------
1593 -- ** Ordered 'ByteString's
1595 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1596 sort :: ByteString -> ByteString
1597 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1599 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1600 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1603 go 256 _ = return ()
1604 go i ptr = do n <- peekElemOff arr i
1605 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1606 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1610 sort :: ByteString -> ByteString
1611 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1612 memcpy p (f `plusPtr` s) l
1613 c_qsort p l -- inplace
1616 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1618 -- Try some linear sorts: radix, counting
1621 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1622 -- sortBy f ps = undefined
1624 -- ---------------------------------------------------------------------
1625 -- Low level constructors
1627 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1628 -- finalizer associated to it. The ByteString length is calculated using
1629 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1630 packCString :: CString -> ByteString
1631 packCString cstr = unsafePerformIO $ do
1632 fp <- newForeignPtr_ (castPtr cstr)
1634 return $! PS fp 0 (fromIntegral l)
1636 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1637 -- have /no/ finalizer associated with it. This operation has /O(1)/
1638 -- complexity as we already know the final size, so no /strlen(3)/ is
1640 packCStringLen :: CStringLen -> ByteString
1641 packCStringLen (ptr,len) = unsafePerformIO $ do
1642 fp <- newForeignPtr_ (castPtr ptr)
1643 return $! PS fp 0 (fromIntegral len)
1645 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1646 -- have a @free(3)@ finalizer associated to it.
1647 packMallocCString :: CString -> ByteString
1648 packMallocCString cstr = unsafePerformIO $ do
1649 fp <- newForeignFreePtr (castPtr cstr)
1650 len <- c_strlen cstr
1651 return $! PS fp 0 (fromIntegral len)
1653 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1654 -- null-terminated @CString@. The @CString@ will be freed
1655 -- automatically. This is a memcpy(3).
1656 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1657 useAsCString ps f = useAsCStringLen ps (\(s,_) -> f s)
1659 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1660 -- @CStringLen@. The @CStringLen@ will be freed automatically. This is a
1662 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1663 useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1665 alloc = withForeignPtr ps $ \p -> do
1666 buf <- c_malloc (fromIntegral l+1)
1667 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1668 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1669 return $! (castPtr buf, l)
1671 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1672 -- This is mainly useful to allow the rest of the data pointed
1673 -- to by the 'ByteString' to be garbage collected, for example
1674 -- if a large string has been read in, and only a small part of it
1675 -- is needed in the rest of the program.
1676 copy :: ByteString -> ByteString
1677 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1678 memcpy p (f `plusPtr` s) (fromIntegral l)
1680 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1681 -- CString is going to be deallocated from C land.
1682 copyCString :: CString -> IO ByteString
1683 copyCString cstr = do
1684 len <- c_strlen cstr
1685 copyCStringLen (cstr, fromIntegral len)
1687 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1688 copyCStringLen :: CStringLen -> IO ByteString
1689 copyCStringLen (cstr, len) = create len $ \p ->
1690 memcpy p (castPtr cstr) (fromIntegral len)
1692 -- ---------------------------------------------------------------------
1695 #if defined(__GLASGOW_HASKELL__)
1697 -- | getLine, read a line from stdin.
1698 getLine :: IO ByteString
1699 getLine = hGetLine stdin
1701 -- | Lazily construct a list of lines of ByteStrings. This will be much
1702 -- better on memory consumption than using lines =<< getContents.
1703 hGetLines :: Handle -> IO [ByteString]
1706 go = unsafeInterleaveIO $ do
1707 ms <- catch (hGetLine h >>= return . Just)
1708 (\_ -> return Nothing)
1710 Nothing -> return []
1711 Just s -> do ss <- go
1714 -- | hGetLine. read a ByteString from a handle
1715 hGetLine :: Handle -> IO ByteString
1716 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1717 case haBufferMode handle_ of
1718 NoBuffering -> error "no buffering"
1719 _other -> hGetLineBuffered handle_
1722 hGetLineBuffered handle_ = do
1723 let ref = haBuffer handle_
1724 buf <- readIORef ref
1725 hGetLineBufferedLoop handle_ ref buf 0 []
1727 hGetLineBufferedLoop handle_ ref
1728 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1730 off <- findEOL r w raw
1731 let new_len = len + off - r
1732 xs <- mkPS raw r off
1734 -- if eol == True, then off is the offset of the '\n'
1735 -- otherwise off == w and the buffer is now empty.
1737 then do if (w == off + 1)
1738 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1739 else writeIORef ref buf{ bufRPtr = off + 1 }
1740 mkBigPS new_len (xs:xss)
1742 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1743 buf{ bufWPtr=0, bufRPtr=0 }
1745 -- Nothing indicates we caught an EOF, and we may have a
1746 -- partial line to return.
1748 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1750 then mkBigPS new_len (xs:xss)
1753 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1755 -- find the end-of-line character, if there is one
1759 (c,r') <- readCharFromBuffer raw r
1761 then return r -- NB. not r': don't include the '\n'
1762 else findEOL r' w raw
1764 maybeFillReadBuffer fd is_line is_stream buf = catch
1765 (do buf' <- fillReadBuffer fd is_line is_stream buf
1767 (\e -> if isEOFError e then return Nothing else ioError e)
1769 -- TODO, rewrite to use normal memcpy
1770 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1771 mkPS buf start end =
1772 let len = end - start
1773 in create len $ \p -> do
1774 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1777 mkBigPS :: Int -> [ByteString] -> IO ByteString
1778 mkBigPS _ [ps] = return ps
1779 mkBigPS _ pss = return $! concat (P.reverse pss)
1783 -- ---------------------------------------------------------------------
1786 -- | Outputs a 'ByteString' to the specified 'Handle'.
1787 hPut :: Handle -> ByteString -> IO ()
1788 hPut _ (PS _ _ 0) = return ()
1789 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1791 -- | A synonym for @hPut@, for compatibility
1792 hPutStr :: Handle -> ByteString -> IO ()
1795 -- | Write a ByteString to a handle, appending a newline byte
1796 hPutStrLn :: Handle -> ByteString -> IO ()
1798 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1799 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1801 -- | Write a ByteString to stdout
1802 putStr :: ByteString -> IO ()
1803 putStr = hPut stdout
1805 -- | Write a ByteString to stdout, appending a newline byte
1806 putStrLn :: ByteString -> IO ()
1807 putStrLn = hPutStrLn stdout
1809 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1810 -- is far more efficient than reading the characters into a 'String'
1811 -- and then using 'pack'.
1812 hGet :: Handle -> Int -> IO ByteString
1813 hGet _ 0 = return empty
1814 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1816 #if defined(__GLASGOW_HASKELL__)
1817 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1818 -- waiting for data to become available, instead it returns only whatever data
1820 hGetNonBlocking :: Handle -> Int -> IO ByteString
1821 hGetNonBlocking _ 0 = return empty
1822 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1825 -- | Read entire handle contents into a 'ByteString'.
1826 -- This function reads chunks at a time, doubling the chunksize on each
1827 -- read. The final buffer is then realloced to the appropriate size. For
1828 -- files > half of available memory, this may lead to memory exhaustion.
1829 -- Consider using 'readFile' in this case.
1831 -- As with 'hGet', the string representation in the file is assumed to
1834 hGetContents :: Handle -> IO ByteString
1836 let start_size = 1024
1837 p <- mallocArray start_size
1838 i <- hGetBuf h p start_size
1840 then do p' <- reallocArray p i
1841 fp <- newForeignFreePtr p'
1847 p' <- reallocArray p s'
1848 i <- hGetBuf h (p' `plusPtr` s) s
1850 then do let i' = s + i
1851 p'' <- reallocArray p' i'
1852 fp <- newForeignFreePtr p''
1853 return $! PS fp 0 i'
1856 -- | getContents. Equivalent to hGetContents stdin
1857 getContents :: IO ByteString
1858 getContents = hGetContents stdin
1860 -- | Read an entire file strictly into a 'ByteString'. This is far more
1861 -- efficient than reading the characters into a 'String' and then using
1862 -- 'pack'. It also may be more efficient than opening the file and
1863 -- reading it using hGet.
1864 readFile :: FilePath -> IO ByteString
1865 readFile f = bracket (openBinaryFile f ReadMode) hClose
1866 (\h -> hFileSize h >>= hGet h . fromIntegral)
1868 -- | Write a 'ByteString' to a file.
1869 writeFile :: FilePath -> ByteString -> IO ()
1870 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1873 -- | Append a 'ByteString' to a file.
1874 appendFile :: FilePath -> ByteString -> IO ()
1875 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1876 (\hdl -> hPut hdl txt)
1880 -- Disable until we can move it into a portable .hsc file
1883 -- | Like readFile, this reads an entire file directly into a
1884 -- 'ByteString', but it is even more efficient. It involves directly
1885 -- mapping the file to memory. This has the advantage that the contents
1886 -- of the file never need to be copied. Also, under memory pressure the
1887 -- page may simply be discarded, while in the case of readFile it would
1888 -- need to be written to swap. If you read many small files, mmapFile
1889 -- will be less memory-efficient than readFile, since each mmapFile
1890 -- takes up a separate page of memory. Also, you can run into bus
1891 -- errors if the file is modified. As with 'readFile', the string
1892 -- representation in the file is assumed to be ISO-8859-1.
1894 -- On systems without mmap, this is the same as a readFile.
1896 mmapFile :: FilePath -> IO ByteString
1897 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1899 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1901 h <- openBinaryFile f ReadMode
1902 l <- fromIntegral `fmap` hFileSize h
1903 -- Don't bother mmaping small files because each mmapped file takes up
1904 -- at least one full VM block.
1906 then do thefp <- mallocByteString l
1907 withForeignPtr thefp $ \p-> hGetBuf h p l
1912 fd <- fromIntegral `fmap` handleToFd h
1914 fp <- if p == nullPtr
1915 then do thefp <- mallocByteString l
1916 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1919 -- The munmap leads to crashes on OpenBSD.
1920 -- maybe there's a use after unmap in there somewhere?
1921 -- Bulat suggests adding the hClose to the
1922 -- finalizer, excellent idea.
1923 #if !defined(__OpenBSD__)
1924 let unmap = c_munmap p l >> return ()
1926 let unmap = return ()
1928 fp <- FC.newForeignPtr p unmap
1933 where mmap_limit = 16*1024
1936 #if defined(__GLASGOW_HASKELL__)
1938 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1940 getArgs :: IO [ByteString]
1942 alloca $ \ p_argc ->
1943 alloca $ \ p_argv -> do
1944 getProgArgv p_argc p_argv
1945 p <- fromIntegral `fmap` peek p_argc
1947 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1950 -- ---------------------------------------------------------------------
1951 -- Internal utilities
1953 -- | Perform an operation with a temporary ByteString
1954 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1955 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1956 {-# INLINE withPtr #-}
1958 -- Common up near identical calls to `error' to reduce the number
1959 -- constant strings created when compiled:
1960 errorEmptyList :: String -> a
1961 errorEmptyList fun = moduleError fun "empty ByteString"
1962 {-# NOINLINE errorEmptyList #-}
1964 moduleError :: String -> String -> a
1965 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
1966 {-# NOINLINE moduleError #-}
1968 -- Find from the end of the string using predicate
1969 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1970 STRICT2(findFromEndUntil)
1971 findFromEndUntil f ps@(PS x s l) =
1973 else if f (last ps) then l
1974 else findFromEndUntil f (PS x s (l-1))
1976 {-# INLINE newForeignFreePtr #-}
1977 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1978 #if defined(__GLASGOW_HASKELL__)
1979 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1981 newForeignFreePtr p = newForeignPtr c_free_finalizer p