1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 singleton, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 append, -- :: ByteString -> ByteString -> ByteString
56 head, -- :: ByteString -> Word8
57 last, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 init, -- :: ByteString -> ByteString
60 null, -- :: ByteString -> Bool
61 length, -- :: ByteString -> Int
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
66 reverse, -- :: ByteString -> ByteString
67 intersperse, -- :: Word8 -> ByteString -> ByteString
68 transpose, -- :: [ByteString] -> [ByteString]
70 -- * Reducing 'ByteString's (folds)
71 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
72 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
73 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
74 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
75 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
76 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 concat, -- :: [ByteString] -> ByteString
80 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
81 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
82 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 maximum, -- :: ByteString -> Word8
84 minimum, -- :: ByteString -> Word8
86 -- * Building ByteStrings
88 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
89 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
90 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
93 -- ** Accumulating maps
94 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
95 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
98 -- ** Unfolding ByteStrings
99 replicate, -- :: Int -> Word8 -> ByteString
100 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
101 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
105 -- ** Breaking strings
106 take, -- :: Int -> ByteString -> ByteString
107 drop, -- :: Int -> ByteString -> ByteString
108 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
109 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
113 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
114 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
117 inits, -- :: ByteString -> [ByteString]
118 tails, -- :: ByteString -> [ByteString]
120 -- ** Breaking and dropping on specific bytes
121 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
122 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
124 -- ** Breaking into many substrings
125 split, -- :: Word8 -> ByteString -> [ByteString]
126 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
127 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
129 -- ** Joining strings
130 join, -- :: ByteString -> [ByteString] -> ByteString
131 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
134 isPrefixOf, -- :: ByteString -> ByteString -> Bool
135 isSuffixOf, -- :: ByteString -> ByteString -> Bool
137 -- ** Search for arbitrary substrings
138 isSubstringOf, -- :: ByteString -> ByteString -> Bool
139 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
140 findSubstrings, -- :: ByteString -> ByteString -> [Int]
142 -- * Searching ByteStrings
144 -- ** Searching by equality
145 -- | These functions use memchr(3) to efficiently search the ByteString
146 elem, -- :: Word8 -> ByteString -> Bool
147 notElem, -- :: Word8 -> ByteString -> Bool
148 filterByte, -- :: Word8 -> ByteString -> ByteString
149 filterNotByte, -- :: Word8 -> ByteString -> ByteString
151 -- ** Searching with a predicate
152 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
153 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
154 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
155 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
157 -- * Indexing ByteStrings
158 index, -- :: ByteString -> Int -> Word8
159 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
160 elemIndices, -- :: Word8 -> ByteString -> [Int]
161 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
162 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
163 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
164 count, -- :: Word8 -> ByteString -> Int
165 findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int
167 -- * Zipping and unzipping ByteStrings
168 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
169 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
171 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
173 -- * Ordered ByteStrings
174 sort, -- :: ByteString -> ByteString
176 -- * Low level CString conversions
178 -- ** Packing CStrings and pointers
179 packCString, -- :: CString -> ByteString
180 packCStringLen, -- :: CString -> ByteString
181 packMallocCString, -- :: CString -> ByteString
183 -- ** Using ByteStrings as CStrings
184 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
185 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
187 -- ** Copying ByteStrings
188 -- | These functions perform memcpy(3) operations
189 copy, -- :: ByteString -> ByteString
190 copyCString, -- :: CString -> IO ByteString
191 copyCStringLen, -- :: CStringLen -> IO ByteString
193 -- * I\/O with 'ByteString's
195 -- ** Standard input and output
197 #if defined(__GLASGOW_HASKELL__)
198 getLine, -- :: IO ByteString
200 getContents, -- :: IO ByteString
201 putStr, -- :: ByteString -> IO ()
202 putStrLn, -- :: ByteString -> IO ()
205 readFile, -- :: FilePath -> IO ByteString
206 writeFile, -- :: FilePath -> ByteString -> IO ()
207 appendFile, -- :: FilePath -> ByteString -> IO ()
208 -- mmapFile, -- :: FilePath -> IO ByteString
210 -- ** I\/O with Handles
211 #if defined(__GLASGOW_HASKELL__)
212 getArgs, -- :: IO [ByteString]
213 hGetLine, -- :: Handle -> IO ByteString
214 hGetLines, -- :: Handle -> IO [ByteString]
215 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
217 hGetContents, -- :: Handle -> IO ByteString
218 hGet, -- :: Handle -> Int -> IO ByteString
219 hPut, -- :: Handle -> ByteString -> IO ()
220 hPutStr, -- :: Handle -> ByteString -> IO ()
221 hPutStrLn, -- :: Handle -> ByteString -> IO ()
223 -- * Fusion utilities
224 #if defined(__GLASGOW_HASKELL__)
225 unpackList, -- eek, otherwise it gets thrown away by the simplifier
227 lengthU, maximumU, minimumU
230 import qualified Prelude as P
231 import Prelude hiding (reverse,head,tail,last,init,null
232 ,length,map,lines,foldl,foldr,unlines
233 ,concat,any,take,drop,splitAt,takeWhile
234 ,dropWhile,span,break,elem,filter,maximum
235 ,minimum,all,concatMap,foldl1,foldr1
236 ,scanl,scanl1,scanr,scanr1
237 ,readFile,writeFile,appendFile,replicate
238 ,getContents,getLine,putStr,putStrLn
239 ,zip,zipWith,unzip,notElem)
241 import Data.ByteString.Base
242 import Data.ByteString.Fusion
244 import qualified Data.List as List
246 import Data.Word (Word8)
247 import Data.Maybe (listToMaybe)
248 import Data.Array (listArray)
249 import qualified Data.Array as Array ((!))
251 -- Control.Exception.bracket not available in yhc or nhc
252 import Control.Exception (bracket, assert)
253 import Control.Monad (when)
255 import Foreign.C.String (CString, CStringLen)
256 import Foreign.C.Types (CSize)
257 import Foreign.ForeignPtr
258 import Foreign.Marshal.Array
260 import Foreign.Storable (Storable(..))
262 -- hGetBuf and hPutBuf not available in yhc or nhc
263 import System.IO (stdin,stdout,hClose,hFileSize
264 ,hGetBuf,hPutBuf,openBinaryFile
267 import Data.Monoid (Monoid, mempty, mappend, mconcat)
269 #if !defined(__GLASGOW_HASKELL__)
270 import System.IO.Unsafe
273 #if defined(__GLASGOW_HASKELL__)
275 import System.IO (hGetBufNonBlocking)
276 import System.IO.Error (isEOFError)
278 import Foreign.Marshal (alloca)
279 import qualified Foreign.Concurrent as FC (newForeignPtr)
282 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
283 import GHC.Base (build)
284 import GHC.Word hiding (Word8)
285 import GHC.Ptr (Ptr(..))
286 import GHC.ST (ST(..))
291 -- -----------------------------------------------------------------------------
293 -- Useful macros, until we have bang patterns
296 #define STRICT1(f) f a | a `seq` False = undefined
297 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
298 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
299 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
300 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
302 -- -----------------------------------------------------------------------------
304 instance Eq ByteString
307 instance Ord ByteString
308 where compare = compareBytes
310 instance Show ByteString where
311 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
313 instance Read ByteString where
314 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
316 instance Monoid ByteString where
322 instance Arbitrary PackedString where
323 arbitrary = P.pack `fmap` arbitrary
324 coarbitrary s = coarbitrary (P.unpack s)
327 -- | /O(n)/ Equality on the 'ByteString' type.
328 eq :: ByteString -> ByteString -> Bool
329 eq a@(PS p s l) b@(PS p' s' l')
330 | l /= l' = False -- short cut on length
331 | p == p' && s == s' = True -- short cut for the same string
332 | otherwise = compareBytes a b == EQ
335 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
336 compareBytes :: ByteString -> ByteString -> Ordering
337 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
338 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
339 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
340 | otherwise = inlinePerformIO $
341 withForeignPtr x1 $ \p1 ->
342 withForeignPtr x2 $ \p2 -> do
343 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
344 return $! case i `compare` 0 of
345 EQ -> l1 `compare` l2
347 {-# INLINE compareBytes #-}
351 -- About 4x slower over 32M
353 compareBytes :: ByteString -> ByteString -> Ordering
354 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
355 withForeignPtr fp1 $ \p1 ->
356 withForeignPtr fp2 $ \p2 ->
357 cmp (p1 `plusPtr` off1)
358 (p2 `plusPtr` off2) 0 len1 len2
360 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
362 cmp p1 p2 n len1 len2
363 | n == len1 = if n == len2 then return EQ else return LT
364 | n == len2 = return GT
366 (a :: Word8) <- peekByteOff p1 n
367 (b :: Word8) <- peekByteOff p2 n
368 case a `compare` b of
369 EQ -> cmp p1 p2 (n+1) len1 len2
372 {-# INLINE compareBytes #-}
375 -- -----------------------------------------------------------------------------
376 -- Introducing and eliminating 'ByteString's
378 -- | /O(1)/ The empty 'ByteString'
380 empty = unsafeCreate 0 $ const $ return ()
381 {-# NOINLINE empty #-}
383 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
384 singleton :: Word8 -> ByteString
385 singleton c = unsafeCreate 1 $ \p -> poke p c
386 {-# INLINE singleton #-}
389 -- XXX The unsafePerformIO is critical!
393 -- singleton 255 `compare` singleton 127
397 -- case mallocByteString 2 of
398 -- ForeignPtr f internals ->
399 -- case writeWord8OffAddr# f 0 255 of _ ->
400 -- case writeWord8OffAddr# f 0 127 of _ ->
401 -- case eqAddr# f f of
402 -- False -> case compare (GHC.Prim.plusAddr# f 0)
403 -- (GHC.Prim.plusAddr# f 0)
407 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
409 -- For applications with large numbers of string literals, pack can be a
410 -- bottleneck. In such cases, consider using packAddress (GHC only).
411 pack :: [Word8] -> ByteString
413 #if !defined(__GLASGOW_HASKELL__)
415 pack str = unsafeCreate (P.length str) $ \p -> go p str
418 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
420 #else /* hack away */
422 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
424 go _ _ [] = return ()
425 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
427 writeByte p i c = ST $ \s# ->
428 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
432 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
433 unpack :: ByteString -> [Word8]
435 #if !defined(__GLASGOW_HASKELL__)
437 unpack (PS _ _ 0) = []
438 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
439 go (p `plusPtr` s) (l - 1) []
442 go p 0 acc = peek p >>= \e -> return (e : acc)
443 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
444 {-# INLINE unpack #-}
449 -- Interacting with head/build fusion rule in ghc 6.5. Disable for now
452 unpack ps = build (unpackFoldr ps)
453 {-# INLINE unpack #-}
456 -- critical this isn't strict in the acc
457 -- as it will break in the presence of list fusion. this is a known
458 -- issue with seq and rewrite rules
460 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
461 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
462 let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
463 loop _ (-1) acc = return acc
466 loop q (n-1) (a `f` acc)
467 loop (p `plusPtr` off) (len-1) ch
468 {-# INLINE [0] unpackFoldr #-}
470 unpackList :: ByteString -> [Word8]
471 unpackList (PS fp off len) = withPtr fp $ \p -> do
473 loop _ (-1) acc = return acc
476 loop q (n-1) (a : acc)
477 loop (p `plusPtr` off) (len-1) []
480 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
485 ------------------------------------------------------------------------
487 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
488 -- conversion function
489 packWith :: (a -> Word8) -> [a] -> ByteString
490 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
494 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
495 {-# INLINE packWith #-}
496 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
498 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
499 unpackWith :: (Word8 -> a) -> ByteString -> [a]
500 unpackWith _ (PS _ _ 0) = []
501 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
502 go (p `plusPtr` s) (l - 1) []
505 go p 0 acc = peek p >>= \e -> return (k e : acc)
506 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
507 {-# INLINE unpackWith #-}
508 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
510 -- ---------------------------------------------------------------------
513 -- | /O(1)/ Test whether a ByteString is empty.
514 null :: ByteString -> Bool
515 null (PS _ _ l) = assert (l >= 0) $ l <= 0
518 -- ---------------------------------------------------------------------
519 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
520 length :: ByteString -> Int
521 length (PS _ _ l) = assert (l >= 0) $ l
524 -- length/loop fusion. When taking the length of any fuseable loop,
525 -- rewrite it as a foldl', and thus avoid allocating the result buffer
526 -- worth around 10% in speed testing.
529 #if defined(__GLASGOW_HASKELL__)
530 {-# INLINE [1] length #-}
533 lengthU :: ByteString -> Int
534 lengthU = foldl' (const . (+1)) (0::Int)
535 {-# INLINE lengthU #-}
540 "length/loop" forall loop s .
541 length (loopArr (loopWrapper loop s)) =
542 lengthU (loopArr (loopWrapper loop s))
546 ------------------------------------------------------------------------
548 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
549 -- complexity, as it requires a memcpy.
550 cons :: Word8 -> ByteString -> ByteString
551 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
553 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
556 -- | /O(n)/ Append a byte to the end of a 'ByteString'
557 snoc :: ByteString -> Word8 -> ByteString
558 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
559 memcpy p (f `plusPtr` s) (fromIntegral l)
560 poke (p `plusPtr` l) c
565 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
566 -- An exception will be thrown in the case of an empty ByteString.
567 head :: ByteString -> Word8
569 | l <= 0 = errorEmptyList "head"
570 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
573 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
574 -- An exception will be thrown in the case of an empty ByteString.
575 tail :: ByteString -> ByteString
577 | l <= 0 = errorEmptyList "tail"
578 | otherwise = PS p (s+1) (l-1)
581 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
582 -- An exception will be thrown in the case of an empty ByteString.
583 last :: ByteString -> Word8
585 | null ps = errorEmptyList "last"
586 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
589 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
590 -- An exception will be thrown in the case of an empty ByteString.
591 init :: ByteString -> ByteString
593 | null ps = errorEmptyList "init"
594 | otherwise = PS p s (l-1)
597 -- | /O(n)/ Append two ByteStrings
598 append :: ByteString -> ByteString -> ByteString
599 append xs ys | null xs = ys
601 | otherwise = concat [xs,ys]
602 {-# INLINE append #-}
604 -- ---------------------------------------------------------------------
607 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
608 -- element of @xs@. This function is subject to array fusion.
609 map :: (Word8 -> Word8) -> ByteString -> ByteString
610 #if defined(LOOPU_FUSION)
611 map f = loopArr . loopU (mapEFL f) NoAcc
612 #elif defined(LOOPUP_FUSION)
613 map f = loopArr . loopUp (mapEFL f) NoAcc
614 #elif defined(LOOPNOACC_FUSION)
615 map f = loopArr . loopNoAcc (mapEFL f)
617 map f = loopArr . loopMap f
621 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
622 -- slightly faster for one-shot cases.
623 map' :: (Word8 -> Word8) -> ByteString -> ByteString
624 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
625 create len $ map_ 0 (a `plusPtr` s)
627 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
630 | n >= len = return ()
632 x <- peekByteOff p1 n
633 pokeByteOff p2 n (f x)
637 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
638 reverse :: ByteString -> ByteString
639 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
640 c_reverse p (f `plusPtr` s) (fromIntegral l)
642 -- todo, fuseable version
644 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
645 -- 'ByteString' and \`intersperses\' that byte between the elements of
646 -- the 'ByteString'. It is analogous to the intersperse function on
648 intersperse :: Word8 -> ByteString -> ByteString
649 intersperse c ps@(PS x s l)
651 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
652 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
655 intersperse c = pack . List.intersperse c . unpack
658 -- | The 'transpose' function transposes the rows and columns of its
659 -- 'ByteString' argument.
660 transpose :: [ByteString] -> [ByteString]
661 transpose ps = P.map pack (List.transpose (P.map unpack ps))
663 -- ---------------------------------------------------------------------
664 -- Reducing 'ByteString's
666 -- | 'foldl', applied to a binary operator, a starting value (typically
667 -- the left-identity of the operator), and a ByteString, reduces the
668 -- ByteString using the binary operator, from left to right.
669 -- This function is subject to array fusion.
670 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
671 #if !defined(LOOPU_FUSION)
672 foldl f z = loopAcc . loopUp (foldEFL f) z
674 foldl f z = loopAcc . loopU (foldEFL f) z
680 -- About twice as fast with 6.4.1, but not fuseable
681 -- A simple fold . map is enough to make it worth while.
683 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
684 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
687 lgo z p q | p == q = return z
688 | otherwise = do c <- peek p
689 lgo (f z c) (p `plusPtr` 1) q
692 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
693 -- Though actually foldl is also strict in the accumulator.
694 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
696 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
697 {-# INLINE foldl' #-}
699 -- | 'foldr', applied to a binary operator, a starting value
700 -- (typically the right-identity of the operator), and a ByteString,
701 -- reduces the ByteString using the binary operator, from right to left.
702 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
703 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
706 -- | 'foldl1' is a variant of 'foldl' that has no starting value
707 -- argument, and thus must be applied to non-empty 'ByteStrings'.
708 -- This function is subject to array fusion.
709 -- An exception will be thrown in the case of an empty ByteString.
710 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
712 | null ps = errorEmptyList "foldl1"
713 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
714 {-# INLINE foldl1 #-}
716 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
717 -- An exception will be thrown in the case of an empty ByteString.
718 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
720 | null ps = errorEmptyList "foldl1'"
721 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
722 {-# INLINE foldl1' #-}
724 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
725 -- and thus must be applied to non-empty 'ByteString's
726 -- An exception will be thrown in the case of an empty ByteString.
727 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
729 | null ps = errorEmptyList "foldr1"
730 | otherwise = foldr f (last ps) (init ps)
731 {-# INLINE foldr1 #-}
733 -- ---------------------------------------------------------------------
736 -- | /O(n)/ Concatenate a list of ByteStrings.
737 concat :: [ByteString] -> ByteString
740 concat xs = unsafeCreate len $ \ptr -> go xs ptr
741 where len = P.sum . P.map length $ xs
744 go (PS p s l:ps) ptr = do
745 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
746 go ps (ptr `plusPtr` l)
748 -- | Map a function over a 'ByteString' and concatenate the results
749 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
750 concatMap f = concat . foldr ((:) . f) []
752 -- foldr (append . f) empty
754 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
755 -- any element of the 'ByteString' satisfies the predicate.
756 any :: (Word8 -> Bool) -> ByteString -> Bool
757 any _ (PS _ _ 0) = False
758 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
759 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
762 go p q | p == q = return False
763 | otherwise = do c <- peek p
764 if f c then return True
765 else go (p `plusPtr` 1) q
769 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
770 -- if all elements of the 'ByteString' satisfy the predicate.
771 all :: (Word8 -> Bool) -> ByteString -> Bool
772 all _ (PS _ _ 0) = True
773 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
774 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
777 go p q | p == q = return True -- end of list
778 | otherwise = do c <- peek p
780 then go (p `plusPtr` 1) q
783 ------------------------------------------------------------------------
785 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
786 -- This function will fuse.
787 -- An exception will be thrown in the case of an empty ByteString.
788 maximum :: ByteString -> Word8
789 maximum xs@(PS x s l)
790 | null xs = errorEmptyList "maximum"
791 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
792 c_maximum (p `plusPtr` s) (fromIntegral l)
794 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
795 -- This function will fuse.
796 -- An exception will be thrown in the case of an empty ByteString.
797 minimum :: ByteString -> Word8
798 minimum xs@(PS x s l)
799 | null xs = errorEmptyList "minimum"
800 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
801 c_minimum (p `plusPtr` s) (fromIntegral l)
804 -- minimum/maximum/loop fusion. As for length (and other folds), when we
805 -- see we're applied after a fuseable op, switch from using the C
806 -- version, to the fuseable version. The result should then avoid
807 -- allocating a buffer.
810 #if defined(__GLASGOW_HASKELL__)
811 {-# INLINE [1] minimum #-}
812 {-# INLINE [1] maximum #-}
815 maximumU :: ByteString -> Word8
816 maximumU = foldl1' max
817 {-# INLINE maximumU #-}
819 minimumU :: ByteString -> Word8
820 minimumU = foldl1' min
821 {-# INLINE minimumU #-}
825 "minimum/loop" forall loop s .
826 minimum (loopArr (loopWrapper loop s)) =
827 minimumU (loopArr (loopWrapper loop s))
829 "maximum/loop" forall loop s .
830 maximum (loopArr (loopWrapper loop s)) =
831 maximumU (loopArr (loopWrapper loop s))
835 ------------------------------------------------------------------------
837 -- | The 'mapAccumL' function behaves like a combination of 'map' and
838 -- 'foldl'; it applies a function to each element of a ByteString,
839 -- passing an accumulating parameter from left to right, and returning a
840 -- final value of this accumulator together with the new list.
841 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
842 #if !defined(LOOPU_FUSION)
843 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
845 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
847 {-# INLINE mapAccumL #-}
849 -- | The 'mapAccumR' function behaves like a combination of 'map' and
850 -- 'foldr'; it applies a function to each element of a ByteString,
851 -- passing an accumulating parameter from right to left, and returning a
852 -- final value of this accumulator together with the new ByteString.
853 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
854 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
855 {-# INLINE mapAccumR #-}
857 -- | /O(n)/ map Word8 functions, provided with the index at each position
858 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
859 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
860 {-# INLINE mapIndexed #-}
862 -- ---------------------------------------------------------------------
863 -- Building ByteStrings
865 -- | 'scanl' is similar to 'foldl', but returns a list of successive
866 -- reduced values from the left. This function will fuse.
868 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
872 -- > last (scanl f z xs) == foldl f z xs.
873 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
874 #if !defined(LOOPU_FUSION)
875 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
877 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
880 -- n.b. haskell's List scan returns a list one bigger than the
881 -- input, so we need to snoc here to get some extra space, however,
882 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
885 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
886 -- This function will fuse.
888 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
889 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
892 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
893 {-# INLINE scanl1 #-}
895 -- | scanr is the right-to-left dual of scanl.
896 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
897 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
900 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
901 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
904 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
905 {-# INLINE scanr1 #-}
907 -- ---------------------------------------------------------------------
908 -- Unfolds and replicates
910 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
911 -- the value of every element. The following holds:
913 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
915 -- This implemenation uses @memset(3)@
916 replicate :: Int -> Word8 -> ByteString
919 | otherwise = unsafeCreate w $ \ptr ->
920 memset ptr c (fromIntegral w) >> return ()
922 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
923 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
924 -- ByteString from a seed value. The function takes the element and
925 -- returns 'Nothing' if it is done producing the ByteString or returns
926 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
927 -- and @b@ is the seed value for further production.
931 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
932 -- > == pack [0, 1, 2, 3, 4, 5]
934 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
935 unfoldr f = concat . unfoldChunk 32 64
936 where unfoldChunk n n' x =
937 case unfoldrN n f x of
938 (s, Nothing) -> s : []
939 (s, Just x') -> s : unfoldChunk n' (n+n') x'
941 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
942 -- value. However, the length of the result is limited by the first
943 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
944 -- when the maximum length of the result is known.
946 -- The following equation relates 'unfoldrN' and 'unfoldr':
948 -- > unfoldrN n f s == take n (unfoldr f s)
950 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
952 | i < 0 = (empty, Just x0)
953 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
957 Nothing -> return (0, n, Nothing)
959 | n == i -> return (0, n, Just x)
960 | otherwise -> do poke p w
961 go (p `plusPtr` 1) x' (n+1)
963 -- ---------------------------------------------------------------------
966 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
967 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
968 take :: Int -> ByteString -> ByteString
972 | otherwise = PS x s n
975 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
976 -- elements, or @[]@ if @n > 'length' xs@.
977 drop :: Int -> ByteString -> ByteString
981 | otherwise = PS x (s+n) (l-n)
984 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
985 splitAt :: Int -> ByteString -> (ByteString, ByteString)
986 splitAt n ps@(PS x s l)
987 | n <= 0 = (empty, ps)
988 | n >= l = (ps, empty)
989 | otherwise = (PS x s n, PS x (s+n) (l-n))
990 {-# INLINE splitAt #-}
992 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
993 -- returns the longest prefix (possibly empty) of @xs@ of elements that
995 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
996 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
997 {-# INLINE takeWhile #-}
999 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
1000 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
1001 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
1002 {-# INLINE dropWhile #-}
1004 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1005 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1006 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1007 {-# INLINE break #-}
1009 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1011 -- breakEnd p == spanEnd (not.p)
1012 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1013 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1015 -- | 'breakByte' breaks its ByteString argument at the first occurence
1016 -- of the specified byte. It is more efficient than 'break' as it is
1017 -- implemented with @memchr(3)@. I.e.
1019 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1021 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1022 breakByte c p = case elemIndex c p of
1023 Nothing -> (p,empty)
1024 Just n -> (unsafeTake n p, unsafeDrop n p)
1025 {-# INLINE breakByte #-}
1027 -- | 'spanByte' breaks its ByteString argument at the first
1028 -- occurence of a byte other than its argument. It is more efficient
1031 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1033 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1034 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1035 go (p `plusPtr` s) 0
1038 go p i | i >= l = return (ps, empty)
1039 | otherwise = do c' <- peekByteOff p i
1041 then return (unsafeTake i ps, unsafeDrop i ps)
1043 {-# INLINE spanByte #-}
1045 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1046 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1047 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1048 span p ps = break (not . p) ps
1051 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1054 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1058 -- > spanEnd (not . isSpace) ps
1060 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1062 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1063 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1065 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1066 -- separators, where the predicate returns True for a separator element.
1067 -- The resulting components do not contain the separators. Two adjacent
1068 -- separators result in an empty component in the output. eg.
1070 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1071 -- > splitWith (=='a') [] == []
1073 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1075 #if defined(__GLASGOW_HASKELL__)
1076 splitWith _pred (PS _ _ 0) = []
1077 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1078 where pred# c# = pred_ (W8# c#)
1081 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1082 splitLoop pred' p 0 off' len' fp'
1084 splitLoop :: (Word# -> Bool)
1086 -> Int -> Int -> Int
1090 splitLoop pred' p idx' off' len' fp'
1091 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1092 | idx' >= len' = return [PS fp' off' idx']
1094 w <- peekElemOff p (off'+idx')
1095 if pred' (case w of W8# w# -> w#)
1096 then return (PS fp' off' idx' :
1097 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1098 else splitLoop pred' p (idx'+1) off' len' fp'
1099 {-# INLINE splitWith #-}
1102 splitWith _ (PS _ _ 0) = []
1103 splitWith p ps = loop p ps
1106 loop q qs = if null rest then [chunk]
1107 else chunk : loop q (unsafeTail rest)
1108 where (chunk,rest) = break q qs
1111 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1112 -- argument, consuming the delimiter. I.e.
1114 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1115 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1116 -- > split 'x' "x" == ["",""]
1120 -- > join [c] . split c == id
1121 -- > split == splitWith . (==)
1123 -- As for all splitting functions in this library, this function does
1124 -- not copy the substrings, it just constructs new 'ByteStrings' that
1125 -- are slices of the original.
1127 split :: Word8 -> ByteString -> [ByteString]
1128 split _ (PS _ _ 0) = []
1129 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1130 let ptr = p `plusPtr` s
1134 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1135 w (fromIntegral (l-n))
1137 then [PS x (s+n) (l-n)]
1138 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1141 {-# INLINE split #-}
1144 -- slower. but stays inside Haskell.
1145 split _ (PS _ _ 0) = []
1146 split (W8# w#) (PS fp off len) = splitWith' off len fp
1148 splitWith' off' len' fp' = withPtr fp $ \p ->
1149 splitLoop p 0 off' len' fp'
1151 splitLoop :: Ptr Word8
1152 -> Int -> Int -> Int
1157 splitLoop p idx' off' len' fp'
1158 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1159 | idx' >= len' = return [PS fp' off' idx']
1161 (W8# x#) <- peekElemOff p (off'+idx')
1162 if word2Int# w# ==# word2Int# x#
1163 then return (PS fp' off' idx' :
1164 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1165 else splitLoop p (idx'+1) off' len' fp'
1168 -- | Like 'splitWith', except that sequences of adjacent separators are
1169 -- treated as a single separator. eg.
1171 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1173 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1174 tokens f = P.filter (not.null) . splitWith f
1175 {-# INLINE tokens #-}
1177 -- | The 'group' function takes a ByteString and returns a list of
1178 -- ByteStrings such that the concatenation of the result is equal to the
1179 -- argument. Moreover, each sublist in the result contains only equal
1180 -- elements. For example,
1182 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1184 -- It is a special case of 'groupBy', which allows the programmer to
1185 -- supply their own equality test. It is about 40% faster than
1187 group :: ByteString -> [ByteString]
1190 | otherwise = ys : group zs
1192 (ys, zs) = spanByte (unsafeHead xs) xs
1194 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1195 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1198 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1200 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1202 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1203 -- 'ByteString's and concatenates the list after interspersing the first
1204 -- argument between each element of the list.
1205 join :: ByteString -> [ByteString] -> ByteString
1206 join s = concat . (List.intersperse s)
1210 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1211 -- with a char. Around 4 times faster than the generalised join.
1213 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1214 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1215 withForeignPtr ffp $ \fp ->
1216 withForeignPtr fgp $ \gp -> do
1217 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1218 poke (ptr `plusPtr` l) c
1219 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1221 len = length f + length g + 1
1222 {-# INLINE joinWithByte #-}
1224 -- ---------------------------------------------------------------------
1225 -- Indexing ByteStrings
1227 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1228 index :: ByteString -> Int -> Word8
1230 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1231 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1232 ++ ", length = " ++ show (length ps))
1233 | otherwise = ps `unsafeIndex` n
1234 {-# INLINE index #-}
1236 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1237 -- element in the given 'ByteString' which is equal to the query
1238 -- element, or 'Nothing' if there is no such element.
1239 -- This implementation uses memchr(3).
1240 elemIndex :: Word8 -> ByteString -> Maybe Int
1241 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1242 let p' = p `plusPtr` s
1243 q <- memchr p' c (fromIntegral l)
1244 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1245 {-# INLINE elemIndex #-}
1247 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1248 -- element in the given 'ByteString' which is equal to the query
1249 -- element, or 'Nothing' if there is no such element. The following
1252 -- > elemIndexEnd c xs ==
1253 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1255 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1256 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1257 go (p `plusPtr` s) (l-1)
1260 go p i | i < 0 = return Nothing
1261 | otherwise = do ch' <- peekByteOff p i
1263 then return $ Just i
1265 {-# INLINE elemIndexEnd #-}
1267 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1268 -- the indices of all elements equal to the query element, in ascending order.
1269 -- This implementation uses memchr(3).
1270 elemIndices :: Word8 -> ByteString -> [Int]
1271 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1272 let ptr = p `plusPtr` s
1275 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1276 w (fromIntegral (l - n))
1279 else let i = q `minusPtr` ptr
1282 {-# INLINE elemIndices #-}
1286 elemIndices :: Word8 -> ByteString -> [Int]
1287 elemIndices c ps = loop 0 ps
1289 loop _ ps' | null ps' = []
1290 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1291 | otherwise = loop (n+1) (unsafeTail ps')
1294 -- | count returns the number of times its argument appears in the ByteString
1296 -- > count = length . elemIndices
1298 -- But more efficiently than using length on the intermediate list.
1299 count :: Word8 -> ByteString -> Int
1300 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1301 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1302 {-# INLINE count #-}
1306 -- around 30% slower
1308 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1309 go (p `plusPtr` s) (fromIntegral m) 0
1311 go :: Ptr Word8 -> CSize -> Int -> IO Int
1317 else do let k = fromIntegral $ q `minusPtr` p
1318 go (q `plusPtr` 1) (l-k-1) (i+1)
1321 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1322 -- returns the index of the first element in the ByteString
1323 -- satisfying the predicate.
1324 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1325 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1328 go ptr n | n >= l = return Nothing
1329 | otherwise = do w <- peek ptr
1331 then return (Just n)
1332 else go (ptr `plusPtr` 1) (n+1)
1333 {-# INLINE findIndex #-}
1335 -- | The 'findIndices' function extends 'findIndex', by returning the
1336 -- indices of all elements satisfying the predicate, in ascending order.
1337 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1338 findIndices p ps = loop 0 ps
1341 loop n qs | null qs = []
1342 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1343 | otherwise = loop (n+1) (unsafeTail qs)
1345 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1346 -- of the string if no element is found, rather than Nothing.
1347 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1348 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1351 go ptr n | n >= l = return l
1352 | otherwise = do w <- peek ptr
1355 else go (ptr `plusPtr` 1) (n+1)
1356 {-# INLINE findIndexOrEnd #-}
1358 -- ---------------------------------------------------------------------
1359 -- Searching ByteStrings
1361 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1362 elem :: Word8 -> ByteString -> Bool
1363 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1366 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1367 notElem :: Word8 -> ByteString -> Bool
1368 notElem c ps = not (elem c ps)
1369 {-# INLINE notElem #-}
1371 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1372 -- returns a ByteString containing those characters that satisfy the
1373 -- predicate. This function is subject to array fusion.
1374 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1375 #if defined(LOOPU_FUSION)
1376 filter p = loopArr . loopU (filterEFL p) NoAcc
1377 #elif defined(LOOPUP_FUSION)
1378 filter p = loopArr . loopUp (filterEFL p) NoAcc
1379 #elif defined(LOOPNOACC_FUSION)
1380 filter p = loopArr . loopNoAcc (filterEFL p)
1382 filter f = loopArr . loopFilter f
1384 {-# INLINE filter #-}
1386 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1387 -- around 2x faster for some one-shot applications.
1388 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1389 filter' k ps@(PS x s l)
1391 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1392 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1393 return $! t `minusPtr` p -- actual length
1396 go f t end | f == end = return t
1400 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1401 else go (f `plusPtr` 1) t end
1402 {-# INLINE filter' #-}
1405 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1406 -- case of filtering a single byte. It is more efficient to use
1407 -- /filterByte/ in this case.
1409 -- > filterByte == filter . (==)
1411 -- filterByte is around 10x faster, and uses much less space, than its
1412 -- filter equivalent
1413 filterByte :: Word8 -> ByteString -> ByteString
1414 filterByte w ps = replicate (count w ps) w
1415 {-# INLINE filterByte #-}
1418 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1419 -- case of filtering a single byte out of a list. It is more efficient
1420 -- to use /filterNotByte/ in this case.
1422 -- > filterNotByte == filter . (/=)
1424 -- filterNotByte is around 2x faster than its filter equivalent.
1425 filterNotByte :: Word8 -> ByteString -> ByteString
1426 filterNotByte w = filter' (/= w)
1427 {-# INLINE filterNotByte #-}
1429 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1430 -- and returns the first element in matching the predicate, or 'Nothing'
1431 -- if there is no such element.
1433 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1435 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1436 find f p = case findIndex f p of
1437 Just n -> Just (p `unsafeIndex` n)
1443 -- fuseable, but we don't want to walk the whole array.
1445 find k = foldl findEFL Nothing
1446 where findEFL a@(Just _) _ = a
1447 findEFL _ c | k c = Just c
1448 | otherwise = Nothing
1451 -- ---------------------------------------------------------------------
1452 -- Searching for substrings
1454 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1455 -- iff the first is a prefix of the second.
1456 isPrefixOf :: ByteString -> ByteString -> Bool
1457 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1460 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1461 withForeignPtr x2 $ \p2 -> do
1462 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1465 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1466 -- iff the first is a suffix of the second.
1468 -- The following holds:
1470 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1472 -- However, the real implemenation uses memcmp to compare the end of the
1473 -- string only, with no reverse required..
1474 isSuffixOf :: ByteString -> ByteString -> Bool
1475 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1478 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1479 withForeignPtr x2 $ \p2 -> do
1480 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1483 -- | Check whether one string is a substring of another. @isSubstringOf
1484 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1485 isSubstringOf :: ByteString -- ^ String to search for.
1486 -> ByteString -- ^ String to search in.
1488 isSubstringOf p s = not $ P.null $ findSubstrings p s
1490 -- | Get the first index of a substring in another string,
1491 -- or 'Nothing' if the string is not found.
1492 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1493 findSubstring :: ByteString -- ^ String to search for.
1494 -> ByteString -- ^ String to seach in.
1496 findSubstring = (listToMaybe .) . findSubstrings
1498 -- | Find the indexes of all (possibly overlapping) occurances of a
1499 -- substring in a string. This function uses the Knuth-Morris-Pratt
1500 -- string matching algorithm.
1501 findSubstrings :: ByteString -- ^ String to search for.
1502 -> ByteString -- ^ String to seach in.
1505 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1507 patc x = pat `unsafeIndex` x
1508 strc x = str `unsafeIndex` x
1510 -- maybe we should make kmpNext a UArray before using it in search?
1511 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1512 kmpNextL p _ | null p = []
1513 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1515 x = if not (null ps) && unsafeHead ps == patc j'
1516 then kmpNext Array.! j' else j'
1518 search i j = match ++ rest -- i: position in string, j: position in pattern
1519 where match = if j == m then [(i - j)] else []
1520 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1521 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1524 -- ---------------------------------------------------------------------
1527 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1528 -- corresponding pairs of bytes. If one input ByteString is short,
1529 -- excess elements of the longer ByteString are discarded. This is
1530 -- equivalent to a pair of 'unpack' operations.
1531 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1533 | null ps || null qs = []
1534 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1536 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1537 -- the first argument, instead of a tupling function. For example,
1538 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1539 -- corresponding sums.
1540 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1542 | null ps || null qs = []
1543 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1546 -- | A specialised version of zipWith for the common case of a
1547 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1548 -- are used to automatically covert zipWith into zipWith' when a pack is
1549 -- performed on the result of zipWith, but we also export it for
1552 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1553 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1554 withForeignPtr fp $ \a ->
1555 withForeignPtr fq $ \b ->
1556 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1558 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1561 | n >= len = return ()
1563 x <- peekByteOff p1 n
1564 y <- peekByteOff p2 n
1565 pokeByteOff r n (f x y)
1566 zipWith_ (n+1) p1 p2 r
1569 {-# INLINE zipWith' #-}
1573 "Specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1574 pack (zipWith f p q) = zipWith' f p q
1577 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1578 -- ByteStrings. Note that this performs two 'pack' operations.
1579 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1580 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1581 {-# INLINE unzip #-}
1583 -- ---------------------------------------------------------------------
1586 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1587 inits :: ByteString -> [ByteString]
1588 inits (PS x s l) = [PS x s n | n <- [0..l]]
1590 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1591 tails :: ByteString -> [ByteString]
1592 tails p | null p = [empty]
1593 | otherwise = p : tails (unsafeTail p)
1595 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1597 -- ---------------------------------------------------------------------
1598 -- ** Ordered 'ByteString's
1600 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1601 sort :: ByteString -> ByteString
1602 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1604 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1605 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1608 go 256 _ = return ()
1609 go i ptr = do n <- peekElemOff arr i
1610 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1611 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1615 sort :: ByteString -> ByteString
1616 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1617 memcpy p (f `plusPtr` s) l
1618 c_qsort p l -- inplace
1621 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1623 -- Try some linear sorts: radix, counting
1626 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1627 -- sortBy f ps = undefined
1629 -- ---------------------------------------------------------------------
1630 -- Low level constructors
1632 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1633 -- finalizer associated to it. The ByteString length is calculated using
1634 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1635 packCString :: CString -> ByteString
1636 packCString cstr = unsafePerformIO $ do
1637 fp <- newForeignPtr_ (castPtr cstr)
1639 return $! PS fp 0 (fromIntegral l)
1641 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1642 -- have /no/ finalizer associated with it. This operation has /O(1)/
1643 -- complexity as we already know the final size, so no /strlen(3)/ is
1645 packCStringLen :: CStringLen -> ByteString
1646 packCStringLen (ptr,len) = unsafePerformIO $ do
1647 fp <- newForeignPtr_ (castPtr ptr)
1648 return $! PS fp 0 (fromIntegral len)
1650 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1651 -- have a @free(3)@ finalizer associated to it.
1652 packMallocCString :: CString -> ByteString
1653 packMallocCString cstr = unsafePerformIO $ do
1654 fp <- newForeignFreePtr (castPtr cstr)
1655 len <- c_strlen cstr
1656 return $! PS fp 0 (fromIntegral len)
1658 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1659 -- null-terminated @CString@. The @CString@ will be freed
1660 -- automatically. This is a memcpy(3).
1661 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1662 useAsCString ps f = useAsCStringLen ps (\(s,_) -> f s)
1664 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1665 -- @CStringLen@. The @CStringLen@ will be freed automatically. This is a
1667 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1668 useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1670 alloc = withForeignPtr ps $ \p -> do
1671 buf <- c_malloc (fromIntegral l+1)
1672 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1673 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1674 return $! (castPtr buf, l)
1676 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1677 -- This is mainly useful to allow the rest of the data pointed
1678 -- to by the 'ByteString' to be garbage collected, for example
1679 -- if a large string has been read in, and only a small part of it
1680 -- is needed in the rest of the program.
1681 copy :: ByteString -> ByteString
1682 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1683 memcpy p (f `plusPtr` s) (fromIntegral l)
1685 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1686 -- CString is going to be deallocated from C land.
1687 copyCString :: CString -> IO ByteString
1688 copyCString cstr = do
1689 len <- c_strlen cstr
1690 copyCStringLen (cstr, fromIntegral len)
1692 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1693 copyCStringLen :: CStringLen -> IO ByteString
1694 copyCStringLen (cstr, len) = create len $ \p ->
1695 memcpy p (castPtr cstr) (fromIntegral len)
1697 -- ---------------------------------------------------------------------
1700 #if defined(__GLASGOW_HASKELL__)
1702 -- | getLine, read a line from stdin.
1703 getLine :: IO ByteString
1704 getLine = hGetLine stdin
1706 -- | Lazily construct a list of lines of ByteStrings. This will be much
1707 -- better on memory consumption than using lines =<< getContents.
1708 hGetLines :: Handle -> IO [ByteString]
1711 go = unsafeInterleaveIO $ do
1712 ms <- catch (hGetLine h >>= return . Just)
1713 (\_ -> return Nothing)
1715 Nothing -> return []
1716 Just s -> do ss <- go
1719 -- | hGetLine. read a ByteString from a handle
1720 hGetLine :: Handle -> IO ByteString
1721 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1722 case haBufferMode handle_ of
1723 NoBuffering -> error "no buffering"
1724 _other -> hGetLineBuffered handle_
1727 hGetLineBuffered handle_ = do
1728 let ref = haBuffer handle_
1729 buf <- readIORef ref
1730 hGetLineBufferedLoop handle_ ref buf 0 []
1732 hGetLineBufferedLoop handle_ ref
1733 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1735 off <- findEOL r w raw
1736 let new_len = len + off - r
1737 xs <- mkPS raw r off
1739 -- if eol == True, then off is the offset of the '\n'
1740 -- otherwise off == w and the buffer is now empty.
1742 then do if (w == off + 1)
1743 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1744 else writeIORef ref buf{ bufRPtr = off + 1 }
1745 mkBigPS new_len (xs:xss)
1747 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1748 buf{ bufWPtr=0, bufRPtr=0 }
1750 -- Nothing indicates we caught an EOF, and we may have a
1751 -- partial line to return.
1753 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1755 then mkBigPS new_len (xs:xss)
1758 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1760 -- find the end-of-line character, if there is one
1764 (c,r') <- readCharFromBuffer raw r
1766 then return r -- NB. not r': don't include the '\n'
1767 else findEOL r' w raw
1769 maybeFillReadBuffer fd is_line is_stream buf = catch
1770 (do buf' <- fillReadBuffer fd is_line is_stream buf
1772 (\e -> if isEOFError e then return Nothing else ioError e)
1774 -- TODO, rewrite to use normal memcpy
1775 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1776 mkPS buf start end =
1777 let len = end - start
1778 in create len $ \p -> do
1779 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1782 mkBigPS :: Int -> [ByteString] -> IO ByteString
1783 mkBigPS _ [ps] = return ps
1784 mkBigPS _ pss = return $! concat (P.reverse pss)
1788 -- ---------------------------------------------------------------------
1791 -- | Outputs a 'ByteString' to the specified 'Handle'.
1792 hPut :: Handle -> ByteString -> IO ()
1793 hPut _ (PS _ _ 0) = return ()
1794 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1796 -- | A synonym for @hPut@, for compatibility
1797 hPutStr :: Handle -> ByteString -> IO ()
1800 -- | Write a ByteString to a handle, appending a newline byte
1801 hPutStrLn :: Handle -> ByteString -> IO ()
1803 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1804 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1806 -- | Write a ByteString to stdout
1807 putStr :: ByteString -> IO ()
1808 putStr = hPut stdout
1810 -- | Write a ByteString to stdout, appending a newline byte
1811 putStrLn :: ByteString -> IO ()
1812 putStrLn = hPutStrLn stdout
1814 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1815 -- is far more efficient than reading the characters into a 'String'
1816 -- and then using 'pack'.
1817 hGet :: Handle -> Int -> IO ByteString
1818 hGet _ 0 = return empty
1819 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1821 #if defined(__GLASGOW_HASKELL__)
1822 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1823 -- waiting for data to become available, instead it returns only whatever data
1825 hGetNonBlocking :: Handle -> Int -> IO ByteString
1826 hGetNonBlocking _ 0 = return empty
1827 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1830 -- | Read entire handle contents into a 'ByteString'.
1831 -- This function reads chunks at a time, doubling the chunksize on each
1832 -- read. The final buffer is then realloced to the appropriate size. For
1833 -- files > half of available memory, this may lead to memory exhaustion.
1834 -- Consider using 'readFile' in this case.
1836 -- As with 'hGet', the string representation in the file is assumed to
1839 hGetContents :: Handle -> IO ByteString
1841 let start_size = 1024
1842 p <- mallocArray start_size
1843 i <- hGetBuf h p start_size
1845 then do p' <- reallocArray p i
1846 fp <- newForeignFreePtr p'
1852 p' <- reallocArray p s'
1853 i <- hGetBuf h (p' `plusPtr` s) s
1855 then do let i' = s + i
1856 p'' <- reallocArray p' i'
1857 fp <- newForeignFreePtr p''
1858 return $! PS fp 0 i'
1861 -- | getContents. Equivalent to hGetContents stdin
1862 getContents :: IO ByteString
1863 getContents = hGetContents stdin
1865 -- | Read an entire file strictly into a 'ByteString'. This is far more
1866 -- efficient than reading the characters into a 'String' and then using
1867 -- 'pack'. It also may be more efficient than opening the file and
1868 -- reading it using hGet.
1869 readFile :: FilePath -> IO ByteString
1870 readFile f = bracket (openBinaryFile f ReadMode) hClose
1871 (\h -> hFileSize h >>= hGet h . fromIntegral)
1873 -- | Write a 'ByteString' to a file.
1874 writeFile :: FilePath -> ByteString -> IO ()
1875 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1878 -- | Append a 'ByteString' to a file.
1879 appendFile :: FilePath -> ByteString -> IO ()
1880 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1881 (\hdl -> hPut hdl txt)
1885 -- Disable until we can move it into a portable .hsc file
1888 -- | Like readFile, this reads an entire file directly into a
1889 -- 'ByteString', but it is even more efficient. It involves directly
1890 -- mapping the file to memory. This has the advantage that the contents
1891 -- of the file never need to be copied. Also, under memory pressure the
1892 -- page may simply be discarded, while in the case of readFile it would
1893 -- need to be written to swap. If you read many small files, mmapFile
1894 -- will be less memory-efficient than readFile, since each mmapFile
1895 -- takes up a separate page of memory. Also, you can run into bus
1896 -- errors if the file is modified. As with 'readFile', the string
1897 -- representation in the file is assumed to be ISO-8859-1.
1899 -- On systems without mmap, this is the same as a readFile.
1901 mmapFile :: FilePath -> IO ByteString
1902 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1904 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1906 h <- openBinaryFile f ReadMode
1907 l <- fromIntegral `fmap` hFileSize h
1908 -- Don't bother mmaping small files because each mmapped file takes up
1909 -- at least one full VM block.
1911 then do thefp <- mallocByteString l
1912 withForeignPtr thefp $ \p-> hGetBuf h p l
1917 fd <- fromIntegral `fmap` handleToFd h
1919 fp <- if p == nullPtr
1920 then do thefp <- mallocByteString l
1921 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1924 -- The munmap leads to crashes on OpenBSD.
1925 -- maybe there's a use after unmap in there somewhere?
1926 -- Bulat suggests adding the hClose to the
1927 -- finalizer, excellent idea.
1928 #if !defined(__OpenBSD__)
1929 let unmap = c_munmap p l >> return ()
1931 let unmap = return ()
1933 fp <- FC.newForeignPtr p unmap
1938 where mmap_limit = 16*1024
1941 #if defined(__GLASGOW_HASKELL__)
1943 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1945 getArgs :: IO [ByteString]
1947 alloca $ \ p_argc ->
1948 alloca $ \ p_argv -> do
1949 getProgArgv p_argc p_argv
1950 p <- fromIntegral `fmap` peek p_argc
1952 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1955 -- ---------------------------------------------------------------------
1956 -- Internal utilities
1958 -- | Perform an operation with a temporary ByteString
1959 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1960 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1961 {-# INLINE withPtr #-}
1963 -- Common up near identical calls to `error' to reduce the number
1964 -- constant strings created when compiled:
1965 errorEmptyList :: String -> a
1966 errorEmptyList fun = moduleError fun "empty ByteString"
1967 {-# NOINLINE errorEmptyList #-}
1969 moduleError :: String -> String -> a
1970 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
1971 {-# NOINLINE moduleError #-}
1973 -- Find from the end of the string using predicate
1974 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1975 STRICT2(findFromEndUntil)
1976 findFromEndUntil f ps@(PS x s l) =
1978 else if f (last ps) then l
1979 else findFromEndUntil f (PS x s (l-1))
1981 {-# INLINE newForeignFreePtr #-}
1982 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1983 #if defined(__GLASGOW_HASKELL__)
1984 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1986 newForeignFreePtr p = newForeignPtr c_free_finalizer p