1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: Word8 -> ByteString -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
74 -- * Reducing 'ByteString's
75 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
81 concat, -- :: [ByteString] -> ByteString
82 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
83 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 maximum, -- :: ByteString -> Word8
86 minimum, -- :: ByteString -> Word8
87 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
89 -- * Generating and unfolding ByteStrings
90 replicate, -- :: Int -> Word8 -> ByteString
91 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
95 -- ** Breaking strings
96 take, -- :: Int -> ByteString -> ByteString
97 drop, -- :: Int -> ByteString -> ByteString
98 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
99 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
100 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
101 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
102 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
103 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
105 -- ** Breaking and dropping on specific bytes
106 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
107 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
108 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
109 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
111 -- ** Breaking into many substrings
112 split, -- :: Word8 -> ByteString -> [ByteString]
113 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
114 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
118 -- ** Joining strings
119 join, -- :: ByteString -> [ByteString] -> ByteString
120 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
122 -- * Indexing ByteStrings
123 index, -- :: ByteString -> Int -> Word8
124 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
125 elemIndices, -- :: Word8 -> ByteString -> [Int]
126 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
127 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
128 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
129 count, -- :: Word8 -> ByteString -> Int
131 -- * Ordered ByteStrings
132 sort, -- :: ByteString -> ByteString
134 -- * Searching ByteStrings
136 -- ** Searching by equality
137 -- | These functions use memchr(3) to efficiently search the ByteString
139 elem, -- :: Word8 -> ByteString -> Bool
140 notElem, -- :: Word8 -> ByteString -> Bool
141 filterByte, -- :: Word8 -> ByteString -> ByteString
142 filterNotByte, -- :: Word8 -> ByteString -> ByteString
144 -- ** Searching with a predicate
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
148 -- ** Prefixes and suffixes
149 -- | These functions use memcmp(3) to efficiently compare substrings
150 isPrefixOf, -- :: ByteString -> ByteString -> Bool
151 isSuffixOf, -- :: ByteString -> ByteString -> Bool
153 -- ** Search for arbitrary substrings
154 isSubstringOf, -- :: ByteString -> ByteString -> Bool
155 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
156 findSubstrings, -- :: ByteString -> ByteString -> [Int]
158 -- * Zipping and unzipping ByteStrings
159 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
160 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
161 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
163 -- * Unchecked access
164 unsafeHead, -- :: ByteString -> Word8
165 unsafeTail, -- :: ByteString -> ByteString
166 unsafeIndex, -- :: ByteString -> Int -> Word8
168 -- * Low level introduction and elimination
169 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
170 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
171 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
172 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
173 skipIndex, -- :: ByteString -> Int
175 -- ** Packing CStrings and pointers
176 packCString, -- :: CString -> ByteString
177 packCStringLen, -- :: CString -> ByteString
178 packMallocCString, -- :: CString -> ByteString
180 #if defined(__GLASGOW_HASKELL__)
181 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
182 packAddress, -- :: Addr# -> ByteString
183 unsafePackAddress, -- :: Int -> Addr# -> ByteString
184 unsafeFinalize, -- :: ByteString -> IO ()
187 -- ** Using ByteStrings as CStrings
188 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
189 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
190 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
192 -- ** Copying ByteStrings
193 -- | These functions perform memcpy(3) operations
194 copy, -- :: ByteString -> ByteString
195 copyCString, -- :: CString -> ByteString
196 copyCStringLen, -- :: CStringLen -> ByteString
198 -- * I\/O with @ByteString@s
200 -- ** Standard input and output
202 #if defined(__GLASGOW_HASKELL__)
203 getLine, -- :: IO ByteString
205 getContents, -- :: IO ByteString
206 putStr, -- :: ByteString -> IO ()
207 putStrLn, -- :: ByteString -> IO ()
210 readFile, -- :: FilePath -> IO ByteString
211 writeFile, -- :: FilePath -> ByteString -> IO ()
212 -- mmapFile, -- :: FilePath -> IO ByteString
214 -- ** I\/O with Handles
215 #if defined(__GLASGOW_HASKELL__)
216 getArgs, -- :: IO [ByteString]
217 hGetLine, -- :: Handle -> IO ByteString
218 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
220 hGetContents, -- :: Handle -> IO ByteString
221 hGet, -- :: Handle -> Int -> IO ByteString
222 hPut, -- :: Handle -> ByteString -> IO ()
224 -- * Fusion utilities
225 #if defined(__GLASGOW_HASKELL__)
226 unpackList, -- eek, otherwise it gets thrown away by the simplifier
229 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
230 loopU, mapEFL, filterEFL, foldEFL,
235 import qualified Prelude as P
236 import Prelude hiding (reverse,head,tail,last,init,null
237 ,length,map,lines,foldl,foldr,unlines
238 ,concat,any,take,drop,splitAt,takeWhile
239 ,dropWhile,span,break,elem,filter,maximum
240 ,minimum,all,concatMap,foldl1,foldr1
241 ,readFile,writeFile,replicate
242 ,getContents,getLine,putStr,putStrLn
243 ,zip,zipWith,unzip,notElem)
245 import qualified Data.List as List
248 import Data.Word (Word8)
249 import Data.Maybe (listToMaybe)
250 import Data.Array (listArray)
251 import qualified Data.Array as Array ((!))
253 -- Control.Exception.bracket not available in yhc or nhc
254 import Control.Exception (bracket)
255 import Control.Monad (when)
257 import Foreign.C.String (CString, CStringLen)
258 import Foreign.C.Types (CSize, CInt)
259 import Foreign.ForeignPtr
260 import Foreign.Marshal.Array
262 import Foreign.Storable (Storable(..))
264 -- hGetBuf and hPutBuf not available in yhc or nhc
265 import System.IO (stdin,stdout,hClose,hFileSize
266 ,hGetBuf,hPutBuf,openBinaryFile
269 #if !defined(__GLASGOW_HASKELL__)
270 import System.IO.Unsafe
273 #if defined(__GLASGOW_HASKELL__)
275 import Data.Generics (Data(..), Typeable(..))
277 import System.IO (hGetBufNonBlocking)
278 import System.IO.Error (isEOFError)
280 import Foreign.Marshal (alloca)
281 import qualified Foreign.Concurrent as FC (newForeignPtr)
284 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
285 import GHC.Base (build, unsafeChr)
286 import GHC.Word hiding (Word8)
287 import GHC.Ptr (Ptr(..))
288 import GHC.ST (ST(..))
293 -- CFILES stuff is Hugs only
294 {-# CFILES cbits/fpstring.c #-}
296 -- -----------------------------------------------------------------------------
298 -- Useful macros, until we have bang patterns
301 #define STRICT1(f) f a | a `seq` False = undefined
302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
307 -- -----------------------------------------------------------------------------
309 -- | A space-efficient representation of a Word8 vector, supporting many
310 -- efficient operations. A 'ByteString' contains 8-bit characters only.
312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
318 #if defined(__GLASGOW_HASKELL__)
319 deriving (Data, Typeable)
322 instance Eq ByteString
325 instance Ord ByteString
326 where compare = compareBytes
328 instance Show ByteString where
329 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
331 instance Read ByteString where
332 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
335 instance Arbitrary PackedString where
336 arbitrary = P.pack `fmap` arbitrary
337 coarbitrary s = coarbitrary (P.unpack s)
340 -- | /O(n)/ Equality on the 'ByteString' type.
341 eq :: ByteString -> ByteString -> Bool
342 eq a@(PS p s l) b@(PS p' s' l')
343 | l /= l' = False -- short cut on length
344 | p == p' && s == s' = True -- short cut for the same string
345 | otherwise = compareBytes a b == EQ
348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
349 compareBytes :: ByteString -> ByteString -> Ordering
350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
351 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
352 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
353 | otherwise = inlinePerformIO $
354 withForeignPtr x1 $ \p1 ->
355 withForeignPtr x2 $ \p2 -> do
356 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
357 return $ case i `compare` 0 of
358 EQ -> l1 `compare` l2
360 {-# INLINE compareBytes #-}
364 -- About 4x slower over 32M
366 compareBytes :: ByteString -> ByteString -> Ordering
367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
368 withForeignPtr fp1 $ \p1 ->
369 withForeignPtr fp2 $ \p2 ->
370 cmp (p1 `plusPtr` off1)
371 (p2 `plusPtr` off2) 0 len1 len2
373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
375 cmp p1 p2 n len1 len2
376 | n == len1 = if n == len2 then return EQ else return LT
377 | n == len2 = return GT
379 (a :: Word8) <- peekByteOff p1 n
380 (b :: Word8) <- peekByteOff p2 n
381 case a `compare` b of
382 EQ -> cmp p1 p2 (n+1) len1 len2
385 {-# INLINE compareBytes #-}
388 -- -----------------------------------------------------------------------------
389 -- Introducing and eliminating 'ByteString's
391 -- | /O(1)/ The empty 'ByteString'
393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
394 {-# NOINLINE empty #-}
396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
397 packByte :: Word8 -> ByteString
398 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
399 withForeignPtr fp $ \p -> poke p c
401 {-# INLINE packByte #-}
404 -- XXX must use unsafePerformIO, not inlinePerformIO here, otherwise ghc
407 -- packByte 255 `compare` packByte 127
411 -- case mallocByteString 2 of
412 -- ForeignPtr f internals ->
413 -- case writeWord8OffAddr# f 0 255 of _ ->
414 -- case writeWord8OffAddr# f 0 127 of _ ->
415 -- case eqAddr# f f of
416 -- False -> case compare (GHC.Prim.plusAddr# f 0)
417 -- (GHC.Prim.plusAddr# f 0)
421 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
423 -- For applications with large numbers of string literals, pack can be a
424 -- bottleneck. In such cases, consider using packAddress (GHC only).
425 pack :: [Word8] -> ByteString
427 #if !defined(__GLASGOW_HASKELL__)
429 pack str = create (P.length str) $ \p -> go p str
432 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
434 #else /* hack away */
436 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
438 go _ _ [] = return ()
439 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
441 writeByte p i c = ST $ \s# ->
442 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
446 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
447 unpack :: ByteString -> [Word8]
449 #if !defined(__GLASGOW_HASKELL__)
451 unpack (PS _ _ 0) = []
452 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
453 go (p `plusPtr` s) (l - 1) []
456 go p 0 acc = peek p >>= \e -> return (e : acc)
457 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
458 {-# INLINE unpack #-}
462 unpack ps = build (unpackFoldr ps)
463 {-# INLINE unpack #-}
465 unpackList :: ByteString -> [Word8]
466 unpackList (PS fp off len) = withPtr fp $ \p -> do
468 loop _ (-1) acc = return acc
471 loop q (n-1) (a : acc)
472 loop (p `plusPtr` off) (len-1) []
475 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
478 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
479 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
481 loop _ (-1) acc = return acc
484 loop q (n-1) (a `f` acc)
485 loop (p `plusPtr` off) (len-1) ch
486 {-# INLINE [0] unpackFoldr #-}
490 ------------------------------------------------------------------------
492 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
493 -- conversion function
494 packWith :: (a -> Word8) -> [a] -> ByteString
495 packWith k str = create (P.length str) $ \p -> go p str
499 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
500 {-# INLINE packWith #-}
501 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
503 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
504 unpackWith :: (Word8 -> a) -> ByteString -> [a]
505 unpackWith _ (PS _ _ 0) = []
506 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
507 go (p `plusPtr` s) (l - 1) []
510 go p 0 acc = peek p >>= \e -> return (k e : acc)
511 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
512 {-# INLINE unpackWith #-}
513 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
515 -- ---------------------------------------------------------------------
518 -- | /O(1)/ Test whether a ByteString is empty.
519 null :: ByteString -> Bool
520 null (PS _ _ l) = l == 0
523 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
524 length :: ByteString -> Int
525 length (PS _ _ l) = l
526 {-# INLINE length #-}
528 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
529 -- complexity, as it requires a memcpy.
530 cons :: Word8 -> ByteString -> ByteString
531 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
532 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
538 -- | /O(n)/ Append a byte to the end of a 'ByteString'
539 snoc :: ByteString -> Word8 -> ByteString
540 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
541 memcpy p (f `plusPtr` s) l
542 poke (p `plusPtr` l) c
547 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
548 head :: ByteString -> Word8
550 | null ps = errorEmptyList "head"
551 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
554 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
555 tail :: ByteString -> ByteString
557 | l <= 0 = errorEmptyList "tail"
558 | otherwise = PS p (s+1) (l-1)
561 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
562 last :: ByteString -> Word8
564 | null ps = errorEmptyList "last"
565 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
568 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
569 init :: ByteString -> ByteString
571 | l <= 0 = errorEmptyList "init"
572 | otherwise = PS p s (l-1)
575 -- | /O(n)/ Append two ByteStrings
576 append :: ByteString -> ByteString -> ByteString
577 append xs ys | null xs = ys
579 | otherwise = concat [xs,ys]
580 {-# INLINE append #-}
582 -- ---------------------------------------------------------------------
585 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
586 -- element of @xs@. This function is subject to array fusion.
587 map :: (Word8 -> Word8) -> ByteString -> ByteString
588 map f = loopArr . loopU (mapEFL f) noAL
591 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
592 -- slightly faster for one-shot cases.
593 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
595 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
596 np <- mallocByteString (len+1)
597 withForeignPtr np $ \p -> do
598 map_ 0 (a `plusPtr` s) p
601 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
604 | n >= len = return ()
606 x <- peekByteOff p1 n
607 pokeByteOff p2 n (f x)
611 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
612 reverse :: ByteString -> ByteString
613 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
614 c_reverse p (f `plusPtr` s) l
617 reverse = pack . P.reverse . unpack
620 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
621 -- 'ByteString' and \`intersperses\' that byte between the elements of
622 -- the 'ByteString'. It is analogous to the intersperse function on
624 intersperse :: Word8 -> ByteString -> ByteString
625 intersperse c ps@(PS x s l)
627 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
628 c_intersperse p (f `plusPtr` s) l c
631 intersperse c = pack . List.intersperse c . unpack
634 -- | The 'transpose' function transposes the rows and columns of its
635 -- 'ByteString' argument.
636 transpose :: [ByteString] -> [ByteString]
637 transpose ps = P.map pack (List.transpose (P.map unpack ps))
639 -- ---------------------------------------------------------------------
640 -- Reducing 'ByteString's
642 -- | 'foldl', applied to a binary operator, a starting value (typically
643 -- the left-identity of the operator), and a ByteString, reduces the
644 -- ByteString using the binary operator, from left to right.
645 -- This function is subject to array fusion.
646 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
647 foldl f z = loopAcc . loopU (foldEFL f) z
652 -- About twice as fast with 6.4.1, but not fuseable
653 -- A simple fold . map is enough to make it worth while.
655 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
656 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
659 lgo z p q | p == q = return z
660 | otherwise = do c <- peek p
661 lgo (f z c) (p `plusPtr` 1) q
664 -- | 'foldr', applied to a binary operator, a starting value
665 -- (typically the right-identity of the operator), and a ByteString,
666 -- reduces the ByteString using the binary operator, from right to left.
667 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
668 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
669 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
672 go p q | p == q = return z
673 | otherwise = do c <- peek p
674 ws <- go (p `plusPtr` 1) q
677 -- | 'foldl1' is a variant of 'foldl' that has no starting value
678 -- argument, and thus must be applied to non-empty 'ByteStrings'.
679 -- This function is subject to array fusion.
680 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
682 | null ps = errorEmptyList "foldl1"
683 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
685 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
686 -- and thus must be applied to non-empty 'ByteString's
687 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
689 | null ps = errorEmptyList "foldr1"
690 | otherwise = foldr f (last ps) (init ps)
692 -- ---------------------------------------------------------------------
695 -- | /O(n)/ Concatenate a list of ByteStrings.
696 concat :: [ByteString] -> ByteString
699 concat xs = create len $ \ptr -> go xs ptr
700 where len = P.sum . P.map length $ xs
703 go (PS p s l:ps) ptr = do
704 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) l
705 go ps (ptr `plusPtr` l)
707 -- | Map a function over a 'ByteString' and concatenate the results
708 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
709 concatMap f = foldr (append . f) empty
711 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
712 -- any element of the 'ByteString' satisfies the predicate.
713 any :: (Word8 -> Bool) -> ByteString -> Bool
714 any _ (PS _ _ 0) = False
715 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
716 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
719 go p q | p == q = return False
720 | otherwise = do c <- peek p
721 if f c then return True
722 else go (p `plusPtr` 1) q
726 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
727 -- if all elements of the 'ByteString' satisfy the predicate.
728 all :: (Word8 -> Bool) -> ByteString -> Bool
729 all _ (PS _ _ 0) = True
730 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
731 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
734 go p q | p == q = return True -- end of list
735 | otherwise = do c <- peek p
737 then go (p `plusPtr` 1) q
741 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
742 maximum :: ByteString -> Word8
743 maximum xs@(PS x s l)
744 | null xs = errorEmptyList "maximum"
745 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
746 return $ c_maximum (p `plusPtr` s) l
747 {-# INLINE maximum #-}
749 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
750 minimum :: ByteString -> Word8
751 minimum xs@(PS x s l)
752 | null xs = errorEmptyList "minimum"
753 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
754 return $ c_minimum (p `plusPtr` s) l
755 {-# INLINE minimum #-}
757 -- fusion is too slow here (10x)
760 maximum xs@(PS x s l)
761 | null xs = errorEmptyList "maximum"
762 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
764 maximum_ (p `plusPtr` s) 0 l w
766 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
770 | otherwise = do w <- peekByteOff ptr n
771 maximum_ ptr (n+1) m (if w > c then w else c)
773 minimum xs@(PS x s l)
774 | null xs = errorEmptyList "minimum"
775 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
777 minimum_ (p `plusPtr` s) 0 l w
779 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
783 | otherwise = do w <- peekByteOff ptr n
784 minimum_ ptr (n+1) m (if w < c then w else c)
787 -- | /O(n)/ map Word8 functions, provided with the index at each position
788 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
789 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
790 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
792 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
794 go n f t p | f == p = return ()
795 | otherwise = do w <- peek f
797 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
799 -- ---------------------------------------------------------------------
800 -- Unfolds and replicates
802 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
803 -- the value of every element. The following holds:
805 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
807 -- This implemenation uses @memset(3)@
808 replicate :: Int -> Word8 -> ByteString
809 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
813 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
817 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
820 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
821 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
822 -- the element and returns 'Nothing' if it is done producing the
823 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
824 -- prepending to the ByteString and @b@ is used as the next element in a
827 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
828 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
829 -- final size of the ByteString as an argument. 'cons' can then be
830 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
831 -- linear complexity. The depth of the recursion is limited to this
832 -- size, but may be less. For lazy, infinite unfoldr, use
833 -- 'Data.List.unfoldr' (from 'Data.List').
837 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
839 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
841 -- > unfoldrN n == take n $ List.unfoldr
842 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
843 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
846 go q c n | n == i = return n -- stop if we reach `i'
847 | otherwise = case f c of
851 go (q `plusPtr` 1) new_c (n+1)
853 -- ---------------------------------------------------------------------
856 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
857 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
858 take :: Int -> ByteString -> ByteString
862 | otherwise = PS x s n
865 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
866 -- elements, or @[]@ if @n > 'length' xs@.
867 drop :: Int -> ByteString -> ByteString
871 | otherwise = PS x (s+n) (l-n)
874 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
875 splitAt :: Int -> ByteString -> (ByteString, ByteString)
876 splitAt n ps = (take n ps, drop n ps)
877 {-# INLINE splitAt #-}
879 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
880 -- returns the longest prefix (possibly empty) of @xs@ of elements that
882 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
883 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
884 {-# INLINE takeWhile #-}
886 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
887 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
888 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
889 {-# INLINE dropWhile #-}
891 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
892 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
893 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
896 -- | 'breakByte' breaks its ByteString argument at the first occurence
897 -- of the specified byte. It is more efficient than 'break' as it is
898 -- implemented with @memchr(3)@. I.e.
900 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
902 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
903 breakByte c p = case elemIndex c p of
905 Just n -> (take n p, drop n p)
906 {-# INLINE breakByte #-}
908 -- | 'spanByte' breaks its ByteString argument at the first
909 -- occurence of a byte other than its argument. It is more efficient
912 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
914 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
915 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
919 go p i | i >= l = return (ps, empty)
920 | otherwise = do c' <- peekByteOff p i
922 then return (take i ps, drop i ps)
924 {-# INLINE spanByte #-}
926 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
927 -- occurence of @w@. It behaves like 'break', except the delimiter is
928 -- not returned, and @Nothing@ is returned if the delimiter is not in
929 -- the ByteString. I.e.
931 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
933 -- > breakFirst c xs ==
934 -- > let (x,y) = break (== c) xs
935 -- > in if null y then Nothing else Just (x, drop 1 y))
937 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
938 breakFirst c p = case elemIndex c p of
940 Just n -> Just (take n p, drop (n+1) p)
941 {-# INLINE breakFirst #-}
943 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
946 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
948 -- and the following are equivalent:
950 -- > breakLast 'c' "abcdef"
951 -- > let (x,y) = break (=='c') (reverse "abcdef")
952 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
954 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
955 breakLast c p = case elemIndexLast c p of
957 Just n -> Just (take n p, drop (n+1) p)
958 {-# INLINE breakLast #-}
960 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
961 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
962 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
963 span p ps = break (not . p) ps
966 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
969 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
973 -- > spanEnd (not . isSpace) ps
975 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
977 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
978 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
980 -- | /O(n)/ Splits a 'ByteString' into components delimited by
981 -- separators, where the predicate returns True for a separator element.
982 -- The resulting components do not contain the separators. Two adjacent
983 -- separators result in an empty component in the output. eg.
985 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
986 -- > splitWith (=='a') [] == []
988 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
990 #if defined(__GLASGOW_HASKELL__)
991 splitWith _pred (PS _ _ 0) = []
992 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
993 where pred# c# = pred_ (W8# c#)
995 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
996 splitLoop pred' p 0 off' len' fp'
998 splitLoop :: (Word# -> Bool)
1000 -> Int -> Int -> Int
1004 splitLoop pred' p idx' off' len' fp'
1005 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1006 | idx' >= len' = return [PS fp' off' idx']
1008 w <- peekElemOff p (off'+idx')
1009 if pred' (case w of W8# w# -> w#)
1010 then return (PS fp' off' idx' :
1011 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
1012 else splitLoop pred' p (idx'+1) off' len' fp'
1013 {-# INLINE splitWith #-}
1016 splitWith _ (PS _ _ 0) = []
1017 splitWith p ps = splitWith' p ps
1020 splitWith' q qs = if null rest then [chunk]
1021 else chunk : splitWith' q (unsafeTail rest)
1022 where (chunk,rest) = break q qs
1025 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1026 -- argument, consuming the delimiter. I.e.
1028 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1029 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1030 -- > split 'x' "x" == ["",""]
1034 -- > join [c] . split c == id
1035 -- > split == splitWith . (==)
1037 -- As for all splitting functions in this library, this function does
1038 -- not copy the substrings, it just constructs new 'ByteStrings' that
1039 -- are slices of the original.
1041 split :: Word8 -> ByteString -> [ByteString]
1042 split _ (PS _ _ 0) = []
1043 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1044 let ptr = p `plusPtr` s
1048 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1050 then return [PS x (s+n) (l-n)]
1051 else do let i = q `minusPtr` ptr
1053 return $! PS x (s+n) (i-n) : ls
1055 {-# INLINE split #-}
1058 -- slower. but stays inside Haskell.
1059 split _ (PS _ _ 0) = []
1060 split (W8# w#) (PS fp off len) = splitWith' off len fp
1062 splitWith' off' len' fp' = withPtr fp $ \p ->
1063 splitLoop p 0 off' len' fp'
1065 splitLoop :: Ptr Word8
1066 -> Int -> Int -> Int
1071 splitLoop p idx' off' len' fp'
1072 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1073 | idx' >= len' = return [PS fp' off' idx']
1075 (W8# x#) <- peekElemOff p (off'+idx')
1076 if word2Int# w# ==# word2Int# x#
1077 then return (PS fp' off' idx' :
1078 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1079 else splitLoop p (idx'+1) off' len' fp'
1082 -- | Like 'splitWith', except that sequences of adjacent separators are
1083 -- treated as a single separator. eg.
1085 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1087 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1088 tokens f = P.filter (not.null) . splitWith f
1090 -- | The 'group' function takes a ByteString and returns a list of
1091 -- ByteStrings such that the concatenation of the result is equal to the
1092 -- argument. Moreover, each sublist in the result contains only equal
1093 -- elements. For example,
1095 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1097 -- It is a special case of 'groupBy', which allows the programmer to
1098 -- supply their own equality test. It is about 40% faster than
1100 group :: ByteString -> [ByteString]
1103 | otherwise = ys : group zs
1105 (ys, zs) = spanByte (unsafeHead xs) xs
1107 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1108 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1111 | otherwise = take n xs : groupBy k (drop n xs)
1113 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1115 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1116 -- 'ByteString's and concatenates the list after interspersing the first
1117 -- argument between each element of the list.
1118 join :: ByteString -> [ByteString] -> ByteString
1119 join filler pss = concat (splice pss)
1123 splice (x:y:xs) = x:filler:splice (y:xs)
1126 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1127 -- with a char. Around 4 times faster than the generalised join.
1129 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1130 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1131 withForeignPtr ffp $ \fp ->
1132 withForeignPtr fgp $ \gp -> do
1133 memcpy ptr (fp `plusPtr` s) l
1134 poke (ptr `plusPtr` l) c
1135 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1137 len = length f + length g + 1
1138 {-# INLINE joinWithByte #-}
1140 -- ---------------------------------------------------------------------
1141 -- Indexing ByteStrings
1143 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1144 index :: ByteString -> Int -> Word8
1146 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1147 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1148 ++ ", length = " ++ show (length ps)
1149 | otherwise = ps `unsafeIndex` n
1150 {-# INLINE index #-}
1152 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1153 -- element in the given 'ByteString' which is equal to the query
1154 -- element, or 'Nothing' if there is no such element.
1155 -- This implementation uses memchr(3).
1156 elemIndex :: Word8 -> ByteString -> Maybe Int
1157 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1158 let p' = p `plusPtr` s
1159 q = memchr p' c (fromIntegral l)
1160 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1161 {-# INLINE elemIndex #-}
1163 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1164 -- element in the given 'ByteString' which is equal to the query
1165 -- element, or 'Nothing' if there is no such element. The following
1168 -- > elemIndexLast c xs ==
1169 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1171 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1172 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1173 go (p `plusPtr` s) (l-1)
1176 go p i | i < 0 = return Nothing
1177 | otherwise = do ch' <- peekByteOff p i
1179 then return $ Just i
1181 {-# INLINE elemIndexLast #-}
1183 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1184 -- the indices of all elements equal to the query element, in ascending order.
1185 -- This implementation uses memchr(3).
1186 elemIndices :: Word8 -> ByteString -> [Int]
1187 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1188 let ptr = p `plusPtr` s
1192 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1195 else do let i = q `minusPtr` ptr
1202 elemIndices :: Word8 -> ByteString -> [Int]
1203 elemIndices c ps = loop 0 ps
1205 loop _ ps' | null ps' = []
1206 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1207 | otherwise = loop (n+1) (unsafeTail ps')
1210 -- | count returns the number of times its argument appears in the ByteString
1212 -- > count = length . elemIndices
1214 -- But more efficiently than using length on the intermediate list.
1215 count :: Word8 -> ByteString -> Int
1216 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1217 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1218 {-# INLINE count #-}
1222 -- around 30% slower
1224 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1225 go (p `plusPtr` s) (fromIntegral m) 0
1227 go :: Ptr Word8 -> CSize -> Int -> IO Int
1230 let q = memchr p w l
1233 else do let k = fromIntegral $ q `minusPtr` p
1234 go (q `plusPtr` 1) (l-k-1) (i+1)
1237 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1238 -- returns the index of the first element in the ByteString
1239 -- satisfying the predicate.
1240 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1241 findIndex = (listToMaybe .) . findIndices
1243 -- | The 'findIndices' function extends 'findIndex', by returning the
1244 -- indices of all elements satisfying the predicate, in ascending order.
1245 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1246 findIndices p ps = loop 0 ps
1249 loop _ qs | null qs = []
1250 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1251 | otherwise = loop (n+1) (unsafeTail qs)
1253 -- ---------------------------------------------------------------------
1254 -- Searching ByteStrings
1256 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1257 elem :: Word8 -> ByteString -> Bool
1258 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1261 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1262 notElem :: Word8 -> ByteString -> Bool
1263 notElem c ps = not (elem c ps)
1264 {-# INLINE notElem #-}
1267 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1268 -- case of filtering a single byte. It is more efficient to use
1269 -- /filterByte/ in this case.
1271 -- > filterByte == filter . (==)
1273 -- filterByte is around 10x faster, and uses much less space, than its
1274 -- filter equivalent
1275 filterByte :: Word8 -> ByteString -> ByteString
1276 filterByte w ps = replicate (count w ps) w
1279 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1280 -- case of filtering a single byte out of a list. It is more efficient
1281 -- to use /filterNotByte/ in this case.
1283 -- > filterNotByte == filter . (/=)
1285 -- filterNotByte is around 3x faster, and uses much less space, than its
1286 -- filter equivalent
1287 filterNotByte :: Word8 -> ByteString -> ByteString
1288 filterNotByte ch ps@(PS x s l)
1290 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1291 t <- go (f `plusPtr` s) p l
1292 return (t `minusPtr` p) -- actual length
1296 go f t e = do w <- peek f
1298 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1299 else go (f `plusPtr` 1) t (e-1)
1301 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1302 -- returns a ByteString containing those characters that satisfy the
1303 -- predicate. This function is subject to array fusion.
1304 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1305 filter p = loopArr . loopU (filterEFL p) noAL
1306 {-# INLINE filter #-}
1308 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1309 filterF k ps@(PS x s l)
1311 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1312 t <- go (f `plusPtr` s) p l
1313 return (t `minusPtr` p) -- actual length
1317 go f t e = do w <- peek f
1319 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1320 else go (f `plusPtr` 1) t (e - 1)
1321 {-# INLINE filterF #-}
1323 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1325 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1326 -- and returns the first element in matching the predicate, or 'Nothing'
1327 -- if there is no such element.
1328 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1329 find p ps = case filter p ps of
1330 q | null q -> Nothing
1331 | otherwise -> Just (unsafeHead q)
1333 -- ---------------------------------------------------------------------
1334 -- Searching for substrings
1336 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1337 -- iff the first is a prefix of the second.
1338 isPrefixOf :: ByteString -> ByteString -> Bool
1339 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1342 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1343 withForeignPtr x2 $ \p2 -> do
1344 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1347 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1348 -- iff the first is a suffix of the second.
1350 -- The following holds:
1352 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1354 -- However, the real implemenation uses memcmp to compare the end of the
1355 -- string only, with no reverse required..
1356 isSuffixOf :: ByteString -> ByteString -> Bool
1357 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1360 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1361 withForeignPtr x2 $ \p2 -> do
1362 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1365 -- | Check whether one string is a substring of another. @isSubstringOf
1366 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1367 isSubstringOf :: ByteString -- ^ String to search for.
1368 -> ByteString -- ^ String to search in.
1370 isSubstringOf p s = not $ P.null $ findSubstrings p s
1372 -- | Get the first index of a substring in another string,
1373 -- or 'Nothing' if the string is not found.
1374 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1375 findSubstring :: ByteString -- ^ String to search for.
1376 -> ByteString -- ^ String to seach in.
1378 findSubstring = (listToMaybe .) . findSubstrings
1380 -- | Find the indexes of all (possibly overlapping) occurances of a
1381 -- substring in a string. This function uses the Knuth-Morris-Pratt
1382 -- string matching algorithm.
1383 findSubstrings :: ByteString -- ^ String to search for.
1384 -> ByteString -- ^ String to seach in.
1387 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1389 patc x = pat `unsafeIndex` x
1390 strc x = str `unsafeIndex` x
1392 -- maybe we should make kmpNext a UArray before using it in search?
1393 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1394 kmpNextL p _ | null p = []
1395 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1397 x = if not (null ps) && unsafeHead ps == patc j'
1398 then kmpNext Array.! j' else j'
1400 search i j = match ++ rest -- i: position in string, j: position in pattern
1401 where match = if j == m then [(i - j)] else []
1402 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1403 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1406 -- ---------------------------------------------------------------------
1409 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1410 -- corresponding pairs of bytes. If one input ByteString is short,
1411 -- excess elements of the longer ByteString are discarded. This is
1412 -- equivalent to a pair of 'unpack' operations.
1413 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1415 | null ps || null qs = []
1416 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1418 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1419 -- the first argument, instead of a tupling function. For example,
1420 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1421 -- corresponding sums.
1422 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1424 | null ps || null qs = []
1425 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1427 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1428 -- ByteStrings. Note that this performs two 'pack' operations.
1429 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1430 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1431 {-# INLINE unzip #-}
1433 -- ---------------------------------------------------------------------
1436 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1437 inits :: ByteString -> [ByteString]
1438 inits (PS x s l) = [PS x s n | n <- [0..l]]
1440 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1441 tails :: ByteString -> [ByteString]
1442 tails p | null p = [empty]
1443 | otherwise = p : tails (unsafeTail p)
1445 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1447 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1448 elems :: ByteString -> [ByteString]
1449 elems (PS _ _ 0) = []
1450 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1451 {-# INLINE elems #-}
1453 -- ---------------------------------------------------------------------
1454 -- ** Ordered 'ByteString's
1456 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1457 sort :: ByteString -> ByteString
1458 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1460 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1461 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1464 go 256 _ = return ()
1465 go i ptr = do n <- peekElemOff arr i
1466 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1467 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1470 -- "countEach counts str l" counts the number of occurences of each Word8 in
1471 -- str, and stores the result in counts.
1472 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1474 countEach counts str l = go 0
1477 go i | i == l = return ()
1478 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1479 x <- peekElemOff counts k
1480 pokeElemOff counts k (x + 1)
1484 sort :: ByteString -> ByteString
1485 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1486 memcpy p (f `plusPtr` s) l
1487 c_qsort p l -- inplace
1491 sort = pack . List.sort . unpack
1494 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1496 -- Try some linear sorts: radix, counting
1499 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1500 -- sortBy f ps = undefined
1502 -- ---------------------------------------------------------------------
1504 -- Extensions to the basic interface
1507 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1508 -- check for the empty case, so there is an obligation on the programmer
1509 -- to provide a proof that the ByteString is non-empty.
1510 unsafeHead :: ByteString -> Word8
1511 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1512 {-# INLINE unsafeHead #-}
1514 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1515 -- check for the empty case. As with 'unsafeHead', the programmer must
1516 -- provide a separate proof that the ByteString is non-empty.
1517 unsafeTail :: ByteString -> ByteString
1518 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1519 {-# INLINE unsafeTail #-}
1521 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1522 -- This omits the bounds check, which means there is an accompanying
1523 -- obligation on the programmer to ensure the bounds are checked in some
1525 unsafeIndex :: ByteString -> Int -> Word8
1526 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1527 {-# INLINE unsafeIndex #-}
1529 -- ---------------------------------------------------------------------
1530 -- Low level constructors
1532 #if defined(__GLASGOW_HASKELL__)
1533 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1534 -- Addr\# (an arbitrary machine address assumed to point outside the
1535 -- garbage-collected heap) into a @ByteString@. A much faster way to
1536 -- create an Addr\# is with an unboxed string literal, than to pack a
1537 -- boxed string. A unboxed string literal is compiled to a static @char
1538 -- []@ by GHC. Establishing the length of the string requires a call to
1539 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1540 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1541 -- if you know the length of the string statically.
1545 -- > literalFS = packAddress "literal"#
1547 packAddress :: Addr# -> ByteString
1548 packAddress addr# = inlinePerformIO $ do
1549 p <- newForeignPtr_ cstr
1550 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1553 {-# INLINE packAddress #-}
1555 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1556 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1557 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1558 -- 'Addr\#' to the string, and the length of the string. Make sure the
1559 -- length is correct, otherwise use the safer 'packAddress' (where the
1560 -- length will be calculated once at runtime).
1561 unsafePackAddress :: Int -> Addr# -> ByteString
1562 unsafePackAddress len addr# = inlinePerformIO $ do
1563 p <- newForeignPtr_ cstr
1565 where cstr = Ptr addr#
1569 -- | /O(1)/ Build a ByteString from a ForeignPtr
1570 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1571 fromForeignPtr fp l = PS fp 0 l
1573 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1574 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1575 toForeignPtr (PS ps s l) = (ps, s, l)
1577 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1578 -- current 'ByteString' from any larger string it was created from, as
1580 skipIndex :: ByteString -> Int
1581 skipIndex (PS _ s _) = s
1582 {-# INLINE skipIndex #-}
1584 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1585 -- finalizer associated to it. The ByteString length is calculated using
1586 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1587 packCString :: CString -> ByteString
1588 packCString cstr = inlinePerformIO $ do
1589 fp <- newForeignPtr_ (castPtr cstr)
1590 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1592 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1593 -- have /no/ finalizer associated with it. This operation has /O(1)/
1594 -- complexity as we already know the final size, so no /strlen(3)/ is
1596 packCStringLen :: CStringLen -> ByteString
1597 packCStringLen (ptr,len) = inlinePerformIO $ do
1598 fp <- newForeignPtr_ (castPtr ptr)
1599 return $ PS fp 0 (fromIntegral len)
1601 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1602 -- have a @free(3)@ finalizer associated to it.
1603 packMallocCString :: CString -> ByteString
1604 packMallocCString cstr = inlinePerformIO $ do
1605 fp <- newForeignFreePtr (castPtr cstr)
1606 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1608 #if defined(__GLASGOW_HASKELL__)
1609 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1610 -- length, and an IO action representing a finalizer. This function is
1611 -- not available on Hugs.
1613 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1614 packCStringFinalizer p l f = do
1615 fp <- FC.newForeignPtr p f
1618 -- | Explicitly run the finaliser associated with a 'ByteString'.
1619 -- Further references to this value may generate invalid memory
1620 -- references. This operation is unsafe, as there may be other
1621 -- 'ByteStrings' referring to the same underlying pages. If you use
1622 -- this, you need to have a proof of some kind that all 'ByteString's
1623 -- ever generated from the underlying byte array are no longer live.
1624 unsafeFinalize :: ByteString -> IO ()
1625 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1629 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1630 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1631 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1632 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1634 alloc = withForeignPtr ps $ \p -> do
1635 buf <- c_malloc (fromIntegral l+1)
1636 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1637 poke (buf `plusPtr` l) (0::Word8)
1638 return $ castPtr buf
1640 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1641 -- Warning: modifying the @CString@ will affect the @ByteString@.
1642 -- Why is this function unsafe? It relies on the null byte at the end of
1643 -- the ByteString to be there. This is /not/ the case if your ByteString
1644 -- has been spliced from a larger string (i.e. with take or drop).
1645 -- Unless you can guarantee the null byte, you should use the safe
1646 -- version, which will copy the string first.
1648 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1649 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1651 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1652 -- This is mainly useful to allow the rest of the data pointed
1653 -- to by the 'ByteString' to be garbage collected, for example
1654 -- if a large string has been read in, and only a small part of it
1655 -- is needed in the rest of the program.
1656 copy :: ByteString -> ByteString
1657 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1659 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1660 -- CString is going to be deallocated from C land.
1661 copyCString :: CString -> ByteString
1662 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1664 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1665 copyCStringLen :: CStringLen -> ByteString
1666 copyCStringLen (cstr, len) = inlinePerformIO $ do
1667 fp <- mallocForeignPtrArray (len+1)
1668 withForeignPtr fp $ \p -> do
1669 memcpy p (castPtr cstr) len
1670 poke (p `plusPtr` len) (0 :: Word8)
1671 return $! PS fp 0 len
1673 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1674 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1675 -- This is analogous to unsafeUseAsCString, and comes with the same
1676 -- safety requirements.
1678 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1679 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1681 -- | Given the maximum size needed and a function to make the contents
1682 -- of a ByteString, generate makes the 'ByteString'. The generating
1683 -- function is required to return the actual final size (<= the maximum
1684 -- size), and the resulting byte array is realloced to this size. The
1685 -- string is padded at the end with a null byte.
1687 -- generate is the main mechanism for creating custom, efficient
1688 -- ByteString functions, using Haskell or C functions to fill the space.
1690 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1692 p <- mallocArray (i+1)
1694 p' <- reallocArray p (i'+1)
1695 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1696 fp <- newForeignFreePtr p'
1699 -- ---------------------------------------------------------------------
1702 #if defined(__GLASGOW_HASKELL__)
1704 -- | getLine, read a line from stdin.
1705 getLine :: IO ByteString
1706 getLine = hGetLine stdin
1708 -- | hGetLine. read a ByteString from a handle
1709 hGetLine :: Handle -> IO ByteString
1710 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1711 case haBufferMode handle_ of
1712 NoBuffering -> error "no buffering"
1713 _other -> hGetLineBuffered handle_
1716 hGetLineBuffered handle_ = do
1717 let ref = haBuffer handle_
1718 buf <- readIORef ref
1719 hGetLineBufferedLoop handle_ ref buf 0 []
1721 hGetLineBufferedLoop handle_ ref
1722 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1724 off <- findEOL r w raw
1725 let new_len = len + off - r
1726 xs <- mkPS raw r off
1728 -- if eol == True, then off is the offset of the '\n'
1729 -- otherwise off == w and the buffer is now empty.
1731 then do if (w == off + 1)
1732 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1733 else writeIORef ref buf{ bufRPtr = off + 1 }
1734 mkBigPS new_len (xs:xss)
1736 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1737 buf{ bufWPtr=0, bufRPtr=0 }
1739 -- Nothing indicates we caught an EOF, and we may have a
1740 -- partial line to return.
1742 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1744 then mkBigPS new_len (xs:xss)
1747 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1749 -- find the end-of-line character, if there is one
1753 (c,r') <- readCharFromBuffer raw r
1755 then return r -- NB. not r': don't include the '\n'
1756 else findEOL r' w raw
1758 maybeFillReadBuffer fd is_line is_stream buf = catch
1759 (do buf' <- fillReadBuffer fd is_line is_stream buf
1761 (\e -> if isEOFError e then return Nothing else ioError e)
1763 -- TODO, rewrite to use normal memcpy
1764 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1765 mkPS buf start end = do
1766 let len = end - start
1767 fp <- mallocByteString len
1768 withForeignPtr fp $ \p -> do
1769 memcpy_ptr_baoff p buf start (fromIntegral len)
1770 return (PS fp 0 len)
1772 mkBigPS :: Int -> [ByteString] -> IO ByteString
1773 mkBigPS _ [ps] = return ps
1774 mkBigPS _ pss = return $! concat (P.reverse pss)
1778 -- ---------------------------------------------------------------------
1781 -- | Outputs a 'ByteString' to the specified 'Handle'.
1782 hPut :: Handle -> ByteString -> IO ()
1783 hPut _ (PS _ _ 0) = return ()
1784 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1785 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1787 -- | Write a ByteString to stdout
1788 putStr :: ByteString -> IO ()
1789 putStr = hPut stdout
1791 -- | Write a ByteString to stdout, appending a newline byte
1792 putStrLn :: ByteString -> IO ()
1793 putStrLn ps = hPut stdout ps >> hPut stdout nl
1794 where nl = packByte 0x0a
1796 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1797 -- is far more efficient than reading the characters into a 'String'
1798 -- and then using 'pack'.
1799 hGet :: Handle -> Int -> IO ByteString
1800 hGet _ 0 = return empty
1801 hGet h i = do fp <- mallocByteString i
1802 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1805 #if defined(__GLASGOW_HASKELL__)
1806 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1807 -- waiting for data to become available, instead it returns only whatever data
1809 hGetNonBlocking :: Handle -> Int -> IO ByteString
1810 hGetNonBlocking _ 0 = return empty
1811 hGetNonBlocking h i = do
1812 fp <- mallocByteString i
1813 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1817 -- | Read entire handle contents into a 'ByteString'.
1819 -- As with 'hGet', the string representation in the file is assumed to
1822 hGetContents :: Handle -> IO ByteString
1824 let start_size = 1024
1825 p <- mallocArray start_size
1826 i <- hGetBuf h p start_size
1828 then do p' <- reallocArray p i
1829 fp <- newForeignFreePtr p'
1835 p' <- reallocArray p s'
1836 i <- hGetBuf h (p' `plusPtr` s) s
1838 then do let i' = s + i
1839 p'' <- reallocArray p' i'
1840 fp <- newForeignFreePtr p''
1844 -- | getContents. Equivalent to hGetContents stdin
1845 getContents :: IO ByteString
1846 getContents = hGetContents stdin
1848 -- | Read an entire file directly into a 'ByteString'. This is far more
1849 -- efficient than reading the characters into a 'String' and then using
1850 -- 'pack'. It also may be more efficient than opening the file and
1851 -- reading it using hGet.
1852 readFile :: FilePath -> IO ByteString
1854 h <- openBinaryFile f ReadMode
1856 s <- hGet h $ fromIntegral l
1860 -- | Write a 'ByteString' to a file.
1861 writeFile :: FilePath -> ByteString -> IO ()
1863 h <- openBinaryFile f WriteMode
1869 -- Disable until we can move it into a portable .hsc file
1872 -- | Like readFile, this reads an entire file directly into a
1873 -- 'ByteString', but it is even more efficient. It involves directly
1874 -- mapping the file to memory. This has the advantage that the contents
1875 -- of the file never need to be copied. Also, under memory pressure the
1876 -- page may simply be discarded, while in the case of readFile it would
1877 -- need to be written to swap. If you read many small files, mmapFile
1878 -- will be less memory-efficient than readFile, since each mmapFile
1879 -- takes up a separate page of memory. Also, you can run into bus
1880 -- errors if the file is modified. As with 'readFile', the string
1881 -- representation in the file is assumed to be ISO-8859-1.
1883 -- On systems without mmap, this is the same as a readFile.
1885 mmapFile :: FilePath -> IO ByteString
1886 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1888 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1890 h <- openBinaryFile f ReadMode
1891 l <- fromIntegral `fmap` hFileSize h
1892 -- Don't bother mmaping small files because each mmapped file takes up
1893 -- at least one full VM block.
1895 then do thefp <- mallocByteString l
1896 withForeignPtr thefp $ \p-> hGetBuf h p l
1901 fd <- fromIntegral `fmap` handleToFd h
1903 fp <- if p == nullPtr
1904 then do thefp <- mallocByteString l
1905 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1908 -- The munmap leads to crashes on OpenBSD.
1909 -- maybe there's a use after unmap in there somewhere?
1910 #if !defined(__OpenBSD__)
1911 let unmap = c_munmap p l >> return ()
1913 let unmap = return ()
1915 fp <- FC.newForeignPtr p unmap
1920 where mmap_limit = 16*1024
1923 #if defined(__GLASGOW_HASKELL__)
1925 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1927 getArgs :: IO [ByteString]
1929 alloca $ \ p_argc ->
1930 alloca $ \ p_argv -> do
1931 getProgArgv p_argc p_argv
1932 p <- fromIntegral `fmap` peek p_argc
1934 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1937 -- ---------------------------------------------------------------------
1938 -- Internal utilities
1940 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1941 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1942 -- convenience for ByteString construction.
1943 w2c :: Word8 -> Char
1944 #if !defined(__GLASGOW_HASKELL__)
1945 w2c = chr . fromIntegral
1947 w2c = unsafeChr . fromIntegral
1951 c2w :: Char -> Word8
1952 c2w = fromIntegral . ord
1955 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1956 -- is padded with a null byte.
1957 mallocByteString :: Int -> IO (ForeignPtr Word8)
1958 mallocByteString l = do
1959 fp <- mallocForeignPtrArray (l+1)
1960 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1963 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1964 -- argument gives the final size of the ByteString. Unlike 'generate'
1965 -- the ByteString is not reallocated if the final size is less than the
1966 -- estimated size. Also, unlike 'generate' ByteString's created this way
1967 -- are managed on the Haskell heap.
1968 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1969 create l write_ptr = inlinePerformIO $ do
1970 fp <- mallocByteString (l+1)
1971 withForeignPtr fp $ \p -> write_ptr p
1973 {-# INLINE create #-}
1975 -- | Perform an operation with a temporary ByteString
1976 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1977 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1978 {-# INLINE withPtr #-}
1980 -- Common up near identical calls to `error' to reduce the number
1981 -- constant strings created when compiled:
1982 errorEmptyList :: String -> a
1983 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1984 {-# INLINE errorEmptyList #-}
1986 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1987 -- of the string if no element is found, rather than Nothing.
1988 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1989 STRICT2(findIndexOrEnd)
1992 | f (unsafeHead ps) = 0
1993 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1994 {-# INLINE findIndexOrEnd #-}
1996 -- Find from the end of the string using predicate
1997 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1998 STRICT2(findFromEndUntil)
1999 findFromEndUntil f ps@(PS x s l) =
2001 else if f (last ps) then l
2002 else findFromEndUntil f (PS x s (l-1))
2004 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2005 -- it exposes lots of things to further inlining
2007 {-# INLINE inlinePerformIO #-}
2008 inlinePerformIO :: IO a -> a
2009 #if defined(__GLASGOW_HASKELL__)
2010 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2012 inlinePerformIO = unsafePerformIO
2015 {-# INLINE newForeignFreePtr #-}
2016 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2017 #if defined(__GLASGOW_HASKELL__)
2018 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2020 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2023 -- ---------------------------------------------------------------------
2025 -- Standard C functions
2028 foreign import ccall unsafe "string.h strlen" c_strlen
2031 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2032 :: CInt -> IO (Ptr Word8)
2034 foreign import ccall unsafe "static stdlib.h free" c_free
2035 :: Ptr Word8 -> IO ()
2037 #if !defined(__GLASGOW_HASKELL__)
2038 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2039 :: FunPtr (Ptr Word8 -> IO ())
2042 foreign import ccall unsafe "string.h memset" memset
2043 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2045 foreign import ccall unsafe "string.h memchr" memchr
2046 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2048 foreign import ccall unsafe "string.h memcmp" memcmp
2049 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2051 foreign import ccall unsafe "string.h memcpy" memcpy
2052 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2054 -- ---------------------------------------------------------------------
2059 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2060 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2062 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2063 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2065 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2066 :: Ptr Word8 -> Int -> Word8
2068 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2069 :: Ptr Word8 -> Int -> Word8
2071 foreign import ccall unsafe "static fpstring.h count" c_count
2072 :: Ptr Word8 -> Int -> Word8 -> Int
2074 -- ---------------------------------------------------------------------
2078 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2079 :: Int -> Int -> IO (Ptr Word8)
2081 foreign import ccall unsafe "static unistd.h close" c_close
2084 # if !defined(__OpenBSD__)
2085 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2086 :: Ptr Word8 -> Int -> IO Int
2090 -- ---------------------------------------------------------------------
2091 -- Internal GHC Haskell magic
2093 #if defined(__GLASGOW_HASKELL__)
2094 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2095 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2097 foreign import ccall unsafe "__hscore_memcpy_src_off"
2098 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2101 -- ---------------------------------------------------------------------
2103 -- Functional array fusion for ByteStrings.
2105 -- From the Data Parallel Haskell project,
2106 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2109 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2110 -- the fact that no bottoms of this type are ever constructed; hence, we can
2111 -- assume @(_ :: NoAL) `seq` x = x@.
2115 -- | Special forms of loop arguments
2117 -- * These are common special cases for the three function arguments of gen
2118 -- and loop; we give them special names to make it easier to trigger RULES
2119 -- applying in the special cases represented by these arguments. The
2120 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2121 -- two simplifier phases.
2123 -- * In the case where the accumulator is not needed, it is better to always
2124 -- explicitly return a value `()', rather than just copy the input to the
2125 -- output, as the former gives GHC better local information.
2128 -- | Element function expressing a mapping only
2129 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2130 mapEFL f = \_ e -> (noAL, (Just $ f e))
2131 {-# INLINE [1] mapEFL #-}
2133 -- | Element function implementing a filter function only
2134 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2135 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2136 {-# INLINE [1] filterEFL #-}
2138 -- |Element function expressing a reduction only
2139 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2140 foldEFL f = \a e -> (f a e, Nothing)
2141 {-# INLINE [1] foldEFL #-}
2146 {-# INLINE [1] noAL #-}
2148 -- | Projection functions that are fusion friendly (as in, we determine when
2149 -- they are inlined)
2150 loopArr :: (ByteString, acc) -> ByteString
2151 loopArr (arr, _) = arr
2152 {-# INLINE [1] loopArr #-}
2154 loopAcc :: (ByteString, acc) -> acc
2155 loopAcc (_, acc) = acc
2156 {-# INLINE [1] loopAcc #-}
2158 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2159 loopSndAcc (arr, (_, acc)) = (arr, acc)
2160 {-# INLINE [1] loopSndAcc #-}
2162 ------------------------------------------------------------------------
2164 -- | Iteration over over ByteStrings
2165 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2166 -> acc -- ^ initial acc value
2167 -> ByteString -- ^ input ByteString
2168 -> (ByteString, acc)
2170 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2171 fp <- mallocByteString i
2172 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2173 (acc, i') <- go (a `plusPtr` s) p start
2175 then return (fp,i,acc) -- no realloc for map
2176 else do fp_ <- mallocByteString (i'+1) -- realloc
2177 withForeignPtr fp_ $ \p' -> do
2179 poke (p' `plusPtr` i') (0::Word8)
2182 return (PS ptr 0 n, acc)
2187 trans a_off ma_off acc
2188 | a_off >= i = return (acc, ma_off)
2190 x <- peekByteOff p a_off
2191 let (acc', oe) = f acc x
2192 ma_off' <- case oe of
2193 Nothing -> return ma_off
2194 Just e -> do pokeByteOff ma ma_off e
2196 trans (a_off+1) ma_off' acc'
2198 {-# INLINE [1] loopU #-}
2202 "array fusion!" forall em1 em2 start1 start2 arr.
2203 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2204 let em (acc1, acc2) e =
2206 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2209 (acc2', res) -> ((acc1', acc2'), res)
2210 in loopSndAcc (loopU em (start1, start2) arr)
2212 "loopArr/loopSndAcc" forall x.
2213 loopArr (loopSndAcc x) = loopArr x
2215 "seq/NoAL" forall (u::NoAL) e.