1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- Maintainer : dons@cse.unsw.edu.au
12 -- Stability : experimental
13 -- Portability : portable, requires ffi and cpp
14 -- Tested with : GHC 6.4.1 and Hugs March 2005
18 -- | A time and space-efficient implementation of byte vectors using
19 -- packed Word8 arrays, suitable for high performance use, both in terms
20 -- of large data quantities, or high speed requirements. Byte vectors
21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
22 -- be passed between C and Haskell with little effort.
24 -- This module is intended to be imported @qualified@, to avoid name
25 -- clashes with Prelude functions. eg.
27 -- > import qualified Data.ByteString as B
29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
30 -- UArray by Simon Marlow. Rewritten to support slices and use
31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
34 module Data.ByteString (
36 -- * The @ByteString@ type
37 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
39 -- * Introducing and eliminating 'ByteString's
40 empty, -- :: ByteString
41 packByte, -- :: Word8 -> ByteString
42 pack, -- :: [Word8] -> ByteString
43 unpack, -- :: ByteString -> [Word8]
44 packWith, -- :: (a -> Word8) -> [a] -> ByteString
45 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: Word8 -> ByteString -> ByteString
50 null, -- :: ByteString -> Bool
51 length, -- :: ByteString -> Int
52 head, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 last, -- :: ByteString -> Word8
55 init, -- :: ByteString -> ByteString
56 append, -- :: ByteString -> ByteString -> ByteString
58 -- * Special ByteStrings
59 inits, -- :: ByteString -> [ByteString]
60 tails, -- :: ByteString -> [ByteString]
61 elems, -- :: ByteString -> [ByteString]
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 reverse, -- :: ByteString -> ByteString
66 intersperse, -- :: Word8 -> ByteString -> ByteString
67 transpose, -- :: [ByteString] -> [ByteString]
69 -- * Reducing 'ByteString's
70 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
71 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
84 -- * Generating and unfolding ByteStrings
85 replicate, -- :: Int -> Word8 -> ByteString
86 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
90 -- ** Breaking strings
91 take, -- :: Int -> ByteString -> ByteString
92 drop, -- :: Int -> ByteString -> ByteString
93 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
94 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
95 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
96 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
97 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
98 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
100 -- ** Breaking and dropping on specific bytes
101 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
102 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
103 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
104 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
106 -- ** Breaking into many substrings
107 split, -- :: Word8 -> ByteString -> [ByteString]
108 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
109 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
110 group, -- :: ByteString -> [ByteString]
111 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
113 -- ** Joining strings
114 join, -- :: ByteString -> [ByteString] -> ByteString
115 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
117 -- * Indexing ByteStrings
118 index, -- :: ByteString -> Int -> Word8
119 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
120 elemIndices, -- :: Word8 -> ByteString -> [Int]
121 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
122 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
123 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
124 count, -- :: Word8 -> ByteString -> Int
126 -- * Ordered ByteStrings
127 sort, -- :: ByteString -> ByteString
129 -- * Searching ByteStrings
131 -- ** Searching by equality
132 -- | These functions use memchr(3) to efficiently search the ByteString
134 elem, -- :: Word8 -> ByteString -> Bool
135 notElem, -- :: Word8 -> ByteString -> Bool
136 filterByte, -- :: Word8 -> ByteString -> ByteString
137 filterNotByte, -- :: Word8 -> ByteString -> ByteString
139 -- ** Searching with a predicate
140 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
141 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
143 -- ** Prefixes and suffixes
144 -- | These functions use memcmp(3) to efficiently compare substrings
145 isPrefixOf, -- :: ByteString -> ByteString -> Bool
146 isSuffixOf, -- :: ByteString -> ByteString -> Bool
148 -- ** Search for arbitrary substrings
149 isSubstringOf, -- :: ByteString -> ByteString -> Bool
150 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
151 findSubstrings, -- :: ByteString -> ByteString -> [Int]
153 -- * Zipping and unzipping ByteStrings
154 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
155 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
156 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
158 -- * Unchecked access
159 unsafeHead, -- :: ByteString -> Word8
160 unsafeTail, -- :: ByteString -> ByteString
161 unsafeIndex, -- :: ByteString -> Int -> Word8
163 -- * Low level introduction and elimination
164 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
165 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
166 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
167 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
168 skipIndex, -- :: ByteString -> Int
170 -- ** Packing CStrings and pointers
171 packCString, -- :: CString -> ByteString
172 packCStringLen, -- :: CString -> ByteString
173 packMallocCString, -- :: CString -> ByteString
175 #if defined(__GLASGOW_HASKELL__)
176 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
177 packAddress, -- :: Addr# -> ByteString
178 unsafePackAddress, -- :: Int -> Addr# -> ByteString
179 unsafeFinalize, -- :: ByteString -> IO ()
182 -- ** Using ByteStrings as CStrings
183 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
184 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
185 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
187 -- ** Copying ByteStrings
188 -- | These functions perform memcpy(3) operations
189 copy, -- :: ByteString -> ByteString
190 copyCString, -- :: CString -> ByteString
191 copyCStringLen, -- :: CStringLen -> ByteString
193 -- * I\/O with @ByteString@s
195 -- ** Standard input and output
197 #if defined(__GLASGOW_HASKELL__)
198 getLine, -- :: IO ByteString
200 getContents, -- :: IO ByteString
201 putStr, -- :: ByteString -> IO ()
202 putStrLn, -- :: ByteString -> IO ()
205 readFile, -- :: FilePath -> IO ByteString
206 writeFile, -- :: FilePath -> ByteString -> IO ()
207 -- mmapFile, -- :: FilePath -> IO ByteString
209 -- ** I\/O with Handles
210 #if defined(__GLASGOW_HASKELL__)
211 getArgs, -- :: IO [ByteString]
212 hGetLine, -- :: Handle -> IO ByteString
213 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
215 hGetContents, -- :: Handle -> IO ByteString
216 hGet, -- :: Handle -> Int -> IO ByteString
217 hPut, -- :: Handle -> ByteString -> IO ()
219 #if defined(__GLASGOW_HASKELL__)
221 unpackList, -- eek, otherwise it gets thrown away by the simplifier
226 import qualified Prelude as P
227 import Prelude hiding (reverse,head,tail,last,init,null
228 ,length,map,lines,foldl,foldr,unlines
229 ,concat,any,take,drop,splitAt,takeWhile
230 ,dropWhile,span,break,elem,filter,maximum
231 ,minimum,all,concatMap,foldl1,foldr1
232 ,readFile,writeFile,replicate
233 ,getContents,getLine,putStr,putStrLn
234 ,zip,zipWith,unzip,notElem)
236 import qualified Data.List as List
239 import Data.Word (Word8)
240 import Data.Maybe (listToMaybe)
241 import Data.Array (listArray)
242 import qualified Data.Array as Array ((!))
244 -- Control.Exception.bracket not available in yhc or nhc
245 import Control.Exception (bracket)
246 import Control.Monad (when)
248 import Foreign.C.String (CString, CStringLen)
249 import Foreign.C.Types (CSize, CInt)
250 import Foreign.ForeignPtr
251 import Foreign.Marshal.Array
253 import Foreign.Storable (Storable(..))
255 -- hGetBuf and hPutBuf not available in yhc or nhc
256 import System.IO (stdin,stdout,hClose,hFileSize
257 ,hGetBuf,hPutBuf,openBinaryFile
260 #if !defined(__GLASGOW_HASKELL__)
261 import System.IO.Unsafe
264 #if defined(__GLASGOW_HASKELL__)
266 import Data.Generics (Data(..), Typeable(..))
268 import System.IO (hGetBufNonBlocking)
269 import System.IO.Error (isEOFError)
271 import Foreign.Marshal (alloca)
272 import qualified Foreign.Concurrent as FC (newForeignPtr)
275 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
276 import GHC.Base (build, unsafeChr)
277 import GHC.Word hiding (Word8)
278 import GHC.Ptr (Ptr(..))
279 import GHC.ST (ST(..))
284 -- CFILES stuff is Hugs only
285 {-# CFILES cbits/fpstring.c #-}
287 -- -----------------------------------------------------------------------------
289 -- Useful macros, until we have bang patterns
292 #define STRICT1(f) f a | a `seq` False = undefined
293 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
294 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
295 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
296 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
298 -- -----------------------------------------------------------------------------
300 -- | A space-efficient representation of a Word8 vector, supporting many
301 -- efficient operations. A 'ByteString' contains 8-bit characters only.
303 -- Instances of Eq, Ord, Read, Show, Data, Typeable
305 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
309 #if defined(__GLASGOW_HASKELL__)
310 deriving (Data, Typeable)
313 instance Eq ByteString
316 instance Ord ByteString
317 where compare = compareBytes
319 instance Show ByteString where
320 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
322 instance Read ByteString where
323 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
326 instance Arbitrary PackedString where
327 arbitrary = P.pack `fmap` arbitrary
328 coarbitrary s = coarbitrary (P.unpack s)
331 -- | /O(n)/ Equality on the 'ByteString' type.
332 eq :: ByteString -> ByteString -> Bool
333 eq a b = (compareBytes a b) == EQ
336 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
337 compareBytes :: ByteString -> ByteString -> Ordering
338 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
339 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
340 withForeignPtr x1 $ \p1 ->
341 withForeignPtr x2 $ \p2 -> do
342 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
343 return $ case i `compare` 0 of
344 EQ -> l1 `compare` l2
346 {-# INLINE compareBytes #-}
350 -- About 4x slower over 32M
352 compareBytes :: ByteString -> ByteString -> Ordering
353 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
354 withForeignPtr fp1 $ \p1 ->
355 withForeignPtr fp2 $ \p2 ->
356 cmp (p1 `plusPtr` off1)
357 (p2 `plusPtr` off2) 0 len1 len2
359 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
361 cmp p1 p2 n len1 len2
362 | n == len1 = if n == len2 then return EQ else return LT
363 | n == len2 = return GT
365 (a :: Word8) <- peekByteOff p1 n
366 (b :: Word8) <- peekByteOff p2 n
367 case a `compare` b of
368 EQ -> cmp p1 p2 (n+1) len1 len2
371 {-# INLINE compareBytes #-}
374 -- -----------------------------------------------------------------------------
375 -- Introducing and eliminating 'ByteString's
377 -- | /O(1)/ The empty 'ByteString'
379 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
380 {-# NOINLINE empty #-}
382 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
383 packByte :: Word8 -> ByteString
384 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
385 withForeignPtr fp $ \p -> poke p c
387 {-# NOINLINE packByte #-}
389 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
391 -- For applications with large numbers of string literals, pack can be a
392 -- bottleneck. In such cases, consider using packAddress (GHC only).
393 pack :: [Word8] -> ByteString
395 #if !defined(__GLASGOW_HASKELL__)
397 pack str = create (P.length str) $ \p -> go p str
400 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
402 #else /* hack away */
404 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
406 go _ _ [] = return ()
407 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
409 writeByte p i c = ST $ \s# ->
410 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
414 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
415 unpack :: ByteString -> [Word8]
417 #if !defined(__GLASGOW_HASKELL__)
419 unpack (PS _ _ 0) = []
420 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
421 go (p `plusPtr` s) (l - 1) []
424 go p 0 acc = peek p >>= \e -> return (e : acc)
425 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
426 {-# INLINE unpack #-}
430 unpack ps = build (unpackFoldr ps)
431 {-# INLINE unpack #-}
433 unpackList :: ByteString -> [Word8]
434 unpackList (PS fp off len) = withPtr fp $ \p -> do
436 loop _ (-1) acc = return acc
439 loop q (n-1) (a : acc)
440 loop (p `plusPtr` off) (len-1) []
443 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
446 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
447 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
449 loop _ (-1) acc = return acc
452 loop q (n-1) (a `f` acc)
453 loop (p `plusPtr` off) (len-1) ch
454 {-# INLINE [0] unpackFoldr #-}
458 ------------------------------------------------------------------------
460 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
461 -- conversion function
462 packWith :: (a -> Word8) -> [a] -> ByteString
463 packWith k str = create (P.length str) $ \p -> go p str
467 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
468 {-# INLINE packWith #-}
469 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
471 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
472 unpackWith :: (Word8 -> a) -> ByteString -> [a]
473 unpackWith _ (PS _ _ 0) = []
474 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
475 go (p `plusPtr` s) (l - 1) []
478 go p 0 acc = peek p >>= \e -> return (k e : acc)
479 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
480 {-# INLINE unpackWith #-}
481 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
483 -- ---------------------------------------------------------------------
486 -- | /O(1)/ Test whether a ByteString is empty.
487 null :: ByteString -> Bool
488 null (PS _ _ l) = l == 0
491 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
492 length :: ByteString -> Int
493 length (PS _ _ l) = l
494 {-# INLINE length #-}
496 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
497 -- complexity, as it requires a memcpy.
498 cons :: Word8 -> ByteString -> ByteString
499 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
500 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
504 -- | /O(n)/ Append a byte to the end of a 'ByteString'
505 snoc :: ByteString -> Word8 -> ByteString
506 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
507 memcpy p (f `plusPtr` s) l
508 poke (p `plusPtr` l) c
511 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
512 head :: ByteString -> Word8
514 | null ps = errorEmptyList "head"
515 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
518 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
519 tail :: ByteString -> ByteString
521 | l <= 0 = errorEmptyList "tail"
522 | otherwise = PS p (s+1) (l-1)
525 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
526 last :: ByteString -> Word8
528 | null ps = errorEmptyList "last"
529 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
532 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
533 init :: ByteString -> ByteString
535 | l <= 0 = errorEmptyList "init"
536 | otherwise = PS p s (l-1)
539 -- | /O(n)/ Append two ByteStrings
540 append :: ByteString -> ByteString -> ByteString
541 append xs ys | null xs = ys
543 | otherwise = concat [xs,ys]
544 {-# INLINE append #-}
548 -- About 30% faster, but allocating in a big chunk isn't good for memory use
550 append :: ByteString -> ByteString -> ByteString
551 append xs@(PS ffp s l) ys@(PS fgp t m)
554 | otherwise = create len $ \ptr ->
555 withForeignPtr ffp $ \fp ->
556 withForeignPtr fgp $ \gp -> do
557 memcpy ptr (fp `plusPtr` s) l
558 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
559 where len = length xs + length ys
562 -- ---------------------------------------------------------------------
565 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
568 map :: (Word8 -> Word8) -> ByteString -> ByteString
569 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
570 new_fp <- mallocByteString len
571 withForeignPtr new_fp $ \new_p -> do
572 map_ f (len-1) (p `plusPtr` start) new_p
573 return (PS new_fp 0 len)
576 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
581 x <- peekByteOff p1 n
582 pokeByteOff p2 n (f x)
586 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
587 reverse :: ByteString -> ByteString
588 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
589 c_reverse p (f `plusPtr` s) l
592 reverse = pack . P.reverse . unpack
595 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
596 -- 'ByteString' and \`intersperses\' that byte between the elements of
597 -- the 'ByteString'. It is analogous to the intersperse function on
599 intersperse :: Word8 -> ByteString -> ByteString
600 intersperse c ps@(PS x s l)
602 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
603 c_intersperse p (f `plusPtr` s) l c
606 intersperse c = pack . List.intersperse c . unpack
609 -- | The 'transpose' function transposes the rows and columns of its
610 -- 'ByteString' argument.
611 transpose :: [ByteString] -> [ByteString]
612 transpose ps = P.map pack (List.transpose (P.map unpack ps))
614 -- ---------------------------------------------------------------------
615 -- Reducing 'ByteString's
617 -- | 'foldl', applied to a binary operator, a starting value (typically
618 -- the left-identity of the operator), and a ByteString, reduces the
619 -- ByteString using the binary operator, from left to right.
620 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
621 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
622 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
625 lgo z p q | p == q = return z
626 | otherwise = do c <- peek p
627 lgo (f z c) (p `plusPtr` 1) q
629 -- | 'foldr', applied to a binary operator, a starting value
630 -- (typically the right-identity of the operator), and a ByteString,
631 -- reduces the ByteString using the binary operator, from right to left.
632 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
633 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
634 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
637 go p q | p == q = return z
638 | otherwise = do c <- peek p
639 ws <- go (p `plusPtr` 1) q
642 -- | 'foldl1' is a variant of 'foldl' that has no starting value
643 -- argument, and thus must be applied to non-empty 'ByteStrings'.
644 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
646 | null ps = errorEmptyList "foldl1"
647 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
649 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
650 -- and thus must be applied to non-empty 'ByteString's
651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
653 | null ps = errorEmptyList "foldr1"
654 | otherwise = foldr f (last ps) (init ps)
656 -- ---------------------------------------------------------------------
659 -- | /O(n)/ Concatenate a list of ByteStrings.
660 concat :: [ByteString] -> ByteString
663 concat xs = inlinePerformIO $ do
664 let start_size = 1024
665 p <- mallocArray start_size
668 where f ptr len _ [] = do
669 ptr' <- reallocArray ptr (len+1)
670 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
671 fp <- newForeignFreePtr ptr'
674 f ptr len to_go pss@(PS p s l:pss')
675 | l <= to_go = do withForeignPtr p $ \pf ->
676 memcpy (ptr `plusPtr` len)
678 f ptr (len + l) (to_go - l) pss'
680 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
681 ptr' <- reallocArray ptr new_total
682 f ptr' len (new_total - len) pss
684 -- | Map a function over a 'ByteString' and concatenate the results
685 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
686 concatMap f = foldr (append . f) empty
688 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
689 -- any element of the 'ByteString' satisfies the predicate.
690 any :: (Word8 -> Bool) -> ByteString -> Bool
691 any _ (PS _ _ 0) = False
692 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
693 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
696 go p q | p == q = return False
697 | otherwise = do c <- peek p
698 if f c then return True
699 else go (p `plusPtr` 1) q
701 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
702 -- if all elements of the 'ByteString' satisfy the predicate.
703 all :: (Word8 -> Bool) -> ByteString -> Bool
704 all _ (PS _ _ 0) = True
705 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
706 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
709 go p q | p == q = return True -- end of list
710 | otherwise = do c <- peek p
712 then go (p `plusPtr` 1) q
715 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
716 maximum :: ByteString -> Word8
717 maximum xs@(PS x s l)
718 | null xs = errorEmptyList "maximum"
719 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
720 return $ c_maximum (p `plusPtr` s) l
721 {-# INLINE maximum #-}
723 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
724 minimum :: ByteString -> Word8
725 minimum xs@(PS x s l)
726 | null xs = errorEmptyList "minimum"
727 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
728 return $ c_minimum (p `plusPtr` s) l
729 {-# INLINE minimum #-}
732 maximum xs@(PS x s l)
733 | null xs = errorEmptyList "maximum"
734 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
736 maximum_ (p `plusPtr` s) 0 l w
738 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
742 | otherwise = do w <- peekByteOff ptr n
743 maximum_ ptr (n+1) m (if w > c then w else c)
745 minimum xs@(PS x s l)
746 | null xs = errorEmptyList "minimum"
747 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
749 minimum_ (p `plusPtr` s) 0 l w
751 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
755 | otherwise = do w <- peekByteOff ptr n
756 minimum_ ptr (n+1) m (if w < c then w else c)
759 -- | /O(n)/ map Word8 functions, provided with the index at each position
760 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
761 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
762 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
764 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
766 go n f t p | f == p = return ()
767 | otherwise = do w <- peek f
769 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
771 -- ---------------------------------------------------------------------
772 -- Unfolds and replicates
774 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
775 -- the value of every element. The following holds:
777 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
779 -- This implemenation uses @memset(3)@
780 replicate :: Int -> Word8 -> ByteString
781 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
785 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
789 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
792 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
793 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
794 -- the element and returns 'Nothing' if it is done producing the
795 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
796 -- prepending to the ByteString and @b@ is used as the next element in a
799 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
800 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
801 -- final size of the ByteString as an argument. 'cons' can then be
802 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
803 -- linear complexity. The depth of the recursion is limited to this
804 -- size, but may be less. For lazy, infinite unfoldr, use
805 -- 'Data.List.unfoldr' (from 'Data.List').
809 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
811 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
813 -- > unfoldrN n == take n $ List.unfoldr
814 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
815 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
818 go q c n | n == i = return n -- stop if we reach `i'
819 | otherwise = case f c of
823 go (q `plusPtr` 1) new_c (n+1)
825 -- ---------------------------------------------------------------------
828 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
829 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
830 take :: Int -> ByteString -> ByteString
834 | otherwise = PS x s n
837 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
838 -- elements, or @[]@ if @n > 'length' xs@.
839 drop :: Int -> ByteString -> ByteString
843 | otherwise = PS x (s+n) (l-n)
846 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
847 splitAt :: Int -> ByteString -> (ByteString, ByteString)
848 splitAt n ps = (take n ps, drop n ps)
849 {-# INLINE splitAt #-}
851 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
852 -- returns the longest prefix (possibly empty) of @xs@ of elements that
854 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
855 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
856 {-# INLINE takeWhile #-}
858 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
859 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
860 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
861 {-# INLINE dropWhile #-}
863 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
864 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
865 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
868 -- | 'breakByte' breaks its ByteString argument at the first occurence
869 -- of the specified byte. It is more efficient than 'break' as it is
870 -- implemented with @memchr(3)@. I.e.
872 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
874 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
875 breakByte c p = case elemIndex c p of
877 Just n -> (take n p, drop n p)
878 {-# INLINE breakByte #-}
880 -- | 'spanByte' breaks its ByteString argument at the first
881 -- occurence of a byte other than its argument. It is more efficient
884 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
886 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
887 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
891 go p i | i >= l = return (ps, empty)
892 | otherwise = do c' <- peekByteOff p i
894 then return (take i ps, drop i ps)
896 {-# INLINE spanByte #-}
898 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
899 -- occurence of @w@. It behaves like 'break', except the delimiter is
900 -- not returned, and @Nothing@ is returned if the delimiter is not in
901 -- the ByteString. I.e.
903 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
905 -- > breakFirst c xs ==
906 -- > let (x,y) = break (== c) xs
907 -- > in if null y then Nothing else Just (x, drop 1 y))
909 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
910 breakFirst c p = case elemIndex c p of
912 Just n -> Just (take n p, drop (n+1) p)
913 {-# INLINE breakFirst #-}
915 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
918 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
920 -- and the following are equivalent:
922 -- > breakLast 'c' "abcdef"
923 -- > let (x,y) = break (=='c') (reverse "abcdef")
924 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
926 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
927 breakLast c p = case elemIndexLast c p of
929 Just n -> Just (take n p, drop (n+1) p)
930 {-# INLINE breakLast #-}
932 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
933 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
934 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
935 span p ps = break (not . p) ps
938 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
941 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
945 -- > spanEnd (not . isSpace) ps
947 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
949 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
950 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
952 -- | /O(n)/ Splits a 'ByteString' into components delimited by
953 -- separators, where the predicate returns True for a separator element.
954 -- The resulting components do not contain the separators. Two adjacent
955 -- separators result in an empty component in the output. eg.
957 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
958 -- > splitWith (=='a') [] == []
960 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
962 #if defined(__GLASGOW_HASKELL__)
963 splitWith _pred (PS _ _ 0) = []
964 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
965 where pred# c# = pred_ (W8# c#)
967 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
968 splitLoop pred' p 0 off' len' fp'
970 splitLoop :: (Word# -> Bool)
976 splitLoop pred' p idx' off' len' fp'
977 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
978 | idx' >= len' = return [PS fp' off' idx']
980 w <- peekElemOff p (off'+idx')
981 if pred' (case w of W8# w# -> w#)
982 then return (PS fp' off' idx' :
983 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
984 else splitLoop pred' p (idx'+1) off' len' fp'
985 {-# INLINE splitWith #-}
988 splitWith _ (PS _ _ 0) = []
989 splitWith p ps = splitWith' p ps
992 splitWith' q qs = if null rest then [chunk]
993 else chunk : splitWith' q (unsafeTail rest)
994 where (chunk,rest) = break q qs
997 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
998 -- argument, consuming the delimiter. I.e.
1000 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1001 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1002 -- > split 'x' "x" == ["",""]
1006 -- > join [c] . split c == id
1007 -- > split == splitWith . (==)
1009 -- As for all splitting functions in this library, this function does
1010 -- not copy the substrings, it just constructs new 'ByteStrings' that
1011 -- are slices of the original.
1013 split :: Word8 -> ByteString -> [ByteString]
1014 split _ (PS _ _ 0) = []
1015 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1016 let ptr = p `plusPtr` s
1020 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1022 then return [PS x (s+n) (l-n)]
1023 else do let i = q `minusPtr` ptr
1025 return $! PS x (s+n) (i-n) : ls
1027 {-# INLINE split #-}
1030 -- slower. but stays inside Haskell.
1031 split _ (PS _ _ 0) = []
1032 split (W8# w#) (PS fp off len) = splitWith' off len fp
1034 splitWith' off' len' fp' = withPtr fp $ \p ->
1035 splitLoop p 0 off' len' fp'
1037 splitLoop :: Ptr Word8
1038 -> Int -> Int -> Int
1043 splitLoop p idx' off' len' fp'
1044 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1045 | idx' >= len' = return [PS fp' off' idx']
1047 (W8# x#) <- peekElemOff p (off'+idx')
1048 if word2Int# w# ==# word2Int# x#
1049 then return (PS fp' off' idx' :
1050 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1051 else splitLoop p (idx'+1) off' len' fp'
1054 -- | Like 'splitWith', except that sequences of adjacent separators are
1055 -- treated as a single separator. eg.
1057 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1059 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1060 tokens f = P.filter (not.null) . splitWith f
1062 -- | The 'group' function takes a ByteString and returns a list of
1063 -- ByteStrings such that the concatenation of the result is equal to the
1064 -- argument. Moreover, each sublist in the result contains only equal
1065 -- elements. For example,
1067 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1069 -- It is a special case of 'groupBy', which allows the programmer to
1070 -- supply their own equality test. It is about 40% faster than
1072 group :: ByteString -> [ByteString]
1075 | otherwise = ys : group zs
1077 (ys, zs) = spanByte (unsafeHead xs) xs
1079 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1080 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1083 | otherwise = take n xs : groupBy k (drop n xs)
1085 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1087 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1088 -- 'ByteString's and concatenates the list after interspersing the first
1089 -- argument between each element of the list.
1090 join :: ByteString -> [ByteString] -> ByteString
1091 join filler pss = concat (splice pss)
1095 splice (x:y:xs) = x:filler:splice (y:xs)
1098 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1099 -- with a char. Around 4 times faster than the generalised join.
1101 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1102 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1103 withForeignPtr ffp $ \fp ->
1104 withForeignPtr fgp $ \gp -> do
1105 memcpy ptr (fp `plusPtr` s) l
1106 poke (ptr `plusPtr` l) c
1107 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1109 len = length f + length g + 1
1110 {-# INLINE joinWithByte #-}
1112 -- ---------------------------------------------------------------------
1113 -- Indexing ByteStrings
1115 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1116 index :: ByteString -> Int -> Word8
1118 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1119 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1120 ++ ", length = " ++ show (length ps)
1121 | otherwise = ps `unsafeIndex` n
1122 {-# INLINE index #-}
1124 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1125 -- element in the given 'ByteString' which is equal to the query
1126 -- element, or 'Nothing' if there is no such element.
1127 -- This implementation uses memchr(3).
1128 elemIndex :: Word8 -> ByteString -> Maybe Int
1129 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1130 let p' = p `plusPtr` s
1131 q = memchr p' c (fromIntegral l)
1132 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1133 {-# INLINE elemIndex #-}
1135 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1136 -- element in the given 'ByteString' which is equal to the query
1137 -- element, or 'Nothing' if there is no such element. The following
1140 -- > elemIndexLast c xs ==
1141 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1143 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1144 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1145 go (p `plusPtr` s) (l-1)
1148 go p i | i < 0 = return Nothing
1149 | otherwise = do ch' <- peekByteOff p i
1151 then return $ Just i
1153 {-# INLINE elemIndexLast #-}
1155 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1156 -- the indices of all elements equal to the query element, in ascending order.
1157 -- This implementation uses memchr(3).
1158 elemIndices :: Word8 -> ByteString -> [Int]
1159 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1160 let ptr = p `plusPtr` s
1164 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1167 else do let i = q `minusPtr` ptr
1174 elemIndices :: Word8 -> ByteString -> [Int]
1175 elemIndices c ps = loop 0 ps
1177 loop _ ps' | null ps' = []
1178 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1179 | otherwise = loop (n+1) (unsafeTail ps')
1182 -- | count returns the number of times its argument appears in the ByteString
1184 -- > count = length . elemIndices
1186 -- But more efficiently than using length on the intermediate list.
1187 count :: Word8 -> ByteString -> Int
1188 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1189 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1190 {-# INLINE count #-}
1194 -- around 30% slower
1196 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1197 go (p `plusPtr` s) (fromIntegral m) 0
1199 go :: Ptr Word8 -> CSize -> Int -> IO Int
1202 let q = memchr p w l
1205 else do let k = fromIntegral $ q `minusPtr` p
1206 go (q `plusPtr` 1) (l-k-1) (i+1)
1209 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1210 -- returns the index of the first element in the ByteString
1211 -- satisfying the predicate.
1212 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1213 findIndex = (listToMaybe .) . findIndices
1215 -- | The 'findIndices' function extends 'findIndex', by returning the
1216 -- indices of all elements satisfying the predicate, in ascending order.
1217 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1218 findIndices p ps = loop 0 ps
1221 loop _ qs | null qs = []
1222 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1223 | otherwise = loop (n+1) (unsafeTail qs)
1225 -- ---------------------------------------------------------------------
1226 -- Searching ByteStrings
1228 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1229 elem :: Word8 -> ByteString -> Bool
1230 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1233 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1234 notElem :: Word8 -> ByteString -> Bool
1235 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1236 {-# INLINE notElem #-}
1239 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1240 -- case of filtering a single byte. It is more efficient to use
1241 -- /filterByte/ in this case.
1243 -- > filterByte == filter . (==)
1245 -- filterByte is around 10x faster, and uses much less space, than its
1246 -- filter equivalent
1247 filterByte :: Word8 -> ByteString -> ByteString
1248 filterByte w ps = replicate (count w ps) w
1251 -- slower than the replicate version
1253 filterByte ch ps@(PS x s l)
1255 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1256 t <- go (f `plusPtr` s) p l
1257 return (t `minusPtr` p) -- actual length
1261 go f t e = do w <- peek f
1263 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1264 else go (f `plusPtr` 1) t (e-1)
1268 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1269 -- case of filtering a single byte out of a list. It is more efficient
1270 -- to use /filterNotByte/ in this case.
1272 -- > filterNotByte == filter . (/=)
1274 -- filterNotByte is around 3x faster, and uses much less space, than its
1275 -- filter equivalent
1276 filterNotByte :: Word8 -> ByteString -> ByteString
1277 filterNotByte ch ps@(PS x s l)
1279 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1280 t <- go (f `plusPtr` s) p l
1281 return (t `minusPtr` p) -- actual length
1285 go f t e = do w <- peek f
1287 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1288 else go (f `plusPtr` 1) t (e-1)
1290 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1291 -- returns a ByteString containing those characters that satisfy the
1293 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1294 filter k ps@(PS x s l)
1296 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1297 t <- go (f `plusPtr` s) p l
1298 return (t `minusPtr` p) -- actual length
1302 go f t e = do w <- peek f
1304 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1305 else go (f `plusPtr` 1) t (e - 1)
1307 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1309 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1310 -- and returns the first element in matching the predicate, or 'Nothing'
1311 -- if there is no such element.
1312 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1313 find p ps = case filter p ps of
1314 q | null q -> Nothing
1315 | otherwise -> Just (unsafeHead q)
1317 -- ---------------------------------------------------------------------
1318 -- Searching for substrings
1320 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1321 -- iff the first is a prefix of the second.
1322 isPrefixOf :: ByteString -> ByteString -> Bool
1323 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1326 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1327 withForeignPtr x2 $ \p2 -> do
1328 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1331 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1332 -- iff the first is a suffix of the second.
1334 -- The following holds:
1336 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1338 -- However, the real implemenation uses memcmp to compare the end of the
1339 -- string only, with no reverse required..
1340 isSuffixOf :: ByteString -> ByteString -> Bool
1341 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1344 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1345 withForeignPtr x2 $ \p2 -> do
1346 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1349 -- | Check whether one string is a substring of another. @isSubstringOf
1350 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1351 isSubstringOf :: ByteString -- ^ String to search for.
1352 -> ByteString -- ^ String to search in.
1354 isSubstringOf p s = not $ P.null $ findSubstrings p s
1356 -- | Get the first index of a substring in another string,
1357 -- or 'Nothing' if the string is not found.
1358 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1359 findSubstring :: ByteString -- ^ String to search for.
1360 -> ByteString -- ^ String to seach in.
1362 findSubstring = (listToMaybe .) . findSubstrings
1364 -- | Find the indexes of all (possibly overlapping) occurances of a
1365 -- substring in a string. This function uses the Knuth-Morris-Pratt
1366 -- string matching algorithm.
1367 findSubstrings :: ByteString -- ^ String to search for.
1368 -> ByteString -- ^ String to seach in.
1371 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1373 patc x = pat `unsafeIndex` x
1374 strc x = str `unsafeIndex` x
1376 -- maybe we should make kmpNext a UArray before using it in search?
1377 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1378 kmpNextL p _ | null p = []
1379 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1381 x = if not (null ps) && unsafeHead ps == patc j'
1382 then kmpNext Array.! j' else j'
1384 search i j = match ++ rest -- i: position in string, j: position in pattern
1385 where match = if j == m then [(i - j)] else []
1386 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1387 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1390 -- ---------------------------------------------------------------------
1393 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1394 -- corresponding pairs of bytes. If one input ByteString is short,
1395 -- excess elements of the longer ByteString are discarded. This is
1396 -- equivalent to a pair of 'unpack' operations.
1397 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1399 | null ps || null qs = []
1400 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1402 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1403 -- the first argument, instead of a tupling function. For example,
1404 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1405 -- corresponding sums.
1406 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1408 | null ps || null qs = []
1409 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1411 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1412 -- ByteStrings. Note that this performs two 'pack' operations.
1413 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1414 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1415 {-# INLINE unzip #-}
1417 -- ---------------------------------------------------------------------
1420 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1421 inits :: ByteString -> [ByteString]
1422 inits (PS x s l) = [PS x s n | n <- [0..l]]
1424 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1425 tails :: ByteString -> [ByteString]
1426 tails p | null p = [empty]
1427 | otherwise = p : tails (unsafeTail p)
1429 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1431 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1432 elems :: ByteString -> [ByteString]
1433 elems (PS _ _ 0) = []
1434 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1435 {-# INLINE elems #-}
1437 -- ---------------------------------------------------------------------
1438 -- ** Ordered 'ByteString's
1440 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1441 sort :: ByteString -> ByteString
1442 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1444 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1445 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1448 go 256 _ = return ()
1449 go i ptr = do n <- peekElemOff arr i
1450 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1451 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1454 -- "countEach counts str l" counts the number of occurences of each Word8 in
1455 -- str, and stores the result in counts.
1456 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1458 countEach counts str l = go 0
1461 go i | i == l = return ()
1462 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1463 x <- peekElemOff counts k
1464 pokeElemOff counts k (x + 1)
1468 sort :: ByteString -> ByteString
1469 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1470 memcpy p (f `plusPtr` s) l
1471 c_qsort p l -- inplace
1475 sort = pack . List.sort . unpack
1478 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1480 -- Try some linear sorts: radix, counting
1483 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1484 -- sortBy f ps = undefined
1486 -- ---------------------------------------------------------------------
1488 -- Extensions to the basic interface
1491 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1492 -- check for the empty case, so there is an obligation on the programmer
1493 -- to provide a proof that the ByteString is non-empty.
1494 unsafeHead :: ByteString -> Word8
1495 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1496 {-# INLINE unsafeHead #-}
1498 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1499 -- check for the empty case. As with 'unsafeHead', the programmer must
1500 -- provide a separate proof that the ByteString is non-empty.
1501 unsafeTail :: ByteString -> ByteString
1502 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1503 {-# INLINE unsafeTail #-}
1505 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1506 -- This omits the bounds check, which means there is an accompanying
1507 -- obligation on the programmer to ensure the bounds are checked in some
1509 unsafeIndex :: ByteString -> Int -> Word8
1510 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1511 {-# INLINE unsafeIndex #-}
1513 -- ---------------------------------------------------------------------
1514 -- Low level constructors
1516 #if defined(__GLASGOW_HASKELL__)
1517 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1518 -- Addr\# (an arbitrary machine address assumed to point outside the
1519 -- garbage-collected heap) into a @ByteString@. A much faster way to
1520 -- create an Addr\# is with an unboxed string literal, than to pack a
1521 -- boxed string. A unboxed string literal is compiled to a static @char
1522 -- []@ by GHC. Establishing the length of the string requires a call to
1523 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1524 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1525 -- if you know the length of the string statically.
1529 -- > literalFS = packAddress "literal"#
1531 packAddress :: Addr# -> ByteString
1532 packAddress addr# = inlinePerformIO $ do
1533 p <- newForeignPtr_ cstr
1534 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1537 {-# INLINE packAddress #-}
1539 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1540 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1541 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1542 -- 'Addr\#' to the string, and the length of the string. Make sure the
1543 -- length is correct, otherwise use the safer 'packAddress' (where the
1544 -- length will be calculated once at runtime).
1545 unsafePackAddress :: Int -> Addr# -> ByteString
1546 unsafePackAddress len addr# = inlinePerformIO $ do
1547 p <- newForeignPtr_ cstr
1549 where cstr = Ptr addr#
1553 -- | /O(1)/ Build a ByteString from a ForeignPtr
1554 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1555 fromForeignPtr fp l = PS fp 0 l
1557 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1558 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1559 toForeignPtr (PS ps s l) = (ps, s, l)
1561 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1562 -- current 'ByteString' from any larger string it was created from, as
1564 skipIndex :: ByteString -> Int
1565 skipIndex (PS _ s _) = s
1566 {-# INLINE skipIndex #-}
1568 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1569 -- finalizer associated to it. The ByteString length is calculated using
1570 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1571 packCString :: CString -> ByteString
1572 packCString cstr = inlinePerformIO $ do
1573 fp <- newForeignPtr_ (castPtr cstr)
1574 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1576 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1577 -- have /no/ finalizer associated with it. This operation has /O(1)/
1578 -- complexity as we already know the final size, so no /strlen(3)/ is
1580 packCStringLen :: CStringLen -> ByteString
1581 packCStringLen (ptr,len) = inlinePerformIO $ do
1582 fp <- newForeignPtr_ (castPtr ptr)
1583 return $ PS fp 0 (fromIntegral len)
1585 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1586 -- have a @free(3)@ finalizer associated to it.
1587 packMallocCString :: CString -> ByteString
1588 packMallocCString cstr = inlinePerformIO $ do
1589 fp <- newForeignFreePtr (castPtr cstr)
1590 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1592 #if defined(__GLASGOW_HASKELL__)
1593 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1594 -- length, and an IO action representing a finalizer. This function is
1595 -- not available on Hugs.
1597 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1598 packCStringFinalizer p l f = do
1599 fp <- FC.newForeignPtr p f
1602 -- | Explicitly run the finaliser associated with a 'ByteString'.
1603 -- Further references to this value may generate invalid memory
1604 -- references. This operation is unsafe, as there may be other
1605 -- 'ByteStrings' referring to the same underlying pages. If you use
1606 -- this, you need to have a proof of some kind that all 'ByteString's
1607 -- ever generated from the underlying byte array are no longer live.
1608 unsafeFinalize :: ByteString -> IO ()
1609 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1613 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1614 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1615 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1616 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1618 alloc = withForeignPtr ps $ \p -> do
1619 buf <- c_malloc (fromIntegral l+1)
1620 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1621 poke (buf `plusPtr` l) (0::Word8)
1622 return $ castPtr buf
1624 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1625 -- Warning: modifying the @CString@ will affect the @ByteString@.
1626 -- Why is this function unsafe? It relies on the null byte at the end of
1627 -- the ByteString to be there. This is /not/ the case if your ByteString
1628 -- has been spliced from a larger string (i.e. with take or drop).
1629 -- Unless you can guarantee the null byte, you should use the safe
1630 -- version, which will copy the string first.
1632 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1633 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1635 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1636 -- This is mainly useful to allow the rest of the data pointed
1637 -- to by the 'ByteString' to be garbage collected, for example
1638 -- if a large string has been read in, and only a small part of it
1639 -- is needed in the rest of the program.
1640 copy :: ByteString -> ByteString
1641 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1643 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1644 -- CString is going to be deallocated from C land.
1645 copyCString :: CString -> ByteString
1646 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1648 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1649 copyCStringLen :: CStringLen -> ByteString
1650 copyCStringLen (cstr, len) = inlinePerformIO $ do
1651 fp <- mallocForeignPtrArray (len+1)
1652 withForeignPtr fp $ \p -> do
1653 memcpy p (castPtr cstr) len
1654 poke (p `plusPtr` len) (0 :: Word8)
1655 return $! PS fp 0 len
1657 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1658 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1659 -- This is analogous to unsafeUseAsCString, and comes with the same
1660 -- safety requirements.
1662 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1663 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1665 -- | Given the maximum size needed and a function to make the contents
1666 -- of a ByteString, generate makes the 'ByteString'. The generating
1667 -- function is required to return the actual final size (<= the maximum
1668 -- size), and the resulting byte array is realloced to this size. The
1669 -- string is padded at the end with a null byte.
1671 -- generate is the main mechanism for creating custom, efficient
1672 -- ByteString functions, using Haskell or C functions to fill the space.
1674 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1678 p' <- reallocArray p (i'+1)
1679 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1680 fp <- newForeignFreePtr p'
1683 -- ---------------------------------------------------------------------
1686 #if defined(__GLASGOW_HASKELL__)
1688 -- | getLine, read a line from stdin.
1689 getLine :: IO ByteString
1690 getLine = hGetLine stdin
1692 -- | hGetLine. read a ByteString from a handle
1693 hGetLine :: Handle -> IO ByteString
1694 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1695 case haBufferMode handle_ of
1696 NoBuffering -> error "no buffering"
1697 _other -> hGetLineBuffered handle_
1700 hGetLineBuffered handle_ = do
1701 let ref = haBuffer handle_
1702 buf <- readIORef ref
1703 hGetLineBufferedLoop handle_ ref buf 0 []
1705 hGetLineBufferedLoop handle_ ref
1706 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1708 off <- findEOL r w raw
1709 let new_len = len + off - r
1710 xs <- mkPS raw r off
1712 -- if eol == True, then off is the offset of the '\n'
1713 -- otherwise off == w and the buffer is now empty.
1715 then do if (w == off + 1)
1716 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1717 else writeIORef ref buf{ bufRPtr = off + 1 }
1718 mkBigPS new_len (xs:xss)
1720 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1721 buf{ bufWPtr=0, bufRPtr=0 }
1723 -- Nothing indicates we caught an EOF, and we may have a
1724 -- partial line to return.
1726 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1728 then mkBigPS new_len (xs:xss)
1731 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1733 -- find the end-of-line character, if there is one
1737 (c,r') <- readCharFromBuffer raw r
1739 then return r -- NB. not r': don't include the '\n'
1740 else findEOL r' w raw
1742 maybeFillReadBuffer fd is_line is_stream buf = catch
1743 (do buf' <- fillReadBuffer fd is_line is_stream buf
1745 (\e -> if isEOFError e then return Nothing else ioError e)
1747 -- TODO, rewrite to use normal memcpy
1748 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1749 mkPS buf start end = do
1750 let len = end - start
1751 fp <- mallocByteString len
1752 withForeignPtr fp $ \p -> do
1753 memcpy_ptr_baoff p buf start (fromIntegral len)
1754 return (PS fp 0 len)
1756 mkBigPS :: Int -> [ByteString] -> IO ByteString
1757 mkBigPS _ [ps] = return ps
1758 mkBigPS _ pss = return $! concat (P.reverse pss)
1762 -- ---------------------------------------------------------------------
1765 -- | Outputs a 'ByteString' to the specified 'Handle'.
1766 hPut :: Handle -> ByteString -> IO ()
1767 hPut _ (PS _ _ 0) = return ()
1768 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1769 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1771 -- | Write a ByteString to stdout
1772 putStr :: ByteString -> IO ()
1773 putStr = hPut stdout
1775 -- | Write a ByteString to stdout, appending a newline byte
1776 putStrLn :: ByteString -> IO ()
1777 putStrLn ps = hPut stdout ps >> hPut stdout nl
1778 where nl = packByte 0x0a
1780 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1781 -- is far more efficient than reading the characters into a 'String'
1782 -- and then using 'pack'.
1783 hGet :: Handle -> Int -> IO ByteString
1784 hGet _ 0 = return empty
1785 hGet h i = do fp <- mallocByteString i
1786 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1789 #if defined(__GLASGOW_HASKELL__)
1790 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1791 -- waiting for data to become available, instead it returns only whatever data
1793 hGetNonBlocking :: Handle -> Int -> IO ByteString
1794 hGetNonBlocking _ 0 = return empty
1795 hGetNonBlocking h i = do
1796 fp <- mallocByteString i
1797 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1801 -- | Read entire handle contents into a 'ByteString'.
1803 -- As with 'hGet', the string representation in the file is assumed to
1806 hGetContents :: Handle -> IO ByteString
1808 let start_size = 1024
1809 p <- mallocArray start_size
1810 i <- hGetBuf h p start_size
1812 then do p' <- reallocArray p i
1813 fp <- newForeignFreePtr p'
1819 p' <- reallocArray p s'
1820 i <- hGetBuf h (p' `plusPtr` s) s
1822 then do let i' = s + i
1823 p'' <- reallocArray p' i'
1824 fp <- newForeignFreePtr p''
1828 -- | getContents. Equivalent to hGetContents stdin
1829 getContents :: IO ByteString
1830 getContents = hGetContents stdin
1832 -- | Read an entire file directly into a 'ByteString'. This is far more
1833 -- efficient than reading the characters into a 'String' and then using
1834 -- 'pack'. It also may be more efficient than opening the file and
1835 -- reading it using hGet.
1836 readFile :: FilePath -> IO ByteString
1838 h <- openBinaryFile f ReadMode
1840 s <- hGet h $ fromIntegral l
1844 -- | Write a 'ByteString' to a file.
1845 writeFile :: FilePath -> ByteString -> IO ()
1847 h <- openBinaryFile f WriteMode
1853 -- Disable until we can move it into a portable .hsc file
1856 -- | Like readFile, this reads an entire file directly into a
1857 -- 'ByteString', but it is even more efficient. It involves directly
1858 -- mapping the file to memory. This has the advantage that the contents
1859 -- of the file never need to be copied. Also, under memory pressure the
1860 -- page may simply be discarded, while in the case of readFile it would
1861 -- need to be written to swap. If you read many small files, mmapFile
1862 -- will be less memory-efficient than readFile, since each mmapFile
1863 -- takes up a separate page of memory. Also, you can run into bus
1864 -- errors if the file is modified. As with 'readFile', the string
1865 -- representation in the file is assumed to be ISO-8859-1.
1867 -- On systems without mmap, this is the same as a readFile.
1869 mmapFile :: FilePath -> IO ByteString
1870 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1872 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1874 h <- openBinaryFile f ReadMode
1875 l <- fromIntegral `fmap` hFileSize h
1876 -- Don't bother mmaping small files because each mmapped file takes up
1877 -- at least one full VM block.
1879 then do thefp <- mallocByteString l
1880 withForeignPtr thefp $ \p-> hGetBuf h p l
1885 fd <- fromIntegral `fmap` handleToFd h
1887 fp <- if p == nullPtr
1888 then do thefp <- mallocByteString l
1889 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1892 -- The munmap leads to crashes on OpenBSD.
1893 -- maybe there's a use after unmap in there somewhere?
1894 #if !defined(__OpenBSD__)
1895 let unmap = c_munmap p l >> return ()
1897 let unmap = return ()
1899 fp <- FC.newForeignPtr p unmap
1904 where mmap_limit = 16*1024
1907 #if defined(__GLASGOW_HASKELL__)
1909 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1911 getArgs :: IO [ByteString]
1913 alloca $ \ p_argc ->
1914 alloca $ \ p_argv -> do
1915 getProgArgv p_argc p_argv
1916 p <- fromIntegral `fmap` peek p_argc
1918 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1921 -- ---------------------------------------------------------------------
1922 -- Internal utilities
1924 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1925 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1926 -- convenience for ByteString construction.
1927 w2c :: Word8 -> Char
1928 #if !defined(__GLASGOW_HASKELL__)
1929 w2c = chr . fromIntegral
1931 w2c = unsafeChr . fromIntegral
1935 c2w :: Char -> Word8
1936 c2w = fromIntegral . ord
1939 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1940 -- is padded with a null byte.
1941 mallocByteString :: Int -> IO (ForeignPtr Word8)
1942 mallocByteString l = do
1943 fp <- mallocForeignPtrArray (l+1)
1944 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1947 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1948 -- argument gives the final size of the ByteString. Unlike 'generate'
1949 -- the ByteString is not reallocated if the final size is less than the
1950 -- estimated size. Also, unlike 'generate' ByteString's created this way
1951 -- are managed on the Haskell heap.
1952 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1953 create l write_ptr = inlinePerformIO $ do
1954 fp <- mallocByteString (l+1)
1955 withForeignPtr fp $ \p -> write_ptr p
1957 {-# INLINE create #-}
1959 -- | Perform an operation with a temporary ByteString
1960 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1961 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1962 {-# INLINE withPtr #-}
1964 -- Common up near identical calls to `error' to reduce the number
1965 -- constant strings created when compiled:
1966 errorEmptyList :: String -> a
1967 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1968 {-# INLINE errorEmptyList #-}
1970 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1971 -- of the string if no element is found, rather than Nothing.
1972 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1973 STRICT2(findIndexOrEnd)
1976 | f (unsafeHead ps) = 0
1977 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1978 {-# INLINE findIndexOrEnd #-}
1980 -- Find from the end of the string using predicate
1981 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1982 STRICT2(findFromEndUntil)
1983 findFromEndUntil f ps@(PS x s l) =
1985 else if f (last ps) then l
1986 else findFromEndUntil f (PS x s (l-1))
1988 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1989 -- it exposes lots of things to further inlining
1991 {-# INLINE inlinePerformIO #-}
1992 inlinePerformIO :: IO a -> a
1993 #if defined(__GLASGOW_HASKELL__)
1994 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1996 inlinePerformIO = unsafePerformIO
1999 {-# INLINE newForeignFreePtr #-}
2000 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2001 #if defined(__GLASGOW_HASKELL__)
2002 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2004 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2007 -- ---------------------------------------------------------------------
2009 -- Standard C functions
2012 foreign import ccall unsafe "string.h strlen" c_strlen
2015 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2016 :: CInt -> IO (Ptr Word8)
2018 foreign import ccall unsafe "static stdlib.h free" c_free
2019 :: Ptr Word8 -> IO ()
2021 #if !defined(__GLASGOW_HASKELL__)
2022 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2023 :: FunPtr (Ptr Word8 -> IO ())
2026 foreign import ccall unsafe "string.h memset" memset
2027 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2029 foreign import ccall unsafe "string.h memchr" memchr
2030 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2032 foreign import ccall unsafe "string.h memcmp" memcmp
2033 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2035 foreign import ccall unsafe "string.h memcpy" memcpy
2036 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2038 -- ---------------------------------------------------------------------
2043 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2044 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2046 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2047 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2049 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2050 :: Ptr Word8 -> Int -> Word8
2052 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2053 :: Ptr Word8 -> Int -> Word8
2055 foreign import ccall unsafe "static fpstring.h count" c_count
2056 :: Ptr Word8 -> Int -> Word8 -> Int
2058 -- ---------------------------------------------------------------------
2062 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2063 :: Int -> Int -> IO (Ptr Word8)
2065 foreign import ccall unsafe "static unistd.h close" c_close
2068 # if !defined(__OpenBSD__)
2069 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2070 :: Ptr Word8 -> Int -> IO Int
2074 -- ---------------------------------------------------------------------
2075 -- Internal GHC Haskell magic
2077 #if defined(__GLASGOW_HASKELL__)
2078 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2079 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2081 foreign import ccall unsafe "__hscore_memcpy_src_off"
2082 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())