1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: Word8 -> ByteString -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
74 -- * Reducing 'ByteString's
75 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
81 concat, -- :: [ByteString] -> ByteString
82 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
83 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 maximum, -- :: ByteString -> Word8
86 minimum, -- :: ByteString -> Word8
87 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
89 -- * Generating and unfolding ByteStrings
90 replicate, -- :: Int -> Word8 -> ByteString
91 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
95 -- ** Breaking strings
96 take, -- :: Int -> ByteString -> ByteString
97 drop, -- :: Int -> ByteString -> ByteString
98 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
99 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
100 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
101 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
102 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
103 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
105 -- ** Breaking and dropping on specific bytes
106 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
107 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
108 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
109 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
111 -- ** Breaking into many substrings
112 split, -- :: Word8 -> ByteString -> [ByteString]
113 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
114 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
118 -- ** Joining strings
119 join, -- :: ByteString -> [ByteString] -> ByteString
120 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
122 -- * Indexing ByteStrings
123 index, -- :: ByteString -> Int -> Word8
124 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
125 elemIndices, -- :: Word8 -> ByteString -> [Int]
126 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
127 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
128 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
129 count, -- :: Word8 -> ByteString -> Int
131 -- * Ordered ByteStrings
132 sort, -- :: ByteString -> ByteString
134 -- * Searching ByteStrings
136 -- ** Searching by equality
137 -- | These functions use memchr(3) to efficiently search the ByteString
139 elem, -- :: Word8 -> ByteString -> Bool
140 notElem, -- :: Word8 -> ByteString -> Bool
141 filterByte, -- :: Word8 -> ByteString -> ByteString
142 filterNotByte, -- :: Word8 -> ByteString -> ByteString
144 -- ** Searching with a predicate
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
148 -- ** Prefixes and suffixes
149 -- | These functions use memcmp(3) to efficiently compare substrings
150 isPrefixOf, -- :: ByteString -> ByteString -> Bool
151 isSuffixOf, -- :: ByteString -> ByteString -> Bool
153 -- ** Search for arbitrary substrings
154 isSubstringOf, -- :: ByteString -> ByteString -> Bool
155 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
156 findSubstrings, -- :: ByteString -> ByteString -> [Int]
158 -- * Zipping and unzipping ByteStrings
159 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
160 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
161 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
163 -- * Unchecked access
164 unsafeHead, -- :: ByteString -> Word8
165 unsafeTail, -- :: ByteString -> ByteString
166 unsafeIndex, -- :: ByteString -> Int -> Word8
168 -- * Low level introduction and elimination
169 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
170 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
171 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
172 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
173 skipIndex, -- :: ByteString -> Int
175 -- ** Packing CStrings and pointers
176 packCString, -- :: CString -> ByteString
177 packCStringLen, -- :: CString -> ByteString
178 packMallocCString, -- :: CString -> ByteString
180 #if defined(__GLASGOW_HASKELL__)
181 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
182 packAddress, -- :: Addr# -> ByteString
183 unsafePackAddress, -- :: Int -> Addr# -> ByteString
184 unsafeFinalize, -- :: ByteString -> IO ()
187 -- ** Using ByteStrings as CStrings
188 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
189 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
190 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
192 -- ** Copying ByteStrings
193 -- | These functions perform memcpy(3) operations
194 copy, -- :: ByteString -> ByteString
195 copyCString, -- :: CString -> ByteString
196 copyCStringLen, -- :: CStringLen -> ByteString
198 -- * I\/O with @ByteString@s
200 -- ** Standard input and output
202 #if defined(__GLASGOW_HASKELL__)
203 getLine, -- :: IO ByteString
205 getContents, -- :: IO ByteString
206 putStr, -- :: ByteString -> IO ()
207 putStrLn, -- :: ByteString -> IO ()
210 readFile, -- :: FilePath -> IO ByteString
211 writeFile, -- :: FilePath -> ByteString -> IO ()
212 -- mmapFile, -- :: FilePath -> IO ByteString
214 -- ** I\/O with Handles
215 #if defined(__GLASGOW_HASKELL__)
216 getArgs, -- :: IO [ByteString]
217 hGetLine, -- :: Handle -> IO ByteString
218 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
220 hGetContents, -- :: Handle -> IO ByteString
221 hGet, -- :: Handle -> Int -> IO ByteString
222 hPut, -- :: Handle -> ByteString -> IO ()
224 -- * Fusion utilities
225 #if defined(__GLASGOW_HASKELL__)
226 unpackList, -- eek, otherwise it gets thrown away by the simplifier
229 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
230 loopU, mapEFL, filterEFL, foldEFL, fuseEFL,
235 import qualified Prelude as P
236 import Prelude hiding (reverse,head,tail,last,init,null
237 ,length,map,lines,foldl,foldr,unlines
238 ,concat,any,take,drop,splitAt,takeWhile
239 ,dropWhile,span,break,elem,filter,maximum
240 ,minimum,all,concatMap,foldl1,foldr1
241 ,readFile,writeFile,replicate
242 ,getContents,getLine,putStr,putStrLn
243 ,zip,zipWith,unzip,notElem)
245 import qualified Data.List as List
248 import Data.Word (Word8)
249 import Data.Maybe (listToMaybe)
250 import Data.Array (listArray)
251 import qualified Data.Array as Array ((!))
253 -- Control.Exception.bracket not available in yhc or nhc
254 import Control.Exception (bracket)
255 import Control.Monad (when)
257 import Foreign.C.String (CString, CStringLen)
258 import Foreign.C.Types (CSize, CInt)
259 import Foreign.ForeignPtr
260 import Foreign.Marshal.Array
262 import Foreign.Storable (Storable(..))
264 -- hGetBuf and hPutBuf not available in yhc or nhc
265 import System.IO (stdin,stdout,hClose,hFileSize
266 ,hGetBuf,hPutBuf,openBinaryFile
269 #if !defined(__GLASGOW_HASKELL__)
270 import System.IO.Unsafe
273 #if defined(__GLASGOW_HASKELL__)
275 import Data.Generics (Data(..), Typeable(..))
277 import System.IO (hGetBufNonBlocking)
278 import System.IO.Error (isEOFError)
280 import Foreign.Marshal (alloca)
281 import qualified Foreign.Concurrent as FC (newForeignPtr)
284 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
285 import GHC.Base (build, unsafeChr)
286 import GHC.Word hiding (Word8)
287 import GHC.Ptr (Ptr(..))
288 import GHC.ST (ST(..))
293 -- CFILES stuff is Hugs only
294 {-# CFILES cbits/fpstring.c #-}
296 -- -----------------------------------------------------------------------------
298 -- Useful macros, until we have bang patterns
301 #define STRICT1(f) f a | a `seq` False = undefined
302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
307 -- -----------------------------------------------------------------------------
309 -- | A space-efficient representation of a Word8 vector, supporting many
310 -- efficient operations. A 'ByteString' contains 8-bit characters only.
312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
318 #if defined(__GLASGOW_HASKELL__)
319 deriving (Data, Typeable)
322 instance Eq ByteString
325 instance Ord ByteString
326 where compare = compareBytes
328 instance Show ByteString where
329 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
331 instance Read ByteString where
332 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
335 instance Arbitrary PackedString where
336 arbitrary = P.pack `fmap` arbitrary
337 coarbitrary s = coarbitrary (P.unpack s)
340 -- | /O(n)/ Equality on the 'ByteString' type.
341 eq :: ByteString -> ByteString -> Bool
342 eq a@(PS p s l) b@(PS p' s' l')
343 | l /= l' = False -- short cut on length
344 | p == p' && s == s' = True -- short cut for the same string
345 | otherwise = compareBytes a b == EQ
348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
349 compareBytes :: ByteString -> ByteString -> Ordering
350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
351 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
352 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
353 | otherwise = inlinePerformIO $
354 withForeignPtr x1 $ \p1 ->
355 withForeignPtr x2 $ \p2 -> do
356 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
357 return $ case i `compare` 0 of
358 EQ -> l1 `compare` l2
360 {-# INLINE compareBytes #-}
364 -- About 4x slower over 32M
366 compareBytes :: ByteString -> ByteString -> Ordering
367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
368 withForeignPtr fp1 $ \p1 ->
369 withForeignPtr fp2 $ \p2 ->
370 cmp (p1 `plusPtr` off1)
371 (p2 `plusPtr` off2) 0 len1 len2
373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
375 cmp p1 p2 n len1 len2
376 | n == len1 = if n == len2 then return EQ else return LT
377 | n == len2 = return GT
379 (a :: Word8) <- peekByteOff p1 n
380 (b :: Word8) <- peekByteOff p2 n
381 case a `compare` b of
382 EQ -> cmp p1 p2 (n+1) len1 len2
385 {-# INLINE compareBytes #-}
388 -- -----------------------------------------------------------------------------
389 -- Introducing and eliminating 'ByteString's
391 -- | /O(1)/ The empty 'ByteString'
393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
394 {-# NOINLINE empty #-}
396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
397 packByte :: Word8 -> ByteString
398 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
399 withForeignPtr fp $ \p -> poke p c
401 {-# INLINE packByte #-}
404 -- XXX The unsafePerformIO is critical!
408 -- packByte 255 `compare` packByte 127
412 -- case mallocByteString 2 of
413 -- ForeignPtr f internals ->
414 -- case writeWord8OffAddr# f 0 255 of _ ->
415 -- case writeWord8OffAddr# f 0 127 of _ ->
416 -- case eqAddr# f f of
417 -- False -> case compare (GHC.Prim.plusAddr# f 0)
418 -- (GHC.Prim.plusAddr# f 0)
422 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
424 -- For applications with large numbers of string literals, pack can be a
425 -- bottleneck. In such cases, consider using packAddress (GHC only).
426 pack :: [Word8] -> ByteString
428 #if !defined(__GLASGOW_HASKELL__)
430 pack str = create (P.length str) $ \p -> go p str
433 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
435 #else /* hack away */
437 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
439 go _ _ [] = return ()
440 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
442 writeByte p i c = ST $ \s# ->
443 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
447 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
448 unpack :: ByteString -> [Word8]
450 #if !defined(__GLASGOW_HASKELL__)
452 unpack (PS _ _ 0) = []
453 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
454 go (p `plusPtr` s) (l - 1) []
457 go p 0 acc = peek p >>= \e -> return (e : acc)
458 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
459 {-# INLINE unpack #-}
463 unpack ps = build (unpackFoldr ps)
464 {-# INLINE unpack #-}
466 unpackList :: ByteString -> [Word8]
467 unpackList (PS fp off len) = withPtr fp $ \p -> do
469 loop _ (-1) acc = return acc
472 loop q (n-1) (a : acc)
473 loop (p `plusPtr` off) (len-1) []
476 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
479 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
480 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
482 loop _ (-1) acc = return acc
485 loop q (n-1) (a `f` acc)
486 loop (p `plusPtr` off) (len-1) ch
487 {-# INLINE [0] unpackFoldr #-}
491 ------------------------------------------------------------------------
493 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
494 -- conversion function
495 packWith :: (a -> Word8) -> [a] -> ByteString
496 packWith k str = create (P.length str) $ \p -> go p str
500 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
501 {-# INLINE packWith #-}
502 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
504 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
505 unpackWith :: (Word8 -> a) -> ByteString -> [a]
506 unpackWith _ (PS _ _ 0) = []
507 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
508 go (p `plusPtr` s) (l - 1) []
511 go p 0 acc = peek p >>= \e -> return (k e : acc)
512 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
513 {-# INLINE unpackWith #-}
514 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
516 -- ---------------------------------------------------------------------
519 -- | /O(1)/ Test whether a ByteString is empty.
520 null :: ByteString -> Bool
521 null (PS _ _ l) = l == 0
524 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
525 length :: ByteString -> Int
526 length (PS _ _ l) = l
527 {-# INLINE length #-}
529 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
530 -- complexity, as it requires a memcpy.
531 cons :: Word8 -> ByteString -> ByteString
532 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
533 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
539 -- | /O(n)/ Append a byte to the end of a 'ByteString'
540 snoc :: ByteString -> Word8 -> ByteString
541 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
542 memcpy p (f `plusPtr` s) (fromIntegral l)
543 poke (p `plusPtr` l) c
548 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
549 head :: ByteString -> Word8
551 | null ps = errorEmptyList "head"
552 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
555 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
556 tail :: ByteString -> ByteString
558 | l <= 0 = errorEmptyList "tail"
559 | otherwise = PS p (s+1) (l-1)
562 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
563 last :: ByteString -> Word8
565 | null ps = errorEmptyList "last"
566 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
569 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
570 init :: ByteString -> ByteString
572 | l <= 0 = errorEmptyList "init"
573 | otherwise = PS p s (l-1)
576 -- | /O(n)/ Append two ByteStrings
577 append :: ByteString -> ByteString -> ByteString
578 append xs ys | null xs = ys
580 | otherwise = concat [xs,ys]
581 {-# INLINE append #-}
583 -- ---------------------------------------------------------------------
586 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
587 -- element of @xs@. This function is subject to array fusion.
588 map :: (Word8 -> Word8) -> ByteString -> ByteString
589 map f = loopArr . loopU (mapEFL f) noAL
592 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
593 -- slightly faster for one-shot cases.
594 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
596 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
597 np <- mallocByteString (len+1)
598 withForeignPtr np $ \p -> do
599 map_ 0 (a `plusPtr` s) p
602 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
605 | n >= len = return ()
607 x <- peekByteOff p1 n
608 pokeByteOff p2 n (f x)
612 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
613 reverse :: ByteString -> ByteString
614 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
615 c_reverse p (f `plusPtr` s) (fromIntegral l)
618 reverse = pack . P.reverse . unpack
621 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
622 -- 'ByteString' and \`intersperses\' that byte between the elements of
623 -- the 'ByteString'. It is analogous to the intersperse function on
625 intersperse :: Word8 -> ByteString -> ByteString
626 intersperse c ps@(PS x s l)
628 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
629 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
632 intersperse c = pack . List.intersperse c . unpack
635 -- | The 'transpose' function transposes the rows and columns of its
636 -- 'ByteString' argument.
637 transpose :: [ByteString] -> [ByteString]
638 transpose ps = P.map pack (List.transpose (P.map unpack ps))
640 -- ---------------------------------------------------------------------
641 -- Reducing 'ByteString's
643 -- | 'foldl', applied to a binary operator, a starting value (typically
644 -- the left-identity of the operator), and a ByteString, reduces the
645 -- ByteString using the binary operator, from left to right.
646 -- This function is subject to array fusion.
647 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
648 foldl f z = loopAcc . loopU (foldEFL f) z
653 -- About twice as fast with 6.4.1, but not fuseable
654 -- A simple fold . map is enough to make it worth while.
656 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
657 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
660 lgo z p q | p == q = return z
661 | otherwise = do c <- peek p
662 lgo (f z c) (p `plusPtr` 1) q
665 -- | 'foldr', applied to a binary operator, a starting value
666 -- (typically the right-identity of the operator), and a ByteString,
667 -- reduces the ByteString using the binary operator, from right to left.
668 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
669 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
670 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
673 go p q | p == q = return z
674 | otherwise = do c <- peek p
675 ws <- go (p `plusPtr` 1) q
678 -- | 'foldl1' is a variant of 'foldl' that has no starting value
679 -- argument, and thus must be applied to non-empty 'ByteStrings'.
680 -- This function is subject to array fusion.
681 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
683 | null ps = errorEmptyList "foldl1"
684 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
686 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
687 -- and thus must be applied to non-empty 'ByteString's
688 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
690 | null ps = errorEmptyList "foldr1"
691 | otherwise = foldr f (last ps) (init ps)
693 -- ---------------------------------------------------------------------
696 -- | /O(n)/ Concatenate a list of ByteStrings.
697 concat :: [ByteString] -> ByteString
700 concat xs = create len $ \ptr -> go xs ptr
701 where len = P.sum . P.map length $ xs
704 go (PS p s l:ps) ptr = do
705 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
706 go ps (ptr `plusPtr` l)
708 -- | Map a function over a 'ByteString' and concatenate the results
709 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
710 concatMap f = foldr (append . f) empty
712 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
713 -- any element of the 'ByteString' satisfies the predicate.
714 any :: (Word8 -> Bool) -> ByteString -> Bool
715 any _ (PS _ _ 0) = False
716 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
717 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
720 go p q | p == q = return False
721 | otherwise = do c <- peek p
722 if f c then return True
723 else go (p `plusPtr` 1) q
727 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
728 -- if all elements of the 'ByteString' satisfy the predicate.
729 all :: (Word8 -> Bool) -> ByteString -> Bool
730 all _ (PS _ _ 0) = True
731 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
732 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
735 go p q | p == q = return True -- end of list
736 | otherwise = do c <- peek p
738 then go (p `plusPtr` 1) q
742 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
743 maximum :: ByteString -> Word8
744 maximum xs@(PS x s l)
745 | null xs = errorEmptyList "maximum"
746 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
747 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
748 {-# INLINE maximum #-}
750 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
751 minimum :: ByteString -> Word8
752 minimum xs@(PS x s l)
753 | null xs = errorEmptyList "minimum"
754 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
755 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
756 {-# INLINE minimum #-}
758 -- fusion is too slow here (10x)
761 maximum xs@(PS x s l)
762 | null xs = errorEmptyList "maximum"
763 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
765 maximum_ (p `plusPtr` s) 0 l w
767 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
771 | otherwise = do w <- peekByteOff ptr n
772 maximum_ ptr (n+1) m (if w > c then w else c)
774 minimum xs@(PS x s l)
775 | null xs = errorEmptyList "minimum"
776 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
778 minimum_ (p `plusPtr` s) 0 l w
780 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
784 | otherwise = do w <- peekByteOff ptr n
785 minimum_ ptr (n+1) m (if w < c then w else c)
788 -- | /O(n)/ map Word8 functions, provided with the index at each position
789 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
790 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
791 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
793 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
795 go n f t p | f == p = return ()
796 | otherwise = do w <- peek f
798 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
800 -- ---------------------------------------------------------------------
801 -- Unfolds and replicates
803 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
804 -- the value of every element. The following holds:
806 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
808 -- This implemenation uses @memset(3)@
809 replicate :: Int -> Word8 -> ByteString
810 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
814 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
818 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
821 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
822 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
823 -- the element and returns 'Nothing' if it is done producing the
824 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
825 -- prepending to the ByteString and @b@ is used as the next element in a
828 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
829 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
830 -- final size of the ByteString as an argument. 'cons' can then be
831 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
832 -- linear complexity. The depth of the recursion is limited to this
833 -- size, but may be less. For lazy, infinite unfoldr, use
834 -- 'Data.List.unfoldr' (from 'Data.List').
838 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
840 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
842 -- > unfoldrN n == take n $ List.unfoldr
843 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
844 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
847 go q c n | n == i = return n -- stop if we reach `i'
848 | otherwise = case f c of
852 go (q `plusPtr` 1) new_c (n+1)
854 -- ---------------------------------------------------------------------
857 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
858 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
859 take :: Int -> ByteString -> ByteString
863 | otherwise = PS x s n
866 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
867 -- elements, or @[]@ if @n > 'length' xs@.
868 drop :: Int -> ByteString -> ByteString
872 | otherwise = PS x (s+n) (l-n)
875 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
876 splitAt :: Int -> ByteString -> (ByteString, ByteString)
877 splitAt n ps = (take n ps, drop n ps)
878 {-# INLINE splitAt #-}
880 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
881 -- returns the longest prefix (possibly empty) of @xs@ of elements that
883 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
884 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
885 {-# INLINE takeWhile #-}
887 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
888 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
889 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
890 {-# INLINE dropWhile #-}
892 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
893 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
894 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
897 -- | 'breakByte' breaks its ByteString argument at the first occurence
898 -- of the specified byte. It is more efficient than 'break' as it is
899 -- implemented with @memchr(3)@. I.e.
901 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
903 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
904 breakByte c p = case elemIndex c p of
906 Just n -> (take n p, drop n p)
907 {-# INLINE breakByte #-}
909 -- | 'spanByte' breaks its ByteString argument at the first
910 -- occurence of a byte other than its argument. It is more efficient
913 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
915 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
916 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
920 go p i | i >= l = return (ps, empty)
921 | otherwise = do c' <- peekByteOff p i
923 then return (take i ps, drop i ps)
925 {-# INLINE spanByte #-}
927 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
928 -- occurence of @w@. It behaves like 'break', except the delimiter is
929 -- not returned, and @Nothing@ is returned if the delimiter is not in
930 -- the ByteString. I.e.
932 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
934 -- > breakFirst c xs ==
935 -- > let (x,y) = break (== c) xs
936 -- > in if null y then Nothing else Just (x, drop 1 y))
938 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
939 breakFirst c p = case elemIndex c p of
941 Just n -> Just (take n p, drop (n+1) p)
942 {-# INLINE breakFirst #-}
944 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
947 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
949 -- and the following are equivalent:
951 -- > breakLast 'c' "abcdef"
952 -- > let (x,y) = break (=='c') (reverse "abcdef")
953 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
955 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
956 breakLast c p = case elemIndexLast c p of
958 Just n -> Just (take n p, drop (n+1) p)
959 {-# INLINE breakLast #-}
961 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
962 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
963 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
964 span p ps = break (not . p) ps
967 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
970 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
974 -- > spanEnd (not . isSpace) ps
976 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
978 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
979 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
981 -- | /O(n)/ Splits a 'ByteString' into components delimited by
982 -- separators, where the predicate returns True for a separator element.
983 -- The resulting components do not contain the separators. Two adjacent
984 -- separators result in an empty component in the output. eg.
986 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
987 -- > splitWith (=='a') [] == []
989 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
991 #if defined(__GLASGOW_HASKELL__)
992 splitWith _pred (PS _ _ 0) = []
993 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
994 where pred# c# = pred_ (W8# c#)
997 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
998 splitLoop pred' p 0 off' len' fp'
1000 splitLoop :: (Word# -> Bool)
1002 -> Int -> Int -> Int
1006 splitLoop pred' p idx' off' len' fp'
1007 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1008 | idx' >= len' = return [PS fp' off' idx']
1010 w <- peekElemOff p (off'+idx')
1011 if pred' (case w of W8# w# -> w#)
1012 then return (PS fp' off' idx' :
1013 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1014 else splitLoop pred' p (idx'+1) off' len' fp'
1015 {-# INLINE splitWith #-}
1018 splitWith _ (PS _ _ 0) = []
1019 splitWith p ps = splitWith' p ps
1022 splitWith' q qs = if null rest then [chunk]
1023 else chunk : splitWith' q (unsafeTail rest)
1024 where (chunk,rest) = break q qs
1027 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1028 -- argument, consuming the delimiter. I.e.
1030 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1031 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1032 -- > split 'x' "x" == ["",""]
1036 -- > join [c] . split c == id
1037 -- > split == splitWith . (==)
1039 -- As for all splitting functions in this library, this function does
1040 -- not copy the substrings, it just constructs new 'ByteStrings' that
1041 -- are slices of the original.
1043 split :: Word8 -> ByteString -> [ByteString]
1044 split _ (PS _ _ 0) = []
1045 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1046 let ptr = p `plusPtr` s
1050 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1052 then [PS x (s+n) (l-n)]
1053 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1056 {-# INLINE split #-}
1059 -- slower. but stays inside Haskell.
1060 split _ (PS _ _ 0) = []
1061 split (W8# w#) (PS fp off len) = splitWith' off len fp
1063 splitWith' off' len' fp' = withPtr fp $ \p ->
1064 splitLoop p 0 off' len' fp'
1066 splitLoop :: Ptr Word8
1067 -> Int -> Int -> Int
1072 splitLoop p idx' off' len' fp'
1073 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1074 | idx' >= len' = return [PS fp' off' idx']
1076 (W8# x#) <- peekElemOff p (off'+idx')
1077 if word2Int# w# ==# word2Int# x#
1078 then return (PS fp' off' idx' :
1079 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1080 else splitLoop p (idx'+1) off' len' fp'
1083 -- | Like 'splitWith', except that sequences of adjacent separators are
1084 -- treated as a single separator. eg.
1086 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1088 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1089 tokens f = P.filter (not.null) . splitWith f
1091 -- | The 'group' function takes a ByteString and returns a list of
1092 -- ByteStrings such that the concatenation of the result is equal to the
1093 -- argument. Moreover, each sublist in the result contains only equal
1094 -- elements. For example,
1096 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1098 -- It is a special case of 'groupBy', which allows the programmer to
1099 -- supply their own equality test. It is about 40% faster than
1101 group :: ByteString -> [ByteString]
1104 | otherwise = ys : group zs
1106 (ys, zs) = spanByte (unsafeHead xs) xs
1108 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1109 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1112 | otherwise = take n xs : groupBy k (drop n xs)
1114 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1116 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1117 -- 'ByteString's and concatenates the list after interspersing the first
1118 -- argument between each element of the list.
1119 join :: ByteString -> [ByteString] -> ByteString
1120 join filler pss = concat (splice pss)
1124 splice (x:y:xs) = x:filler:splice (y:xs)
1127 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1128 -- with a char. Around 4 times faster than the generalised join.
1130 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1131 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1132 withForeignPtr ffp $ \fp ->
1133 withForeignPtr fgp $ \gp -> do
1134 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1135 poke (ptr `plusPtr` l) c
1136 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1138 len = length f + length g + 1
1139 {-# INLINE joinWithByte #-}
1141 -- ---------------------------------------------------------------------
1142 -- Indexing ByteStrings
1144 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1145 index :: ByteString -> Int -> Word8
1147 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1148 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1149 ++ ", length = " ++ show (length ps)
1150 | otherwise = ps `unsafeIndex` n
1151 {-# INLINE index #-}
1153 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1154 -- element in the given 'ByteString' which is equal to the query
1155 -- element, or 'Nothing' if there is no such element.
1156 -- This implementation uses memchr(3).
1157 elemIndex :: Word8 -> ByteString -> Maybe Int
1158 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1159 let p' = p `plusPtr` s
1160 q = memchr p' c (fromIntegral l)
1161 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1162 {-# INLINE elemIndex #-}
1164 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1165 -- element in the given 'ByteString' which is equal to the query
1166 -- element, or 'Nothing' if there is no such element. The following
1169 -- > elemIndexLast c xs ==
1170 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1172 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1173 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1174 go (p `plusPtr` s) (l-1)
1177 go p i | i < 0 = return Nothing
1178 | otherwise = do ch' <- peekByteOff p i
1180 then return $ Just i
1182 {-# INLINE elemIndexLast #-}
1184 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1185 -- the indices of all elements equal to the query element, in ascending order.
1186 -- This implementation uses memchr(3).
1187 elemIndices :: Word8 -> ByteString -> [Int]
1188 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1189 let ptr = p `plusPtr` s
1192 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1195 else let i = q `minusPtr` ptr
1201 elemIndices :: Word8 -> ByteString -> [Int]
1202 elemIndices c ps = loop 0 ps
1204 loop _ ps' | null ps' = []
1205 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1206 | otherwise = loop (n+1) (unsafeTail ps')
1209 -- | count returns the number of times its argument appears in the ByteString
1211 -- > count = length . elemIndices
1213 -- But more efficiently than using length on the intermediate list.
1214 count :: Word8 -> ByteString -> Int
1215 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1216 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1217 {-# INLINE count #-}
1221 -- around 30% slower
1223 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1224 go (p `plusPtr` s) (fromIntegral m) 0
1226 go :: Ptr Word8 -> CSize -> Int -> IO Int
1229 let q = memchr p w l
1232 else do let k = fromIntegral $ q `minusPtr` p
1233 go (q `plusPtr` 1) (l-k-1) (i+1)
1236 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1237 -- returns the index of the first element in the ByteString
1238 -- satisfying the predicate.
1239 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1240 findIndex k ps@(PS x s l)
1242 | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1245 go ptr n | n >= l = return Nothing
1246 | otherwise = do w <- peek ptr
1248 then return (Just n)
1249 else go (ptr `plusPtr` 1) (n+1)
1250 {-# INLINE findIndex #-}
1252 -- | The 'findIndices' function extends 'findIndex', by returning the
1253 -- indices of all elements satisfying the predicate, in ascending order.
1254 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1255 findIndices p ps = loop 0 ps
1258 loop n qs | null qs = []
1259 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1260 | otherwise = loop (n+1) (unsafeTail qs)
1262 -- ---------------------------------------------------------------------
1263 -- Searching ByteStrings
1265 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1266 elem :: Word8 -> ByteString -> Bool
1267 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1270 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1271 notElem :: Word8 -> ByteString -> Bool
1272 notElem c ps = not (elem c ps)
1273 {-# INLINE notElem #-}
1275 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1276 -- returns a ByteString containing those characters that satisfy the
1277 -- predicate. This function is subject to array fusion.
1278 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1279 filter p = loopArr . loopU (filterEFL p) noAL
1280 {-# INLINE filter #-}
1282 -- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
1283 -- around 2x faster for some one-shot applications.
1284 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1285 filterF k ps@(PS x s l)
1287 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1288 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1289 return (t `minusPtr` p) -- actual length
1292 go f t end | f == end = return t
1296 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1297 else go (f `plusPtr` 1) t end
1298 {-# INLINE filterF #-}
1301 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1302 -- case of filtering a single byte. It is more efficient to use
1303 -- /filterByte/ in this case.
1305 -- > filterByte == filter . (==)
1307 -- filterByte is around 10x faster, and uses much less space, than its
1308 -- filter equivalent
1309 filterByte :: Word8 -> ByteString -> ByteString
1310 filterByte w ps = replicate (count w ps) w
1311 {-# INLINE filterByte #-}
1314 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1315 -- case of filtering a single byte out of a list. It is more efficient
1316 -- to use /filterNotByte/ in this case.
1318 -- > filterNotByte == filter . (/=)
1320 -- filterNotByte is around 2x faster than its filter equivalent.
1321 filterNotByte :: Word8 -> ByteString -> ByteString
1322 filterNotByte w = filterF (/= w)
1323 {-# INLINE filterNotByte #-}
1325 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1326 -- and returns the first element in matching the predicate, or 'Nothing'
1327 -- if there is no such element.
1329 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1331 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1332 find f p = case findIndex f p of
1333 Just n -> Just (p `unsafeIndex` n)
1339 -- fuseable, but we don't want to walk the whole array.
1341 find k = foldl findEFL Nothing
1342 where findEFL a@(Just _) _ = a
1343 findEFL _ c | k c = Just c
1344 | otherwise = Nothing
1347 -- ---------------------------------------------------------------------
1348 -- Searching for substrings
1350 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1351 -- iff the first is a prefix of the second.
1352 isPrefixOf :: ByteString -> ByteString -> Bool
1353 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1356 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1357 withForeignPtr x2 $ \p2 -> do
1358 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1361 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1362 -- iff the first is a suffix of the second.
1364 -- The following holds:
1366 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1368 -- However, the real implemenation uses memcmp to compare the end of the
1369 -- string only, with no reverse required..
1370 isSuffixOf :: ByteString -> ByteString -> Bool
1371 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1374 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1375 withForeignPtr x2 $ \p2 -> do
1376 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1379 -- | Check whether one string is a substring of another. @isSubstringOf
1380 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1381 isSubstringOf :: ByteString -- ^ String to search for.
1382 -> ByteString -- ^ String to search in.
1384 isSubstringOf p s = not $ P.null $ findSubstrings p s
1386 -- | Get the first index of a substring in another string,
1387 -- or 'Nothing' if the string is not found.
1388 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1389 findSubstring :: ByteString -- ^ String to search for.
1390 -> ByteString -- ^ String to seach in.
1392 findSubstring = (listToMaybe .) . findSubstrings
1394 -- | Find the indexes of all (possibly overlapping) occurances of a
1395 -- substring in a string. This function uses the Knuth-Morris-Pratt
1396 -- string matching algorithm.
1397 findSubstrings :: ByteString -- ^ String to search for.
1398 -> ByteString -- ^ String to seach in.
1401 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1403 patc x = pat `unsafeIndex` x
1404 strc x = str `unsafeIndex` x
1406 -- maybe we should make kmpNext a UArray before using it in search?
1407 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1408 kmpNextL p _ | null p = []
1409 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1411 x = if not (null ps) && unsafeHead ps == patc j'
1412 then kmpNext Array.! j' else j'
1414 search i j = match ++ rest -- i: position in string, j: position in pattern
1415 where match = if j == m then [(i - j)] else []
1416 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1417 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1420 -- ---------------------------------------------------------------------
1423 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1424 -- corresponding pairs of bytes. If one input ByteString is short,
1425 -- excess elements of the longer ByteString are discarded. This is
1426 -- equivalent to a pair of 'unpack' operations.
1427 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1429 | null ps || null qs = []
1430 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1432 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1433 -- the first argument, instead of a tupling function. For example,
1434 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1435 -- corresponding sums.
1436 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1438 | null ps || null qs = []
1439 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1441 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1442 -- ByteStrings. Note that this performs two 'pack' operations.
1443 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1444 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1445 {-# INLINE unzip #-}
1447 -- ---------------------------------------------------------------------
1450 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1451 inits :: ByteString -> [ByteString]
1452 inits (PS x s l) = [PS x s n | n <- [0..l]]
1454 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1455 tails :: ByteString -> [ByteString]
1456 tails p | null p = [empty]
1457 | otherwise = p : tails (unsafeTail p)
1459 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1461 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1462 elems :: ByteString -> [ByteString]
1463 elems (PS _ _ 0) = []
1464 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1465 {-# INLINE elems #-}
1467 -- ---------------------------------------------------------------------
1468 -- ** Ordered 'ByteString's
1470 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1471 sort :: ByteString -> ByteString
1472 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1474 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1475 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1478 go 256 _ = return ()
1479 go i ptr = do n <- peekElemOff arr i
1480 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1481 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1484 -- "countEach counts str l" counts the number of occurences of each Word8 in
1485 -- str, and stores the result in counts.
1486 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1488 countEach counts str l = go 0
1491 go i | i == l = return ()
1492 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1493 x <- peekElemOff counts k
1494 pokeElemOff counts k (x + 1)
1498 sort :: ByteString -> ByteString
1499 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1500 memcpy p (f `plusPtr` s) l
1501 c_qsort p l -- inplace
1505 sort = pack . List.sort . unpack
1508 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1510 -- Try some linear sorts: radix, counting
1513 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1514 -- sortBy f ps = undefined
1516 -- ---------------------------------------------------------------------
1518 -- Extensions to the basic interface
1521 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1522 -- check for the empty case, so there is an obligation on the programmer
1523 -- to provide a proof that the ByteString is non-empty.
1524 unsafeHead :: ByteString -> Word8
1525 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1526 {-# INLINE unsafeHead #-}
1528 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1529 -- check for the empty case. As with 'unsafeHead', the programmer must
1530 -- provide a separate proof that the ByteString is non-empty.
1531 unsafeTail :: ByteString -> ByteString
1532 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1533 {-# INLINE unsafeTail #-}
1535 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1536 -- This omits the bounds check, which means there is an accompanying
1537 -- obligation on the programmer to ensure the bounds are checked in some
1539 unsafeIndex :: ByteString -> Int -> Word8
1540 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1541 {-# INLINE unsafeIndex #-}
1543 -- ---------------------------------------------------------------------
1544 -- Low level constructors
1546 #if defined(__GLASGOW_HASKELL__)
1547 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1548 -- Addr\# (an arbitrary machine address assumed to point outside the
1549 -- garbage-collected heap) into a @ByteString@. A much faster way to
1550 -- create an Addr\# is with an unboxed string literal, than to pack a
1551 -- boxed string. A unboxed string literal is compiled to a static @char
1552 -- []@ by GHC. Establishing the length of the string requires a call to
1553 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1554 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1555 -- if you know the length of the string statically.
1559 -- > literalFS = packAddress "literal"#
1561 packAddress :: Addr# -> ByteString
1562 packAddress addr# = inlinePerformIO $ do
1563 p <- newForeignPtr_ cstr
1564 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1567 {-# INLINE packAddress #-}
1569 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1570 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1571 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1572 -- 'Addr\#' to the string, and the length of the string. Make sure the
1573 -- length is correct, otherwise use the safer 'packAddress' (where the
1574 -- length will be calculated once at runtime).
1575 unsafePackAddress :: Int -> Addr# -> ByteString
1576 unsafePackAddress len addr# = inlinePerformIO $ do
1577 p <- newForeignPtr_ cstr
1579 where cstr = Ptr addr#
1583 -- | /O(1)/ Build a ByteString from a ForeignPtr
1584 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1585 fromForeignPtr fp l = PS fp 0 l
1587 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1588 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1589 toForeignPtr (PS ps s l) = (ps, s, l)
1591 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1592 -- current 'ByteString' from any larger string it was created from, as
1594 skipIndex :: ByteString -> Int
1595 skipIndex (PS _ s _) = s
1596 {-# INLINE skipIndex #-}
1598 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1599 -- finalizer associated to it. The ByteString length is calculated using
1600 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1601 packCString :: CString -> ByteString
1602 packCString cstr = inlinePerformIO $ do
1603 fp <- newForeignPtr_ (castPtr cstr)
1604 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1606 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1607 -- have /no/ finalizer associated with it. This operation has /O(1)/
1608 -- complexity as we already know the final size, so no /strlen(3)/ is
1610 packCStringLen :: CStringLen -> ByteString
1611 packCStringLen (ptr,len) = inlinePerformIO $ do
1612 fp <- newForeignPtr_ (castPtr ptr)
1613 return $ PS fp 0 (fromIntegral len)
1615 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1616 -- have a @free(3)@ finalizer associated to it.
1617 packMallocCString :: CString -> ByteString
1618 packMallocCString cstr = inlinePerformIO $ do
1619 fp <- newForeignFreePtr (castPtr cstr)
1620 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1622 #if defined(__GLASGOW_HASKELL__)
1623 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1624 -- length, and an IO action representing a finalizer. This function is
1625 -- not available on Hugs.
1627 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1628 packCStringFinalizer p l f = do
1629 fp <- FC.newForeignPtr p f
1632 -- | Explicitly run the finaliser associated with a 'ByteString'.
1633 -- Further references to this value may generate invalid memory
1634 -- references. This operation is unsafe, as there may be other
1635 -- 'ByteStrings' referring to the same underlying pages. If you use
1636 -- this, you need to have a proof of some kind that all 'ByteString's
1637 -- ever generated from the underlying byte array are no longer live.
1638 unsafeFinalize :: ByteString -> IO ()
1639 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1643 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1644 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1645 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1646 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1648 alloc = withForeignPtr ps $ \p -> do
1649 buf <- c_malloc (fromIntegral l+1)
1650 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1651 poke (buf `plusPtr` l) (0::Word8)
1652 return $ castPtr buf
1654 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1655 -- Warning: modifying the @CString@ will affect the @ByteString@.
1656 -- Why is this function unsafe? It relies on the null byte at the end of
1657 -- the ByteString to be there. This is /not/ the case if your ByteString
1658 -- has been spliced from a larger string (i.e. with take or drop).
1659 -- Unless you can guarantee the null byte, you should use the safe
1660 -- version, which will copy the string first.
1662 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1663 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1665 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1666 -- This is mainly useful to allow the rest of the data pointed
1667 -- to by the 'ByteString' to be garbage collected, for example
1668 -- if a large string has been read in, and only a small part of it
1669 -- is needed in the rest of the program.
1670 copy :: ByteString -> ByteString
1671 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1672 memcpy p (f `plusPtr` s) (fromIntegral l)
1674 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1675 -- CString is going to be deallocated from C land.
1676 copyCString :: CString -> ByteString
1677 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1679 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1680 copyCStringLen :: CStringLen -> ByteString
1681 copyCStringLen (cstr, len) = inlinePerformIO $ do
1682 fp <- mallocForeignPtrArray (len+1)
1683 withForeignPtr fp $ \p -> do
1684 memcpy p (castPtr cstr) (fromIntegral len)
1685 poke (p `plusPtr` len) (0 :: Word8)
1686 return $! PS fp 0 len
1688 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1689 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1690 -- This is analogous to unsafeUseAsCString, and comes with the same
1691 -- safety requirements.
1693 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1694 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1696 -- | Given the maximum size needed and a function to make the contents
1697 -- of a ByteString, generate makes the 'ByteString'. The generating
1698 -- function is required to return the actual final size (<= the maximum
1699 -- size), and the resulting byte array is realloced to this size. The
1700 -- string is padded at the end with a null byte.
1702 -- generate is the main mechanism for creating custom, efficient
1703 -- ByteString functions, using Haskell or C functions to fill the space.
1705 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1707 p <- mallocArray (i+1)
1709 p' <- reallocArray p (i'+1)
1710 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1711 fp <- newForeignFreePtr p'
1714 -- ---------------------------------------------------------------------
1717 #if defined(__GLASGOW_HASKELL__)
1719 -- | getLine, read a line from stdin.
1720 getLine :: IO ByteString
1721 getLine = hGetLine stdin
1723 -- | hGetLine. read a ByteString from a handle
1724 hGetLine :: Handle -> IO ByteString
1725 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1726 case haBufferMode handle_ of
1727 NoBuffering -> error "no buffering"
1728 _other -> hGetLineBuffered handle_
1731 hGetLineBuffered handle_ = do
1732 let ref = haBuffer handle_
1733 buf <- readIORef ref
1734 hGetLineBufferedLoop handle_ ref buf 0 []
1736 hGetLineBufferedLoop handle_ ref
1737 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1739 off <- findEOL r w raw
1740 let new_len = len + off - r
1741 xs <- mkPS raw r off
1743 -- if eol == True, then off is the offset of the '\n'
1744 -- otherwise off == w and the buffer is now empty.
1746 then do if (w == off + 1)
1747 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1748 else writeIORef ref buf{ bufRPtr = off + 1 }
1749 mkBigPS new_len (xs:xss)
1751 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1752 buf{ bufWPtr=0, bufRPtr=0 }
1754 -- Nothing indicates we caught an EOF, and we may have a
1755 -- partial line to return.
1757 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1759 then mkBigPS new_len (xs:xss)
1762 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1764 -- find the end-of-line character, if there is one
1768 (c,r') <- readCharFromBuffer raw r
1770 then return r -- NB. not r': don't include the '\n'
1771 else findEOL r' w raw
1773 maybeFillReadBuffer fd is_line is_stream buf = catch
1774 (do buf' <- fillReadBuffer fd is_line is_stream buf
1776 (\e -> if isEOFError e then return Nothing else ioError e)
1778 -- TODO, rewrite to use normal memcpy
1779 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1780 mkPS buf start end = do
1781 let len = end - start
1782 fp <- mallocByteString len
1783 withForeignPtr fp $ \p -> do
1784 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1785 return (PS fp 0 len)
1787 mkBigPS :: Int -> [ByteString] -> IO ByteString
1788 mkBigPS _ [ps] = return ps
1789 mkBigPS _ pss = return $! concat (P.reverse pss)
1793 -- ---------------------------------------------------------------------
1796 -- | Outputs a 'ByteString' to the specified 'Handle'.
1797 hPut :: Handle -> ByteString -> IO ()
1798 hPut _ (PS _ _ 0) = return ()
1799 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1800 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1802 -- | Write a ByteString to stdout
1803 putStr :: ByteString -> IO ()
1804 putStr = hPut stdout
1806 -- | Write a ByteString to stdout, appending a newline byte
1807 putStrLn :: ByteString -> IO ()
1808 putStrLn ps = hPut stdout ps >> hPut stdout nl
1809 where nl = packByte 0x0a
1811 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1812 -- is far more efficient than reading the characters into a 'String'
1813 -- and then using 'pack'.
1814 hGet :: Handle -> Int -> IO ByteString
1815 hGet _ 0 = return empty
1816 hGet h i = do fp <- mallocByteString i
1817 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1820 #if defined(__GLASGOW_HASKELL__)
1821 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1822 -- waiting for data to become available, instead it returns only whatever data
1824 hGetNonBlocking :: Handle -> Int -> IO ByteString
1825 hGetNonBlocking _ 0 = return empty
1826 hGetNonBlocking h i = do
1827 fp <- mallocByteString i
1828 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1832 -- | Read entire handle contents into a 'ByteString'.
1834 -- As with 'hGet', the string representation in the file is assumed to
1837 hGetContents :: Handle -> IO ByteString
1839 let start_size = 1024
1840 p <- mallocArray start_size
1841 i <- hGetBuf h p start_size
1843 then do p' <- reallocArray p i
1844 fp <- newForeignFreePtr p'
1850 p' <- reallocArray p s'
1851 i <- hGetBuf h (p' `plusPtr` s) s
1853 then do let i' = s + i
1854 p'' <- reallocArray p' i'
1855 fp <- newForeignFreePtr p''
1859 -- | getContents. Equivalent to hGetContents stdin
1860 getContents :: IO ByteString
1861 getContents = hGetContents stdin
1863 -- | Read an entire file directly into a 'ByteString'. This is far more
1864 -- efficient than reading the characters into a 'String' and then using
1865 -- 'pack'. It also may be more efficient than opening the file and
1866 -- reading it using hGet.
1867 readFile :: FilePath -> IO ByteString
1869 h <- openBinaryFile f ReadMode
1871 s <- hGet h $ fromIntegral l
1875 -- | Write a 'ByteString' to a file.
1876 writeFile :: FilePath -> ByteString -> IO ()
1878 h <- openBinaryFile f WriteMode
1884 -- Disable until we can move it into a portable .hsc file
1887 -- | Like readFile, this reads an entire file directly into a
1888 -- 'ByteString', but it is even more efficient. It involves directly
1889 -- mapping the file to memory. This has the advantage that the contents
1890 -- of the file never need to be copied. Also, under memory pressure the
1891 -- page may simply be discarded, while in the case of readFile it would
1892 -- need to be written to swap. If you read many small files, mmapFile
1893 -- will be less memory-efficient than readFile, since each mmapFile
1894 -- takes up a separate page of memory. Also, you can run into bus
1895 -- errors if the file is modified. As with 'readFile', the string
1896 -- representation in the file is assumed to be ISO-8859-1.
1898 -- On systems without mmap, this is the same as a readFile.
1900 mmapFile :: FilePath -> IO ByteString
1901 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1903 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1905 h <- openBinaryFile f ReadMode
1906 l <- fromIntegral `fmap` hFileSize h
1907 -- Don't bother mmaping small files because each mmapped file takes up
1908 -- at least one full VM block.
1910 then do thefp <- mallocByteString l
1911 withForeignPtr thefp $ \p-> hGetBuf h p l
1916 fd <- fromIntegral `fmap` handleToFd h
1918 fp <- if p == nullPtr
1919 then do thefp <- mallocByteString l
1920 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1923 -- The munmap leads to crashes on OpenBSD.
1924 -- maybe there's a use after unmap in there somewhere?
1925 #if !defined(__OpenBSD__)
1926 let unmap = c_munmap p l >> return ()
1928 let unmap = return ()
1930 fp <- FC.newForeignPtr p unmap
1935 where mmap_limit = 16*1024
1938 #if defined(__GLASGOW_HASKELL__)
1940 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1942 getArgs :: IO [ByteString]
1944 alloca $ \ p_argc ->
1945 alloca $ \ p_argv -> do
1946 getProgArgv p_argc p_argv
1947 p <- fromIntegral `fmap` peek p_argc
1949 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1952 -- ---------------------------------------------------------------------
1953 -- Internal utilities
1955 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1956 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1957 -- convenience for ByteString construction.
1958 w2c :: Word8 -> Char
1959 #if !defined(__GLASGOW_HASKELL__)
1960 w2c = chr . fromIntegral
1962 w2c = unsafeChr . fromIntegral
1966 c2w :: Char -> Word8
1967 c2w = fromIntegral . ord
1970 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1971 -- is padded with a null byte.
1972 mallocByteString :: Int -> IO (ForeignPtr Word8)
1973 mallocByteString l = do
1974 fp <- mallocForeignPtrArray (l+1)
1975 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1978 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1979 -- argument gives the final size of the ByteString. Unlike 'generate'
1980 -- the ByteString is not reallocated if the final size is less than the
1981 -- estimated size. Also, unlike 'generate' ByteString's created this way
1982 -- are managed on the Haskell heap.
1983 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1984 create l write_ptr = inlinePerformIO $ do
1985 fp <- mallocByteString (l+1)
1986 withForeignPtr fp $ \p -> write_ptr p
1988 {-# INLINE create #-}
1990 -- | Perform an operation with a temporary ByteString
1991 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1992 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1993 {-# INLINE withPtr #-}
1995 -- Common up near identical calls to `error' to reduce the number
1996 -- constant strings created when compiled:
1997 errorEmptyList :: String -> a
1998 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1999 {-# INLINE errorEmptyList #-}
2001 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2002 -- of the string if no element is found, rather than Nothing.
2003 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2004 STRICT2(findIndexOrEnd)
2007 | f (unsafeHead ps) = 0
2008 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
2009 {-# INLINE findIndexOrEnd #-}
2011 -- Find from the end of the string using predicate
2012 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2013 STRICT2(findFromEndUntil)
2014 findFromEndUntil f ps@(PS x s l) =
2016 else if f (last ps) then l
2017 else findFromEndUntil f (PS x s (l-1))
2019 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2020 -- it exposes lots of things to further inlining
2022 {-# INLINE inlinePerformIO #-}
2023 inlinePerformIO :: IO a -> a
2024 #if defined(__GLASGOW_HASKELL__)
2025 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2027 inlinePerformIO = unsafePerformIO
2030 {-# INLINE newForeignFreePtr #-}
2031 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2032 #if defined(__GLASGOW_HASKELL__)
2033 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2035 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2038 -- ---------------------------------------------------------------------
2040 -- Standard C functions
2043 foreign import ccall unsafe "string.h strlen" c_strlen
2046 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2047 :: CInt -> IO (Ptr Word8)
2049 foreign import ccall unsafe "static stdlib.h free" c_free
2050 :: Ptr Word8 -> IO ()
2052 #if !defined(__GLASGOW_HASKELL__)
2053 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2054 :: FunPtr (Ptr Word8 -> IO ())
2057 foreign import ccall unsafe "string.h memset" memset
2058 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2060 foreign import ccall unsafe "string.h memchr" memchr
2061 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2063 foreign import ccall unsafe "string.h memcmp" memcmp
2064 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO Int
2066 foreign import ccall unsafe "string.h memcpy" memcpy
2067 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2069 -- ---------------------------------------------------------------------
2074 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2075 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2077 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2078 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2080 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2081 :: Ptr Word8 -> CInt -> Word8
2083 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2084 :: Ptr Word8 -> CInt -> Word8
2086 foreign import ccall unsafe "static fpstring.h count" c_count
2087 :: Ptr Word8 -> CInt -> Word8 -> Int
2089 -- ---------------------------------------------------------------------
2093 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2094 :: Int -> Int -> IO (Ptr Word8)
2096 foreign import ccall unsafe "static unistd.h close" c_close
2099 # if !defined(__OpenBSD__)
2100 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2101 :: Ptr Word8 -> Int -> IO Int
2105 -- ---------------------------------------------------------------------
2106 -- Internal GHC Haskell magic
2108 #if defined(__GLASGOW_HASKELL__)
2109 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2110 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2112 foreign import ccall unsafe "__hscore_memcpy_src_off"
2113 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2116 -- ---------------------------------------------------------------------
2118 -- Functional array fusion for ByteStrings.
2120 -- From the Data Parallel Haskell project,
2121 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2124 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2125 -- the fact that no bottoms of this type are ever constructed; hence, we can
2126 -- assume @(_ :: NoAL) `seq` x = x@.
2130 -- | Special forms of loop arguments
2132 -- * These are common special cases for the three function arguments of gen
2133 -- and loop; we give them special names to make it easier to trigger RULES
2134 -- applying in the special cases represented by these arguments. The
2135 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2136 -- two simplifier phases.
2138 -- * In the case where the accumulator is not needed, it is better to always
2139 -- explicitly return a value `()', rather than just copy the input to the
2140 -- output, as the former gives GHC better local information.
2143 -- | Element function expressing a mapping only
2144 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2145 mapEFL f = \_ e -> (noAL, (Just $ f e))
2146 {-# INLINE [1] mapEFL #-}
2148 -- | Element function implementing a filter function only
2149 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2150 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2151 {-# INLINE [1] filterEFL #-}
2153 -- |Element function expressing a reduction only
2154 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2155 foldEFL f = \a e -> (f a e, Nothing)
2156 {-# INLINE [1] foldEFL #-}
2161 {-# INLINE [1] noAL #-}
2163 -- | Projection functions that are fusion friendly (as in, we determine when
2164 -- they are inlined)
2165 loopArr :: (ByteString, acc) -> ByteString
2166 loopArr (arr, _) = arr
2167 {-# INLINE [1] loopArr #-}
2169 loopAcc :: (ByteString, acc) -> acc
2170 loopAcc (_, acc) = acc
2171 {-# INLINE [1] loopAcc #-}
2173 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2174 loopSndAcc (arr, (_, acc)) = (arr, acc)
2175 {-# INLINE [1] loopSndAcc #-}
2177 ------------------------------------------------------------------------
2179 -- | Iteration over over ByteStrings
2180 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2181 -> acc -- ^ initial acc value
2182 -> ByteString -- ^ input ByteString
2183 -> (ByteString, acc)
2185 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2186 fp <- mallocByteString i
2187 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2188 (acc, i') <- go (a `plusPtr` s) p start
2190 then return (fp,i,acc) -- no realloc for map
2191 else do fp_ <- mallocByteString (i'+1) -- realloc
2192 withForeignPtr fp_ $ \p' -> do
2193 memcpy p' p (fromIntegral i') -- can't avoid this, right?
2194 poke (p' `plusPtr` i') (0::Word8)
2197 return (PS ptr 0 n, acc)
2202 trans a_off ma_off acc
2203 | a_off >= i = return (acc, ma_off)
2205 x <- peekByteOff p a_off
2206 let (acc', oe) = f acc x
2207 ma_off' <- case oe of
2208 Nothing -> return ma_off
2209 Just e -> do pokeByteOff ma ma_off e
2211 trans (a_off+1) ma_off' acc'
2213 {-# INLINE [1] loopU #-}
2217 -- |Fuse to flat loop functions
2218 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2219 -> (a2 -> Word8 -> (a2, Maybe Word8))
2222 -> ((a1, a2), Maybe Word8)
2223 fuseEFL f g (acc1, acc2) e1 =
2225 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2228 (acc2', res) -> ((acc1', acc2'), res)
2232 "Array fusion!" forall em1 em2 start1 start2 arr.
2233 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2234 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2236 "loopArr/loopSndAcc" forall x.
2237 loopArr (loopSndAcc x) = loopArr x
2239 "seq/NoAL" forall (u::NoAL) e.