1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: Word8 -> ByteString -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
74 -- * Reducing 'ByteString's
75 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
81 concat, -- :: [ByteString] -> ByteString
82 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
83 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 maximum, -- :: ByteString -> Word8
86 minimum, -- :: ByteString -> Word8
87 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
89 -- * Generating and unfolding ByteStrings
90 replicate, -- :: Int -> Word8 -> ByteString
91 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
95 -- ** Breaking strings
96 take, -- :: Int -> ByteString -> ByteString
97 drop, -- :: Int -> ByteString -> ByteString
98 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
99 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
100 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
101 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
102 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
103 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
105 -- ** Breaking and dropping on specific bytes
106 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
107 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
108 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
109 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
111 -- ** Breaking into many substrings
112 split, -- :: Word8 -> ByteString -> [ByteString]
113 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
114 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
118 -- ** Joining strings
119 join, -- :: ByteString -> [ByteString] -> ByteString
120 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
122 -- * Indexing ByteStrings
123 index, -- :: ByteString -> Int -> Word8
124 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
125 elemIndices, -- :: Word8 -> ByteString -> [Int]
126 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
127 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
128 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
129 count, -- :: Word8 -> ByteString -> Int
131 -- * Ordered ByteStrings
132 sort, -- :: ByteString -> ByteString
134 -- * Searching ByteStrings
136 -- ** Searching by equality
137 -- | These functions use memchr(3) to efficiently search the ByteString
139 elem, -- :: Word8 -> ByteString -> Bool
140 notElem, -- :: Word8 -> ByteString -> Bool
141 filterByte, -- :: Word8 -> ByteString -> ByteString
142 filterNotByte, -- :: Word8 -> ByteString -> ByteString
144 -- ** Searching with a predicate
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
148 -- ** Prefixes and suffixes
149 -- | These functions use memcmp(3) to efficiently compare substrings
150 isPrefixOf, -- :: ByteString -> ByteString -> Bool
151 isSuffixOf, -- :: ByteString -> ByteString -> Bool
153 -- ** Search for arbitrary substrings
154 isSubstringOf, -- :: ByteString -> ByteString -> Bool
155 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
156 findSubstrings, -- :: ByteString -> ByteString -> [Int]
158 -- * Zipping and unzipping ByteStrings
159 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
160 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
161 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
163 -- * Unchecked access
164 unsafeHead, -- :: ByteString -> Word8
165 unsafeTail, -- :: ByteString -> ByteString
166 unsafeIndex, -- :: ByteString -> Int -> Word8
168 -- * Low level introduction and elimination
169 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
170 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
171 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
172 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
173 skipIndex, -- :: ByteString -> Int
175 -- ** Packing CStrings and pointers
176 packCString, -- :: CString -> ByteString
177 packCStringLen, -- :: CString -> ByteString
178 packMallocCString, -- :: CString -> ByteString
180 #if defined(__GLASGOW_HASKELL__)
181 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
182 packAddress, -- :: Addr# -> ByteString
183 unsafePackAddress, -- :: Int -> Addr# -> ByteString
184 unsafeFinalize, -- :: ByteString -> IO ()
187 -- ** Using ByteStrings as CStrings
188 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
189 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
190 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
192 -- ** Copying ByteStrings
193 -- | These functions perform memcpy(3) operations
194 copy, -- :: ByteString -> ByteString
195 copyCString, -- :: CString -> ByteString
196 copyCStringLen, -- :: CStringLen -> ByteString
198 -- * I\/O with @ByteString@s
200 -- ** Standard input and output
202 #if defined(__GLASGOW_HASKELL__)
203 getLine, -- :: IO ByteString
205 getContents, -- :: IO ByteString
206 putStr, -- :: ByteString -> IO ()
207 putStrLn, -- :: ByteString -> IO ()
210 readFile, -- :: FilePath -> IO ByteString
211 writeFile, -- :: FilePath -> ByteString -> IO ()
212 -- mmapFile, -- :: FilePath -> IO ByteString
214 -- ** I\/O with Handles
215 #if defined(__GLASGOW_HASKELL__)
216 getArgs, -- :: IO [ByteString]
217 hGetLine, -- :: Handle -> IO ByteString
218 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
220 hGetContents, -- :: Handle -> IO ByteString
221 hGet, -- :: Handle -> Int -> IO ByteString
222 hPut, -- :: Handle -> ByteString -> IO ()
224 -- * Fusion utilities
225 #if defined(__GLASGOW_HASKELL__)
226 unpackList, -- eek, otherwise it gets thrown away by the simplifier
229 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
230 loopU, mapEFL, filterEFL, foldEFL,
235 import qualified Prelude as P
236 import Prelude hiding (reverse,head,tail,last,init,null
237 ,length,map,lines,foldl,foldr,unlines
238 ,concat,any,take,drop,splitAt,takeWhile
239 ,dropWhile,span,break,elem,filter,maximum
240 ,minimum,all,concatMap,foldl1,foldr1
241 ,readFile,writeFile,replicate
242 ,getContents,getLine,putStr,putStrLn
243 ,zip,zipWith,unzip,notElem)
245 import qualified Data.List as List
248 import Data.Word (Word8)
249 import Data.Maybe (listToMaybe)
250 import Data.Array (listArray)
251 import qualified Data.Array as Array ((!))
253 -- Control.Exception.bracket not available in yhc or nhc
254 import Control.Exception (bracket)
255 import Control.Monad (when)
257 import Foreign.C.String (CString, CStringLen)
258 import Foreign.C.Types (CSize, CInt)
259 import Foreign.ForeignPtr
260 import Foreign.Marshal.Array
262 import Foreign.Storable (Storable(..))
264 -- hGetBuf and hPutBuf not available in yhc or nhc
265 import System.IO (stdin,stdout,hClose,hFileSize
266 ,hGetBuf,hPutBuf,openBinaryFile
269 #if !defined(__GLASGOW_HASKELL__)
270 import System.IO.Unsafe
273 #if defined(__GLASGOW_HASKELL__)
275 import Data.Generics (Data(..), Typeable(..))
277 import System.IO (hGetBufNonBlocking)
278 import System.IO.Error (isEOFError)
280 import Foreign.Marshal (alloca)
281 import qualified Foreign.Concurrent as FC (newForeignPtr)
284 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
285 import GHC.Base (build, unsafeChr)
286 import GHC.Word hiding (Word8)
287 import GHC.Ptr (Ptr(..))
288 import GHC.ST (ST(..))
293 -- CFILES stuff is Hugs only
294 {-# CFILES cbits/fpstring.c #-}
296 -- -----------------------------------------------------------------------------
298 -- Useful macros, until we have bang patterns
301 #define STRICT1(f) f a | a `seq` False = undefined
302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
307 -- -----------------------------------------------------------------------------
309 -- | A space-efficient representation of a Word8 vector, supporting many
310 -- efficient operations. A 'ByteString' contains 8-bit characters only.
312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
318 #if defined(__GLASGOW_HASKELL__)
319 deriving (Data, Typeable)
322 instance Eq ByteString
325 instance Ord ByteString
326 where compare = compareBytes
328 instance Show ByteString where
329 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
331 instance Read ByteString where
332 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
335 instance Arbitrary PackedString where
336 arbitrary = P.pack `fmap` arbitrary
337 coarbitrary s = coarbitrary (P.unpack s)
340 -- | /O(n)/ Equality on the 'ByteString' type.
341 eq :: ByteString -> ByteString -> Bool
342 eq a@(PS p s l) b@(PS p' s' l')
343 | l /= l' = False -- short cut on length
344 | p == p' && s == s' = True -- short cut for the same string
345 | otherwise = compareBytes a b == EQ
348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
349 compareBytes :: ByteString -> ByteString -> Ordering
350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
351 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
352 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
353 | otherwise = inlinePerformIO $
354 withForeignPtr x1 $ \p1 ->
355 withForeignPtr x2 $ \p2 -> do
356 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
357 return $ case i `compare` 0 of
358 EQ -> l1 `compare` l2
360 {-# INLINE compareBytes #-}
364 -- About 4x slower over 32M
366 compareBytes :: ByteString -> ByteString -> Ordering
367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
368 withForeignPtr fp1 $ \p1 ->
369 withForeignPtr fp2 $ \p2 ->
370 cmp (p1 `plusPtr` off1)
371 (p2 `plusPtr` off2) 0 len1 len2
373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
375 cmp p1 p2 n len1 len2
376 | n == len1 = if n == len2 then return EQ else return LT
377 | n == len2 = return GT
379 (a :: Word8) <- peekByteOff p1 n
380 (b :: Word8) <- peekByteOff p2 n
381 case a `compare` b of
382 EQ -> cmp p1 p2 (n+1) len1 len2
385 {-# INLINE compareBytes #-}
388 -- -----------------------------------------------------------------------------
389 -- Introducing and eliminating 'ByteString's
391 -- | /O(1)/ The empty 'ByteString'
393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
394 {-# NOINLINE empty #-}
396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
397 packByte :: Word8 -> ByteString
398 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
399 withForeignPtr fp $ \p -> poke p c
401 {-# NOINLINE packByte #-}
403 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
405 -- For applications with large numbers of string literals, pack can be a
406 -- bottleneck. In such cases, consider using packAddress (GHC only).
407 pack :: [Word8] -> ByteString
409 #if !defined(__GLASGOW_HASKELL__)
411 pack str = create (P.length str) $ \p -> go p str
414 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
416 #else /* hack away */
418 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
420 go _ _ [] = return ()
421 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
423 writeByte p i c = ST $ \s# ->
424 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
428 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
429 unpack :: ByteString -> [Word8]
431 #if !defined(__GLASGOW_HASKELL__)
433 unpack (PS _ _ 0) = []
434 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
435 go (p `plusPtr` s) (l - 1) []
438 go p 0 acc = peek p >>= \e -> return (e : acc)
439 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
440 {-# INLINE unpack #-}
444 unpack ps = build (unpackFoldr ps)
445 {-# INLINE unpack #-}
447 unpackList :: ByteString -> [Word8]
448 unpackList (PS fp off len) = withPtr fp $ \p -> do
450 loop _ (-1) acc = return acc
453 loop q (n-1) (a : acc)
454 loop (p `plusPtr` off) (len-1) []
457 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
460 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
461 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
463 loop _ (-1) acc = return acc
466 loop q (n-1) (a `f` acc)
467 loop (p `plusPtr` off) (len-1) ch
468 {-# INLINE [0] unpackFoldr #-}
472 ------------------------------------------------------------------------
474 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
475 -- conversion function
476 packWith :: (a -> Word8) -> [a] -> ByteString
477 packWith k str = create (P.length str) $ \p -> go p str
481 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
482 {-# INLINE packWith #-}
483 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
485 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
486 unpackWith :: (Word8 -> a) -> ByteString -> [a]
487 unpackWith _ (PS _ _ 0) = []
488 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
489 go (p `plusPtr` s) (l - 1) []
492 go p 0 acc = peek p >>= \e -> return (k e : acc)
493 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
494 {-# INLINE unpackWith #-}
495 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
497 -- ---------------------------------------------------------------------
500 -- | /O(1)/ Test whether a ByteString is empty.
501 null :: ByteString -> Bool
502 null (PS _ _ l) = l == 0
505 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
506 length :: ByteString -> Int
507 length (PS _ _ l) = l
508 {-# INLINE length #-}
510 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
511 -- complexity, as it requires a memcpy.
512 cons :: Word8 -> ByteString -> ByteString
513 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
514 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
520 -- | /O(n)/ Append a byte to the end of a 'ByteString'
521 snoc :: ByteString -> Word8 -> ByteString
522 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
523 memcpy p (f `plusPtr` s) l
524 poke (p `plusPtr` l) c
529 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
530 head :: ByteString -> Word8
532 | null ps = errorEmptyList "head"
533 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
536 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
537 tail :: ByteString -> ByteString
539 | l <= 0 = errorEmptyList "tail"
540 | otherwise = PS p (s+1) (l-1)
543 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
544 last :: ByteString -> Word8
546 | null ps = errorEmptyList "last"
547 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
550 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
551 init :: ByteString -> ByteString
553 | l <= 0 = errorEmptyList "init"
554 | otherwise = PS p s (l-1)
557 -- | /O(n)/ Append two ByteStrings
558 append :: ByteString -> ByteString -> ByteString
559 append xs ys | null xs = ys
561 | otherwise = concat [xs,ys]
562 {-# INLINE append #-}
566 -- About 30% faster, but allocating in a big chunk isn't good for memory use
568 append :: ByteString -> ByteString -> ByteString
569 append xs@(PS ffp s l) ys@(PS fgp t m)
572 | otherwise = create len $ \ptr ->
573 withForeignPtr ffp $ \fp ->
574 withForeignPtr fgp $ \gp -> do
575 memcpy ptr (fp `plusPtr` s) l
576 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
577 where len = length xs + length ys
580 -- ---------------------------------------------------------------------
583 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
584 -- element of @xs@. This function is subject to array fusion.
585 map :: (Word8 -> Word8) -> ByteString -> ByteString
586 map f = loopArr . loopU (mapEFL f) noAL
589 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
590 -- slightly faster for one-shot cases.
591 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
593 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
594 np <- mallocByteString (len+1)
595 withForeignPtr np $ \p -> do
596 map_ 0 (a `plusPtr` s) p
599 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
602 | n >= len = return ()
604 x <- peekByteOff p1 n
605 pokeByteOff p2 n (f x)
609 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
610 reverse :: ByteString -> ByteString
611 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
612 c_reverse p (f `plusPtr` s) l
615 reverse = pack . P.reverse . unpack
618 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
619 -- 'ByteString' and \`intersperses\' that byte between the elements of
620 -- the 'ByteString'. It is analogous to the intersperse function on
622 intersperse :: Word8 -> ByteString -> ByteString
623 intersperse c ps@(PS x s l)
625 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
626 c_intersperse p (f `plusPtr` s) l c
629 intersperse c = pack . List.intersperse c . unpack
632 -- | The 'transpose' function transposes the rows and columns of its
633 -- 'ByteString' argument.
634 transpose :: [ByteString] -> [ByteString]
635 transpose ps = P.map pack (List.transpose (P.map unpack ps))
637 -- ---------------------------------------------------------------------
638 -- Reducing 'ByteString's
640 -- | 'foldl', applied to a binary operator, a starting value (typically
641 -- the left-identity of the operator), and a ByteString, reduces the
642 -- ByteString using the binary operator, from left to right.
643 -- This function is subject to array fusion.
644 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
645 foldl f z = loopAcc . loopU (foldEFL f) z
650 -- About twice as fast with 6.4.1, but not fuseable
651 -- A simple fold . map is enough to make it worth while.
653 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
654 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
657 lgo z p q | p == q = return z
658 | otherwise = do c <- peek p
659 lgo (f z c) (p `plusPtr` 1) q
662 -- | 'foldr', applied to a binary operator, a starting value
663 -- (typically the right-identity of the operator), and a ByteString,
664 -- reduces the ByteString using the binary operator, from right to left.
665 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
666 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
667 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
670 go p q | p == q = return z
671 | otherwise = do c <- peek p
672 ws <- go (p `plusPtr` 1) q
675 -- | 'foldl1' is a variant of 'foldl' that has no starting value
676 -- argument, and thus must be applied to non-empty 'ByteStrings'.
677 -- This function is subject to array fusion.
678 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
680 | null ps = errorEmptyList "foldl1"
681 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
683 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
684 -- and thus must be applied to non-empty 'ByteString's
685 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
687 | null ps = errorEmptyList "foldr1"
688 | otherwise = foldr f (last ps) (init ps)
690 -- ---------------------------------------------------------------------
693 -- | /O(n)/ Concatenate a list of ByteStrings.
694 concat :: [ByteString] -> ByteString
697 concat xs = inlinePerformIO $ do
698 let start_size = 1024
699 p <- mallocArray start_size
702 where f ptr len _ [] = do
703 ptr' <- reallocArray ptr (len+1)
704 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
705 fp <- newForeignFreePtr ptr'
708 f ptr len to_go pss@(PS p s l:pss')
709 | l <= to_go = do withForeignPtr p $ \pf ->
710 memcpy (ptr `plusPtr` len)
712 f ptr (len + l) (to_go - l) pss'
714 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
715 ptr' <- reallocArray ptr new_total
716 f ptr' len (new_total - len) pss
718 -- | Map a function over a 'ByteString' and concatenate the results
719 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
720 concatMap f = foldr (append . f) empty
722 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
723 -- any element of the 'ByteString' satisfies the predicate.
724 any :: (Word8 -> Bool) -> ByteString -> Bool
725 any _ (PS _ _ 0) = False
726 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
727 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
730 go p q | p == q = return False
731 | otherwise = do c <- peek p
732 if f c then return True
733 else go (p `plusPtr` 1) q
737 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
738 -- if all elements of the 'ByteString' satisfy the predicate.
739 all :: (Word8 -> Bool) -> ByteString -> Bool
740 all _ (PS _ _ 0) = True
741 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
742 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
745 go p q | p == q = return True -- end of list
746 | otherwise = do c <- peek p
748 then go (p `plusPtr` 1) q
752 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
753 maximum :: ByteString -> Word8
754 maximum xs@(PS x s l)
755 | null xs = errorEmptyList "maximum"
756 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
757 return $ c_maximum (p `plusPtr` s) l
758 {-# INLINE maximum #-}
760 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
761 minimum :: ByteString -> Word8
762 minimum xs@(PS x s l)
763 | null xs = errorEmptyList "minimum"
764 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
765 return $ c_minimum (p `plusPtr` s) l
766 {-# INLINE minimum #-}
768 -- fusion is too slow here (10x)
771 maximum xs@(PS x s l)
772 | null xs = errorEmptyList "maximum"
773 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
775 maximum_ (p `plusPtr` s) 0 l w
777 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
781 | otherwise = do w <- peekByteOff ptr n
782 maximum_ ptr (n+1) m (if w > c then w else c)
784 minimum xs@(PS x s l)
785 | null xs = errorEmptyList "minimum"
786 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
788 minimum_ (p `plusPtr` s) 0 l w
790 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
794 | otherwise = do w <- peekByteOff ptr n
795 minimum_ ptr (n+1) m (if w < c then w else c)
798 -- | /O(n)/ map Word8 functions, provided with the index at each position
799 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
800 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
801 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
803 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
805 go n f t p | f == p = return ()
806 | otherwise = do w <- peek f
808 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
810 -- ---------------------------------------------------------------------
811 -- Unfolds and replicates
813 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
814 -- the value of every element. The following holds:
816 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
818 -- This implemenation uses @memset(3)@
819 replicate :: Int -> Word8 -> ByteString
820 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
824 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
828 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
831 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
832 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
833 -- the element and returns 'Nothing' if it is done producing the
834 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
835 -- prepending to the ByteString and @b@ is used as the next element in a
838 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
839 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
840 -- final size of the ByteString as an argument. 'cons' can then be
841 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
842 -- linear complexity. The depth of the recursion is limited to this
843 -- size, but may be less. For lazy, infinite unfoldr, use
844 -- 'Data.List.unfoldr' (from 'Data.List').
848 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
850 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
852 -- > unfoldrN n == take n $ List.unfoldr
853 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
854 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
857 go q c n | n == i = return n -- stop if we reach `i'
858 | otherwise = case f c of
862 go (q `plusPtr` 1) new_c (n+1)
864 -- ---------------------------------------------------------------------
867 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
868 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
869 take :: Int -> ByteString -> ByteString
873 | otherwise = PS x s n
876 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
877 -- elements, or @[]@ if @n > 'length' xs@.
878 drop :: Int -> ByteString -> ByteString
882 | otherwise = PS x (s+n) (l-n)
885 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
886 splitAt :: Int -> ByteString -> (ByteString, ByteString)
887 splitAt n ps = (take n ps, drop n ps)
888 {-# INLINE splitAt #-}
890 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
891 -- returns the longest prefix (possibly empty) of @xs@ of elements that
893 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
894 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
895 {-# INLINE takeWhile #-}
897 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
898 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
899 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
900 {-# INLINE dropWhile #-}
902 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
903 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
904 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
907 -- | 'breakByte' breaks its ByteString argument at the first occurence
908 -- of the specified byte. It is more efficient than 'break' as it is
909 -- implemented with @memchr(3)@. I.e.
911 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
913 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
914 breakByte c p = case elemIndex c p of
916 Just n -> (take n p, drop n p)
917 {-# INLINE breakByte #-}
919 -- | 'spanByte' breaks its ByteString argument at the first
920 -- occurence of a byte other than its argument. It is more efficient
923 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
925 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
926 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
930 go p i | i >= l = return (ps, empty)
931 | otherwise = do c' <- peekByteOff p i
933 then return (take i ps, drop i ps)
935 {-# INLINE spanByte #-}
937 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
938 -- occurence of @w@. It behaves like 'break', except the delimiter is
939 -- not returned, and @Nothing@ is returned if the delimiter is not in
940 -- the ByteString. I.e.
942 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
944 -- > breakFirst c xs ==
945 -- > let (x,y) = break (== c) xs
946 -- > in if null y then Nothing else Just (x, drop 1 y))
948 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
949 breakFirst c p = case elemIndex c p of
951 Just n -> Just (take n p, drop (n+1) p)
952 {-# INLINE breakFirst #-}
954 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
957 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
959 -- and the following are equivalent:
961 -- > breakLast 'c' "abcdef"
962 -- > let (x,y) = break (=='c') (reverse "abcdef")
963 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
965 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
966 breakLast c p = case elemIndexLast c p of
968 Just n -> Just (take n p, drop (n+1) p)
969 {-# INLINE breakLast #-}
971 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
972 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
973 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
974 span p ps = break (not . p) ps
977 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
980 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
984 -- > spanEnd (not . isSpace) ps
986 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
988 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
989 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
991 -- | /O(n)/ Splits a 'ByteString' into components delimited by
992 -- separators, where the predicate returns True for a separator element.
993 -- The resulting components do not contain the separators. Two adjacent
994 -- separators result in an empty component in the output. eg.
996 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
997 -- > splitWith (=='a') [] == []
999 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1001 #if defined(__GLASGOW_HASKELL__)
1002 splitWith _pred (PS _ _ 0) = []
1003 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
1004 where pred# c# = pred_ (W8# c#)
1006 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
1007 splitLoop pred' p 0 off' len' fp'
1009 splitLoop :: (Word# -> Bool)
1011 -> Int -> Int -> Int
1015 splitLoop pred' p idx' off' len' fp'
1016 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1017 | idx' >= len' = return [PS fp' off' idx']
1019 w <- peekElemOff p (off'+idx')
1020 if pred' (case w of W8# w# -> w#)
1021 then return (PS fp' off' idx' :
1022 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
1023 else splitLoop pred' p (idx'+1) off' len' fp'
1024 {-# INLINE splitWith #-}
1027 splitWith _ (PS _ _ 0) = []
1028 splitWith p ps = splitWith' p ps
1031 splitWith' q qs = if null rest then [chunk]
1032 else chunk : splitWith' q (unsafeTail rest)
1033 where (chunk,rest) = break q qs
1036 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1037 -- argument, consuming the delimiter. I.e.
1039 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1040 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1041 -- > split 'x' "x" == ["",""]
1045 -- > join [c] . split c == id
1046 -- > split == splitWith . (==)
1048 -- As for all splitting functions in this library, this function does
1049 -- not copy the substrings, it just constructs new 'ByteStrings' that
1050 -- are slices of the original.
1052 split :: Word8 -> ByteString -> [ByteString]
1053 split _ (PS _ _ 0) = []
1054 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1055 let ptr = p `plusPtr` s
1059 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1061 then return [PS x (s+n) (l-n)]
1062 else do let i = q `minusPtr` ptr
1064 return $! PS x (s+n) (i-n) : ls
1066 {-# INLINE split #-}
1069 -- slower. but stays inside Haskell.
1070 split _ (PS _ _ 0) = []
1071 split (W8# w#) (PS fp off len) = splitWith' off len fp
1073 splitWith' off' len' fp' = withPtr fp $ \p ->
1074 splitLoop p 0 off' len' fp'
1076 splitLoop :: Ptr Word8
1077 -> Int -> Int -> Int
1082 splitLoop p idx' off' len' fp'
1083 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1084 | idx' >= len' = return [PS fp' off' idx']
1086 (W8# x#) <- peekElemOff p (off'+idx')
1087 if word2Int# w# ==# word2Int# x#
1088 then return (PS fp' off' idx' :
1089 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1090 else splitLoop p (idx'+1) off' len' fp'
1093 -- | Like 'splitWith', except that sequences of adjacent separators are
1094 -- treated as a single separator. eg.
1096 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1098 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1099 tokens f = P.filter (not.null) . splitWith f
1101 -- | The 'group' function takes a ByteString and returns a list of
1102 -- ByteStrings such that the concatenation of the result is equal to the
1103 -- argument. Moreover, each sublist in the result contains only equal
1104 -- elements. For example,
1106 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1108 -- It is a special case of 'groupBy', which allows the programmer to
1109 -- supply their own equality test. It is about 40% faster than
1111 group :: ByteString -> [ByteString]
1114 | otherwise = ys : group zs
1116 (ys, zs) = spanByte (unsafeHead xs) xs
1118 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1119 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1122 | otherwise = take n xs : groupBy k (drop n xs)
1124 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1126 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1127 -- 'ByteString's and concatenates the list after interspersing the first
1128 -- argument between each element of the list.
1129 join :: ByteString -> [ByteString] -> ByteString
1130 join filler pss = concat (splice pss)
1134 splice (x:y:xs) = x:filler:splice (y:xs)
1137 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1138 -- with a char. Around 4 times faster than the generalised join.
1140 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1141 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1142 withForeignPtr ffp $ \fp ->
1143 withForeignPtr fgp $ \gp -> do
1144 memcpy ptr (fp `plusPtr` s) l
1145 poke (ptr `plusPtr` l) c
1146 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1148 len = length f + length g + 1
1149 {-# INLINE joinWithByte #-}
1151 -- ---------------------------------------------------------------------
1152 -- Indexing ByteStrings
1154 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1155 index :: ByteString -> Int -> Word8
1157 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1158 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1159 ++ ", length = " ++ show (length ps)
1160 | otherwise = ps `unsafeIndex` n
1161 {-# INLINE index #-}
1163 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1164 -- element in the given 'ByteString' which is equal to the query
1165 -- element, or 'Nothing' if there is no such element.
1166 -- This implementation uses memchr(3).
1167 elemIndex :: Word8 -> ByteString -> Maybe Int
1168 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1169 let p' = p `plusPtr` s
1170 q = memchr p' c (fromIntegral l)
1171 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1172 {-# INLINE elemIndex #-}
1174 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1175 -- element in the given 'ByteString' which is equal to the query
1176 -- element, or 'Nothing' if there is no such element. The following
1179 -- > elemIndexLast c xs ==
1180 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1182 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1183 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1184 go (p `plusPtr` s) (l-1)
1187 go p i | i < 0 = return Nothing
1188 | otherwise = do ch' <- peekByteOff p i
1190 then return $ Just i
1192 {-# INLINE elemIndexLast #-}
1194 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1195 -- the indices of all elements equal to the query element, in ascending order.
1196 -- This implementation uses memchr(3).
1197 elemIndices :: Word8 -> ByteString -> [Int]
1198 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1199 let ptr = p `plusPtr` s
1203 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1206 else do let i = q `minusPtr` ptr
1213 elemIndices :: Word8 -> ByteString -> [Int]
1214 elemIndices c ps = loop 0 ps
1216 loop _ ps' | null ps' = []
1217 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1218 | otherwise = loop (n+1) (unsafeTail ps')
1221 -- | count returns the number of times its argument appears in the ByteString
1223 -- > count = length . elemIndices
1225 -- But more efficiently than using length on the intermediate list.
1226 count :: Word8 -> ByteString -> Int
1227 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1228 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1229 {-# INLINE count #-}
1233 -- around 30% slower
1235 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1236 go (p `plusPtr` s) (fromIntegral m) 0
1238 go :: Ptr Word8 -> CSize -> Int -> IO Int
1241 let q = memchr p w l
1244 else do let k = fromIntegral $ q `minusPtr` p
1245 go (q `plusPtr` 1) (l-k-1) (i+1)
1248 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1249 -- returns the index of the first element in the ByteString
1250 -- satisfying the predicate.
1251 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1252 findIndex = (listToMaybe .) . findIndices
1254 -- | The 'findIndices' function extends 'findIndex', by returning the
1255 -- indices of all elements satisfying the predicate, in ascending order.
1256 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1257 findIndices p ps = loop 0 ps
1260 loop _ qs | null qs = []
1261 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1262 | otherwise = loop (n+1) (unsafeTail qs)
1264 -- ---------------------------------------------------------------------
1265 -- Searching ByteStrings
1267 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1268 elem :: Word8 -> ByteString -> Bool
1269 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1272 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1273 notElem :: Word8 -> ByteString -> Bool
1274 notElem c ps = not (elem c ps)
1275 {-# INLINE notElem #-}
1278 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1279 -- case of filtering a single byte. It is more efficient to use
1280 -- /filterByte/ in this case.
1282 -- > filterByte == filter . (==)
1284 -- filterByte is around 10x faster, and uses much less space, than its
1285 -- filter equivalent
1286 filterByte :: Word8 -> ByteString -> ByteString
1287 filterByte w ps = replicate (count w ps) w
1290 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1291 -- case of filtering a single byte out of a list. It is more efficient
1292 -- to use /filterNotByte/ in this case.
1294 -- > filterNotByte == filter . (/=)
1296 -- filterNotByte is around 3x faster, and uses much less space, than its
1297 -- filter equivalent
1298 filterNotByte :: Word8 -> ByteString -> ByteString
1299 filterNotByte ch ps@(PS x s l)
1301 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1302 t <- go (f `plusPtr` s) p l
1303 return (t `minusPtr` p) -- actual length
1307 go f t e = do w <- peek f
1309 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1310 else go (f `plusPtr` 1) t (e-1)
1312 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1313 -- returns a ByteString containing those characters that satisfy the
1314 -- predicate. This function is subject to array fusion.
1315 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1316 filter p = loopArr . loopU (filterEFL p) noAL
1317 {-# INLINE filter #-}
1319 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1320 filterF k ps@(PS x s l)
1322 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1323 t <- go (f `plusPtr` s) p l
1324 return (t `minusPtr` p) -- actual length
1328 go f t e = do w <- peek f
1330 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1331 else go (f `plusPtr` 1) t (e - 1)
1332 {-# INLINE filterF #-}
1334 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1336 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1337 -- and returns the first element in matching the predicate, or 'Nothing'
1338 -- if there is no such element.
1339 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1340 find p ps = case filter p ps of
1341 q | null q -> Nothing
1342 | otherwise -> Just (unsafeHead q)
1344 -- ---------------------------------------------------------------------
1345 -- Searching for substrings
1347 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1348 -- iff the first is a prefix of the second.
1349 isPrefixOf :: ByteString -> ByteString -> Bool
1350 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1353 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1354 withForeignPtr x2 $ \p2 -> do
1355 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1358 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1359 -- iff the first is a suffix of the second.
1361 -- The following holds:
1363 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1365 -- However, the real implemenation uses memcmp to compare the end of the
1366 -- string only, with no reverse required..
1367 isSuffixOf :: ByteString -> ByteString -> Bool
1368 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1371 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1372 withForeignPtr x2 $ \p2 -> do
1373 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1376 -- | Check whether one string is a substring of another. @isSubstringOf
1377 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1378 isSubstringOf :: ByteString -- ^ String to search for.
1379 -> ByteString -- ^ String to search in.
1381 isSubstringOf p s = not $ P.null $ findSubstrings p s
1383 -- | Get the first index of a substring in another string,
1384 -- or 'Nothing' if the string is not found.
1385 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1386 findSubstring :: ByteString -- ^ String to search for.
1387 -> ByteString -- ^ String to seach in.
1389 findSubstring = (listToMaybe .) . findSubstrings
1391 -- | Find the indexes of all (possibly overlapping) occurances of a
1392 -- substring in a string. This function uses the Knuth-Morris-Pratt
1393 -- string matching algorithm.
1394 findSubstrings :: ByteString -- ^ String to search for.
1395 -> ByteString -- ^ String to seach in.
1398 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1400 patc x = pat `unsafeIndex` x
1401 strc x = str `unsafeIndex` x
1403 -- maybe we should make kmpNext a UArray before using it in search?
1404 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1405 kmpNextL p _ | null p = []
1406 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1408 x = if not (null ps) && unsafeHead ps == patc j'
1409 then kmpNext Array.! j' else j'
1411 search i j = match ++ rest -- i: position in string, j: position in pattern
1412 where match = if j == m then [(i - j)] else []
1413 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1414 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1417 -- ---------------------------------------------------------------------
1420 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1421 -- corresponding pairs of bytes. If one input ByteString is short,
1422 -- excess elements of the longer ByteString are discarded. This is
1423 -- equivalent to a pair of 'unpack' operations.
1424 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1426 | null ps || null qs = []
1427 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1429 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1430 -- the first argument, instead of a tupling function. For example,
1431 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1432 -- corresponding sums.
1433 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1435 | null ps || null qs = []
1436 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1438 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1439 -- ByteStrings. Note that this performs two 'pack' operations.
1440 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1441 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1442 {-# INLINE unzip #-}
1444 -- ---------------------------------------------------------------------
1447 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1448 inits :: ByteString -> [ByteString]
1449 inits (PS x s l) = [PS x s n | n <- [0..l]]
1451 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1452 tails :: ByteString -> [ByteString]
1453 tails p | null p = [empty]
1454 | otherwise = p : tails (unsafeTail p)
1456 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1458 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1459 elems :: ByteString -> [ByteString]
1460 elems (PS _ _ 0) = []
1461 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1462 {-# INLINE elems #-}
1464 -- ---------------------------------------------------------------------
1465 -- ** Ordered 'ByteString's
1467 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1468 sort :: ByteString -> ByteString
1469 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1471 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1472 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1475 go 256 _ = return ()
1476 go i ptr = do n <- peekElemOff arr i
1477 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1478 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1481 -- "countEach counts str l" counts the number of occurences of each Word8 in
1482 -- str, and stores the result in counts.
1483 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1485 countEach counts str l = go 0
1488 go i | i == l = return ()
1489 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1490 x <- peekElemOff counts k
1491 pokeElemOff counts k (x + 1)
1495 sort :: ByteString -> ByteString
1496 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1497 memcpy p (f `plusPtr` s) l
1498 c_qsort p l -- inplace
1502 sort = pack . List.sort . unpack
1505 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1507 -- Try some linear sorts: radix, counting
1510 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1511 -- sortBy f ps = undefined
1513 -- ---------------------------------------------------------------------
1515 -- Extensions to the basic interface
1518 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1519 -- check for the empty case, so there is an obligation on the programmer
1520 -- to provide a proof that the ByteString is non-empty.
1521 unsafeHead :: ByteString -> Word8
1522 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1523 {-# INLINE unsafeHead #-}
1525 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1526 -- check for the empty case. As with 'unsafeHead', the programmer must
1527 -- provide a separate proof that the ByteString is non-empty.
1528 unsafeTail :: ByteString -> ByteString
1529 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1530 {-# INLINE unsafeTail #-}
1532 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1533 -- This omits the bounds check, which means there is an accompanying
1534 -- obligation on the programmer to ensure the bounds are checked in some
1536 unsafeIndex :: ByteString -> Int -> Word8
1537 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1538 {-# INLINE unsafeIndex #-}
1540 -- ---------------------------------------------------------------------
1541 -- Low level constructors
1543 #if defined(__GLASGOW_HASKELL__)
1544 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1545 -- Addr\# (an arbitrary machine address assumed to point outside the
1546 -- garbage-collected heap) into a @ByteString@. A much faster way to
1547 -- create an Addr\# is with an unboxed string literal, than to pack a
1548 -- boxed string. A unboxed string literal is compiled to a static @char
1549 -- []@ by GHC. Establishing the length of the string requires a call to
1550 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1551 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1552 -- if you know the length of the string statically.
1556 -- > literalFS = packAddress "literal"#
1558 packAddress :: Addr# -> ByteString
1559 packAddress addr# = inlinePerformIO $ do
1560 p <- newForeignPtr_ cstr
1561 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1564 {-# INLINE packAddress #-}
1566 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1567 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1568 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1569 -- 'Addr\#' to the string, and the length of the string. Make sure the
1570 -- length is correct, otherwise use the safer 'packAddress' (where the
1571 -- length will be calculated once at runtime).
1572 unsafePackAddress :: Int -> Addr# -> ByteString
1573 unsafePackAddress len addr# = inlinePerformIO $ do
1574 p <- newForeignPtr_ cstr
1576 where cstr = Ptr addr#
1580 -- | /O(1)/ Build a ByteString from a ForeignPtr
1581 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1582 fromForeignPtr fp l = PS fp 0 l
1584 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1585 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1586 toForeignPtr (PS ps s l) = (ps, s, l)
1588 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1589 -- current 'ByteString' from any larger string it was created from, as
1591 skipIndex :: ByteString -> Int
1592 skipIndex (PS _ s _) = s
1593 {-# INLINE skipIndex #-}
1595 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1596 -- finalizer associated to it. The ByteString length is calculated using
1597 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1598 packCString :: CString -> ByteString
1599 packCString cstr = inlinePerformIO $ do
1600 fp <- newForeignPtr_ (castPtr cstr)
1601 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1603 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1604 -- have /no/ finalizer associated with it. This operation has /O(1)/
1605 -- complexity as we already know the final size, so no /strlen(3)/ is
1607 packCStringLen :: CStringLen -> ByteString
1608 packCStringLen (ptr,len) = inlinePerformIO $ do
1609 fp <- newForeignPtr_ (castPtr ptr)
1610 return $ PS fp 0 (fromIntegral len)
1612 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1613 -- have a @free(3)@ finalizer associated to it.
1614 packMallocCString :: CString -> ByteString
1615 packMallocCString cstr = inlinePerformIO $ do
1616 fp <- newForeignFreePtr (castPtr cstr)
1617 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1619 #if defined(__GLASGOW_HASKELL__)
1620 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1621 -- length, and an IO action representing a finalizer. This function is
1622 -- not available on Hugs.
1624 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1625 packCStringFinalizer p l f = do
1626 fp <- FC.newForeignPtr p f
1629 -- | Explicitly run the finaliser associated with a 'ByteString'.
1630 -- Further references to this value may generate invalid memory
1631 -- references. This operation is unsafe, as there may be other
1632 -- 'ByteStrings' referring to the same underlying pages. If you use
1633 -- this, you need to have a proof of some kind that all 'ByteString's
1634 -- ever generated from the underlying byte array are no longer live.
1635 unsafeFinalize :: ByteString -> IO ()
1636 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1640 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1641 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1642 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1643 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1645 alloc = withForeignPtr ps $ \p -> do
1646 buf <- c_malloc (fromIntegral l+1)
1647 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1648 poke (buf `plusPtr` l) (0::Word8)
1649 return $ castPtr buf
1651 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1652 -- Warning: modifying the @CString@ will affect the @ByteString@.
1653 -- Why is this function unsafe? It relies on the null byte at the end of
1654 -- the ByteString to be there. This is /not/ the case if your ByteString
1655 -- has been spliced from a larger string (i.e. with take or drop).
1656 -- Unless you can guarantee the null byte, you should use the safe
1657 -- version, which will copy the string first.
1659 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1660 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1662 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1663 -- This is mainly useful to allow the rest of the data pointed
1664 -- to by the 'ByteString' to be garbage collected, for example
1665 -- if a large string has been read in, and only a small part of it
1666 -- is needed in the rest of the program.
1667 copy :: ByteString -> ByteString
1668 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1670 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1671 -- CString is going to be deallocated from C land.
1672 copyCString :: CString -> ByteString
1673 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1675 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1676 copyCStringLen :: CStringLen -> ByteString
1677 copyCStringLen (cstr, len) = inlinePerformIO $ do
1678 fp <- mallocForeignPtrArray (len+1)
1679 withForeignPtr fp $ \p -> do
1680 memcpy p (castPtr cstr) len
1681 poke (p `plusPtr` len) (0 :: Word8)
1682 return $! PS fp 0 len
1684 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1685 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1686 -- This is analogous to unsafeUseAsCString, and comes with the same
1687 -- safety requirements.
1689 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1690 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1692 -- | Given the maximum size needed and a function to make the contents
1693 -- of a ByteString, generate makes the 'ByteString'. The generating
1694 -- function is required to return the actual final size (<= the maximum
1695 -- size), and the resulting byte array is realloced to this size. The
1696 -- string is padded at the end with a null byte.
1698 -- generate is the main mechanism for creating custom, efficient
1699 -- ByteString functions, using Haskell or C functions to fill the space.
1701 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1703 p <- mallocArray (i+1)
1705 p' <- reallocArray p (i'+1)
1706 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1707 fp <- newForeignFreePtr p'
1710 -- ---------------------------------------------------------------------
1713 #if defined(__GLASGOW_HASKELL__)
1715 -- | getLine, read a line from stdin.
1716 getLine :: IO ByteString
1717 getLine = hGetLine stdin
1719 -- | hGetLine. read a ByteString from a handle
1720 hGetLine :: Handle -> IO ByteString
1721 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1722 case haBufferMode handle_ of
1723 NoBuffering -> error "no buffering"
1724 _other -> hGetLineBuffered handle_
1727 hGetLineBuffered handle_ = do
1728 let ref = haBuffer handle_
1729 buf <- readIORef ref
1730 hGetLineBufferedLoop handle_ ref buf 0 []
1732 hGetLineBufferedLoop handle_ ref
1733 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1735 off <- findEOL r w raw
1736 let new_len = len + off - r
1737 xs <- mkPS raw r off
1739 -- if eol == True, then off is the offset of the '\n'
1740 -- otherwise off == w and the buffer is now empty.
1742 then do if (w == off + 1)
1743 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1744 else writeIORef ref buf{ bufRPtr = off + 1 }
1745 mkBigPS new_len (xs:xss)
1747 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1748 buf{ bufWPtr=0, bufRPtr=0 }
1750 -- Nothing indicates we caught an EOF, and we may have a
1751 -- partial line to return.
1753 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1755 then mkBigPS new_len (xs:xss)
1758 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1760 -- find the end-of-line character, if there is one
1764 (c,r') <- readCharFromBuffer raw r
1766 then return r -- NB. not r': don't include the '\n'
1767 else findEOL r' w raw
1769 maybeFillReadBuffer fd is_line is_stream buf = catch
1770 (do buf' <- fillReadBuffer fd is_line is_stream buf
1772 (\e -> if isEOFError e then return Nothing else ioError e)
1774 -- TODO, rewrite to use normal memcpy
1775 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1776 mkPS buf start end = do
1777 let len = end - start
1778 fp <- mallocByteString len
1779 withForeignPtr fp $ \p -> do
1780 memcpy_ptr_baoff p buf start (fromIntegral len)
1781 return (PS fp 0 len)
1783 mkBigPS :: Int -> [ByteString] -> IO ByteString
1784 mkBigPS _ [ps] = return ps
1785 mkBigPS _ pss = return $! concat (P.reverse pss)
1789 -- ---------------------------------------------------------------------
1792 -- | Outputs a 'ByteString' to the specified 'Handle'.
1793 hPut :: Handle -> ByteString -> IO ()
1794 hPut _ (PS _ _ 0) = return ()
1795 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1796 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1798 -- | Write a ByteString to stdout
1799 putStr :: ByteString -> IO ()
1800 putStr = hPut stdout
1802 -- | Write a ByteString to stdout, appending a newline byte
1803 putStrLn :: ByteString -> IO ()
1804 putStrLn ps = hPut stdout ps >> hPut stdout nl
1805 where nl = packByte 0x0a
1807 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1808 -- is far more efficient than reading the characters into a 'String'
1809 -- and then using 'pack'.
1810 hGet :: Handle -> Int -> IO ByteString
1811 hGet _ 0 = return empty
1812 hGet h i = do fp <- mallocByteString i
1813 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1816 #if defined(__GLASGOW_HASKELL__)
1817 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1818 -- waiting for data to become available, instead it returns only whatever data
1820 hGetNonBlocking :: Handle -> Int -> IO ByteString
1821 hGetNonBlocking _ 0 = return empty
1822 hGetNonBlocking h i = do
1823 fp <- mallocByteString i
1824 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1828 -- | Read entire handle contents into a 'ByteString'.
1830 -- As with 'hGet', the string representation in the file is assumed to
1833 hGetContents :: Handle -> IO ByteString
1835 let start_size = 1024
1836 p <- mallocArray start_size
1837 i <- hGetBuf h p start_size
1839 then do p' <- reallocArray p i
1840 fp <- newForeignFreePtr p'
1846 p' <- reallocArray p s'
1847 i <- hGetBuf h (p' `plusPtr` s) s
1849 then do let i' = s + i
1850 p'' <- reallocArray p' i'
1851 fp <- newForeignFreePtr p''
1855 -- | getContents. Equivalent to hGetContents stdin
1856 getContents :: IO ByteString
1857 getContents = hGetContents stdin
1859 -- | Read an entire file directly into a 'ByteString'. This is far more
1860 -- efficient than reading the characters into a 'String' and then using
1861 -- 'pack'. It also may be more efficient than opening the file and
1862 -- reading it using hGet.
1863 readFile :: FilePath -> IO ByteString
1865 h <- openBinaryFile f ReadMode
1867 s <- hGet h $ fromIntegral l
1871 -- | Write a 'ByteString' to a file.
1872 writeFile :: FilePath -> ByteString -> IO ()
1874 h <- openBinaryFile f WriteMode
1880 -- Disable until we can move it into a portable .hsc file
1883 -- | Like readFile, this reads an entire file directly into a
1884 -- 'ByteString', but it is even more efficient. It involves directly
1885 -- mapping the file to memory. This has the advantage that the contents
1886 -- of the file never need to be copied. Also, under memory pressure the
1887 -- page may simply be discarded, while in the case of readFile it would
1888 -- need to be written to swap. If you read many small files, mmapFile
1889 -- will be less memory-efficient than readFile, since each mmapFile
1890 -- takes up a separate page of memory. Also, you can run into bus
1891 -- errors if the file is modified. As with 'readFile', the string
1892 -- representation in the file is assumed to be ISO-8859-1.
1894 -- On systems without mmap, this is the same as a readFile.
1896 mmapFile :: FilePath -> IO ByteString
1897 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1899 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1901 h <- openBinaryFile f ReadMode
1902 l <- fromIntegral `fmap` hFileSize h
1903 -- Don't bother mmaping small files because each mmapped file takes up
1904 -- at least one full VM block.
1906 then do thefp <- mallocByteString l
1907 withForeignPtr thefp $ \p-> hGetBuf h p l
1912 fd <- fromIntegral `fmap` handleToFd h
1914 fp <- if p == nullPtr
1915 then do thefp <- mallocByteString l
1916 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1919 -- The munmap leads to crashes on OpenBSD.
1920 -- maybe there's a use after unmap in there somewhere?
1921 #if !defined(__OpenBSD__)
1922 let unmap = c_munmap p l >> return ()
1924 let unmap = return ()
1926 fp <- FC.newForeignPtr p unmap
1931 where mmap_limit = 16*1024
1934 #if defined(__GLASGOW_HASKELL__)
1936 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1938 getArgs :: IO [ByteString]
1940 alloca $ \ p_argc ->
1941 alloca $ \ p_argv -> do
1942 getProgArgv p_argc p_argv
1943 p <- fromIntegral `fmap` peek p_argc
1945 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1948 -- ---------------------------------------------------------------------
1949 -- Internal utilities
1951 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1952 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1953 -- convenience for ByteString construction.
1954 w2c :: Word8 -> Char
1955 #if !defined(__GLASGOW_HASKELL__)
1956 w2c = chr . fromIntegral
1958 w2c = unsafeChr . fromIntegral
1962 c2w :: Char -> Word8
1963 c2w = fromIntegral . ord
1966 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1967 -- is padded with a null byte.
1968 mallocByteString :: Int -> IO (ForeignPtr Word8)
1969 mallocByteString l = do
1970 fp <- mallocForeignPtrArray (l+1)
1971 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1974 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1975 -- argument gives the final size of the ByteString. Unlike 'generate'
1976 -- the ByteString is not reallocated if the final size is less than the
1977 -- estimated size. Also, unlike 'generate' ByteString's created this way
1978 -- are managed on the Haskell heap.
1979 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1980 create l write_ptr = inlinePerformIO $ do
1981 fp <- mallocByteString (l+1)
1982 withForeignPtr fp $ \p -> write_ptr p
1984 {-# INLINE create #-}
1986 -- | Perform an operation with a temporary ByteString
1987 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1988 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1989 {-# INLINE withPtr #-}
1991 -- Common up near identical calls to `error' to reduce the number
1992 -- constant strings created when compiled:
1993 errorEmptyList :: String -> a
1994 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1995 {-# INLINE errorEmptyList #-}
1997 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1998 -- of the string if no element is found, rather than Nothing.
1999 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2000 STRICT2(findIndexOrEnd)
2003 | f (unsafeHead ps) = 0
2004 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
2005 {-# INLINE findIndexOrEnd #-}
2007 -- Find from the end of the string using predicate
2008 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2009 STRICT2(findFromEndUntil)
2010 findFromEndUntil f ps@(PS x s l) =
2012 else if f (last ps) then l
2013 else findFromEndUntil f (PS x s (l-1))
2015 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2016 -- it exposes lots of things to further inlining
2018 {-# INLINE inlinePerformIO #-}
2019 inlinePerformIO :: IO a -> a
2020 #if defined(__GLASGOW_HASKELL__)
2021 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2023 inlinePerformIO = unsafePerformIO
2026 {-# INLINE newForeignFreePtr #-}
2027 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2028 #if defined(__GLASGOW_HASKELL__)
2029 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2031 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2034 -- ---------------------------------------------------------------------
2036 -- Standard C functions
2039 foreign import ccall unsafe "string.h strlen" c_strlen
2042 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2043 :: CInt -> IO (Ptr Word8)
2045 foreign import ccall unsafe "static stdlib.h free" c_free
2046 :: Ptr Word8 -> IO ()
2048 #if !defined(__GLASGOW_HASKELL__)
2049 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2050 :: FunPtr (Ptr Word8 -> IO ())
2053 foreign import ccall unsafe "string.h memset" memset
2054 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2056 foreign import ccall unsafe "string.h memchr" memchr
2057 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2059 foreign import ccall unsafe "string.h memcmp" memcmp
2060 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2062 foreign import ccall unsafe "string.h memcpy" memcpy
2063 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2065 -- ---------------------------------------------------------------------
2070 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2071 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2073 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2074 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2076 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2077 :: Ptr Word8 -> Int -> Word8
2079 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2080 :: Ptr Word8 -> Int -> Word8
2082 foreign import ccall unsafe "static fpstring.h count" c_count
2083 :: Ptr Word8 -> Int -> Word8 -> Int
2085 -- ---------------------------------------------------------------------
2089 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2090 :: Int -> Int -> IO (Ptr Word8)
2092 foreign import ccall unsafe "static unistd.h close" c_close
2095 # if !defined(__OpenBSD__)
2096 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2097 :: Ptr Word8 -> Int -> IO Int
2101 -- ---------------------------------------------------------------------
2102 -- Internal GHC Haskell magic
2104 #if defined(__GLASGOW_HASKELL__)
2105 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2106 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2108 foreign import ccall unsafe "__hscore_memcpy_src_off"
2109 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2112 -- ---------------------------------------------------------------------
2114 -- Functional array fusion for ByteStrings.
2116 -- From the Data Parallel Haskell project,
2117 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2120 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2121 -- the fact that no bottoms of this type are ever constructed; hence, we can
2122 -- assume @(_ :: NoAL) `seq` x = x@.
2126 -- | Special forms of loop arguments
2128 -- * These are common special cases for the three function arguments of gen
2129 -- and loop; we give them special names to make it easier to trigger RULES
2130 -- applying in the special cases represented by these arguments. The
2131 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2132 -- two simplifier phases.
2134 -- * In the case where the accumulator is not needed, it is better to always
2135 -- explicitly return a value `()', rather than just copy the input to the
2136 -- output, as the former gives GHC better local information.
2139 -- | Element function expressing a mapping only
2140 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2141 mapEFL f = \_ e -> (noAL, (Just $ f e))
2142 {-# INLINE [1] mapEFL #-}
2144 -- | Element function implementing a filter function only
2145 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2146 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2147 {-# INLINE [1] filterEFL #-}
2149 -- |Element function expressing a reduction only
2150 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2151 foldEFL f = \a e -> (f a e, Nothing)
2152 {-# INLINE [1] foldEFL #-}
2157 {-# INLINE [1] noAL #-}
2159 -- | Projection functions that are fusion friendly (as in, we determine when
2160 -- they are inlined)
2161 loopArr :: (ByteString, acc) -> ByteString
2162 loopArr (arr, _) = arr
2163 {-# INLINE [1] loopArr #-}
2165 loopAcc :: (ByteString, acc) -> acc
2166 loopAcc (_, acc) = acc
2167 {-# INLINE [1] loopAcc #-}
2169 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2170 loopSndAcc (arr, (_, acc)) = (arr, acc)
2171 {-# INLINE [1] loopSndAcc #-}
2173 ------------------------------------------------------------------------
2175 -- | Iteration over over ByteStrings
2176 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2177 -> acc -- ^ initial acc value
2178 -> ByteString -- ^ input ByteString
2179 -> (ByteString, acc)
2181 loopU f start (PS fp s i) = inlinePerformIO $ withForeignPtr fp $ \a -> do
2182 p <- mallocArray (i+1)
2183 (acc, i') <- go (a `plusPtr` s) p start
2184 p' <- if i == i' then return p else reallocArray p (i'+1) -- avoid realloc for maps
2185 poke (p' `plusPtr` i') (0::Word8)
2186 fp' <- newForeignFreePtr p'
2187 return (PS fp' 0 i', acc)
2192 trans a_off ma_off acc
2193 | a_off >= i = return (acc, ma_off)
2195 x <- peekByteOff p a_off
2196 let (acc', oe) = f acc x
2197 ma_off' <- case oe of
2198 Nothing -> return ma_off
2199 Just e -> do pokeByteOff ma ma_off e
2201 trans (a_off+1) ma_off' acc'
2203 {-# INLINE [1] loopU #-}
2207 "array fusion!" forall em1 em2 start1 start2 arr.
2208 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2209 let em (acc1, acc2) e =
2211 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2214 (acc2', res) -> ((acc1', acc2'), res)
2215 in loopSndAcc (loopU em (start1, start2) arr)
2217 "loopArr/loopSndAcc" forall x.
2218 loopArr (loopSndAcc x) = loopArr x
2221 -- "seq/NoAL" forall (u::NoAL) e.