1 {-# OPTIONS_GHC -cpp -fffi #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- Maintainer : dons@cse.unsw.edu.au
12 -- Stability : experimental
13 -- Portability : portable, requires ffi and cpp
14 -- Tested with : GHC 6.4.1 and Hugs March 2005
18 -- | A time and space-efficient implementation of byte vectors using
19 -- packed Word8 arrays, suitable for high performance use, both in terms
20 -- of large data quantities, or high speed requirements. Byte vectors
21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
22 -- be passed between C and Haskell with little effort.
24 -- This module is intended to be imported @qualified@, to avoid name
25 -- clashes with Prelude functions. eg.
27 -- > import qualified Data.ByteString as B
29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
30 -- UArray by Simon Marlow. Rewritten to support slices and use
31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
34 module Data.ByteString (
36 -- * The @ByteString@ type
37 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
39 -- * Introducing and eliminating 'ByteString's
40 empty, -- :: ByteString
41 packByte, -- :: Word8 -> ByteString
42 pack, -- :: [Word8] -> ByteString
43 unpack, -- :: ByteString -> [Word8]
44 packWith, -- :: (a -> Word8) -> [a] -> ByteString
45 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: Word8 -> ByteString -> ByteString
50 null, -- :: ByteString -> Bool
51 length, -- :: ByteString -> Int
52 head, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 last, -- :: ByteString -> Word8
55 init, -- :: ByteString -> ByteString
56 append, -- :: ByteString -> ByteString -> ByteString
58 -- * Special ByteStrings
59 inits, -- :: ByteString -> [ByteString]
60 tails, -- :: ByteString -> [ByteString]
61 elems, -- :: ByteString -> [ByteString]
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 reverse, -- :: ByteString -> ByteString
66 intersperse, -- :: Word8 -> ByteString -> ByteString
67 transpose, -- :: [ByteString] -> [ByteString]
69 -- * Reducing 'ByteString's
70 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
71 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
83 hash, -- :: ByteString -> Int32
85 -- * Generating and unfolding ByteStrings
86 replicate, -- :: Int -> Word8 -> ByteString
87 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
91 -- ** Breaking strings
92 take, -- :: Int -> ByteString -> ByteString
93 drop, -- :: Int -> ByteString -> ByteString
94 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
95 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
96 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
97 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
98 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
99 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
101 -- ** Breaking and dropping on specific bytes
102 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
103 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
104 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
106 -- ** Breaking into many substrings
107 split, -- :: Word8 -> ByteString -> [ByteString]
108 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
109 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
111 -- ** Joining strings
112 join, -- :: ByteString -> [ByteString] -> ByteString
113 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
115 -- * Indexing ByteStrings
116 index, -- :: ByteString -> Int -> Word8
117 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
118 elemIndices, -- :: Word8 -> ByteString -> [Int]
119 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
120 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
121 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
122 count, -- :: Word8 -> ByteString -> Int
124 -- * Ordered ByteStrings
125 sort, -- :: ByteString -> ByteString
127 -- * Searching ByteStrings
129 -- ** Searching by equality
130 -- | These functions use memchr(3) to efficiently search the ByteString
132 elem, -- :: Word8 -> ByteString -> Bool
133 notElem, -- :: Word8 -> ByteString -> Bool
134 filterByte, -- :: Word8 -> ByteString -> ByteString
135 filterNotByte, -- :: Word8 -> ByteString -> ByteString
137 -- ** Searching with a predicate
138 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
139 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
141 -- ** Prefixes and suffixes
142 -- | These functions use memcmp(3) to efficiently compare substrings
143 isPrefixOf, -- :: ByteString -> ByteString -> Bool
144 isSuffixOf, -- :: ByteString -> ByteString -> Bool
146 -- ** Search for arbitrary substrings
147 isSubstringOf, -- :: ByteString -> ByteString -> Bool
148 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
149 findSubstrings, -- :: ByteString -> ByteString -> [Int]
151 -- * Zipping and unzipping ByteStrings
152 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
153 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
154 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
156 -- * Unchecked access
157 unsafeHead, -- :: ByteString -> Word8
158 unsafeTail, -- :: ByteString -> ByteString
159 unsafeIndex, -- :: ByteString -> Int -> Word8
161 -- * Low level introduction and elimination
162 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
163 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
164 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
165 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
166 skipIndex, -- :: ByteString -> Int
168 -- ** Packing CStrings and pointers
169 packCString, -- :: CString -> ByteString
170 packCStringLen, -- :: CString -> ByteString
171 packMallocCString, -- :: CString -> ByteString
173 #if defined(__GLASGOW_HASKELL__)
174 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
175 packAddress, -- :: Addr# -> ByteString
176 unsafePackAddress, -- :: Int -> Addr# -> ByteString
177 unsafeFinalize, -- :: ByteString -> IO ()
180 -- ** Using ByteStrings as CStrings
181 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
182 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
183 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
185 -- ** Copying ByteStrings
186 -- | These functions perform memcpy(3) operations
187 copy, -- :: ByteString -> ByteString
188 copyCString, -- :: CString -> ByteString
189 copyCStringLen, -- :: CStringLen -> ByteString
191 -- * I\/O with @ByteString@s
193 -- ** Standard input and output
195 #if defined(__GLASGOW_HASKELL__)
196 getLine, -- :: IO ByteString
198 getContents, -- :: IO ByteString
199 putStr, -- :: ByteString -> IO ()
200 putStrLn, -- :: ByteString -> IO ()
203 readFile, -- :: FilePath -> IO ByteString
204 writeFile, -- :: FilePath -> ByteString -> IO ()
206 -- ** I\/O with Handles
207 #if defined(__GLASGOW_HASKELL__)
208 getArgs, -- :: IO [ByteString]
209 hGetLine, -- :: Handle -> IO ByteString
210 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
212 hGetContents, -- :: Handle -> IO ByteString
213 hGet, -- :: Handle -> Int -> IO ByteString
214 hPut, -- :: Handle -> ByteString -> IO ()
216 #if defined(__GLASGOW_HASKELL__)
218 unpackList, -- eek, otherwise it gets thrown away by the simplifier
223 import qualified Prelude as P
224 import Prelude hiding (reverse,head,tail,last,init,null
225 ,length,map,lines,foldl,foldr,unlines
226 ,concat,any,take,drop,splitAt,takeWhile
227 ,dropWhile,span,break,elem,filter,maximum
228 ,minimum,all,concatMap,foldl1,foldr1
229 ,readFile,writeFile,replicate
230 ,getContents,getLine,putStr,putStrLn
231 ,zip,zipWith,unzip,notElem)
233 import qualified Data.List as List
236 import Data.Word (Word8)
237 import Data.Int (Int32)
238 import Data.Bits (rotateL)
239 import Data.Maybe (listToMaybe)
240 import Data.Array (listArray)
241 import qualified Data.Array as Array ((!))
243 import Control.Exception (bracket)
245 import Foreign.C.Types (CSize, CInt)
246 import Foreign.C.String (CString, CStringLen)
247 import Foreign.Storable
248 import Foreign.ForeignPtr
250 import Foreign.Marshal.Array
252 import System.IO (stdin,stdout,hClose,hFileSize
253 ,hGetBuf,hPutBuf,openBinaryFile
256 #if defined(__GLASGOW_HASKELL__)
258 import System.IO (hGetBufNonBlocking)
260 import qualified Foreign.Concurrent as FC (newForeignPtr)
262 import Data.Generics (Data(..), Typeable(..))
264 import System.IO.Error (isEOFError)
265 import Foreign.Marshal (alloca)
269 import GHC.Base (build, unsafeChr)
270 import GHC.Word hiding (Word8)
271 import GHC.Ptr (Ptr(..))
272 import GHC.ST (ST(..))
277 import System.IO.Unsafe
281 -- -----------------------------------------------------------------------------
283 -- Useful macros, until we have bang patterns
286 #define STRICT1(f) f a | a `seq` False = undefined
287 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
288 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
289 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
290 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
292 -- -----------------------------------------------------------------------------
294 -- | A space-efficient representation of a Word8 vector, supporting many
295 -- efficient operations. A 'ByteString' contains 8-bit characters only.
297 -- Instances of Eq, Ord, Read, Show, Data, Typeable
299 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
303 #if defined(__GLASGOW_HASKELL__)
304 deriving (Data, Typeable)
307 instance Eq ByteString
310 instance Ord ByteString
311 where compare = compareBytes
313 instance Show ByteString where
314 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
316 instance Read ByteString where
317 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
320 instance Arbitrary PackedString where
321 arbitrary = P.pack `fmap` arbitrary
322 coarbitrary s = coarbitrary (P.unpack s)
325 -- | /O(n)/ Equality on the 'ByteString' type.
326 eq :: ByteString -> ByteString -> Bool
327 eq a b = (compareBytes a b) == EQ
330 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
331 compareBytes :: ByteString -> ByteString -> Ordering
332 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
333 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
334 withForeignPtr x1 $ \p1 ->
335 withForeignPtr x2 $ \p2 -> do
336 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
337 return $ case i `compare` 0 of
338 EQ -> l1 `compare` l2
340 {-# INLINE compareBytes #-}
344 -- About 4x slower over 32M
346 compareBytes :: ByteString -> ByteString -> Ordering
347 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
348 withForeignPtr fp1 $ \p1 ->
349 withForeignPtr fp2 $ \p2 ->
350 cmp (p1 `plusPtr` off1)
351 (p2 `plusPtr` off2) 0 len1 len2
353 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
355 cmp p1 p2 n len1 len2
356 | n == len1 = if n == len2 then return EQ else return LT
357 | n == len2 = return GT
359 (a :: Word8) <- peekByteOff p1 n
360 (b :: Word8) <- peekByteOff p2 n
361 case a `compare` b of
362 EQ -> cmp p1 p2 (n+1) len1 len2
365 {-# INLINE compareBytes #-}
368 -- -----------------------------------------------------------------------------
369 -- Introducing and eliminating 'ByteString's
371 -- | /O(1)/ The empty 'ByteString'
373 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
374 {-# NOINLINE empty #-}
376 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
377 packByte :: Word8 -> ByteString
378 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
379 withForeignPtr fp $ \p -> poke p c
381 {-# NOINLINE packByte #-}
383 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
385 -- For applications with large numbers of string literals, pack can be a
386 -- bottleneck. In such cases, consider using packAddress (GHC only).
387 pack :: [Word8] -> ByteString
389 #if !defined(__GLASGOW_HASKELL__)
391 pack str = create (P.length str) $ \p -> go p str
394 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
396 #else /* hack away */
398 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
400 go _ _ [] = return ()
401 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
403 writeByte p i c = ST $ \s# ->
404 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
408 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
409 unpack :: ByteString -> [Word8]
411 #if !defined(__GLASGOW_HASKELL__)
413 unpack (PS _ _ 0) = []
414 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
415 go (p `plusPtr` s) (l - 1) []
418 go p 0 acc = peek p >>= \e -> return (e : acc)
419 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
420 {-# INLINE unpack #-}
424 unpack ps = build (unpackFoldr ps)
425 {-# INLINE unpack #-}
427 unpackList :: ByteString -> [Word8]
428 unpackList (PS fp off len) = withPtr fp $ \p -> do
430 loop _ (-1) acc = return acc
433 loop q (n-1) (a : acc)
434 loop (p `plusPtr` off) (len-1) []
437 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
440 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
441 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
443 loop _ (-1) acc = return acc
446 loop q (n-1) (a `f` acc)
447 loop (p `plusPtr` off) (len-1) ch
448 {-# INLINE [0] unpackFoldr #-}
452 ------------------------------------------------------------------------
454 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
455 -- conversion function
456 packWith :: (a -> Word8) -> [a] -> ByteString
457 packWith k str = create (P.length str) $ \p -> go p str
461 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
462 {-# INLINE packWith #-}
463 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
465 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
466 unpackWith :: (Word8 -> a) -> ByteString -> [a]
467 unpackWith _ (PS _ _ 0) = []
468 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
469 go (p `plusPtr` s) (l - 1) []
472 go p 0 acc = peek p >>= \e -> return (k e : acc)
473 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
474 {-# INLINE unpackWith #-}
475 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
477 -- ---------------------------------------------------------------------
480 -- | /O(1)/ Test whether a ByteString is empty.
481 null :: ByteString -> Bool
482 null (PS _ _ l) = l == 0
485 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
486 length :: ByteString -> Int
487 length (PS _ _ l) = l
488 {-# INLINE length #-}
490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
491 -- complexity, as it requires a memcpy.
492 cons :: Word8 -> ByteString -> ByteString
493 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
494 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
499 snoc :: ByteString -> Word8 -> ByteString
500 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
501 memcpy p (f `plusPtr` s) l
502 poke (p `plusPtr` l) c
505 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
506 head :: ByteString -> Word8
508 | null ps = errorEmptyList "head"
509 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
512 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
513 tail :: ByteString -> ByteString
515 | l <= 0 = errorEmptyList "tail"
516 | otherwise = PS p (s+1) (l-1)
519 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
520 last :: ByteString -> Word8
522 | null ps = errorEmptyList "last"
523 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
527 init :: ByteString -> ByteString
529 | l <= 0 = errorEmptyList "init"
530 | otherwise = PS p s (l-1)
533 -- | /O(n)/ Append two ByteStrings
534 append :: ByteString -> ByteString -> ByteString
535 append xs ys | null xs = ys
537 | otherwise = concat [xs,ys]
538 {-# INLINE append #-}
542 -- About 30% faster, but allocating in a big chunk isn't good for memory use
544 append :: ByteString -> ByteString -> ByteString
545 append xs@(PS ffp s l) ys@(PS fgp t m)
548 | otherwise = create len $ \ptr ->
549 withForeignPtr ffp $ \fp ->
550 withForeignPtr fgp $ \gp -> do
551 memcpy ptr (fp `plusPtr` s) l
552 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
553 where len = length xs + length ys
556 -- ---------------------------------------------------------------------
559 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
562 map :: (Word8 -> Word8) -> ByteString -> ByteString
563 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
564 new_fp <- mallocByteString len
565 withForeignPtr new_fp $ \new_p -> do
566 map_ f (len-1) (p `plusPtr` start) new_p
567 return (PS new_fp 0 len)
570 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
575 x <- peekByteOff p1 n
576 pokeByteOff p2 n (f x)
580 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
581 reverse :: ByteString -> ByteString
582 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
583 c_reverse p (f `plusPtr` s) l
585 -- reverse = pack . P.reverse . unpack
587 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
588 -- 'ByteString' and \`intersperses\' that byte between the elements of
589 -- the 'ByteString'. It is analogous to the intersperse function on
591 intersperse :: Word8 -> ByteString -> ByteString
592 intersperse c ps@(PS x s l)
594 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
595 c_intersperse p (f `plusPtr` s) l c
597 -- intersperse c = pack . List.intersperse c . unpack
599 -- | The 'transpose' function transposes the rows and columns of its
600 -- 'ByteString' argument.
601 transpose :: [ByteString] -> [ByteString]
602 transpose ps = P.map pack (List.transpose (P.map unpack ps))
604 -- ---------------------------------------------------------------------
605 -- Reducing 'ByteString's
607 -- | 'foldl', applied to a binary operator, a starting value (typically
608 -- the left-identity of the operator), and a ByteString, reduces the
609 -- ByteString using the binary operator, from left to right.
610 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
611 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
612 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
615 lgo z p q | p == q = return z
616 | otherwise = do c <- peek p
617 lgo (f z c) (p `plusPtr` 1) q
619 -- | 'foldr', applied to a binary operator, a starting value
620 -- (typically the right-identity of the operator), and a ByteString,
621 -- reduces the ByteString using the binary operator, from right to left.
622 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
623 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
624 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
627 go p q | p == q = return z
628 | otherwise = do c <- peek p
629 ws <- go (p `plusPtr` 1) q
632 -- | 'foldl1' is a variant of 'foldl' that has no starting value
633 -- argument, and thus must be applied to non-empty 'ByteStrings'.
634 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
636 | null ps = errorEmptyList "foldl1"
637 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
639 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
640 -- and thus must be applied to non-empty 'ByteString's
641 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
643 | null ps = errorEmptyList "foldr1"
644 | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
646 -- ---------------------------------------------------------------------
649 -- | /O(n)/ Concatenate a list of ByteStrings.
650 concat :: [ByteString] -> ByteString
653 concat xs = inlinePerformIO $ do
654 let start_size = 1024
655 p <- mallocArray start_size
658 where f ptr len _ [] = do
659 ptr' <- reallocArray ptr (len+1)
660 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
661 fp <- newForeignFreePtr ptr'
664 f ptr len to_go pss@(PS p s l:pss')
665 | l <= to_go = do withForeignPtr p $ \pf ->
666 memcpy (ptr `plusPtr` len)
668 f ptr (len + l) (to_go - l) pss'
670 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
671 ptr' <- reallocArray ptr new_total
672 f ptr' len (new_total - len) pss
674 -- | Map a function over a 'ByteString' and concatenate the results
675 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
676 concatMap f = foldr (append . f) empty
678 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
679 -- any element of the 'ByteString' satisfies the predicate.
680 any :: (Word8 -> Bool) -> ByteString -> Bool
681 any _ (PS _ _ 0) = False
682 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
683 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
686 go p q | p == q = return False
687 | otherwise = do c <- peek p
688 if f c then return True
689 else go (p `plusPtr` 1) q
691 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
692 -- if all elements of the 'ByteString' satisfy the predicate.
693 all :: (Word8 -> Bool) -> ByteString -> Bool
694 all _ (PS _ _ 0) = True
695 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
696 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
699 go p q | p == q = return True -- end of list
700 | otherwise = do c <- peek p
702 then go (p `plusPtr` 1) q
705 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
706 maximum :: ByteString -> Word8
707 maximum xs@(PS x s l)
708 | null xs = errorEmptyList "maximum"
709 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
710 return $ c_maximum (p `plusPtr` s) l
712 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
713 minimum :: ByteString -> Word8
714 minimum xs@(PS x s l)
715 | null xs = errorEmptyList "minimum"
716 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
717 return $ c_minimum (p `plusPtr` s) l
720 maximum xs@(PS x s l)
721 | null xs = errorEmptyList "maximum"
722 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
724 maximum_ (p `plusPtr` s) 0 l w
725 {-# INLINE maximum #-}
727 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
731 | otherwise = do w <- peekByteOff ptr n
732 maximum_ ptr (n+1) m (if w > c then w else c)
734 minimum xs@(PS x s l)
735 | null xs = errorEmptyList "minimum"
736 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
738 minimum_ (p `plusPtr` s) 0 l w
739 {-# INLINE minimum #-}
741 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
745 | otherwise = do w <- peekByteOff ptr n
746 minimum_ ptr (n+1) m (if w < c then w else c)
748 -- | /O(n)/ map Word8 functions, provided with the index at each position
749 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
750 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
751 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
753 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
755 go n f t p | f == p = return ()
756 | otherwise = do w <- peek f
758 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
760 -- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key.
761 hash :: ByteString -> Int32
762 hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
763 go (0 :: Int32) (p `plusPtr` s) l
765 go :: Int32 -> Ptr Word8 -> Int -> IO Int32
768 go h p n = do w <- peek p
769 go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1)
771 -- ---------------------------------------------------------------------
772 -- Unfolds and replicates
774 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
775 -- the value of every element. The following holds:
777 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
779 -- This implemenation uses @memset(3)@
780 replicate :: Int -> Word8 -> ByteString
781 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
785 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
789 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
792 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
793 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
794 -- the element and returns 'Nothing' if it is done producing the
795 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
796 -- prepending to the ByteString and @b@ is used as the next element in a
799 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
800 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
801 -- final size of the ByteString as an argument. 'cons' can then be
802 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
803 -- linear complexity. The depth of the recursion is limited to this
804 -- size, but may be less. For lazy, infinite unfoldr, use
805 -- 'Data.List.unfoldr' (from 'Data.List').
809 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
811 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
813 -- > unfoldrN n == take n $ List.unfoldr
814 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
815 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
818 go q c n | n == i = return n -- stop if we reach `i'
819 | otherwise = case f c of
823 go (q `plusPtr` 1) new_c (n+1)
825 -- ---------------------------------------------------------------------
828 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
829 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
830 take :: Int -> ByteString -> ByteString
834 | otherwise = PS x s n
837 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
838 -- elements, or @[]@ if @n > 'length' xs@.
839 drop :: Int -> ByteString -> ByteString
843 | otherwise = PS x (s+n) (l-n)
846 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
847 splitAt :: Int -> ByteString -> (ByteString, ByteString)
848 splitAt n ps = (take n ps, drop n ps)
849 {-# INLINE splitAt #-}
851 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
852 -- returns the longest prefix (possibly empty) of @xs@ of elements that
854 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
855 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
856 {-# INLINE takeWhile #-}
858 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
859 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
860 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
861 {-# INLINE dropWhile #-}
863 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
864 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
865 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
868 -- | 'breakByte' breaks its ByteString argument at the first occurence
869 -- of the specified byte. It is more efficient than 'break' as it is
870 -- implemented with @memchr(3)@. I.e.
872 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
874 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
875 breakByte c p = case elemIndex c p of
877 Just n -> (take n p, drop n p)
878 {-# INLINE breakByte #-}
880 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
881 -- occurence of @w@. It behaves like 'break', except the delimiter is
882 -- not returned, and @Nothing@ is returned if the delimiter is not in
883 -- the ByteString. I.e.
885 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
887 -- > breakFirst c xs ==
888 -- > let (x,y) = break (== c) xs
889 -- > in if null y then Nothing else Just (x, drop 1 y))
891 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
892 breakFirst c p = case elemIndex c p of
894 Just n -> Just (take n p, drop (n+1) p)
895 {-# INLINE breakFirst #-}
897 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
900 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
902 -- and the following are equivalent:
904 -- > breakLast 'c' "abcdef"
905 -- > let (x,y) = break (=='c') (reverse "abcdef")
906 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
908 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
909 breakLast c p = case elemIndexLast c p of
911 Just n -> Just (take n p, drop (n+1) p)
912 {-# INLINE breakLast #-}
914 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
915 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
916 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
917 span p ps = break (not . p) ps
920 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
923 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
927 -- > spanEnd (not . isSpace) ps
929 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
931 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
932 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
934 -- | /O(n)/ Splits a 'ByteString' into components delimited by
935 -- separators, where the predicate returns True for a separator element.
936 -- The resulting components do not contain the separators. Two adjacent
937 -- separators result in an empty component in the output. eg.
939 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
940 -- > splitWith (=='a') [] == []
942 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
944 #if defined(__GLASGOW_HASKELL__)
945 splitWith _pred (PS _ _ 0) = []
946 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
947 where pred# c# = pred_ (W8# c#)
949 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
950 splitLoop pred' p 0 off' len' fp'
952 splitLoop :: (Word# -> Bool)
958 splitLoop pred' p idx' off' len' fp'
959 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
960 | idx' >= len' = return [PS fp' off' idx']
962 w <- peekElemOff p (off'+idx')
963 if pred' (case w of W8# w# -> w#)
964 then return (PS fp' off' idx' :
965 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
966 else splitLoop pred' p (idx'+1) off' len' fp'
967 {-# INLINE splitWith #-}
970 splitWith _ (PS _ _ 0) = []
971 splitWith p ps = splitWith' p ps
974 splitWith' q qs = if null rest then [chunk]
975 else chunk : splitWith' q (unsafeTail rest)
976 where (chunk,rest) = break q qs
979 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
980 -- argument, consuming the delimiter. I.e.
982 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
983 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
984 -- > split 'x' "x" == ["",""]
988 -- > join [c] . split c == id
989 -- > split == splitWith . (==)
991 -- As for all splitting functions in this library, this function does
992 -- not copy the substrings, it just constructs new 'ByteStrings' that
993 -- are slices of the original.
995 split :: Word8 -> ByteString -> [ByteString]
996 split _ (PS _ _ 0) = []
997 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
998 let ptr = p `plusPtr` s
1002 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1004 then return [PS x (s+n) (l-n)]
1005 else do let i = q `minusPtr` ptr
1007 return $! PS x (s+n) (i-n) : ls
1009 {-# INLINE split #-}
1012 -- slower. but stays inside Haskell.
1013 split _ (PS _ _ 0) = []
1014 split (W8# w#) (PS fp off len) = splitWith' off len fp
1016 splitWith' off' len' fp' = withPtr fp $ \p ->
1017 splitLoop p 0 off' len' fp'
1019 splitLoop :: Ptr Word8
1020 -> Int -> Int -> Int
1025 splitLoop p idx' off' len' fp'
1026 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1027 | idx' >= len' = return [PS fp' off' idx']
1029 (W8# x#) <- peekElemOff p (off'+idx')
1030 if word2Int# w# ==# word2Int# x#
1031 then return (PS fp' off' idx' :
1032 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1033 else splitLoop p (idx'+1) off' len' fp'
1036 -- | Like 'splitWith', except that sequences of adjacent separators are
1037 -- treated as a single separator. eg.
1039 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1041 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1042 tokens f = P.filter (not.null) . splitWith f
1044 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1045 -- 'ByteString's and concatenates the list after interspersing the first
1046 -- argument between each element of the list.
1047 join :: ByteString -> [ByteString] -> ByteString
1048 join filler pss = concat (splice pss)
1052 splice (x:y:xs) = x:filler:splice (y:xs)
1055 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1056 -- with a char. Around 4 times faster than the generalised join.
1058 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1059 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1060 withForeignPtr ffp $ \fp ->
1061 withForeignPtr fgp $ \gp -> do
1062 memcpy ptr (fp `plusPtr` s) l
1063 poke (ptr `plusPtr` l) c
1064 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1066 len = length f + length g + 1
1067 {-# INLINE joinWithByte #-}
1069 -- ---------------------------------------------------------------------
1070 -- Indexing ByteStrings
1072 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1073 index :: ByteString -> Int -> Word8
1075 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1076 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1077 ++ ", length = " ++ show (length ps)
1078 | otherwise = ps `unsafeIndex` n
1079 {-# INLINE index #-}
1081 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1082 -- element in the given 'ByteString' which is equal to the query
1083 -- element, or 'Nothing' if there is no such element.
1084 -- This implementation uses memchr(3).
1085 elemIndex :: Word8 -> ByteString -> Maybe Int
1086 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1087 let p' = p `plusPtr` s
1088 q = memchr p' c (fromIntegral l)
1089 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1090 {-# INLINE elemIndex #-}
1092 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1093 -- element in the given 'ByteString' which is equal to the query
1094 -- element, or 'Nothing' if there is no such element. The following
1097 -- > elemIndexLast c xs ==
1098 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1100 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1101 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1102 go (p `plusPtr` s) (l-1)
1105 go p i | i < 0 = return Nothing
1106 | otherwise = do ch' <- peekByteOff p i
1108 then return $ Just i
1110 {-# INLINE elemIndexLast #-}
1112 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1113 -- the indices of all elements equal to the query element, in ascending order.
1114 -- This implementation uses memchr(3).
1115 elemIndices :: Word8 -> ByteString -> [Int]
1116 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1117 let ptr = p `plusPtr` s
1121 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1124 else do let i = q `minusPtr` ptr
1131 elemIndices :: Word8 -> ByteString -> [Int]
1132 elemIndices c ps = loop 0 ps
1134 loop _ ps' | null ps' = []
1135 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1136 | otherwise = loop (n+1) (unsafeTail ps')
1139 -- | count returns the number of times its argument appears in the ByteString
1141 -- > count = length . elemIndices
1143 -- But more efficiently than using length on the intermediate list.
1144 count :: Word8 -> ByteString -> Int
1145 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1146 go (p `plusPtr` s) (fromIntegral m) 0
1148 go :: Ptr Word8 -> CSize -> Int -> IO Int
1151 let q = memchr p w l
1154 else do let k = fromIntegral $ q `minusPtr` p
1155 go (q `plusPtr` 1) (l-k-1) (i+1)
1156 {-# INLINE count #-}
1158 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1159 -- returns the index of the first element in the ByteString
1160 -- satisfying the predicate.
1161 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1162 findIndex = (listToMaybe .) . findIndices
1164 -- | The 'findIndices' function extends 'findIndex', by returning the
1165 -- indices of all elements satisfying the predicate, in ascending order.
1166 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1167 findIndices p ps = loop 0 ps
1170 loop _ qs | null qs = []
1171 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1172 | otherwise = loop (n+1) (unsafeTail qs)
1174 -- ---------------------------------------------------------------------
1175 -- Searching ByteStrings
1177 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1178 elem :: Word8 -> ByteString -> Bool
1179 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1182 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1183 notElem :: Word8 -> ByteString -> Bool
1184 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1185 {-# INLINE notElem #-}
1188 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1189 -- case of filtering a single byte. It is more efficient to use
1190 -- /filterByte/ in this case.
1192 -- > filterByte == filter . (==)
1194 -- filterByte is around 10x faster, and uses much less space, than its
1195 -- filter equivalent
1196 filterByte :: Word8 -> ByteString -> ByteString
1197 filterByte w ps = replicate (count w ps) w
1200 -- slower than the replicate version
1202 filterByte ch ps@(PS x s l)
1204 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1205 t <- go (f `plusPtr` s) p l
1206 return (t `minusPtr` p) -- actual length
1210 go f t e = do w <- peek f
1212 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1213 else go (f `plusPtr` 1) t (e-1)
1217 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1218 -- case of filtering a single byte out of a list. It is more efficient
1219 -- to use /filterNotByte/ in this case.
1221 -- > filterNotByte == filter . (/=)
1223 -- filterNotByte is around 3x faster, and uses much less space, than its
1224 -- filter equivalent
1225 filterNotByte :: Word8 -> ByteString -> ByteString
1226 filterNotByte ch ps@(PS x s l)
1228 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1229 t <- go (f `plusPtr` s) p l
1230 return (t `minusPtr` p) -- actual length
1234 go f t e = do w <- peek f
1236 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1237 else go (f `plusPtr` 1) t (e-1)
1239 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1240 -- returns a ByteString containing those characters that satisfy the
1242 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1243 filter k ps@(PS x s l)
1245 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1246 t <- go (f `plusPtr` s) p l
1247 return (t `minusPtr` p) -- actual length
1251 go f t e = do w <- peek f
1253 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1254 else go (f `plusPtr` 1) t (e - 1)
1256 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1258 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1259 -- and returns the first element in matching the predicate, or 'Nothing'
1260 -- if there is no such element.
1261 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1262 find p ps = case filter p ps of
1263 q | null q -> Nothing
1264 | otherwise -> Just (unsafeHead q)
1266 -- ---------------------------------------------------------------------
1267 -- Searching for substrings
1269 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1270 -- iff the first is a prefix of the second.
1271 isPrefixOf :: ByteString -> ByteString -> Bool
1272 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1275 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1276 withForeignPtr x2 $ \p2 -> do
1277 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1280 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1281 -- iff the first is a suffix of the second.
1283 -- The following holds:
1285 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1287 -- However, the real implemenation uses memcmp to compare the end of the
1288 -- string only, with no reverse required..
1289 isSuffixOf :: ByteString -> ByteString -> Bool
1290 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1293 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1294 withForeignPtr x2 $ \p2 -> do
1295 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1298 -- | Check whether one string is a substring of another. @isSubstringOf
1299 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1300 isSubstringOf :: ByteString -- ^ String to search for.
1301 -> ByteString -- ^ String to search in.
1303 isSubstringOf p s = not $ P.null $ findSubstrings p s
1305 -- | Get the first index of a substring in another string,
1306 -- or 'Nothing' if the string is not found.
1307 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1308 findSubstring :: ByteString -- ^ String to search for.
1309 -> ByteString -- ^ String to seach in.
1311 findSubstring = (listToMaybe .) . findSubstrings
1313 -- | Find the indexes of all (possibly overlapping) occurances of a
1314 -- substring in a string. This function uses the Knuth-Morris-Pratt
1315 -- string matching algorithm.
1316 findSubstrings :: ByteString -- ^ String to search for.
1317 -> ByteString -- ^ String to seach in.
1320 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1322 patc x = pat `unsafeIndex` x
1323 strc x = str `unsafeIndex` x
1325 -- maybe we should make kmpNext a UArray before using it in search?
1326 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1327 kmpNextL p _ | null p = []
1328 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1330 x = if not (null ps) && unsafeHead ps == patc j'
1331 then kmpNext Array.! j' else j'
1333 search i j = match ++ rest -- i: position in string, j: position in pattern
1334 where match = if j == m then [(i - j)] else []
1335 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1336 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1339 -- ---------------------------------------------------------------------
1342 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1343 -- corresponding pairs of bytes. If one input ByteString is short,
1344 -- excess elements of the longer ByteString are discarded. This is
1345 -- equivalent to a pair of 'unpack' operations.
1346 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1348 | null ps || null qs = []
1349 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1351 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1352 -- the first argument, instead of a tupling function. For example,
1353 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1354 -- corresponding sums.
1355 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1357 | null ps || null qs = []
1358 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1360 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1361 -- ByteStrings. Note that this performs two 'pack' operations.
1362 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1363 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1364 {-# INLINE unzip #-}
1366 -- ---------------------------------------------------------------------
1369 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1370 inits :: ByteString -> [ByteString]
1371 inits (PS x s l) = [PS x s n | n <- [0..l]]
1373 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1374 tails :: ByteString -> [ByteString]
1375 tails p | null p = [empty]
1376 | otherwise = p : tails (unsafeTail p)
1378 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1380 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1381 elems :: ByteString -> [ByteString]
1382 elems (PS _ _ 0) = []
1383 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1384 {-# INLINE elems #-}
1386 -- ---------------------------------------------------------------------
1387 -- ** Ordered 'ByteString's
1389 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1390 sort :: ByteString -> ByteString
1391 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1392 memcpy p (f `plusPtr` s) l
1393 c_qsort p l -- inplace
1395 -- sort = pack . List.sort . unpack
1397 -- ---------------------------------------------------------------------
1399 -- Extensions to the basic interface
1402 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1403 -- check for the empty case, so there is an obligation on the programmer
1404 -- to provide a proof that the ByteString is non-empty.
1405 unsafeHead :: ByteString -> Word8
1406 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1407 {-# INLINE unsafeHead #-}
1409 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1410 -- check for the empty case. As with 'unsafeHead', the programmer must
1411 -- provide a separate proof that the ByteString is non-empty.
1412 unsafeTail :: ByteString -> ByteString
1413 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1414 {-# INLINE unsafeTail #-}
1416 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1417 -- This omits the bounds check, which means there is an accompanying
1418 -- obligation on the programmer to ensure the bounds are checked in some
1420 unsafeIndex :: ByteString -> Int -> Word8
1421 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1422 {-# INLINE unsafeIndex #-}
1424 -- ---------------------------------------------------------------------
1425 -- Low level constructors
1427 #if defined(__GLASGOW_HASKELL__)
1428 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1429 -- Addr\# (an arbitrary machine address assumed to point outside the
1430 -- garbage-collected heap) into a @ByteString@. A much faster way to
1431 -- create an Addr\# is with an unboxed string literal, than to pack a
1432 -- boxed string. A unboxed string literal is compiled to a static @char
1433 -- []@ by GHC. Establishing the length of the string requires a call to
1434 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1435 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1436 -- if you know the length of the string statically.
1440 -- > literalFS = packAddress "literal"#
1442 packAddress :: Addr# -> ByteString
1443 packAddress addr# = inlinePerformIO $ do
1444 p <- newForeignPtr_ cstr
1445 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1448 {-# INLINE packAddress #-}
1450 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1451 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1452 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1453 -- 'Addr\#' to the string, and the length of the string. Make sure the
1454 -- length is correct, otherwise use the safer 'packAddress' (where the
1455 -- length will be calculated once at runtime).
1456 unsafePackAddress :: Int -> Addr# -> ByteString
1457 unsafePackAddress len addr# = inlinePerformIO $ do
1458 p <- newForeignPtr_ cstr
1460 where cstr = Ptr addr#
1464 -- | /O(1)/ Build a ByteString from a ForeignPtr
1465 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1466 fromForeignPtr fp l = PS fp 0 l
1468 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1469 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1470 toForeignPtr (PS ps s l) = (ps, s, l)
1472 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1473 -- current 'ByteString' from any larger string it was created from, as
1475 skipIndex :: ByteString -> Int
1476 skipIndex (PS _ s _) = s
1477 {-# INLINE skipIndex #-}
1479 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1480 -- finalizer associated to it. The ByteString length is calculated using
1481 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1482 packCString :: CString -> ByteString
1483 packCString cstr = inlinePerformIO $ do
1484 fp <- newForeignPtr_ (castPtr cstr)
1485 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1487 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1488 -- have /no/ finalizer associated with it. This operation has /O(1)/
1489 -- complexity as we already know the final size, so no /strlen(3)/ is
1491 packCStringLen :: CStringLen -> ByteString
1492 packCStringLen (ptr,len) = inlinePerformIO $ do
1493 fp <- newForeignPtr_ (castPtr ptr)
1494 return $ PS fp 0 (fromIntegral len)
1496 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1497 -- have a @free(3)@ finalizer associated to it.
1498 packMallocCString :: CString -> ByteString
1499 packMallocCString cstr = inlinePerformIO $ do
1500 fp <- newForeignFreePtr (castPtr cstr)
1501 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1503 #if defined(__GLASGOW_HASKELL__)
1504 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1505 -- length, and an IO action representing a finalizer. This function is
1506 -- not available on Hugs.
1508 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1509 packCStringFinalizer p l f = do
1510 fp <- FC.newForeignPtr p f
1513 -- | Explicitly run the finaliser associated with a 'ByteString'.
1514 -- Further references to this value may generate invalid memory
1515 -- references. This operation is unsafe, as there may be other
1516 -- 'ByteStrings' referring to the same underlying pages. If you use
1517 -- this, you need to have a proof of some kind that all 'ByteString's
1518 -- ever generated from the underlying byte array are no longer live.
1519 unsafeFinalize :: ByteString -> IO ()
1520 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1524 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1525 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1526 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1527 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1529 alloc = withForeignPtr ps $ \p -> do
1530 buf <- c_malloc (fromIntegral l+1)
1531 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1532 poke (buf `plusPtr` l) (0::Word8)
1533 return $ castPtr buf
1535 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1536 -- Warning: modifying the @CString@ will affect the @ByteString@.
1537 -- Why is this function unsafe? It relies on the null byte at the end of
1538 -- the ByteString to be there. This is /not/ the case if your ByteString
1539 -- has been spliced from a larger string (i.e. with take or drop).
1540 -- Unless you can guarantee the null byte, you should use the safe
1541 -- version, which will copy the string first.
1543 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1544 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1546 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1547 -- This is mainly useful to allow the rest of the data pointed
1548 -- to by the 'ByteString' to be garbage collected, for example
1549 -- if a large string has been read in, and only a small part of it
1550 -- is needed in the rest of the program.
1551 copy :: ByteString -> ByteString
1552 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1554 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1555 -- CString is going to be deallocated from C land.
1556 copyCString :: CString -> ByteString
1557 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1559 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1560 copyCStringLen :: CStringLen -> ByteString
1561 copyCStringLen (cstr, len) = inlinePerformIO $ do
1562 fp <- mallocForeignPtrArray (len+1)
1563 withForeignPtr fp $ \p -> do
1564 memcpy p (castPtr cstr) len
1565 poke (p `plusPtr` len) (0 :: Word8)
1566 return $! PS fp 0 len
1568 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1569 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1570 -- This is analogous to unsafeUseAsCString, and comes with the same
1571 -- safety requirements.
1573 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1574 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1576 -- | Given the maximum size needed and a function to make the contents
1577 -- of a ByteString, generate makes the 'ByteString'. The generating
1578 -- function is required to return the actual final size (<= the maximum
1579 -- size), and the resulting byte array is realloced to this size. The
1580 -- string is padded at the end with a null byte.
1582 -- generate is the main mechanism for creating custom, efficient
1583 -- ByteString functions, using Haskell or C functions to fill the space.
1585 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1589 p' <- reallocArray p (i'+1)
1590 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1591 fp <- newForeignFreePtr p'
1594 -- ---------------------------------------------------------------------
1597 #if defined(__GLASGOW_HASKELL__)
1599 -- | getLine, read a line from stdin.
1600 getLine :: IO ByteString
1601 getLine = hGetLine stdin
1603 -- | hGetLine. read a ByteString from a handle
1604 hGetLine :: Handle -> IO ByteString
1605 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1606 case haBufferMode handle_ of
1607 NoBuffering -> error "no buffering"
1608 _other -> hGetLineBuffered handle_
1611 hGetLineBuffered handle_ = do
1612 let ref = haBuffer handle_
1613 buf <- readIORef ref
1614 hGetLineBufferedLoop handle_ ref buf 0 []
1616 hGetLineBufferedLoop handle_ ref
1617 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1619 off <- findEOL r w raw
1620 let new_len = len + off - r
1621 xs <- mkPS raw r off
1623 -- if eol == True, then off is the offset of the '\n'
1624 -- otherwise off == w and the buffer is now empty.
1626 then do if (w == off + 1)
1627 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1628 else writeIORef ref buf{ bufRPtr = off + 1 }
1629 mkBigPS new_len (xs:xss)
1631 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1632 buf{ bufWPtr=0, bufRPtr=0 }
1634 -- Nothing indicates we caught an EOF, and we may have a
1635 -- partial line to return.
1637 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1639 then mkBigPS new_len (xs:xss)
1642 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1644 -- find the end-of-line character, if there is one
1648 (c,r') <- readCharFromBuffer raw r
1650 then return r -- NB. not r': don't include the '\n'
1651 else findEOL r' w raw
1653 maybeFillReadBuffer fd is_line is_stream buf = catch
1654 (do buf' <- fillReadBuffer fd is_line is_stream buf
1656 (\e -> if isEOFError e then return Nothing else ioError e)
1658 -- TODO, rewrite to use normal memcpy
1659 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1660 mkPS buf start end = do
1661 let len = end - start
1662 fp <- mallocByteString (len `quot` 8)
1663 withForeignPtr fp $ \p -> do
1664 memcpy_ptr_baoff p buf start (fromIntegral len)
1665 return (PS fp 0 len)
1667 mkBigPS :: Int -> [ByteString] -> IO ByteString
1668 mkBigPS _ [ps] = return ps
1669 mkBigPS _ pss = return $! concat (P.reverse pss)
1673 -- ---------------------------------------------------------------------
1676 -- | Outputs a 'ByteString' to the specified 'Handle'.
1677 hPut :: Handle -> ByteString -> IO ()
1678 hPut _ (PS _ _ 0) = return ()
1679 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1680 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1682 -- | Write a ByteString to stdout
1683 putStr :: ByteString -> IO ()
1684 putStr = hPut stdout
1686 -- | Write a ByteString to stdout, appending a newline byte
1687 putStrLn :: ByteString -> IO ()
1688 putStrLn ps = hPut stdout ps >> hPut stdout nl
1689 where nl = packByte 0x0a
1691 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1692 -- is far more efficient than reading the characters into a 'String'
1693 -- and then using 'pack'.
1694 hGet :: Handle -> Int -> IO ByteString
1695 hGet _ 0 = return empty
1696 hGet h i = do fp <- mallocByteString i
1697 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1700 #if defined(__GLASGOW_HASKELL__)
1701 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1702 -- waiting for data to become available, instead it returns only whatever data
1704 hGetNonBlocking :: Handle -> Int -> IO ByteString
1705 hGetNonBlocking _ 0 = return empty
1706 hGetNonBlocking h i = do
1707 fp <- mallocByteString i
1708 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1712 -- | Read entire handle contents into a 'ByteString'.
1714 -- As with 'hGet', the string representation in the file is assumed to
1717 hGetContents :: Handle -> IO ByteString
1719 let start_size = 1024
1720 p <- mallocArray start_size
1721 i <- hGetBuf h p start_size
1723 then do p' <- reallocArray p i
1724 fp <- newForeignFreePtr p'
1730 p' <- reallocArray p s'
1731 i <- hGetBuf h (p' `plusPtr` s) s
1733 then do let i' = s + i
1734 p'' <- reallocArray p' i'
1735 fp <- newForeignFreePtr p''
1739 -- | getContents. Equivalent to hGetContents stdin
1740 getContents :: IO ByteString
1741 getContents = hGetContents stdin
1743 -- | Read an entire file directly into a 'ByteString'. This is far more
1744 -- efficient than reading the characters into a 'String' and then using
1745 -- 'pack'. It also may be more efficient than opening the file and
1746 -- reading it using hGet.
1747 readFile :: FilePath -> IO ByteString
1749 h <- openBinaryFile f ReadMode
1751 s <- hGet h $ fromIntegral l
1755 -- | Write a 'ByteString' to a file.
1756 writeFile :: FilePath -> ByteString -> IO ()
1758 h <- openBinaryFile f WriteMode
1762 #if defined(__GLASGOW_HASKELL__)
1764 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1766 getArgs :: IO [ByteString]
1768 alloca $ \ p_argc ->
1769 alloca $ \ p_argv -> do
1770 getProgArgv p_argc p_argv
1771 p <- fromIntegral `fmap` peek p_argc
1773 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1776 -- ---------------------------------------------------------------------
1777 -- Internal utilities
1779 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1780 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1781 -- convenience for ByteString construction.
1782 w2c :: Word8 -> Char
1783 #if !defined(__GLASGOW_HASKELL__)
1784 w2c = chr . fromIntegral
1786 w2c = unsafeChr . fromIntegral
1790 c2w :: Char -> Word8
1791 c2w = fromIntegral . ord
1794 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1795 -- is padded with a null byte.
1796 mallocByteString :: Int -> IO (ForeignPtr Word8)
1797 mallocByteString l = do
1798 fp <- mallocForeignPtrArray (l+1)
1799 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1802 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1803 -- argument gives the final size of the ByteString. Unlike 'generate'
1804 -- the ByteString is no reallocated if the final size is less than the
1806 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1807 create l write_ptr = inlinePerformIO $ do
1808 fp <- mallocByteString (l+1)
1809 withForeignPtr fp $ \p -> write_ptr p
1811 {-# INLINE create #-}
1813 -- | Perform an operation with a temporary ByteString
1814 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1815 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1816 {-# INLINE withPtr #-}
1818 -- Common up near identical calls to `error' to reduce the number
1819 -- constant strings created when compiled:
1820 errorEmptyList :: String -> a
1821 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1822 {-# INLINE errorEmptyList #-}
1824 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1825 -- of the string if no element is found, rather than Nothing.
1826 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1827 STRICT2(findIndexOrEnd)
1830 | f (unsafeHead ps) = 0
1831 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1832 {-# INLINE findIndexOrEnd #-}
1834 -- Find from the end of the string using predicate
1835 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1836 STRICT2(findFromEndUntil)
1837 findFromEndUntil f ps@(PS x s l) =
1839 else if f (last ps) then l
1840 else findFromEndUntil f (PS x s (l-1))
1842 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1843 -- it exposes lots of things to further inlining
1845 {-# INLINE inlinePerformIO #-}
1846 inlinePerformIO :: IO a -> a
1847 #if defined(__GLASGOW_HASKELL__)
1848 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1850 inlinePerformIO = unsafePerformIO
1853 {-# INLINE newForeignFreePtr #-}
1854 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1855 #if defined(__GLASGOW_HASKELL__)
1856 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1858 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1861 -- ---------------------------------------------------------------------
1863 -- Standard C functions
1866 foreign import ccall unsafe "string.h strlen" c_strlen
1869 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1870 :: CInt -> IO (Ptr Word8)
1872 foreign import ccall unsafe "static stdlib.h free" c_free
1873 :: Ptr Word8 -> IO ()
1875 #if !defined(__GLASGOW_HASKELL__)
1876 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1877 :: FunPtr (Ptr Word8 -> IO ())
1880 foreign import ccall unsafe "string.h memset" memset
1881 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1883 foreign import ccall unsafe "string.h memchr" memchr
1884 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1886 foreign import ccall unsafe "string.h memcmp" memcmp
1887 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1889 foreign import ccall unsafe "string.h memcpy" memcpy
1890 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1892 -- ---------------------------------------------------------------------
1897 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1898 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1900 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1901 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1903 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1904 :: Ptr Word8 -> Int -> Word8
1906 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1907 :: Ptr Word8 -> Int -> Word8
1909 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1910 :: Ptr Word8 -> Int -> IO ()
1912 -- ---------------------------------------------------------------------
1913 -- Internal GHC Haskell magic
1915 #if defined(__GLASGOW_HASKELL__)
1916 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1917 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
1919 foreign import ccall unsafe "__hscore_memcpy_src_off"
1920 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())