1 {-# OPTIONS_GHC -cpp -fffi #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- Maintainer : dons@cse.unsw.edu.au
12 -- Stability : experimental
13 -- Portability : portable, requires ffi and cpp
14 -- Tested with : GHC 6.4.1 and Hugs March 2005
18 -- | A time and space-efficient implementation of byte vectors using
19 -- packed Word8 arrays, suitable for high performance use, both in terms
20 -- of large data quantities, or high speed requirements. Byte vectors
21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
22 -- be passed between C and Haskell with little effort.
24 -- This module is intended to be imported @qualified@, to avoid name
25 -- clashes with Prelude functions. eg.
27 -- > import qualified Data.ByteString as B
29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
30 -- UArray by Simon Marlow. Rewritten to support slices and use
31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
34 module Data.ByteString (
36 -- * The @ByteString@ type
37 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
39 -- * Introducing and eliminating 'ByteString's
40 empty, -- :: ByteString
41 packByte, -- :: Word8 -> ByteString
42 pack, -- :: [Word8] -> ByteString
43 unpack, -- :: ByteString -> [Word8]
44 packWith, -- :: (a -> Word8) -> [a] -> ByteString
45 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: Word8 -> ByteString -> ByteString
50 null, -- :: ByteString -> Bool
51 length, -- :: ByteString -> Int
52 head, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 last, -- :: ByteString -> Word8
55 init, -- :: ByteString -> ByteString
56 append, -- :: ByteString -> ByteString -> ByteString
58 -- * Special ByteStrings
59 inits, -- :: ByteString -> [ByteString]
60 tails, -- :: ByteString -> [ByteString]
61 elems, -- :: ByteString -> [ByteString]
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 reverse, -- :: ByteString -> ByteString
66 intersperse, -- :: Word8 -> ByteString -> ByteString
67 transpose, -- :: [ByteString] -> [ByteString]
69 -- * Reducing 'ByteString's
70 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
71 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
84 -- * Generating and unfolding ByteStrings
85 replicate, -- :: Int -> Word8 -> ByteString
86 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
90 -- ** Breaking strings
91 take, -- :: Int -> ByteString -> ByteString
92 drop, -- :: Int -> ByteString -> ByteString
93 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
94 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
95 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
96 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
97 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
98 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
100 -- ** Breaking and dropping on specific bytes
101 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
102 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
103 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
105 -- ** Breaking into many substrings
106 split, -- :: Word8 -> ByteString -> [ByteString]
107 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
108 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
110 -- ** Joining strings
111 join, -- :: ByteString -> [ByteString] -> ByteString
112 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
114 -- * Indexing ByteStrings
115 index, -- :: ByteString -> Int -> Word8
116 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
117 elemIndices, -- :: Word8 -> ByteString -> [Int]
118 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
119 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
120 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
121 count, -- :: Word8 -> ByteString -> Int
123 -- * Ordered ByteStrings
124 sort, -- :: ByteString -> ByteString
126 -- * Searching ByteStrings
128 -- ** Searching by equality
129 -- | These functions use memchr(3) to efficiently search the ByteString
131 elem, -- :: Word8 -> ByteString -> Bool
132 notElem, -- :: Word8 -> ByteString -> Bool
133 filterByte, -- :: Word8 -> ByteString -> ByteString
134 filterNotByte, -- :: Word8 -> ByteString -> ByteString
136 -- ** Searching with a predicate
137 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
138 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
140 -- ** Prefixes and suffixes
141 -- | These functions use memcmp(3) to efficiently compare substrings
142 isPrefixOf, -- :: ByteString -> ByteString -> Bool
143 isSuffixOf, -- :: ByteString -> ByteString -> Bool
145 -- ** Search for arbitrary substrings
146 isSubstringOf, -- :: ByteString -> ByteString -> Bool
147 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
148 findSubstrings, -- :: ByteString -> ByteString -> [Int]
150 -- * Zipping and unzipping ByteStrings
151 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
152 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
153 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
155 -- * Unchecked access
156 unsafeHead, -- :: ByteString -> Word8
157 unsafeTail, -- :: ByteString -> ByteString
158 unsafeIndex, -- :: ByteString -> Int -> Word8
160 -- * Low level introduction and elimination
161 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
162 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
163 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
164 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
165 skipIndex, -- :: ByteString -> Int
167 -- ** Packing CStrings and pointers
168 packCString, -- :: CString -> ByteString
169 packCStringLen, -- :: CString -> ByteString
170 packMallocCString, -- :: CString -> ByteString
172 #if defined(__GLASGOW_HASKELL__)
173 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
174 packAddress, -- :: Addr# -> ByteString
175 unsafePackAddress, -- :: Int -> Addr# -> ByteString
176 unsafeFinalize, -- :: ByteString -> IO ()
179 -- ** Using ByteStrings as CStrings
180 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
181 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
182 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
184 -- ** Copying ByteStrings
185 -- | These functions perform memcpy(3) operations
186 copy, -- :: ByteString -> ByteString
187 copyCString, -- :: CString -> ByteString
188 copyCStringLen, -- :: CStringLen -> ByteString
190 -- * I\/O with @ByteString@s
192 -- ** Standard input and output
194 #if defined(__GLASGOW_HASKELL__)
195 getLine, -- :: IO ByteString
197 getContents, -- :: IO ByteString
198 putStr, -- :: ByteString -> IO ()
199 putStrLn, -- :: ByteString -> IO ()
202 readFile, -- :: FilePath -> IO ByteString
203 writeFile, -- :: FilePath -> ByteString -> IO ()
204 -- mmapFile, -- :: FilePath -> IO ByteString
206 -- ** I\/O with Handles
207 #if defined(__GLASGOW_HASKELL__)
208 getArgs, -- :: IO [ByteString]
209 hGetLine, -- :: Handle -> IO ByteString
210 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
212 hGetContents, -- :: Handle -> IO ByteString
213 hGet, -- :: Handle -> Int -> IO ByteString
214 hPut, -- :: Handle -> ByteString -> IO ()
216 #if defined(__GLASGOW_HASKELL__)
218 unpackList, -- eek, otherwise it gets thrown away by the simplifier
223 import qualified Prelude as P
224 import Prelude hiding (reverse,head,tail,last,init,null
225 ,length,map,lines,foldl,foldr,unlines
226 ,concat,any,take,drop,splitAt,takeWhile
227 ,dropWhile,span,break,elem,filter,maximum
228 ,minimum,all,concatMap,foldl1,foldr1
229 ,readFile,writeFile,replicate
230 ,getContents,getLine,putStr,putStrLn
231 ,zip,zipWith,unzip,notElem)
233 import qualified Data.List as List
236 import Data.Word (Word8)
237 import Data.Maybe (listToMaybe)
238 import Data.Array (listArray)
239 import qualified Data.Array as Array ((!))
241 import Control.Exception (bracket)
243 import Foreign.C.String (CString, CStringLen)
244 import Foreign.C.Types (CSize, CInt)
245 import Foreign.ForeignPtr
246 import Foreign.Marshal.Array
248 import Foreign.Storable (Storable(..))
250 import System.IO (stdin,stdout,hClose,hFileSize
251 ,hGetBuf,hPutBuf,openBinaryFile
254 #if !defined(__GLASGOW_HASKELL__)
255 import System.IO.Unsafe
258 #if defined(__GLASGOW_HASKELL__)
260 import Data.Generics (Data(..), Typeable(..))
262 import System.IO (hGetBufNonBlocking)
263 import System.IO.Error (isEOFError)
265 import Foreign.Marshal (alloca)
266 import qualified Foreign.Concurrent as FC (newForeignPtr)
269 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
270 import GHC.Base (build, unsafeChr)
271 import GHC.Word hiding (Word8)
272 import GHC.Ptr (Ptr(..))
273 import GHC.ST (ST(..))
278 -- CFILES stuff is Hugs only
279 {-# CFILES cbits/fpstring.c #-}
281 -- -----------------------------------------------------------------------------
283 -- Useful macros, until we have bang patterns
286 #define STRICT1(f) f a | a `seq` False = undefined
287 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
288 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
289 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
290 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
292 -- -----------------------------------------------------------------------------
294 -- | A space-efficient representation of a Word8 vector, supporting many
295 -- efficient operations. A 'ByteString' contains 8-bit characters only.
297 -- Instances of Eq, Ord, Read, Show, Data, Typeable
299 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
303 #if defined(__GLASGOW_HASKELL__)
304 deriving (Data, Typeable)
307 instance Eq ByteString
310 instance Ord ByteString
311 where compare = compareBytes
313 instance Show ByteString where
314 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
316 instance Read ByteString where
317 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
320 instance Arbitrary PackedString where
321 arbitrary = P.pack `fmap` arbitrary
322 coarbitrary s = coarbitrary (P.unpack s)
325 -- | /O(n)/ Equality on the 'ByteString' type.
326 eq :: ByteString -> ByteString -> Bool
327 eq a b = (compareBytes a b) == EQ
330 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
331 compareBytes :: ByteString -> ByteString -> Ordering
332 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
333 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
334 withForeignPtr x1 $ \p1 ->
335 withForeignPtr x2 $ \p2 -> do
336 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
337 return $ case i `compare` 0 of
338 EQ -> l1 `compare` l2
340 {-# INLINE compareBytes #-}
344 -- About 4x slower over 32M
346 compareBytes :: ByteString -> ByteString -> Ordering
347 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
348 withForeignPtr fp1 $ \p1 ->
349 withForeignPtr fp2 $ \p2 ->
350 cmp (p1 `plusPtr` off1)
351 (p2 `plusPtr` off2) 0 len1 len2
353 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
355 cmp p1 p2 n len1 len2
356 | n == len1 = if n == len2 then return EQ else return LT
357 | n == len2 = return GT
359 (a :: Word8) <- peekByteOff p1 n
360 (b :: Word8) <- peekByteOff p2 n
361 case a `compare` b of
362 EQ -> cmp p1 p2 (n+1) len1 len2
365 {-# INLINE compareBytes #-}
368 -- -----------------------------------------------------------------------------
369 -- Introducing and eliminating 'ByteString's
371 -- | /O(1)/ The empty 'ByteString'
373 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
374 {-# NOINLINE empty #-}
376 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
377 packByte :: Word8 -> ByteString
378 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
379 withForeignPtr fp $ \p -> poke p c
381 {-# NOINLINE packByte #-}
383 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
385 -- For applications with large numbers of string literals, pack can be a
386 -- bottleneck. In such cases, consider using packAddress (GHC only).
387 pack :: [Word8] -> ByteString
389 #if !defined(__GLASGOW_HASKELL__)
391 pack str = create (P.length str) $ \p -> go p str
394 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
396 #else /* hack away */
398 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
400 go _ _ [] = return ()
401 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
403 writeByte p i c = ST $ \s# ->
404 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
408 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
409 unpack :: ByteString -> [Word8]
411 #if !defined(__GLASGOW_HASKELL__)
413 unpack (PS _ _ 0) = []
414 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
415 go (p `plusPtr` s) (l - 1) []
418 go p 0 acc = peek p >>= \e -> return (e : acc)
419 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
420 {-# INLINE unpack #-}
424 unpack ps = build (unpackFoldr ps)
425 {-# INLINE unpack #-}
427 unpackList :: ByteString -> [Word8]
428 unpackList (PS fp off len) = withPtr fp $ \p -> do
430 loop _ (-1) acc = return acc
433 loop q (n-1) (a : acc)
434 loop (p `plusPtr` off) (len-1) []
437 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
440 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
441 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
443 loop _ (-1) acc = return acc
446 loop q (n-1) (a `f` acc)
447 loop (p `plusPtr` off) (len-1) ch
448 {-# INLINE [0] unpackFoldr #-}
452 ------------------------------------------------------------------------
454 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
455 -- conversion function
456 packWith :: (a -> Word8) -> [a] -> ByteString
457 packWith k str = create (P.length str) $ \p -> go p str
461 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
462 {-# INLINE packWith #-}
463 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
465 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
466 unpackWith :: (Word8 -> a) -> ByteString -> [a]
467 unpackWith _ (PS _ _ 0) = []
468 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
469 go (p `plusPtr` s) (l - 1) []
472 go p 0 acc = peek p >>= \e -> return (k e : acc)
473 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
474 {-# INLINE unpackWith #-}
475 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
477 -- ---------------------------------------------------------------------
480 -- | /O(1)/ Test whether a ByteString is empty.
481 null :: ByteString -> Bool
482 null (PS _ _ l) = l == 0
485 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
486 length :: ByteString -> Int
487 length (PS _ _ l) = l
488 {-# INLINE length #-}
490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
491 -- complexity, as it requires a memcpy.
492 cons :: Word8 -> ByteString -> ByteString
493 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
494 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
499 snoc :: ByteString -> Word8 -> ByteString
500 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
501 memcpy p (f `plusPtr` s) l
502 poke (p `plusPtr` l) c
505 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
506 head :: ByteString -> Word8
508 | null ps = errorEmptyList "head"
509 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
512 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
513 tail :: ByteString -> ByteString
515 | l <= 0 = errorEmptyList "tail"
516 | otherwise = PS p (s+1) (l-1)
519 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
520 last :: ByteString -> Word8
522 | null ps = errorEmptyList "last"
523 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
527 init :: ByteString -> ByteString
529 | l <= 0 = errorEmptyList "init"
530 | otherwise = PS p s (l-1)
533 -- | /O(n)/ Append two ByteStrings
534 append :: ByteString -> ByteString -> ByteString
535 append xs ys | null xs = ys
537 | otherwise = concat [xs,ys]
538 {-# INLINE append #-}
542 -- About 30% faster, but allocating in a big chunk isn't good for memory use
544 append :: ByteString -> ByteString -> ByteString
545 append xs@(PS ffp s l) ys@(PS fgp t m)
548 | otherwise = create len $ \ptr ->
549 withForeignPtr ffp $ \fp ->
550 withForeignPtr fgp $ \gp -> do
551 memcpy ptr (fp `plusPtr` s) l
552 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
553 where len = length xs + length ys
556 -- ---------------------------------------------------------------------
559 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
562 map :: (Word8 -> Word8) -> ByteString -> ByteString
563 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
564 new_fp <- mallocByteString len
565 withForeignPtr new_fp $ \new_p -> do
566 map_ f (len-1) (p `plusPtr` start) new_p
567 return (PS new_fp 0 len)
570 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
575 x <- peekByteOff p1 n
576 pokeByteOff p2 n (f x)
580 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
581 reverse :: ByteString -> ByteString
582 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
583 c_reverse p (f `plusPtr` s) l
586 reverse = pack . P.reverse . unpack
589 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
590 -- 'ByteString' and \`intersperses\' that byte between the elements of
591 -- the 'ByteString'. It is analogous to the intersperse function on
593 intersperse :: Word8 -> ByteString -> ByteString
594 intersperse c ps@(PS x s l)
596 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
597 c_intersperse p (f `plusPtr` s) l c
600 intersperse c = pack . List.intersperse c . unpack
603 -- | The 'transpose' function transposes the rows and columns of its
604 -- 'ByteString' argument.
605 transpose :: [ByteString] -> [ByteString]
606 transpose ps = P.map pack (List.transpose (P.map unpack ps))
608 -- ---------------------------------------------------------------------
609 -- Reducing 'ByteString's
611 -- | 'foldl', applied to a binary operator, a starting value (typically
612 -- the left-identity of the operator), and a ByteString, reduces the
613 -- ByteString using the binary operator, from left to right.
614 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
615 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
616 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
619 lgo z p q | p == q = return z
620 | otherwise = do c <- peek p
621 lgo (f z c) (p `plusPtr` 1) q
623 -- | 'foldr', applied to a binary operator, a starting value
624 -- (typically the right-identity of the operator), and a ByteString,
625 -- reduces the ByteString using the binary operator, from right to left.
626 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
627 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
628 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
631 go p q | p == q = return z
632 | otherwise = do c <- peek p
633 ws <- go (p `plusPtr` 1) q
636 -- | 'foldl1' is a variant of 'foldl' that has no starting value
637 -- argument, and thus must be applied to non-empty 'ByteStrings'.
638 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
640 | null ps = errorEmptyList "foldl1"
641 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
643 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
644 -- and thus must be applied to non-empty 'ByteString's
645 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
647 | null ps = errorEmptyList "foldr1"
648 | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
650 -- ---------------------------------------------------------------------
653 -- | /O(n)/ Concatenate a list of ByteStrings.
654 concat :: [ByteString] -> ByteString
657 concat xs = inlinePerformIO $ do
658 let start_size = 1024
659 p <- mallocArray start_size
662 where f ptr len _ [] = do
663 ptr' <- reallocArray ptr (len+1)
664 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
665 fp <- newForeignFreePtr ptr'
668 f ptr len to_go pss@(PS p s l:pss')
669 | l <= to_go = do withForeignPtr p $ \pf ->
670 memcpy (ptr `plusPtr` len)
672 f ptr (len + l) (to_go - l) pss'
674 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
675 ptr' <- reallocArray ptr new_total
676 f ptr' len (new_total - len) pss
678 -- | Map a function over a 'ByteString' and concatenate the results
679 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
680 concatMap f = foldr (append . f) empty
682 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
683 -- any element of the 'ByteString' satisfies the predicate.
684 any :: (Word8 -> Bool) -> ByteString -> Bool
685 any _ (PS _ _ 0) = False
686 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
687 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
690 go p q | p == q = return False
691 | otherwise = do c <- peek p
692 if f c then return True
693 else go (p `plusPtr` 1) q
695 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
696 -- if all elements of the 'ByteString' satisfy the predicate.
697 all :: (Word8 -> Bool) -> ByteString -> Bool
698 all _ (PS _ _ 0) = True
699 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
700 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
703 go p q | p == q = return True -- end of list
704 | otherwise = do c <- peek p
706 then go (p `plusPtr` 1) q
709 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
710 maximum :: ByteString -> Word8
711 maximum xs@(PS x s l)
712 | null xs = errorEmptyList "maximum"
713 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
714 return $ c_maximum (p `plusPtr` s) l
715 {-# INLINE maximum #-}
717 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
718 minimum :: ByteString -> Word8
719 minimum xs@(PS x s l)
720 | null xs = errorEmptyList "minimum"
721 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
722 return $ c_minimum (p `plusPtr` s) l
723 {-# INLINE minimum #-}
726 maximum xs@(PS x s l)
727 | null xs = errorEmptyList "maximum"
728 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
730 maximum_ (p `plusPtr` s) 0 l w
732 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
736 | otherwise = do w <- peekByteOff ptr n
737 maximum_ ptr (n+1) m (if w > c then w else c)
739 minimum xs@(PS x s l)
740 | null xs = errorEmptyList "minimum"
741 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
743 minimum_ (p `plusPtr` s) 0 l w
745 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
749 | otherwise = do w <- peekByteOff ptr n
750 minimum_ ptr (n+1) m (if w < c then w else c)
753 -- | /O(n)/ map Word8 functions, provided with the index at each position
754 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
755 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
756 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
758 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
760 go n f t p | f == p = return ()
761 | otherwise = do w <- peek f
763 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
765 -- ---------------------------------------------------------------------
766 -- Unfolds and replicates
768 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
769 -- the value of every element. The following holds:
771 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
773 -- This implemenation uses @memset(3)@
774 replicate :: Int -> Word8 -> ByteString
775 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
779 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
783 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
786 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
787 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
788 -- the element and returns 'Nothing' if it is done producing the
789 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
790 -- prepending to the ByteString and @b@ is used as the next element in a
793 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
794 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
795 -- final size of the ByteString as an argument. 'cons' can then be
796 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
797 -- linear complexity. The depth of the recursion is limited to this
798 -- size, but may be less. For lazy, infinite unfoldr, use
799 -- 'Data.List.unfoldr' (from 'Data.List').
803 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
805 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
807 -- > unfoldrN n == take n $ List.unfoldr
808 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
809 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
812 go q c n | n == i = return n -- stop if we reach `i'
813 | otherwise = case f c of
817 go (q `plusPtr` 1) new_c (n+1)
819 -- ---------------------------------------------------------------------
822 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
823 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
824 take :: Int -> ByteString -> ByteString
828 | otherwise = PS x s n
831 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
832 -- elements, or @[]@ if @n > 'length' xs@.
833 drop :: Int -> ByteString -> ByteString
837 | otherwise = PS x (s+n) (l-n)
840 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
841 splitAt :: Int -> ByteString -> (ByteString, ByteString)
842 splitAt n ps = (take n ps, drop n ps)
843 {-# INLINE splitAt #-}
845 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
846 -- returns the longest prefix (possibly empty) of @xs@ of elements that
848 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
849 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
850 {-# INLINE takeWhile #-}
852 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
853 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
854 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
855 {-# INLINE dropWhile #-}
857 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
858 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
859 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
862 -- | 'breakByte' breaks its ByteString argument at the first occurence
863 -- of the specified byte. It is more efficient than 'break' as it is
864 -- implemented with @memchr(3)@. I.e.
866 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
868 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
869 breakByte c p = case elemIndex c p of
871 Just n -> (take n p, drop n p)
872 {-# INLINE breakByte #-}
874 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
875 -- occurence of @w@. It behaves like 'break', except the delimiter is
876 -- not returned, and @Nothing@ is returned if the delimiter is not in
877 -- the ByteString. I.e.
879 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
881 -- > breakFirst c xs ==
882 -- > let (x,y) = break (== c) xs
883 -- > in if null y then Nothing else Just (x, drop 1 y))
885 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
886 breakFirst c p = case elemIndex c p of
888 Just n -> Just (take n p, drop (n+1) p)
889 {-# INLINE breakFirst #-}
891 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
894 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
896 -- and the following are equivalent:
898 -- > breakLast 'c' "abcdef"
899 -- > let (x,y) = break (=='c') (reverse "abcdef")
900 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
902 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
903 breakLast c p = case elemIndexLast c p of
905 Just n -> Just (take n p, drop (n+1) p)
906 {-# INLINE breakLast #-}
908 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
909 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
910 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
911 span p ps = break (not . p) ps
914 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
917 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
921 -- > spanEnd (not . isSpace) ps
923 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
925 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
926 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
928 -- | /O(n)/ Splits a 'ByteString' into components delimited by
929 -- separators, where the predicate returns True for a separator element.
930 -- The resulting components do not contain the separators. Two adjacent
931 -- separators result in an empty component in the output. eg.
933 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
934 -- > splitWith (=='a') [] == []
936 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
938 #if defined(__GLASGOW_HASKELL__)
939 splitWith _pred (PS _ _ 0) = []
940 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
941 where pred# c# = pred_ (W8# c#)
943 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
944 splitLoop pred' p 0 off' len' fp'
946 splitLoop :: (Word# -> Bool)
952 splitLoop pred' p idx' off' len' fp'
953 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
954 | idx' >= len' = return [PS fp' off' idx']
956 w <- peekElemOff p (off'+idx')
957 if pred' (case w of W8# w# -> w#)
958 then return (PS fp' off' idx' :
959 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
960 else splitLoop pred' p (idx'+1) off' len' fp'
961 {-# INLINE splitWith #-}
964 splitWith _ (PS _ _ 0) = []
965 splitWith p ps = splitWith' p ps
968 splitWith' q qs = if null rest then [chunk]
969 else chunk : splitWith' q (unsafeTail rest)
970 where (chunk,rest) = break q qs
973 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
974 -- argument, consuming the delimiter. I.e.
976 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
977 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
978 -- > split 'x' "x" == ["",""]
982 -- > join [c] . split c == id
983 -- > split == splitWith . (==)
985 -- As for all splitting functions in this library, this function does
986 -- not copy the substrings, it just constructs new 'ByteStrings' that
987 -- are slices of the original.
989 split :: Word8 -> ByteString -> [ByteString]
990 split _ (PS _ _ 0) = []
991 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
992 let ptr = p `plusPtr` s
996 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
998 then return [PS x (s+n) (l-n)]
999 else do let i = q `minusPtr` ptr
1001 return $! PS x (s+n) (i-n) : ls
1003 {-# INLINE split #-}
1006 -- slower. but stays inside Haskell.
1007 split _ (PS _ _ 0) = []
1008 split (W8# w#) (PS fp off len) = splitWith' off len fp
1010 splitWith' off' len' fp' = withPtr fp $ \p ->
1011 splitLoop p 0 off' len' fp'
1013 splitLoop :: Ptr Word8
1014 -> Int -> Int -> Int
1019 splitLoop p idx' off' len' fp'
1020 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1021 | idx' >= len' = return [PS fp' off' idx']
1023 (W8# x#) <- peekElemOff p (off'+idx')
1024 if word2Int# w# ==# word2Int# x#
1025 then return (PS fp' off' idx' :
1026 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1027 else splitLoop p (idx'+1) off' len' fp'
1030 -- | Like 'splitWith', except that sequences of adjacent separators are
1031 -- treated as a single separator. eg.
1033 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1035 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1036 tokens f = P.filter (not.null) . splitWith f
1038 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1039 -- 'ByteString's and concatenates the list after interspersing the first
1040 -- argument between each element of the list.
1041 join :: ByteString -> [ByteString] -> ByteString
1042 join filler pss = concat (splice pss)
1046 splice (x:y:xs) = x:filler:splice (y:xs)
1049 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1050 -- with a char. Around 4 times faster than the generalised join.
1052 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1053 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1054 withForeignPtr ffp $ \fp ->
1055 withForeignPtr fgp $ \gp -> do
1056 memcpy ptr (fp `plusPtr` s) l
1057 poke (ptr `plusPtr` l) c
1058 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1060 len = length f + length g + 1
1061 {-# INLINE joinWithByte #-}
1063 -- ---------------------------------------------------------------------
1064 -- Indexing ByteStrings
1066 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1067 index :: ByteString -> Int -> Word8
1069 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1070 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1071 ++ ", length = " ++ show (length ps)
1072 | otherwise = ps `unsafeIndex` n
1073 {-# INLINE index #-}
1075 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1076 -- element in the given 'ByteString' which is equal to the query
1077 -- element, or 'Nothing' if there is no such element.
1078 -- This implementation uses memchr(3).
1079 elemIndex :: Word8 -> ByteString -> Maybe Int
1080 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1081 let p' = p `plusPtr` s
1082 q = memchr p' c (fromIntegral l)
1083 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1084 {-# INLINE elemIndex #-}
1086 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1087 -- element in the given 'ByteString' which is equal to the query
1088 -- element, or 'Nothing' if there is no such element. The following
1091 -- > elemIndexLast c xs ==
1092 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1094 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1095 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1096 go (p `plusPtr` s) (l-1)
1099 go p i | i < 0 = return Nothing
1100 | otherwise = do ch' <- peekByteOff p i
1102 then return $ Just i
1104 {-# INLINE elemIndexLast #-}
1106 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1107 -- the indices of all elements equal to the query element, in ascending order.
1108 -- This implementation uses memchr(3).
1109 elemIndices :: Word8 -> ByteString -> [Int]
1110 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1111 let ptr = p `plusPtr` s
1115 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1118 else do let i = q `minusPtr` ptr
1125 elemIndices :: Word8 -> ByteString -> [Int]
1126 elemIndices c ps = loop 0 ps
1128 loop _ ps' | null ps' = []
1129 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1130 | otherwise = loop (n+1) (unsafeTail ps')
1133 -- | count returns the number of times its argument appears in the ByteString
1135 -- > count = length . elemIndices
1137 -- But more efficiently than using length on the intermediate list.
1138 count :: Word8 -> ByteString -> Int
1139 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1140 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1141 {-# INLINE count #-}
1145 -- around 30% slower
1147 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1148 go (p `plusPtr` s) (fromIntegral m) 0
1150 go :: Ptr Word8 -> CSize -> Int -> IO Int
1153 let q = memchr p w l
1156 else do let k = fromIntegral $ q `minusPtr` p
1157 go (q `plusPtr` 1) (l-k-1) (i+1)
1160 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1161 -- returns the index of the first element in the ByteString
1162 -- satisfying the predicate.
1163 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1164 findIndex = (listToMaybe .) . findIndices
1166 -- | The 'findIndices' function extends 'findIndex', by returning the
1167 -- indices of all elements satisfying the predicate, in ascending order.
1168 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1169 findIndices p ps = loop 0 ps
1172 loop _ qs | null qs = []
1173 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1174 | otherwise = loop (n+1) (unsafeTail qs)
1176 -- ---------------------------------------------------------------------
1177 -- Searching ByteStrings
1179 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1180 elem :: Word8 -> ByteString -> Bool
1181 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1184 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1185 notElem :: Word8 -> ByteString -> Bool
1186 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1187 {-# INLINE notElem #-}
1190 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1191 -- case of filtering a single byte. It is more efficient to use
1192 -- /filterByte/ in this case.
1194 -- > filterByte == filter . (==)
1196 -- filterByte is around 10x faster, and uses much less space, than its
1197 -- filter equivalent
1198 filterByte :: Word8 -> ByteString -> ByteString
1199 filterByte w ps = replicate (count w ps) w
1202 -- slower than the replicate version
1204 filterByte ch ps@(PS x s l)
1206 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1207 t <- go (f `plusPtr` s) p l
1208 return (t `minusPtr` p) -- actual length
1212 go f t e = do w <- peek f
1214 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1215 else go (f `plusPtr` 1) t (e-1)
1219 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1220 -- case of filtering a single byte out of a list. It is more efficient
1221 -- to use /filterNotByte/ in this case.
1223 -- > filterNotByte == filter . (/=)
1225 -- filterNotByte is around 3x faster, and uses much less space, than its
1226 -- filter equivalent
1227 filterNotByte :: Word8 -> ByteString -> ByteString
1228 filterNotByte ch ps@(PS x s l)
1230 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1231 t <- go (f `plusPtr` s) p l
1232 return (t `minusPtr` p) -- actual length
1236 go f t e = do w <- peek f
1238 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1239 else go (f `plusPtr` 1) t (e-1)
1241 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1242 -- returns a ByteString containing those characters that satisfy the
1244 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1245 filter k ps@(PS x s l)
1247 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1248 t <- go (f `plusPtr` s) p l
1249 return (t `minusPtr` p) -- actual length
1253 go f t e = do w <- peek f
1255 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1256 else go (f `plusPtr` 1) t (e - 1)
1258 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1260 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1261 -- and returns the first element in matching the predicate, or 'Nothing'
1262 -- if there is no such element.
1263 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1264 find p ps = case filter p ps of
1265 q | null q -> Nothing
1266 | otherwise -> Just (unsafeHead q)
1268 -- ---------------------------------------------------------------------
1269 -- Searching for substrings
1271 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1272 -- iff the first is a prefix of the second.
1273 isPrefixOf :: ByteString -> ByteString -> Bool
1274 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1277 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1278 withForeignPtr x2 $ \p2 -> do
1279 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1282 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1283 -- iff the first is a suffix of the second.
1285 -- The following holds:
1287 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1289 -- However, the real implemenation uses memcmp to compare the end of the
1290 -- string only, with no reverse required..
1291 isSuffixOf :: ByteString -> ByteString -> Bool
1292 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1295 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1296 withForeignPtr x2 $ \p2 -> do
1297 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1300 -- | Check whether one string is a substring of another. @isSubstringOf
1301 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1302 isSubstringOf :: ByteString -- ^ String to search for.
1303 -> ByteString -- ^ String to search in.
1305 isSubstringOf p s = not $ P.null $ findSubstrings p s
1307 -- | Get the first index of a substring in another string,
1308 -- or 'Nothing' if the string is not found.
1309 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1310 findSubstring :: ByteString -- ^ String to search for.
1311 -> ByteString -- ^ String to seach in.
1313 findSubstring = (listToMaybe .) . findSubstrings
1315 -- | Find the indexes of all (possibly overlapping) occurances of a
1316 -- substring in a string. This function uses the Knuth-Morris-Pratt
1317 -- string matching algorithm.
1318 findSubstrings :: ByteString -- ^ String to search for.
1319 -> ByteString -- ^ String to seach in.
1322 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1324 patc x = pat `unsafeIndex` x
1325 strc x = str `unsafeIndex` x
1327 -- maybe we should make kmpNext a UArray before using it in search?
1328 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1329 kmpNextL p _ | null p = []
1330 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1332 x = if not (null ps) && unsafeHead ps == patc j'
1333 then kmpNext Array.! j' else j'
1335 search i j = match ++ rest -- i: position in string, j: position in pattern
1336 where match = if j == m then [(i - j)] else []
1337 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1338 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1341 -- ---------------------------------------------------------------------
1344 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1345 -- corresponding pairs of bytes. If one input ByteString is short,
1346 -- excess elements of the longer ByteString are discarded. This is
1347 -- equivalent to a pair of 'unpack' operations.
1348 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1350 | null ps || null qs = []
1351 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1353 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1354 -- the first argument, instead of a tupling function. For example,
1355 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1356 -- corresponding sums.
1357 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1359 | null ps || null qs = []
1360 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1362 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1363 -- ByteStrings. Note that this performs two 'pack' operations.
1364 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1365 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1366 {-# INLINE unzip #-}
1368 -- ---------------------------------------------------------------------
1371 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1372 inits :: ByteString -> [ByteString]
1373 inits (PS x s l) = [PS x s n | n <- [0..l]]
1375 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1376 tails :: ByteString -> [ByteString]
1377 tails p | null p = [empty]
1378 | otherwise = p : tails (unsafeTail p)
1380 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1382 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1383 elems :: ByteString -> [ByteString]
1384 elems (PS _ _ 0) = []
1385 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1386 {-# INLINE elems #-}
1388 -- ---------------------------------------------------------------------
1389 -- ** Ordered 'ByteString's
1391 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1392 sort :: ByteString -> ByteString
1393 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1394 memcpy p (f `plusPtr` s) l
1395 c_qsort p l -- inplace
1398 sort = pack . List.sort . unpack
1401 -- ---------------------------------------------------------------------
1403 -- Extensions to the basic interface
1406 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1407 -- check for the empty case, so there is an obligation on the programmer
1408 -- to provide a proof that the ByteString is non-empty.
1409 unsafeHead :: ByteString -> Word8
1410 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1411 {-# INLINE unsafeHead #-}
1413 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1414 -- check for the empty case. As with 'unsafeHead', the programmer must
1415 -- provide a separate proof that the ByteString is non-empty.
1416 unsafeTail :: ByteString -> ByteString
1417 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1418 {-# INLINE unsafeTail #-}
1420 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1421 -- This omits the bounds check, which means there is an accompanying
1422 -- obligation on the programmer to ensure the bounds are checked in some
1424 unsafeIndex :: ByteString -> Int -> Word8
1425 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1426 {-# INLINE unsafeIndex #-}
1428 -- ---------------------------------------------------------------------
1429 -- Low level constructors
1431 #if defined(__GLASGOW_HASKELL__)
1432 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1433 -- Addr\# (an arbitrary machine address assumed to point outside the
1434 -- garbage-collected heap) into a @ByteString@. A much faster way to
1435 -- create an Addr\# is with an unboxed string literal, than to pack a
1436 -- boxed string. A unboxed string literal is compiled to a static @char
1437 -- []@ by GHC. Establishing the length of the string requires a call to
1438 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1439 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1440 -- if you know the length of the string statically.
1444 -- > literalFS = packAddress "literal"#
1446 packAddress :: Addr# -> ByteString
1447 packAddress addr# = inlinePerformIO $ do
1448 p <- newForeignPtr_ cstr
1449 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1452 {-# INLINE packAddress #-}
1454 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1455 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1456 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1457 -- 'Addr\#' to the string, and the length of the string. Make sure the
1458 -- length is correct, otherwise use the safer 'packAddress' (where the
1459 -- length will be calculated once at runtime).
1460 unsafePackAddress :: Int -> Addr# -> ByteString
1461 unsafePackAddress len addr# = inlinePerformIO $ do
1462 p <- newForeignPtr_ cstr
1464 where cstr = Ptr addr#
1468 -- | /O(1)/ Build a ByteString from a ForeignPtr
1469 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1470 fromForeignPtr fp l = PS fp 0 l
1472 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1473 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1474 toForeignPtr (PS ps s l) = (ps, s, l)
1476 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1477 -- current 'ByteString' from any larger string it was created from, as
1479 skipIndex :: ByteString -> Int
1480 skipIndex (PS _ s _) = s
1481 {-# INLINE skipIndex #-}
1483 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1484 -- finalizer associated to it. The ByteString length is calculated using
1485 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1486 packCString :: CString -> ByteString
1487 packCString cstr = inlinePerformIO $ do
1488 fp <- newForeignPtr_ (castPtr cstr)
1489 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1491 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1492 -- have /no/ finalizer associated with it. This operation has /O(1)/
1493 -- complexity as we already know the final size, so no /strlen(3)/ is
1495 packCStringLen :: CStringLen -> ByteString
1496 packCStringLen (ptr,len) = inlinePerformIO $ do
1497 fp <- newForeignPtr_ (castPtr ptr)
1498 return $ PS fp 0 (fromIntegral len)
1500 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1501 -- have a @free(3)@ finalizer associated to it.
1502 packMallocCString :: CString -> ByteString
1503 packMallocCString cstr = inlinePerformIO $ do
1504 fp <- newForeignFreePtr (castPtr cstr)
1505 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1507 #if defined(__GLASGOW_HASKELL__)
1508 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1509 -- length, and an IO action representing a finalizer. This function is
1510 -- not available on Hugs.
1512 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1513 packCStringFinalizer p l f = do
1514 fp <- FC.newForeignPtr p f
1517 -- | Explicitly run the finaliser associated with a 'ByteString'.
1518 -- Further references to this value may generate invalid memory
1519 -- references. This operation is unsafe, as there may be other
1520 -- 'ByteStrings' referring to the same underlying pages. If you use
1521 -- this, you need to have a proof of some kind that all 'ByteString's
1522 -- ever generated from the underlying byte array are no longer live.
1523 unsafeFinalize :: ByteString -> IO ()
1524 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1528 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1529 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1530 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1531 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1533 alloc = withForeignPtr ps $ \p -> do
1534 buf <- c_malloc (fromIntegral l+1)
1535 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1536 poke (buf `plusPtr` l) (0::Word8)
1537 return $ castPtr buf
1539 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1540 -- Warning: modifying the @CString@ will affect the @ByteString@.
1541 -- Why is this function unsafe? It relies on the null byte at the end of
1542 -- the ByteString to be there. This is /not/ the case if your ByteString
1543 -- has been spliced from a larger string (i.e. with take or drop).
1544 -- Unless you can guarantee the null byte, you should use the safe
1545 -- version, which will copy the string first.
1547 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1548 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1550 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1551 -- This is mainly useful to allow the rest of the data pointed
1552 -- to by the 'ByteString' to be garbage collected, for example
1553 -- if a large string has been read in, and only a small part of it
1554 -- is needed in the rest of the program.
1555 copy :: ByteString -> ByteString
1556 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1558 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1559 -- CString is going to be deallocated from C land.
1560 copyCString :: CString -> ByteString
1561 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1563 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1564 copyCStringLen :: CStringLen -> ByteString
1565 copyCStringLen (cstr, len) = inlinePerformIO $ do
1566 fp <- mallocForeignPtrArray (len+1)
1567 withForeignPtr fp $ \p -> do
1568 memcpy p (castPtr cstr) len
1569 poke (p `plusPtr` len) (0 :: Word8)
1570 return $! PS fp 0 len
1572 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1573 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1574 -- This is analogous to unsafeUseAsCString, and comes with the same
1575 -- safety requirements.
1577 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1578 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1580 -- | Given the maximum size needed and a function to make the contents
1581 -- of a ByteString, generate makes the 'ByteString'. The generating
1582 -- function is required to return the actual final size (<= the maximum
1583 -- size), and the resulting byte array is realloced to this size. The
1584 -- string is padded at the end with a null byte.
1586 -- generate is the main mechanism for creating custom, efficient
1587 -- ByteString functions, using Haskell or C functions to fill the space.
1589 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1593 p' <- reallocArray p (i'+1)
1594 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1595 fp <- newForeignFreePtr p'
1598 -- ---------------------------------------------------------------------
1601 #if defined(__GLASGOW_HASKELL__)
1603 -- | getLine, read a line from stdin.
1604 getLine :: IO ByteString
1605 getLine = hGetLine stdin
1607 -- | hGetLine. read a ByteString from a handle
1608 hGetLine :: Handle -> IO ByteString
1609 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1610 case haBufferMode handle_ of
1611 NoBuffering -> error "no buffering"
1612 _other -> hGetLineBuffered handle_
1615 hGetLineBuffered handle_ = do
1616 let ref = haBuffer handle_
1617 buf <- readIORef ref
1618 hGetLineBufferedLoop handle_ ref buf 0 []
1620 hGetLineBufferedLoop handle_ ref
1621 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1623 off <- findEOL r w raw
1624 let new_len = len + off - r
1625 xs <- mkPS raw r off
1627 -- if eol == True, then off is the offset of the '\n'
1628 -- otherwise off == w and the buffer is now empty.
1630 then do if (w == off + 1)
1631 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1632 else writeIORef ref buf{ bufRPtr = off + 1 }
1633 mkBigPS new_len (xs:xss)
1635 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1636 buf{ bufWPtr=0, bufRPtr=0 }
1638 -- Nothing indicates we caught an EOF, and we may have a
1639 -- partial line to return.
1641 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1643 then mkBigPS new_len (xs:xss)
1646 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1648 -- find the end-of-line character, if there is one
1652 (c,r') <- readCharFromBuffer raw r
1654 then return r -- NB. not r': don't include the '\n'
1655 else findEOL r' w raw
1657 maybeFillReadBuffer fd is_line is_stream buf = catch
1658 (do buf' <- fillReadBuffer fd is_line is_stream buf
1660 (\e -> if isEOFError e then return Nothing else ioError e)
1662 -- TODO, rewrite to use normal memcpy
1663 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1664 mkPS buf start end = do
1665 let len = end - start
1666 fp <- mallocByteString (len `quot` 8)
1667 withForeignPtr fp $ \p -> do
1668 memcpy_ptr_baoff p buf start (fromIntegral len)
1669 return (PS fp 0 len)
1671 mkBigPS :: Int -> [ByteString] -> IO ByteString
1672 mkBigPS _ [ps] = return ps
1673 mkBigPS _ pss = return $! concat (P.reverse pss)
1677 -- ---------------------------------------------------------------------
1680 -- | Outputs a 'ByteString' to the specified 'Handle'.
1681 hPut :: Handle -> ByteString -> IO ()
1682 hPut _ (PS _ _ 0) = return ()
1683 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1684 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1686 -- | Write a ByteString to stdout
1687 putStr :: ByteString -> IO ()
1688 putStr = hPut stdout
1690 -- | Write a ByteString to stdout, appending a newline byte
1691 putStrLn :: ByteString -> IO ()
1692 putStrLn ps = hPut stdout ps >> hPut stdout nl
1693 where nl = packByte 0x0a
1695 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1696 -- is far more efficient than reading the characters into a 'String'
1697 -- and then using 'pack'.
1698 hGet :: Handle -> Int -> IO ByteString
1699 hGet _ 0 = return empty
1700 hGet h i = do fp <- mallocByteString i
1701 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1704 #if defined(__GLASGOW_HASKELL__)
1705 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1706 -- waiting for data to become available, instead it returns only whatever data
1708 hGetNonBlocking :: Handle -> Int -> IO ByteString
1709 hGetNonBlocking _ 0 = return empty
1710 hGetNonBlocking h i = do
1711 fp <- mallocByteString i
1712 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1716 -- | Read entire handle contents into a 'ByteString'.
1718 -- As with 'hGet', the string representation in the file is assumed to
1721 hGetContents :: Handle -> IO ByteString
1723 let start_size = 1024
1724 p <- mallocArray start_size
1725 i <- hGetBuf h p start_size
1727 then do p' <- reallocArray p i
1728 fp <- newForeignFreePtr p'
1734 p' <- reallocArray p s'
1735 i <- hGetBuf h (p' `plusPtr` s) s
1737 then do let i' = s + i
1738 p'' <- reallocArray p' i'
1739 fp <- newForeignFreePtr p''
1743 -- | getContents. Equivalent to hGetContents stdin
1744 getContents :: IO ByteString
1745 getContents = hGetContents stdin
1747 -- | Read an entire file directly into a 'ByteString'. This is far more
1748 -- efficient than reading the characters into a 'String' and then using
1749 -- 'pack'. It also may be more efficient than opening the file and
1750 -- reading it using hGet.
1751 readFile :: FilePath -> IO ByteString
1753 h <- openBinaryFile f ReadMode
1755 s <- hGet h $ fromIntegral l
1759 -- | Write a 'ByteString' to a file.
1760 writeFile :: FilePath -> ByteString -> IO ()
1762 h <- openBinaryFile f WriteMode
1768 -- Disable until we can move it into a portable .hsc file
1771 -- | Like readFile, this reads an entire file directly into a
1772 -- 'ByteString', but it is even more efficient. It involves directly
1773 -- mapping the file to memory. This has the advantage that the contents
1774 -- of the file never need to be copied. Also, under memory pressure the
1775 -- page may simply be discarded, while in the case of readFile it would
1776 -- need to be written to swap. If you read many small files, mmapFile
1777 -- will be less memory-efficient than readFile, since each mmapFile
1778 -- takes up a separate page of memory. Also, you can run into bus
1779 -- errors if the file is modified. As with 'readFile', the string
1780 -- representation in the file is assumed to be ISO-8859-1.
1782 -- On systems without mmap, this is the same as a readFile.
1784 mmapFile :: FilePath -> IO ByteString
1785 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1787 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1789 h <- openBinaryFile f ReadMode
1790 l <- fromIntegral `fmap` hFileSize h
1791 -- Don't bother mmaping small files because each mmapped file takes up
1792 -- at least one full VM block.
1794 then do thefp <- mallocByteString l
1795 withForeignPtr thefp $ \p-> hGetBuf h p l
1800 fd <- fromIntegral `fmap` handleToFd h
1802 fp <- if p == nullPtr
1803 then do thefp <- mallocByteString l
1804 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1807 -- The munmap leads to crashes on OpenBSD.
1808 -- maybe there's a use after unmap in there somewhere?
1809 #if !defined(__OpenBSD__)
1810 let unmap = c_munmap p l >> return ()
1812 let unmap = return ()
1814 fp <- FC.newForeignPtr p unmap
1819 where mmap_limit = 16*1024
1822 #if defined(__GLASGOW_HASKELL__)
1824 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1826 getArgs :: IO [ByteString]
1828 alloca $ \ p_argc ->
1829 alloca $ \ p_argv -> do
1830 getProgArgv p_argc p_argv
1831 p <- fromIntegral `fmap` peek p_argc
1833 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1836 -- ---------------------------------------------------------------------
1837 -- Internal utilities
1839 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1840 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1841 -- convenience for ByteString construction.
1842 w2c :: Word8 -> Char
1843 #if !defined(__GLASGOW_HASKELL__)
1844 w2c = chr . fromIntegral
1846 w2c = unsafeChr . fromIntegral
1850 c2w :: Char -> Word8
1851 c2w = fromIntegral . ord
1854 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1855 -- is padded with a null byte.
1856 mallocByteString :: Int -> IO (ForeignPtr Word8)
1857 mallocByteString l = do
1858 fp <- mallocForeignPtrArray (l+1)
1859 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1862 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1863 -- argument gives the final size of the ByteString. Unlike 'generate'
1864 -- the ByteString is no reallocated if the final size is less than the
1866 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1867 create l write_ptr = inlinePerformIO $ do
1868 fp <- mallocByteString (l+1)
1869 withForeignPtr fp $ \p -> write_ptr p
1871 {-# INLINE create #-}
1873 -- | Perform an operation with a temporary ByteString
1874 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1875 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1876 {-# INLINE withPtr #-}
1878 -- Common up near identical calls to `error' to reduce the number
1879 -- constant strings created when compiled:
1880 errorEmptyList :: String -> a
1881 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1882 {-# INLINE errorEmptyList #-}
1884 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1885 -- of the string if no element is found, rather than Nothing.
1886 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1887 STRICT2(findIndexOrEnd)
1890 | f (unsafeHead ps) = 0
1891 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1892 {-# INLINE findIndexOrEnd #-}
1894 -- Find from the end of the string using predicate
1895 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1896 STRICT2(findFromEndUntil)
1897 findFromEndUntil f ps@(PS x s l) =
1899 else if f (last ps) then l
1900 else findFromEndUntil f (PS x s (l-1))
1902 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1903 -- it exposes lots of things to further inlining
1905 {-# INLINE inlinePerformIO #-}
1906 inlinePerformIO :: IO a -> a
1907 #if defined(__GLASGOW_HASKELL__)
1908 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1910 inlinePerformIO = unsafePerformIO
1913 {-# INLINE newForeignFreePtr #-}
1914 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1915 #if defined(__GLASGOW_HASKELL__)
1916 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1918 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1921 -- ---------------------------------------------------------------------
1923 -- Standard C functions
1926 foreign import ccall unsafe "string.h strlen" c_strlen
1929 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1930 :: CInt -> IO (Ptr Word8)
1932 foreign import ccall unsafe "static stdlib.h free" c_free
1933 :: Ptr Word8 -> IO ()
1935 #if !defined(__GLASGOW_HASKELL__)
1936 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1937 :: FunPtr (Ptr Word8 -> IO ())
1940 foreign import ccall unsafe "string.h memset" memset
1941 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1943 foreign import ccall unsafe "string.h memchr" memchr
1944 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1946 foreign import ccall unsafe "string.h memcmp" memcmp
1947 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1949 foreign import ccall unsafe "string.h memcpy" memcpy
1950 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1952 -- ---------------------------------------------------------------------
1957 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1958 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1960 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1961 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1963 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1964 :: Ptr Word8 -> Int -> Word8
1966 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1967 :: Ptr Word8 -> Int -> Word8
1969 foreign import ccall unsafe "static fpstring.h count" c_count
1970 :: Ptr Word8 -> Int -> Word8 -> Int
1972 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1973 :: Ptr Word8 -> Int -> IO ()
1975 -- ---------------------------------------------------------------------
1979 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
1980 :: Int -> Int -> IO (Ptr Word8)
1982 foreign import ccall unsafe "static unistd.h close" c_close
1985 # if !defined(__OpenBSD__)
1986 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
1987 :: Ptr Word8 -> Int -> IO Int
1991 -- ---------------------------------------------------------------------
1992 -- Internal GHC Haskell magic
1994 #if defined(__GLASGOW_HASKELL__)
1995 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1996 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
1998 foreign import ccall unsafe "__hscore_memcpy_src_off"
1999 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())