1 {-# OPTIONS_GHC -cpp -fffi #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- Maintainer : dons@cse.unsw.edu.au
12 -- Stability : experimental
13 -- Portability : portable, requires ffi and cpp
14 -- Tested with : GHC 6.4.1 and Hugs March 2005
18 -- | A time and space-efficient implementation of byte vectors using
19 -- packed Word8 arrays, suitable for high performance use, both in terms
20 -- of large data quantities, or high speed requirements. Byte vectors
21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
22 -- be passed between C and Haskell with little effort.
24 -- This module is intended to be imported @qualified@, to avoid name
25 -- clashes with Prelude functions. eg.
27 -- > import qualified Data.ByteString as B
29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
30 -- UArray by Simon Marlow. Rewritten to support slices and use
31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
34 module Data.ByteString (
36 -- * The @ByteString@ type
37 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
39 -- * Introducing and eliminating 'ByteString's
40 empty, -- :: ByteString
41 packByte, -- :: Word8 -> ByteString
42 pack, -- :: [Word8] -> ByteString
43 unpack, -- :: ByteString -> [Word8]
44 packWith, -- :: (a -> Word8) -> [a] -> ByteString
45 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: Word8 -> ByteString -> ByteString
50 null, -- :: ByteString -> Bool
51 length, -- :: ByteString -> Int
52 head, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 last, -- :: ByteString -> Word8
55 init, -- :: ByteString -> ByteString
56 append, -- :: ByteString -> ByteString -> ByteString
58 -- * Special ByteStrings
59 inits, -- :: ByteString -> [ByteString]
60 tails, -- :: ByteString -> [ByteString]
61 elems, -- :: ByteString -> [ByteString]
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 reverse, -- :: ByteString -> ByteString
66 intersperse, -- :: Word8 -> ByteString -> ByteString
67 transpose, -- :: [ByteString] -> [ByteString]
69 -- * Reducing 'ByteString's
70 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
71 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
84 -- * Generating and unfolding ByteStrings
85 replicate, -- :: Int -> Word8 -> ByteString
86 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
90 -- ** Breaking strings
91 take, -- :: Int -> ByteString -> ByteString
92 drop, -- :: Int -> ByteString -> ByteString
93 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
94 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
95 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
96 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
97 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
98 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
100 -- ** Breaking and dropping on specific bytes
101 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
102 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
103 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
105 -- ** Breaking into many substrings
106 split, -- :: Word8 -> ByteString -> [ByteString]
107 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
108 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
110 -- ** Joining strings
111 join, -- :: ByteString -> [ByteString] -> ByteString
112 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
114 -- * Indexing ByteStrings
115 index, -- :: ByteString -> Int -> Word8
116 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
117 elemIndices, -- :: Word8 -> ByteString -> [Int]
118 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
119 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
120 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
121 count, -- :: Word8 -> ByteString -> Int
123 -- * Ordered ByteStrings
124 sort, -- :: ByteString -> ByteString
126 -- * Searching ByteStrings
128 -- ** Searching by equality
129 -- | These functions use memchr(3) to efficiently search the ByteString
131 elem, -- :: Word8 -> ByteString -> Bool
132 notElem, -- :: Word8 -> ByteString -> Bool
133 filterByte, -- :: Word8 -> ByteString -> ByteString
134 filterNotByte, -- :: Word8 -> ByteString -> ByteString
136 -- ** Searching with a predicate
137 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
138 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
140 -- ** Prefixes and suffixes
141 -- | These functions use memcmp(3) to efficiently compare substrings
142 isPrefixOf, -- :: ByteString -> ByteString -> Bool
143 isSuffixOf, -- :: ByteString -> ByteString -> Bool
145 -- ** Search for arbitrary substrings
146 isSubstringOf, -- :: ByteString -> ByteString -> Bool
147 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
148 findSubstrings, -- :: ByteString -> ByteString -> [Int]
150 -- * Zipping and unzipping ByteStrings
151 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
152 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
153 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
155 -- * Unchecked access
156 unsafeHead, -- :: ByteString -> Word8
157 unsafeTail, -- :: ByteString -> ByteString
158 unsafeIndex, -- :: ByteString -> Int -> Word8
160 -- * Low level introduction and elimination
161 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
162 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
163 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
164 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
165 skipIndex, -- :: ByteString -> Int
167 -- ** Packing CStrings and pointers
168 packCString, -- :: CString -> ByteString
169 packCStringLen, -- :: CString -> ByteString
170 packMallocCString, -- :: CString -> ByteString
172 #if defined(__GLASGOW_HASKELL__)
173 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
174 packAddress, -- :: Addr# -> ByteString
175 unsafePackAddress, -- :: Int -> Addr# -> ByteString
176 unsafeFinalize, -- :: ByteString -> IO ()
179 -- ** Using ByteStrings as CStrings
180 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
181 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
182 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
184 -- ** Copying ByteStrings
185 -- | These functions perform memcpy(3) operations
186 copy, -- :: ByteString -> ByteString
187 copyCString, -- :: CString -> ByteString
188 copyCStringLen, -- :: CStringLen -> ByteString
190 -- * I\/O with @ByteString@s
192 -- ** Standard input and output
194 #if defined(__GLASGOW_HASKELL__)
195 getLine, -- :: IO ByteString
197 getContents, -- :: IO ByteString
198 putStr, -- :: ByteString -> IO ()
199 putStrLn, -- :: ByteString -> IO ()
202 readFile, -- :: FilePath -> IO ByteString
203 writeFile, -- :: FilePath -> ByteString -> IO ()
204 -- mmapFile, -- :: FilePath -> IO ByteString
206 -- ** I\/O with Handles
207 #if defined(__GLASGOW_HASKELL__)
208 getArgs, -- :: IO [ByteString]
209 hGetLine, -- :: Handle -> IO ByteString
210 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
212 hGetContents, -- :: Handle -> IO ByteString
213 hGet, -- :: Handle -> Int -> IO ByteString
214 hPut, -- :: Handle -> ByteString -> IO ()
216 #if defined(__GLASGOW_HASKELL__)
218 unpackList, -- eek, otherwise it gets thrown away by the simplifier
223 import qualified Prelude as P
224 import Prelude hiding (reverse,head,tail,last,init,null
225 ,length,map,lines,foldl,foldr,unlines
226 ,concat,any,take,drop,splitAt,takeWhile
227 ,dropWhile,span,break,elem,filter,maximum
228 ,minimum,all,concatMap,foldl1,foldr1
229 ,readFile,writeFile,replicate
230 ,getContents,getLine,putStr,putStrLn
231 ,zip,zipWith,unzip,notElem)
233 import qualified Data.List as List
236 import Data.Word (Word8)
237 import Data.Maybe (listToMaybe)
238 import Data.Array (listArray)
239 import qualified Data.Array as Array ((!))
241 -- Control.Exception.bracket not available in yhc or nhc
242 import Control.Exception (bracket)
244 import Foreign.C.String (CString, CStringLen)
245 import Foreign.C.Types (CSize, CInt)
246 import Foreign.ForeignPtr
247 import Foreign.Marshal.Array
249 import Foreign.Storable (Storable(..))
251 -- hGetBuf and hPutBuf not available in yhc or nhc
252 import System.IO (stdin,stdout,hClose,hFileSize
253 ,hGetBuf,hPutBuf,openBinaryFile
256 #if !defined(__GLASGOW_HASKELL__)
257 import System.IO.Unsafe
260 #if defined(__GLASGOW_HASKELL__)
262 import Data.Generics (Data(..), Typeable(..))
264 import System.IO (hGetBufNonBlocking)
265 import System.IO.Error (isEOFError)
267 import Foreign.Marshal (alloca)
268 import qualified Foreign.Concurrent as FC (newForeignPtr)
271 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
272 import GHC.Base (build, unsafeChr)
273 import GHC.Word hiding (Word8)
274 import GHC.Ptr (Ptr(..))
275 import GHC.ST (ST(..))
280 -- CFILES stuff is Hugs only
281 {-# CFILES cbits/fpstring.c #-}
283 -- -----------------------------------------------------------------------------
285 -- Useful macros, until we have bang patterns
288 #define STRICT1(f) f a | a `seq` False = undefined
289 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
290 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
291 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
292 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
294 -- -----------------------------------------------------------------------------
296 -- | A space-efficient representation of a Word8 vector, supporting many
297 -- efficient operations. A 'ByteString' contains 8-bit characters only.
299 -- Instances of Eq, Ord, Read, Show, Data, Typeable
301 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
305 #if defined(__GLASGOW_HASKELL__)
306 deriving (Data, Typeable)
309 instance Eq ByteString
312 instance Ord ByteString
313 where compare = compareBytes
315 instance Show ByteString where
316 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
318 instance Read ByteString where
319 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
322 instance Arbitrary PackedString where
323 arbitrary = P.pack `fmap` arbitrary
324 coarbitrary s = coarbitrary (P.unpack s)
327 -- | /O(n)/ Equality on the 'ByteString' type.
328 eq :: ByteString -> ByteString -> Bool
329 eq a b = (compareBytes a b) == EQ
332 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
333 compareBytes :: ByteString -> ByteString -> Ordering
334 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
335 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
336 withForeignPtr x1 $ \p1 ->
337 withForeignPtr x2 $ \p2 -> do
338 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
339 return $ case i `compare` 0 of
340 EQ -> l1 `compare` l2
342 {-# INLINE compareBytes #-}
346 -- About 4x slower over 32M
348 compareBytes :: ByteString -> ByteString -> Ordering
349 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
350 withForeignPtr fp1 $ \p1 ->
351 withForeignPtr fp2 $ \p2 ->
352 cmp (p1 `plusPtr` off1)
353 (p2 `plusPtr` off2) 0 len1 len2
355 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
357 cmp p1 p2 n len1 len2
358 | n == len1 = if n == len2 then return EQ else return LT
359 | n == len2 = return GT
361 (a :: Word8) <- peekByteOff p1 n
362 (b :: Word8) <- peekByteOff p2 n
363 case a `compare` b of
364 EQ -> cmp p1 p2 (n+1) len1 len2
367 {-# INLINE compareBytes #-}
370 -- -----------------------------------------------------------------------------
371 -- Introducing and eliminating 'ByteString's
373 -- | /O(1)/ The empty 'ByteString'
375 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
376 {-# NOINLINE empty #-}
378 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
379 packByte :: Word8 -> ByteString
380 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
381 withForeignPtr fp $ \p -> poke p c
383 {-# NOINLINE packByte #-}
385 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
387 -- For applications with large numbers of string literals, pack can be a
388 -- bottleneck. In such cases, consider using packAddress (GHC only).
389 pack :: [Word8] -> ByteString
391 #if !defined(__GLASGOW_HASKELL__)
393 pack str = create (P.length str) $ \p -> go p str
396 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
398 #else /* hack away */
400 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
402 go _ _ [] = return ()
403 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
405 writeByte p i c = ST $ \s# ->
406 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
410 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
411 unpack :: ByteString -> [Word8]
413 #if !defined(__GLASGOW_HASKELL__)
415 unpack (PS _ _ 0) = []
416 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
417 go (p `plusPtr` s) (l - 1) []
420 go p 0 acc = peek p >>= \e -> return (e : acc)
421 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
422 {-# INLINE unpack #-}
426 unpack ps = build (unpackFoldr ps)
427 {-# INLINE unpack #-}
429 unpackList :: ByteString -> [Word8]
430 unpackList (PS fp off len) = withPtr fp $ \p -> do
432 loop _ (-1) acc = return acc
435 loop q (n-1) (a : acc)
436 loop (p `plusPtr` off) (len-1) []
439 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
442 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
443 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
445 loop _ (-1) acc = return acc
448 loop q (n-1) (a `f` acc)
449 loop (p `plusPtr` off) (len-1) ch
450 {-# INLINE [0] unpackFoldr #-}
454 ------------------------------------------------------------------------
456 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
457 -- conversion function
458 packWith :: (a -> Word8) -> [a] -> ByteString
459 packWith k str = create (P.length str) $ \p -> go p str
463 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
464 {-# INLINE packWith #-}
465 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
467 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
468 unpackWith :: (Word8 -> a) -> ByteString -> [a]
469 unpackWith _ (PS _ _ 0) = []
470 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
471 go (p `plusPtr` s) (l - 1) []
474 go p 0 acc = peek p >>= \e -> return (k e : acc)
475 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
476 {-# INLINE unpackWith #-}
477 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
479 -- ---------------------------------------------------------------------
482 -- | /O(1)/ Test whether a ByteString is empty.
483 null :: ByteString -> Bool
484 null (PS _ _ l) = l == 0
487 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
488 length :: ByteString -> Int
489 length (PS _ _ l) = l
490 {-# INLINE length #-}
492 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
493 -- complexity, as it requires a memcpy.
494 cons :: Word8 -> ByteString -> ByteString
495 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
496 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
500 -- | /O(n)/ Append a byte to the end of a 'ByteString'
501 snoc :: ByteString -> Word8 -> ByteString
502 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
503 memcpy p (f `plusPtr` s) l
504 poke (p `plusPtr` l) c
507 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
508 head :: ByteString -> Word8
510 | null ps = errorEmptyList "head"
511 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
514 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
515 tail :: ByteString -> ByteString
517 | l <= 0 = errorEmptyList "tail"
518 | otherwise = PS p (s+1) (l-1)
521 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
522 last :: ByteString -> Word8
524 | null ps = errorEmptyList "last"
525 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
528 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
529 init :: ByteString -> ByteString
531 | l <= 0 = errorEmptyList "init"
532 | otherwise = PS p s (l-1)
535 -- | /O(n)/ Append two ByteStrings
536 append :: ByteString -> ByteString -> ByteString
537 append xs ys | null xs = ys
539 | otherwise = concat [xs,ys]
540 {-# INLINE append #-}
544 -- About 30% faster, but allocating in a big chunk isn't good for memory use
546 append :: ByteString -> ByteString -> ByteString
547 append xs@(PS ffp s l) ys@(PS fgp t m)
550 | otherwise = create len $ \ptr ->
551 withForeignPtr ffp $ \fp ->
552 withForeignPtr fgp $ \gp -> do
553 memcpy ptr (fp `plusPtr` s) l
554 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
555 where len = length xs + length ys
558 -- ---------------------------------------------------------------------
561 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
564 map :: (Word8 -> Word8) -> ByteString -> ByteString
565 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
566 new_fp <- mallocByteString len
567 withForeignPtr new_fp $ \new_p -> do
568 map_ f (len-1) (p `plusPtr` start) new_p
569 return (PS new_fp 0 len)
572 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
577 x <- peekByteOff p1 n
578 pokeByteOff p2 n (f x)
582 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
583 reverse :: ByteString -> ByteString
584 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
585 c_reverse p (f `plusPtr` s) l
588 reverse = pack . P.reverse . unpack
591 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
592 -- 'ByteString' and \`intersperses\' that byte between the elements of
593 -- the 'ByteString'. It is analogous to the intersperse function on
595 intersperse :: Word8 -> ByteString -> ByteString
596 intersperse c ps@(PS x s l)
598 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
599 c_intersperse p (f `plusPtr` s) l c
602 intersperse c = pack . List.intersperse c . unpack
605 -- | The 'transpose' function transposes the rows and columns of its
606 -- 'ByteString' argument.
607 transpose :: [ByteString] -> [ByteString]
608 transpose ps = P.map pack (List.transpose (P.map unpack ps))
610 -- ---------------------------------------------------------------------
611 -- Reducing 'ByteString's
613 -- | 'foldl', applied to a binary operator, a starting value (typically
614 -- the left-identity of the operator), and a ByteString, reduces the
615 -- ByteString using the binary operator, from left to right.
616 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
617 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
618 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
621 lgo z p q | p == q = return z
622 | otherwise = do c <- peek p
623 lgo (f z c) (p `plusPtr` 1) q
625 -- | 'foldr', applied to a binary operator, a starting value
626 -- (typically the right-identity of the operator), and a ByteString,
627 -- reduces the ByteString using the binary operator, from right to left.
628 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
629 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
630 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
633 go p q | p == q = return z
634 | otherwise = do c <- peek p
635 ws <- go (p `plusPtr` 1) q
638 -- | 'foldl1' is a variant of 'foldl' that has no starting value
639 -- argument, and thus must be applied to non-empty 'ByteStrings'.
640 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
642 | null ps = errorEmptyList "foldl1"
643 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
645 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
646 -- and thus must be applied to non-empty 'ByteString's
647 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
649 | null ps = errorEmptyList "foldr1"
650 | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
652 -- ---------------------------------------------------------------------
655 -- | /O(n)/ Concatenate a list of ByteStrings.
656 concat :: [ByteString] -> ByteString
659 concat xs = inlinePerformIO $ do
660 let start_size = 1024
661 p <- mallocArray start_size
664 where f ptr len _ [] = do
665 ptr' <- reallocArray ptr (len+1)
666 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
667 fp <- newForeignFreePtr ptr'
670 f ptr len to_go pss@(PS p s l:pss')
671 | l <= to_go = do withForeignPtr p $ \pf ->
672 memcpy (ptr `plusPtr` len)
674 f ptr (len + l) (to_go - l) pss'
676 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
677 ptr' <- reallocArray ptr new_total
678 f ptr' len (new_total - len) pss
680 -- | Map a function over a 'ByteString' and concatenate the results
681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
682 concatMap f = foldr (append . f) empty
684 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
685 -- any element of the 'ByteString' satisfies the predicate.
686 any :: (Word8 -> Bool) -> ByteString -> Bool
687 any _ (PS _ _ 0) = False
688 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
689 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
692 go p q | p == q = return False
693 | otherwise = do c <- peek p
694 if f c then return True
695 else go (p `plusPtr` 1) q
697 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
698 -- if all elements of the 'ByteString' satisfy the predicate.
699 all :: (Word8 -> Bool) -> ByteString -> Bool
700 all _ (PS _ _ 0) = True
701 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
702 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
705 go p q | p == q = return True -- end of list
706 | otherwise = do c <- peek p
708 then go (p `plusPtr` 1) q
711 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
712 maximum :: ByteString -> Word8
713 maximum xs@(PS x s l)
714 | null xs = errorEmptyList "maximum"
715 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
716 return $ c_maximum (p `plusPtr` s) l
717 {-# INLINE maximum #-}
719 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
720 minimum :: ByteString -> Word8
721 minimum xs@(PS x s l)
722 | null xs = errorEmptyList "minimum"
723 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
724 return $ c_minimum (p `plusPtr` s) l
725 {-# INLINE minimum #-}
728 maximum xs@(PS x s l)
729 | null xs = errorEmptyList "maximum"
730 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
732 maximum_ (p `plusPtr` s) 0 l w
734 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
738 | otherwise = do w <- peekByteOff ptr n
739 maximum_ ptr (n+1) m (if w > c then w else c)
741 minimum xs@(PS x s l)
742 | null xs = errorEmptyList "minimum"
743 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
745 minimum_ (p `plusPtr` s) 0 l w
747 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
751 | otherwise = do w <- peekByteOff ptr n
752 minimum_ ptr (n+1) m (if w < c then w else c)
755 -- | /O(n)/ map Word8 functions, provided with the index at each position
756 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
757 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
758 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
760 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
762 go n f t p | f == p = return ()
763 | otherwise = do w <- peek f
765 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
767 -- ---------------------------------------------------------------------
768 -- Unfolds and replicates
770 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
771 -- the value of every element. The following holds:
773 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
775 -- This implemenation uses @memset(3)@
776 replicate :: Int -> Word8 -> ByteString
777 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
781 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
785 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
788 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
789 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
790 -- the element and returns 'Nothing' if it is done producing the
791 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
792 -- prepending to the ByteString and @b@ is used as the next element in a
795 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
796 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
797 -- final size of the ByteString as an argument. 'cons' can then be
798 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
799 -- linear complexity. The depth of the recursion is limited to this
800 -- size, but may be less. For lazy, infinite unfoldr, use
801 -- 'Data.List.unfoldr' (from 'Data.List').
805 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
807 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
809 -- > unfoldrN n == take n $ List.unfoldr
810 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
811 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
814 go q c n | n == i = return n -- stop if we reach `i'
815 | otherwise = case f c of
819 go (q `plusPtr` 1) new_c (n+1)
821 -- ---------------------------------------------------------------------
824 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
825 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
826 take :: Int -> ByteString -> ByteString
830 | otherwise = PS x s n
833 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
834 -- elements, or @[]@ if @n > 'length' xs@.
835 drop :: Int -> ByteString -> ByteString
839 | otherwise = PS x (s+n) (l-n)
842 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
843 splitAt :: Int -> ByteString -> (ByteString, ByteString)
844 splitAt n ps = (take n ps, drop n ps)
845 {-# INLINE splitAt #-}
847 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
848 -- returns the longest prefix (possibly empty) of @xs@ of elements that
850 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
851 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
852 {-# INLINE takeWhile #-}
854 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
855 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
856 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
857 {-# INLINE dropWhile #-}
859 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
860 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
861 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
864 -- | 'breakByte' breaks its ByteString argument at the first occurence
865 -- of the specified byte. It is more efficient than 'break' as it is
866 -- implemented with @memchr(3)@. I.e.
868 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
870 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
871 breakByte c p = case elemIndex c p of
873 Just n -> (take n p, drop n p)
874 {-# INLINE breakByte #-}
876 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
877 -- occurence of @w@. It behaves like 'break', except the delimiter is
878 -- not returned, and @Nothing@ is returned if the delimiter is not in
879 -- the ByteString. I.e.
881 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
883 -- > breakFirst c xs ==
884 -- > let (x,y) = break (== c) xs
885 -- > in if null y then Nothing else Just (x, drop 1 y))
887 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
888 breakFirst c p = case elemIndex c p of
890 Just n -> Just (take n p, drop (n+1) p)
891 {-# INLINE breakFirst #-}
893 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
896 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
898 -- and the following are equivalent:
900 -- > breakLast 'c' "abcdef"
901 -- > let (x,y) = break (=='c') (reverse "abcdef")
902 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
904 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
905 breakLast c p = case elemIndexLast c p of
907 Just n -> Just (take n p, drop (n+1) p)
908 {-# INLINE breakLast #-}
910 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
911 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
912 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
913 span p ps = break (not . p) ps
916 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
919 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
923 -- > spanEnd (not . isSpace) ps
925 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
927 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
928 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
930 -- | /O(n)/ Splits a 'ByteString' into components delimited by
931 -- separators, where the predicate returns True for a separator element.
932 -- The resulting components do not contain the separators. Two adjacent
933 -- separators result in an empty component in the output. eg.
935 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
936 -- > splitWith (=='a') [] == []
938 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
940 #if defined(__GLASGOW_HASKELL__)
941 splitWith _pred (PS _ _ 0) = []
942 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
943 where pred# c# = pred_ (W8# c#)
945 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
946 splitLoop pred' p 0 off' len' fp'
948 splitLoop :: (Word# -> Bool)
954 splitLoop pred' p idx' off' len' fp'
955 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
956 | idx' >= len' = return [PS fp' off' idx']
958 w <- peekElemOff p (off'+idx')
959 if pred' (case w of W8# w# -> w#)
960 then return (PS fp' off' idx' :
961 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
962 else splitLoop pred' p (idx'+1) off' len' fp'
963 {-# INLINE splitWith #-}
966 splitWith _ (PS _ _ 0) = []
967 splitWith p ps = splitWith' p ps
970 splitWith' q qs = if null rest then [chunk]
971 else chunk : splitWith' q (unsafeTail rest)
972 where (chunk,rest) = break q qs
975 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
976 -- argument, consuming the delimiter. I.e.
978 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
979 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
980 -- > split 'x' "x" == ["",""]
984 -- > join [c] . split c == id
985 -- > split == splitWith . (==)
987 -- As for all splitting functions in this library, this function does
988 -- not copy the substrings, it just constructs new 'ByteStrings' that
989 -- are slices of the original.
991 split :: Word8 -> ByteString -> [ByteString]
992 split _ (PS _ _ 0) = []
993 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
994 let ptr = p `plusPtr` s
998 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1000 then return [PS x (s+n) (l-n)]
1001 else do let i = q `minusPtr` ptr
1003 return $! PS x (s+n) (i-n) : ls
1005 {-# INLINE split #-}
1008 -- slower. but stays inside Haskell.
1009 split _ (PS _ _ 0) = []
1010 split (W8# w#) (PS fp off len) = splitWith' off len fp
1012 splitWith' off' len' fp' = withPtr fp $ \p ->
1013 splitLoop p 0 off' len' fp'
1015 splitLoop :: Ptr Word8
1016 -> Int -> Int -> Int
1021 splitLoop p idx' off' len' fp'
1022 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1023 | idx' >= len' = return [PS fp' off' idx']
1025 (W8# x#) <- peekElemOff p (off'+idx')
1026 if word2Int# w# ==# word2Int# x#
1027 then return (PS fp' off' idx' :
1028 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1029 else splitLoop p (idx'+1) off' len' fp'
1032 -- | Like 'splitWith', except that sequences of adjacent separators are
1033 -- treated as a single separator. eg.
1035 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1037 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1038 tokens f = P.filter (not.null) . splitWith f
1040 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1041 -- 'ByteString's and concatenates the list after interspersing the first
1042 -- argument between each element of the list.
1043 join :: ByteString -> [ByteString] -> ByteString
1044 join filler pss = concat (splice pss)
1048 splice (x:y:xs) = x:filler:splice (y:xs)
1051 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1052 -- with a char. Around 4 times faster than the generalised join.
1054 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1055 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1056 withForeignPtr ffp $ \fp ->
1057 withForeignPtr fgp $ \gp -> do
1058 memcpy ptr (fp `plusPtr` s) l
1059 poke (ptr `plusPtr` l) c
1060 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1062 len = length f + length g + 1
1063 {-# INLINE joinWithByte #-}
1065 -- ---------------------------------------------------------------------
1066 -- Indexing ByteStrings
1068 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1069 index :: ByteString -> Int -> Word8
1071 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1072 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1073 ++ ", length = " ++ show (length ps)
1074 | otherwise = ps `unsafeIndex` n
1075 {-# INLINE index #-}
1077 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1078 -- element in the given 'ByteString' which is equal to the query
1079 -- element, or 'Nothing' if there is no such element.
1080 -- This implementation uses memchr(3).
1081 elemIndex :: Word8 -> ByteString -> Maybe Int
1082 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1083 let p' = p `plusPtr` s
1084 q = memchr p' c (fromIntegral l)
1085 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1086 {-# INLINE elemIndex #-}
1088 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1089 -- element in the given 'ByteString' which is equal to the query
1090 -- element, or 'Nothing' if there is no such element. The following
1093 -- > elemIndexLast c xs ==
1094 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1096 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1097 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1098 go (p `plusPtr` s) (l-1)
1101 go p i | i < 0 = return Nothing
1102 | otherwise = do ch' <- peekByteOff p i
1104 then return $ Just i
1106 {-# INLINE elemIndexLast #-}
1108 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1109 -- the indices of all elements equal to the query element, in ascending order.
1110 -- This implementation uses memchr(3).
1111 elemIndices :: Word8 -> ByteString -> [Int]
1112 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1113 let ptr = p `plusPtr` s
1117 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1120 else do let i = q `minusPtr` ptr
1127 elemIndices :: Word8 -> ByteString -> [Int]
1128 elemIndices c ps = loop 0 ps
1130 loop _ ps' | null ps' = []
1131 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1132 | otherwise = loop (n+1) (unsafeTail ps')
1135 -- | count returns the number of times its argument appears in the ByteString
1137 -- > count = length . elemIndices
1139 -- But more efficiently than using length on the intermediate list.
1140 count :: Word8 -> ByteString -> Int
1141 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1142 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1143 {-# INLINE count #-}
1147 -- around 30% slower
1149 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1150 go (p `plusPtr` s) (fromIntegral m) 0
1152 go :: Ptr Word8 -> CSize -> Int -> IO Int
1155 let q = memchr p w l
1158 else do let k = fromIntegral $ q `minusPtr` p
1159 go (q `plusPtr` 1) (l-k-1) (i+1)
1162 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1163 -- returns the index of the first element in the ByteString
1164 -- satisfying the predicate.
1165 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1166 findIndex = (listToMaybe .) . findIndices
1168 -- | The 'findIndices' function extends 'findIndex', by returning the
1169 -- indices of all elements satisfying the predicate, in ascending order.
1170 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1171 findIndices p ps = loop 0 ps
1174 loop _ qs | null qs = []
1175 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1176 | otherwise = loop (n+1) (unsafeTail qs)
1178 -- ---------------------------------------------------------------------
1179 -- Searching ByteStrings
1181 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1182 elem :: Word8 -> ByteString -> Bool
1183 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1186 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1187 notElem :: Word8 -> ByteString -> Bool
1188 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1189 {-# INLINE notElem #-}
1192 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1193 -- case of filtering a single byte. It is more efficient to use
1194 -- /filterByte/ in this case.
1196 -- > filterByte == filter . (==)
1198 -- filterByte is around 10x faster, and uses much less space, than its
1199 -- filter equivalent
1200 filterByte :: Word8 -> ByteString -> ByteString
1201 filterByte w ps = replicate (count w ps) w
1204 -- slower than the replicate version
1206 filterByte ch ps@(PS x s l)
1208 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1209 t <- go (f `plusPtr` s) p l
1210 return (t `minusPtr` p) -- actual length
1214 go f t e = do w <- peek f
1216 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1217 else go (f `plusPtr` 1) t (e-1)
1221 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1222 -- case of filtering a single byte out of a list. It is more efficient
1223 -- to use /filterNotByte/ in this case.
1225 -- > filterNotByte == filter . (/=)
1227 -- filterNotByte is around 3x faster, and uses much less space, than its
1228 -- filter equivalent
1229 filterNotByte :: Word8 -> ByteString -> ByteString
1230 filterNotByte ch ps@(PS x s l)
1232 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1233 t <- go (f `plusPtr` s) p l
1234 return (t `minusPtr` p) -- actual length
1238 go f t e = do w <- peek f
1240 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1241 else go (f `plusPtr` 1) t (e-1)
1243 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1244 -- returns a ByteString containing those characters that satisfy the
1246 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1247 filter k ps@(PS x s l)
1249 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1250 t <- go (f `plusPtr` s) p l
1251 return (t `minusPtr` p) -- actual length
1255 go f t e = do w <- peek f
1257 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1258 else go (f `plusPtr` 1) t (e - 1)
1260 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1262 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1263 -- and returns the first element in matching the predicate, or 'Nothing'
1264 -- if there is no such element.
1265 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1266 find p ps = case filter p ps of
1267 q | null q -> Nothing
1268 | otherwise -> Just (unsafeHead q)
1270 -- ---------------------------------------------------------------------
1271 -- Searching for substrings
1273 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1274 -- iff the first is a prefix of the second.
1275 isPrefixOf :: ByteString -> ByteString -> Bool
1276 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1279 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1280 withForeignPtr x2 $ \p2 -> do
1281 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1284 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1285 -- iff the first is a suffix of the second.
1287 -- The following holds:
1289 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1291 -- However, the real implemenation uses memcmp to compare the end of the
1292 -- string only, with no reverse required..
1293 isSuffixOf :: ByteString -> ByteString -> Bool
1294 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1297 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1298 withForeignPtr x2 $ \p2 -> do
1299 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1302 -- | Check whether one string is a substring of another. @isSubstringOf
1303 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1304 isSubstringOf :: ByteString -- ^ String to search for.
1305 -> ByteString -- ^ String to search in.
1307 isSubstringOf p s = not $ P.null $ findSubstrings p s
1309 -- | Get the first index of a substring in another string,
1310 -- or 'Nothing' if the string is not found.
1311 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1312 findSubstring :: ByteString -- ^ String to search for.
1313 -> ByteString -- ^ String to seach in.
1315 findSubstring = (listToMaybe .) . findSubstrings
1317 -- | Find the indexes of all (possibly overlapping) occurances of a
1318 -- substring in a string. This function uses the Knuth-Morris-Pratt
1319 -- string matching algorithm.
1320 findSubstrings :: ByteString -- ^ String to search for.
1321 -> ByteString -- ^ String to seach in.
1324 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1326 patc x = pat `unsafeIndex` x
1327 strc x = str `unsafeIndex` x
1329 -- maybe we should make kmpNext a UArray before using it in search?
1330 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1331 kmpNextL p _ | null p = []
1332 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1334 x = if not (null ps) && unsafeHead ps == patc j'
1335 then kmpNext Array.! j' else j'
1337 search i j = match ++ rest -- i: position in string, j: position in pattern
1338 where match = if j == m then [(i - j)] else []
1339 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1340 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1343 -- ---------------------------------------------------------------------
1346 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1347 -- corresponding pairs of bytes. If one input ByteString is short,
1348 -- excess elements of the longer ByteString are discarded. This is
1349 -- equivalent to a pair of 'unpack' operations.
1350 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1352 | null ps || null qs = []
1353 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1355 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1356 -- the first argument, instead of a tupling function. For example,
1357 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1358 -- corresponding sums.
1359 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1361 | null ps || null qs = []
1362 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1364 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1365 -- ByteStrings. Note that this performs two 'pack' operations.
1366 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1367 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1368 {-# INLINE unzip #-}
1370 -- ---------------------------------------------------------------------
1373 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1374 inits :: ByteString -> [ByteString]
1375 inits (PS x s l) = [PS x s n | n <- [0..l]]
1377 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1378 tails :: ByteString -> [ByteString]
1379 tails p | null p = [empty]
1380 | otherwise = p : tails (unsafeTail p)
1382 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1384 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1385 elems :: ByteString -> [ByteString]
1386 elems (PS _ _ 0) = []
1387 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1388 {-# INLINE elems #-}
1390 -- ---------------------------------------------------------------------
1391 -- ** Ordered 'ByteString's
1393 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1394 sort :: ByteString -> ByteString
1395 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1396 memcpy p (f `plusPtr` s) l
1397 c_qsort p l -- inplace
1400 sort = pack . List.sort . unpack
1403 -- ---------------------------------------------------------------------
1405 -- Extensions to the basic interface
1408 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1409 -- check for the empty case, so there is an obligation on the programmer
1410 -- to provide a proof that the ByteString is non-empty.
1411 unsafeHead :: ByteString -> Word8
1412 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1413 {-# INLINE unsafeHead #-}
1415 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1416 -- check for the empty case. As with 'unsafeHead', the programmer must
1417 -- provide a separate proof that the ByteString is non-empty.
1418 unsafeTail :: ByteString -> ByteString
1419 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1420 {-# INLINE unsafeTail #-}
1422 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1423 -- This omits the bounds check, which means there is an accompanying
1424 -- obligation on the programmer to ensure the bounds are checked in some
1426 unsafeIndex :: ByteString -> Int -> Word8
1427 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1428 {-# INLINE unsafeIndex #-}
1430 -- ---------------------------------------------------------------------
1431 -- Low level constructors
1433 #if defined(__GLASGOW_HASKELL__)
1434 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1435 -- Addr\# (an arbitrary machine address assumed to point outside the
1436 -- garbage-collected heap) into a @ByteString@. A much faster way to
1437 -- create an Addr\# is with an unboxed string literal, than to pack a
1438 -- boxed string. A unboxed string literal is compiled to a static @char
1439 -- []@ by GHC. Establishing the length of the string requires a call to
1440 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1441 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1442 -- if you know the length of the string statically.
1446 -- > literalFS = packAddress "literal"#
1448 packAddress :: Addr# -> ByteString
1449 packAddress addr# = inlinePerformIO $ do
1450 p <- newForeignPtr_ cstr
1451 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1454 {-# INLINE packAddress #-}
1456 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1457 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1458 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1459 -- 'Addr\#' to the string, and the length of the string. Make sure the
1460 -- length is correct, otherwise use the safer 'packAddress' (where the
1461 -- length will be calculated once at runtime).
1462 unsafePackAddress :: Int -> Addr# -> ByteString
1463 unsafePackAddress len addr# = inlinePerformIO $ do
1464 p <- newForeignPtr_ cstr
1466 where cstr = Ptr addr#
1470 -- | /O(1)/ Build a ByteString from a ForeignPtr
1471 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1472 fromForeignPtr fp l = PS fp 0 l
1474 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1475 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1476 toForeignPtr (PS ps s l) = (ps, s, l)
1478 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1479 -- current 'ByteString' from any larger string it was created from, as
1481 skipIndex :: ByteString -> Int
1482 skipIndex (PS _ s _) = s
1483 {-# INLINE skipIndex #-}
1485 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1486 -- finalizer associated to it. The ByteString length is calculated using
1487 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1488 packCString :: CString -> ByteString
1489 packCString cstr = inlinePerformIO $ do
1490 fp <- newForeignPtr_ (castPtr cstr)
1491 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1493 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1494 -- have /no/ finalizer associated with it. This operation has /O(1)/
1495 -- complexity as we already know the final size, so no /strlen(3)/ is
1497 packCStringLen :: CStringLen -> ByteString
1498 packCStringLen (ptr,len) = inlinePerformIO $ do
1499 fp <- newForeignPtr_ (castPtr ptr)
1500 return $ PS fp 0 (fromIntegral len)
1502 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1503 -- have a @free(3)@ finalizer associated to it.
1504 packMallocCString :: CString -> ByteString
1505 packMallocCString cstr = inlinePerformIO $ do
1506 fp <- newForeignFreePtr (castPtr cstr)
1507 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1509 #if defined(__GLASGOW_HASKELL__)
1510 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1511 -- length, and an IO action representing a finalizer. This function is
1512 -- not available on Hugs.
1514 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1515 packCStringFinalizer p l f = do
1516 fp <- FC.newForeignPtr p f
1519 -- | Explicitly run the finaliser associated with a 'ByteString'.
1520 -- Further references to this value may generate invalid memory
1521 -- references. This operation is unsafe, as there may be other
1522 -- 'ByteStrings' referring to the same underlying pages. If you use
1523 -- this, you need to have a proof of some kind that all 'ByteString's
1524 -- ever generated from the underlying byte array are no longer live.
1525 unsafeFinalize :: ByteString -> IO ()
1526 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1530 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1531 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1532 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1533 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1535 alloc = withForeignPtr ps $ \p -> do
1536 buf <- c_malloc (fromIntegral l+1)
1537 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1538 poke (buf `plusPtr` l) (0::Word8)
1539 return $ castPtr buf
1541 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1542 -- Warning: modifying the @CString@ will affect the @ByteString@.
1543 -- Why is this function unsafe? It relies on the null byte at the end of
1544 -- the ByteString to be there. This is /not/ the case if your ByteString
1545 -- has been spliced from a larger string (i.e. with take or drop).
1546 -- Unless you can guarantee the null byte, you should use the safe
1547 -- version, which will copy the string first.
1549 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1550 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1552 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1553 -- This is mainly useful to allow the rest of the data pointed
1554 -- to by the 'ByteString' to be garbage collected, for example
1555 -- if a large string has been read in, and only a small part of it
1556 -- is needed in the rest of the program.
1557 copy :: ByteString -> ByteString
1558 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1560 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1561 -- CString is going to be deallocated from C land.
1562 copyCString :: CString -> ByteString
1563 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1565 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1566 copyCStringLen :: CStringLen -> ByteString
1567 copyCStringLen (cstr, len) = inlinePerformIO $ do
1568 fp <- mallocForeignPtrArray (len+1)
1569 withForeignPtr fp $ \p -> do
1570 memcpy p (castPtr cstr) len
1571 poke (p `plusPtr` len) (0 :: Word8)
1572 return $! PS fp 0 len
1574 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1575 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1576 -- This is analogous to unsafeUseAsCString, and comes with the same
1577 -- safety requirements.
1579 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1580 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1582 -- | Given the maximum size needed and a function to make the contents
1583 -- of a ByteString, generate makes the 'ByteString'. The generating
1584 -- function is required to return the actual final size (<= the maximum
1585 -- size), and the resulting byte array is realloced to this size. The
1586 -- string is padded at the end with a null byte.
1588 -- generate is the main mechanism for creating custom, efficient
1589 -- ByteString functions, using Haskell or C functions to fill the space.
1591 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1595 p' <- reallocArray p (i'+1)
1596 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1597 fp <- newForeignFreePtr p'
1600 -- ---------------------------------------------------------------------
1603 #if defined(__GLASGOW_HASKELL__)
1605 -- | getLine, read a line from stdin.
1606 getLine :: IO ByteString
1607 getLine = hGetLine stdin
1609 -- | hGetLine. read a ByteString from a handle
1610 hGetLine :: Handle -> IO ByteString
1611 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1612 case haBufferMode handle_ of
1613 NoBuffering -> error "no buffering"
1614 _other -> hGetLineBuffered handle_
1617 hGetLineBuffered handle_ = do
1618 let ref = haBuffer handle_
1619 buf <- readIORef ref
1620 hGetLineBufferedLoop handle_ ref buf 0 []
1622 hGetLineBufferedLoop handle_ ref
1623 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1625 off <- findEOL r w raw
1626 let new_len = len + off - r
1627 xs <- mkPS raw r off
1629 -- if eol == True, then off is the offset of the '\n'
1630 -- otherwise off == w and the buffer is now empty.
1632 then do if (w == off + 1)
1633 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1634 else writeIORef ref buf{ bufRPtr = off + 1 }
1635 mkBigPS new_len (xs:xss)
1637 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1638 buf{ bufWPtr=0, bufRPtr=0 }
1640 -- Nothing indicates we caught an EOF, and we may have a
1641 -- partial line to return.
1643 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1645 then mkBigPS new_len (xs:xss)
1648 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1650 -- find the end-of-line character, if there is one
1654 (c,r') <- readCharFromBuffer raw r
1656 then return r -- NB. not r': don't include the '\n'
1657 else findEOL r' w raw
1659 maybeFillReadBuffer fd is_line is_stream buf = catch
1660 (do buf' <- fillReadBuffer fd is_line is_stream buf
1662 (\e -> if isEOFError e then return Nothing else ioError e)
1664 -- TODO, rewrite to use normal memcpy
1665 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1666 mkPS buf start end = do
1667 let len = end - start
1668 fp <- mallocByteString len
1669 withForeignPtr fp $ \p -> do
1670 memcpy_ptr_baoff p buf start (fromIntegral len)
1671 return (PS fp 0 len)
1673 mkBigPS :: Int -> [ByteString] -> IO ByteString
1674 mkBigPS _ [ps] = return ps
1675 mkBigPS _ pss = return $! concat (P.reverse pss)
1679 -- ---------------------------------------------------------------------
1682 -- | Outputs a 'ByteString' to the specified 'Handle'.
1683 hPut :: Handle -> ByteString -> IO ()
1684 hPut _ (PS _ _ 0) = return ()
1685 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1686 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1688 -- | Write a ByteString to stdout
1689 putStr :: ByteString -> IO ()
1690 putStr = hPut stdout
1692 -- | Write a ByteString to stdout, appending a newline byte
1693 putStrLn :: ByteString -> IO ()
1694 putStrLn ps = hPut stdout ps >> hPut stdout nl
1695 where nl = packByte 0x0a
1697 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1698 -- is far more efficient than reading the characters into a 'String'
1699 -- and then using 'pack'.
1700 hGet :: Handle -> Int -> IO ByteString
1701 hGet _ 0 = return empty
1702 hGet h i = do fp <- mallocByteString i
1703 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1706 #if defined(__GLASGOW_HASKELL__)
1707 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1708 -- waiting for data to become available, instead it returns only whatever data
1710 hGetNonBlocking :: Handle -> Int -> IO ByteString
1711 hGetNonBlocking _ 0 = return empty
1712 hGetNonBlocking h i = do
1713 fp <- mallocByteString i
1714 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1718 -- | Read entire handle contents into a 'ByteString'.
1720 -- As with 'hGet', the string representation in the file is assumed to
1723 hGetContents :: Handle -> IO ByteString
1725 let start_size = 1024
1726 p <- mallocArray start_size
1727 i <- hGetBuf h p start_size
1729 then do p' <- reallocArray p i
1730 fp <- newForeignFreePtr p'
1736 p' <- reallocArray p s'
1737 i <- hGetBuf h (p' `plusPtr` s) s
1739 then do let i' = s + i
1740 p'' <- reallocArray p' i'
1741 fp <- newForeignFreePtr p''
1745 -- | getContents. Equivalent to hGetContents stdin
1746 getContents :: IO ByteString
1747 getContents = hGetContents stdin
1749 -- | Read an entire file directly into a 'ByteString'. This is far more
1750 -- efficient than reading the characters into a 'String' and then using
1751 -- 'pack'. It also may be more efficient than opening the file and
1752 -- reading it using hGet.
1753 readFile :: FilePath -> IO ByteString
1755 h <- openBinaryFile f ReadMode
1757 s <- hGet h $ fromIntegral l
1761 -- | Write a 'ByteString' to a file.
1762 writeFile :: FilePath -> ByteString -> IO ()
1764 h <- openBinaryFile f WriteMode
1770 -- Disable until we can move it into a portable .hsc file
1773 -- | Like readFile, this reads an entire file directly into a
1774 -- 'ByteString', but it is even more efficient. It involves directly
1775 -- mapping the file to memory. This has the advantage that the contents
1776 -- of the file never need to be copied. Also, under memory pressure the
1777 -- page may simply be discarded, while in the case of readFile it would
1778 -- need to be written to swap. If you read many small files, mmapFile
1779 -- will be less memory-efficient than readFile, since each mmapFile
1780 -- takes up a separate page of memory. Also, you can run into bus
1781 -- errors if the file is modified. As with 'readFile', the string
1782 -- representation in the file is assumed to be ISO-8859-1.
1784 -- On systems without mmap, this is the same as a readFile.
1786 mmapFile :: FilePath -> IO ByteString
1787 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1789 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1791 h <- openBinaryFile f ReadMode
1792 l <- fromIntegral `fmap` hFileSize h
1793 -- Don't bother mmaping small files because each mmapped file takes up
1794 -- at least one full VM block.
1796 then do thefp <- mallocByteString l
1797 withForeignPtr thefp $ \p-> hGetBuf h p l
1802 fd <- fromIntegral `fmap` handleToFd h
1804 fp <- if p == nullPtr
1805 then do thefp <- mallocByteString l
1806 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1809 -- The munmap leads to crashes on OpenBSD.
1810 -- maybe there's a use after unmap in there somewhere?
1811 #if !defined(__OpenBSD__)
1812 let unmap = c_munmap p l >> return ()
1814 let unmap = return ()
1816 fp <- FC.newForeignPtr p unmap
1821 where mmap_limit = 16*1024
1824 #if defined(__GLASGOW_HASKELL__)
1826 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1828 getArgs :: IO [ByteString]
1830 alloca $ \ p_argc ->
1831 alloca $ \ p_argv -> do
1832 getProgArgv p_argc p_argv
1833 p <- fromIntegral `fmap` peek p_argc
1835 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1838 -- ---------------------------------------------------------------------
1839 -- Internal utilities
1841 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1842 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1843 -- convenience for ByteString construction.
1844 w2c :: Word8 -> Char
1845 #if !defined(__GLASGOW_HASKELL__)
1846 w2c = chr . fromIntegral
1848 w2c = unsafeChr . fromIntegral
1852 c2w :: Char -> Word8
1853 c2w = fromIntegral . ord
1856 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1857 -- is padded with a null byte.
1858 mallocByteString :: Int -> IO (ForeignPtr Word8)
1859 mallocByteString l = do
1860 fp <- mallocForeignPtrArray (l+1)
1861 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1864 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1865 -- argument gives the final size of the ByteString. Unlike 'generate'
1866 -- the ByteString is no reallocated if the final size is less than the
1868 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1869 create l write_ptr = inlinePerformIO $ do
1870 fp <- mallocByteString (l+1)
1871 withForeignPtr fp $ \p -> write_ptr p
1873 {-# INLINE create #-}
1875 -- | Perform an operation with a temporary ByteString
1876 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1877 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1878 {-# INLINE withPtr #-}
1880 -- Common up near identical calls to `error' to reduce the number
1881 -- constant strings created when compiled:
1882 errorEmptyList :: String -> a
1883 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1884 {-# INLINE errorEmptyList #-}
1886 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1887 -- of the string if no element is found, rather than Nothing.
1888 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1889 STRICT2(findIndexOrEnd)
1892 | f (unsafeHead ps) = 0
1893 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1894 {-# INLINE findIndexOrEnd #-}
1896 -- Find from the end of the string using predicate
1897 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1898 STRICT2(findFromEndUntil)
1899 findFromEndUntil f ps@(PS x s l) =
1901 else if f (last ps) then l
1902 else findFromEndUntil f (PS x s (l-1))
1904 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1905 -- it exposes lots of things to further inlining
1907 {-# INLINE inlinePerformIO #-}
1908 inlinePerformIO :: IO a -> a
1909 #if defined(__GLASGOW_HASKELL__)
1910 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1912 inlinePerformIO = unsafePerformIO
1915 {-# INLINE newForeignFreePtr #-}
1916 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1917 #if defined(__GLASGOW_HASKELL__)
1918 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1920 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1923 -- ---------------------------------------------------------------------
1925 -- Standard C functions
1928 foreign import ccall unsafe "string.h strlen" c_strlen
1931 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1932 :: CInt -> IO (Ptr Word8)
1934 foreign import ccall unsafe "static stdlib.h free" c_free
1935 :: Ptr Word8 -> IO ()
1937 #if !defined(__GLASGOW_HASKELL__)
1938 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1939 :: FunPtr (Ptr Word8 -> IO ())
1942 foreign import ccall unsafe "string.h memset" memset
1943 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1945 foreign import ccall unsafe "string.h memchr" memchr
1946 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1948 foreign import ccall unsafe "string.h memcmp" memcmp
1949 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1951 foreign import ccall unsafe "string.h memcpy" memcpy
1952 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1954 -- ---------------------------------------------------------------------
1959 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1960 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1962 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1963 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1965 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1966 :: Ptr Word8 -> Int -> Word8
1968 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1969 :: Ptr Word8 -> Int -> Word8
1971 foreign import ccall unsafe "static fpstring.h count" c_count
1972 :: Ptr Word8 -> Int -> Word8 -> Int
1974 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1975 :: Ptr Word8 -> Int -> IO ()
1977 -- ---------------------------------------------------------------------
1981 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
1982 :: Int -> Int -> IO (Ptr Word8)
1984 foreign import ccall unsafe "static unistd.h close" c_close
1987 # if !defined(__OpenBSD__)
1988 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
1989 :: Ptr Word8 -> Int -> IO Int
1993 -- ---------------------------------------------------------------------
1994 -- Internal GHC Haskell magic
1996 #if defined(__GLASGOW_HASKELL__)
1997 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1998 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2000 foreign import ccall unsafe "__hscore_memcpy_src_off"
2001 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())