1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
73 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
75 -- * Reducing 'ByteString's
76 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
78 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
80 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
81 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
84 concat, -- :: [ByteString] -> ByteString
85 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
86 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
87 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
88 maximum, -- :: ByteString -> Word8
89 minimum, -- :: ByteString -> Word8
90 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
92 -- * Building ByteStrings
93 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
94 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
96 -- * Generating and unfolding ByteStrings
97 replicate, -- :: Int -> Word8 -> ByteString
98 unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
102 -- ** Breaking strings
103 take, -- :: Int -> ByteString -> ByteString
104 unsafeTake, -- :: Int -> ByteString -> ByteString
105 drop, -- :: Int -> ByteString -> ByteString
106 unsafeDrop, -- :: Int -> ByteString -> ByteString
107 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
108 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
109 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
114 -- ** Breaking and dropping on specific bytes
115 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
116 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
117 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
118 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
120 -- ** Breaking into many substrings
121 split, -- :: Word8 -> ByteString -> [ByteString]
122 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
123 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
124 group, -- :: ByteString -> [ByteString]
125 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
127 -- ** Joining strings
128 join, -- :: ByteString -> [ByteString] -> ByteString
129 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
131 -- * Indexing ByteStrings
132 index, -- :: ByteString -> Int -> Word8
133 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
134 elemIndices, -- :: Word8 -> ByteString -> [Int]
135 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
136 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
137 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
138 count, -- :: Word8 -> ByteString -> Int
139 findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int
141 -- * Ordered ByteStrings
142 sort, -- :: ByteString -> ByteString
144 -- * Searching ByteStrings
146 -- ** Searching by equality
147 -- | These functions use memchr(3) to efficiently search the ByteString
149 elem, -- :: Word8 -> ByteString -> Bool
150 notElem, -- :: Word8 -> ByteString -> Bool
151 filterByte, -- :: Word8 -> ByteString -> ByteString
152 filterNotByte, -- :: Word8 -> ByteString -> ByteString
154 -- ** Searching with a predicate
155 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
156 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
157 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
159 -- ** Prefixes and suffixes
160 -- | These functions use memcmp(3) to efficiently compare substrings
161 isPrefixOf, -- :: ByteString -> ByteString -> Bool
162 isSuffixOf, -- :: ByteString -> ByteString -> Bool
164 -- ** Search for arbitrary substrings
165 isSubstringOf, -- :: ByteString -> ByteString -> Bool
166 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
167 findSubstrings, -- :: ByteString -> ByteString -> [Int]
169 -- * Zipping and unzipping ByteStrings
170 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
171 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
172 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
174 -- * Unchecked access
175 unsafeHead, -- :: ByteString -> Word8
176 unsafeTail, -- :: ByteString -> ByteString
177 unsafeIndex, -- :: ByteString -> Int -> Word8
179 -- * Low level introduction and elimination
180 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
181 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
182 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
183 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
184 skipIndex, -- :: ByteString -> Int
186 -- ** Packing CStrings and pointers
187 packCString, -- :: CString -> ByteString
188 packCStringLen, -- :: CString -> ByteString
189 packMallocCString, -- :: CString -> ByteString
191 #if defined(__GLASGOW_HASKELL__)
192 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
193 packAddress, -- :: Addr# -> ByteString
194 unsafePackAddress, -- :: Int -> Addr# -> ByteString
195 unsafeFinalize, -- :: ByteString -> IO ()
198 -- ** Using ByteStrings as CStrings
199 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
200 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
201 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
203 -- ** Copying ByteStrings
204 -- | These functions perform memcpy(3) operations
205 copy, -- :: ByteString -> ByteString
206 copyCString, -- :: CString -> ByteString
207 copyCStringLen, -- :: CStringLen -> ByteString
209 -- * I\/O with @ByteString@s
211 -- ** Standard input and output
213 #if defined(__GLASGOW_HASKELL__)
214 getLine, -- :: IO ByteString
216 getContents, -- :: IO ByteString
217 putStr, -- :: ByteString -> IO ()
218 putStrLn, -- :: ByteString -> IO ()
221 readFile, -- :: FilePath -> IO ByteString
222 writeFile, -- :: FilePath -> ByteString -> IO ()
223 -- mmapFile, -- :: FilePath -> IO ByteString
225 -- ** I\/O with Handles
226 #if defined(__GLASGOW_HASKELL__)
227 getArgs, -- :: IO [ByteString]
228 hGetLine, -- :: Handle -> IO ByteString
229 hGetLines, -- :: Handle -> IO [ByteString]
230 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
232 hGetContents, -- :: Handle -> IO ByteString
233 hGet, -- :: Handle -> Int -> IO ByteString
234 hPut, -- :: Handle -> ByteString -> IO ()
236 -- * Fusion utilities
237 #if defined(__GLASGOW_HASKELL__)
238 unpackList, -- eek, otherwise it gets thrown away by the simplifier
241 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
242 loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL,
246 import qualified Prelude as P
247 import Prelude hiding (reverse,head,tail,last,init,null
248 ,length,map,lines,foldl,foldr,unlines
249 ,concat,any,take,drop,splitAt,takeWhile
250 ,dropWhile,span,break,elem,filter,maximum
251 ,minimum,all,concatMap,foldl1,foldr1
252 ,scanl,scanl1,readFile,writeFile,replicate
253 ,getContents,getLine,putStr,putStrLn
254 ,zip,zipWith,unzip,notElem)
256 import qualified Data.List as List
259 import Data.Word (Word8)
260 import Data.Maybe (listToMaybe)
261 import Data.Array (listArray)
262 import qualified Data.Array as Array ((!))
264 -- Control.Exception.bracket not available in yhc or nhc
265 import Control.Exception (bracket, assert)
266 import Control.Monad (when)
268 import Foreign.C.String (CString, CStringLen)
269 import Foreign.C.Types (CSize,CInt)
270 import Foreign.ForeignPtr
271 import Foreign.Marshal.Array
273 import Foreign.Storable (Storable(..))
275 -- hGetBuf and hPutBuf not available in yhc or nhc
276 import System.IO (stdin,stdout,hClose,hFileSize
277 ,hGetBuf,hPutBuf,openBinaryFile
280 #if !defined(__GLASGOW_HASKELL__)
281 import System.IO.Unsafe
284 #if defined(__GLASGOW_HASKELL__)
286 import Data.Generics (Data(..), Typeable(..))
288 import System.IO (hGetBufNonBlocking)
289 import System.IO.Error (isEOFError)
291 import Foreign.Marshal (alloca)
292 import qualified Foreign.Concurrent as FC (newForeignPtr)
295 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
296 import GHC.Base (build, unsafeChr)
297 import GHC.Word hiding (Word8)
298 import GHC.Ptr (Ptr(..))
299 import GHC.ST (ST(..))
304 -- CFILES stuff is Hugs only
305 {-# CFILES cbits/fpstring.c #-}
307 -- -----------------------------------------------------------------------------
309 -- Useful macros, until we have bang patterns
312 #define STRICT1(f) f a | a `seq` False = undefined
313 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
314 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
315 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
316 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
318 -- -----------------------------------------------------------------------------
320 -- | A space-efficient representation of a Word8 vector, supporting many
321 -- efficient operations. A 'ByteString' contains 8-bit characters only.
323 -- Instances of Eq, Ord, Read, Show, Data, Typeable
325 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
329 #if defined(__GLASGOW_HASKELL__)
330 deriving (Data, Typeable)
333 instance Eq ByteString
336 instance Ord ByteString
337 where compare = compareBytes
339 instance Show ByteString where
340 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
342 instance Read ByteString where
343 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
346 instance Arbitrary PackedString where
347 arbitrary = P.pack `fmap` arbitrary
348 coarbitrary s = coarbitrary (P.unpack s)
351 -- | /O(n)/ Equality on the 'ByteString' type.
352 eq :: ByteString -> ByteString -> Bool
353 eq a@(PS p s l) b@(PS p' s' l')
354 | l /= l' = False -- short cut on length
355 | p == p' && s == s' = True -- short cut for the same string
356 | otherwise = compareBytes a b == EQ
359 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
360 compareBytes :: ByteString -> ByteString -> Ordering
361 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
362 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
363 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
364 | otherwise = inlinePerformIO $
365 withForeignPtr x1 $ \p1 ->
366 withForeignPtr x2 $ \p2 -> do
367 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
368 return $ case i `compare` 0 of
369 EQ -> l1 `compare` l2
371 {-# INLINE compareBytes #-}
375 -- About 4x slower over 32M
377 compareBytes :: ByteString -> ByteString -> Ordering
378 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
379 withForeignPtr fp1 $ \p1 ->
380 withForeignPtr fp2 $ \p2 ->
381 cmp (p1 `plusPtr` off1)
382 (p2 `plusPtr` off2) 0 len1 len2
384 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
386 cmp p1 p2 n len1 len2
387 | n == len1 = if n == len2 then return EQ else return LT
388 | n == len2 = return GT
390 (a :: Word8) <- peekByteOff p1 n
391 (b :: Word8) <- peekByteOff p2 n
392 case a `compare` b of
393 EQ -> cmp p1 p2 (n+1) len1 len2
396 {-# INLINE compareBytes #-}
399 -- -----------------------------------------------------------------------------
400 -- Introducing and eliminating 'ByteString's
402 -- | /O(1)/ The empty 'ByteString'
404 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
405 {-# NOINLINE empty #-}
407 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
408 packByte :: Word8 -> ByteString
409 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
410 withForeignPtr fp $ \p -> poke p c
412 {-# INLINE packByte #-}
415 -- XXX The unsafePerformIO is critical!
419 -- packByte 255 `compare` packByte 127
423 -- case mallocByteString 2 of
424 -- ForeignPtr f internals ->
425 -- case writeWord8OffAddr# f 0 255 of _ ->
426 -- case writeWord8OffAddr# f 0 127 of _ ->
427 -- case eqAddr# f f of
428 -- False -> case compare (GHC.Prim.plusAddr# f 0)
429 -- (GHC.Prim.plusAddr# f 0)
433 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
435 -- For applications with large numbers of string literals, pack can be a
436 -- bottleneck. In such cases, consider using packAddress (GHC only).
437 pack :: [Word8] -> ByteString
439 #if !defined(__GLASGOW_HASKELL__)
441 pack str = create (P.length str) $ \p -> go p str
444 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
446 #else /* hack away */
448 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
450 go _ _ [] = return ()
451 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
453 writeByte p i c = ST $ \s# ->
454 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
458 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
459 unpack :: ByteString -> [Word8]
461 #if !defined(__GLASGOW_HASKELL__)
463 unpack (PS _ _ 0) = []
464 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
465 go (p `plusPtr` s) (l - 1) []
468 go p 0 acc = peek p >>= \e -> return (e : acc)
469 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
470 {-# INLINE unpack #-}
474 unpack ps = build (unpackFoldr ps)
475 {-# INLINE unpack #-}
477 unpackList :: ByteString -> [Word8]
478 unpackList (PS fp off len) = withPtr fp $ \p -> do
480 loop _ (-1) acc = return acc
483 loop q (n-1) (a : acc)
484 loop (p `plusPtr` off) (len-1) []
487 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
490 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
491 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
493 loop _ (-1) acc = return acc
496 loop q (n-1) (a `f` acc)
497 loop (p `plusPtr` off) (len-1) ch
498 {-# INLINE [0] unpackFoldr #-}
500 -- TODO just use normal foldr here.
504 ------------------------------------------------------------------------
506 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
507 -- conversion function
508 packWith :: (a -> Word8) -> [a] -> ByteString
509 packWith k str = create (P.length str) $ \p -> go p str
513 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
514 {-# INLINE packWith #-}
515 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
517 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
518 unpackWith :: (Word8 -> a) -> ByteString -> [a]
519 unpackWith _ (PS _ _ 0) = []
520 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
521 go (p `plusPtr` s) (l - 1) []
524 go p 0 acc = peek p >>= \e -> return (k e : acc)
525 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
526 {-# INLINE unpackWith #-}
527 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
529 -- ---------------------------------------------------------------------
532 -- | /O(1)/ Test whether a ByteString is empty.
533 null :: ByteString -> Bool
534 null (PS _ _ l) = l == 0
537 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
538 length :: ByteString -> Int
539 length (PS _ _ l) = l
541 #if defined(__GLASGOW_HASKELL__)
542 {-# INLINE [1] length #-}
547 -- Translate length into a loop.
548 -- Performace ok, but allocates too much, so disable for now.
550 "length/loop" forall f acc s .
551 length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
555 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
556 -- complexity, as it requires a memcpy.
557 cons :: Word8 -> ByteString -> ByteString
558 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
560 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
565 -- | /O(n)/ Append a byte to the end of a 'ByteString'
566 snoc :: ByteString -> Word8 -> ByteString
567 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
568 memcpy p (f `plusPtr` s) (fromIntegral l)
569 poke (p `plusPtr` l) c
574 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
575 head :: ByteString -> Word8
577 | null ps = errorEmptyList "head"
578 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
581 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
582 tail :: ByteString -> ByteString
584 | l <= 0 = errorEmptyList "tail"
585 | otherwise = PS p (s+1) (l-1)
588 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
589 last :: ByteString -> Word8
591 | null ps = errorEmptyList "last"
592 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
595 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
596 init :: ByteString -> ByteString
598 | l <= 0 = errorEmptyList "init"
599 | otherwise = PS p s (l-1)
602 -- | /O(n)/ Append two ByteStrings
603 append :: ByteString -> ByteString -> ByteString
604 append xs ys | null xs = ys
606 | otherwise = concat [xs,ys]
607 {-# INLINE append #-}
609 -- ---------------------------------------------------------------------
612 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
613 -- element of @xs@. This function is subject to array fusion.
614 map :: (Word8 -> Word8) -> ByteString -> ByteString
615 map f = loopArr . loopU (mapEFL f) noAL
618 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
619 -- slightly faster for one-shot cases.
620 map' :: (Word8 -> Word8) -> ByteString -> ByteString
621 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
622 np <- mallocByteString (len+1)
623 withForeignPtr np $ \p -> do
624 map_ 0 (a `plusPtr` s) p
628 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
631 | n >= len = return ()
633 x <- peekByteOff p1 n
634 pokeByteOff p2 n (f x)
638 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
639 reverse :: ByteString -> ByteString
640 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
641 c_reverse p (f `plusPtr` s) (fromIntegral l)
644 reverse = pack . P.reverse . unpack
647 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
648 -- 'ByteString' and \`intersperses\' that byte between the elements of
649 -- the 'ByteString'. It is analogous to the intersperse function on
651 intersperse :: Word8 -> ByteString -> ByteString
652 intersperse c ps@(PS x s l)
654 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
655 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
658 intersperse c = pack . List.intersperse c . unpack
661 -- | The 'transpose' function transposes the rows and columns of its
662 -- 'ByteString' argument.
663 transpose :: [ByteString] -> [ByteString]
664 transpose ps = P.map pack (List.transpose (P.map unpack ps))
666 -- ---------------------------------------------------------------------
667 -- Reducing 'ByteString's
669 -- | 'foldl', applied to a binary operator, a starting value (typically
670 -- the left-identity of the operator), and a ByteString, reduces the
671 -- ByteString using the binary operator, from left to right.
672 -- This function is subject to array fusion.
673 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
674 foldl f z = loopAcc . loopU (foldEFL f) z
679 -- About twice as fast with 6.4.1, but not fuseable
680 -- A simple fold . map is enough to make it worth while.
682 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
683 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
686 lgo z p q | p == q = return z
687 | otherwise = do c <- peek p
688 lgo (f z c) (p `plusPtr` 1) q
691 -- | 'foldl\'' is like foldl, but strict in the accumulator.
692 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
693 foldl' f z = loopAcc . loopU (foldEFL' f) z
694 {-# INLINE foldl' #-}
696 -- | 'foldr', applied to a binary operator, a starting value
697 -- (typically the right-identity of the operator), and a ByteString,
698 -- reduces the ByteString using the binary operator, from right to left.
699 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
700 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
701 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
704 go p q | p == q = return z
705 | otherwise = do c <- peek p
706 ws <- go (p `plusPtr` 1) q
709 -- | 'foldl1' is a variant of 'foldl' that has no starting value
710 -- argument, and thus must be applied to non-empty 'ByteStrings'.
711 -- This function is subject to array fusion.
712 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
714 | null ps = errorEmptyList "foldl1"
715 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
717 -- | A strict version of 'foldl1'
718 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
720 | null ps = errorEmptyList "foldl1'"
721 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
723 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
724 -- and thus must be applied to non-empty 'ByteString's
725 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
727 | null ps = errorEmptyList "foldr1"
728 | otherwise = foldr f (last ps) (init ps)
730 -- ---------------------------------------------------------------------
733 -- | /O(n)/ Concatenate a list of ByteStrings.
734 concat :: [ByteString] -> ByteString
737 concat xs = create len $ \ptr -> go xs ptr
738 where len = P.sum . P.map length $ xs
741 go (PS p s l:ps) ptr = do
742 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
743 go ps (ptr `plusPtr` l)
745 -- | Map a function over a 'ByteString' and concatenate the results
746 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
747 concatMap f = foldr (append . f) empty
748 -- A silly function for ByteStrings anyway.
750 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
751 -- any element of the 'ByteString' satisfies the predicate.
752 any :: (Word8 -> Bool) -> ByteString -> Bool
753 any _ (PS _ _ 0) = False
754 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
755 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
758 go p q | p == q = return False
759 | otherwise = do c <- peek p
760 if f c then return True
761 else go (p `plusPtr` 1) q
765 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
766 -- if all elements of the 'ByteString' satisfy the predicate.
767 all :: (Word8 -> Bool) -> ByteString -> Bool
768 all _ (PS _ _ 0) = True
769 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
770 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
773 go p q | p == q = return True -- end of list
774 | otherwise = do c <- peek p
776 then go (p `plusPtr` 1) q
780 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
781 maximum :: ByteString -> Word8
782 maximum xs@(PS x s l)
783 | null xs = errorEmptyList "maximum"
784 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
785 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
786 {-# INLINE maximum #-}
788 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
789 minimum :: ByteString -> Word8
790 minimum xs@(PS x s l)
791 | null xs = errorEmptyList "minimum"
792 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
793 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
794 {-# INLINE minimum #-}
796 -- fusion is too slow here (10x)
799 maximum xs@(PS x s l)
800 | null xs = errorEmptyList "maximum"
801 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
803 maximum_ (p `plusPtr` s) 0 l w
805 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
809 | otherwise = do w <- peekByteOff ptr n
810 maximum_ ptr (n+1) m (if w > c then w else c)
812 minimum xs@(PS x s l)
813 | null xs = errorEmptyList "minimum"
814 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
816 minimum_ (p `plusPtr` s) 0 l w
818 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
822 | otherwise = do w <- peekByteOff ptr n
823 minimum_ ptr (n+1) m (if w < c then w else c)
826 -- | /O(n)/ map Word8 functions, provided with the index at each position
827 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
828 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
829 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
831 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
833 go n f t p | f == p = return ()
834 | otherwise = do w <- peek f
836 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
838 -- ---------------------------------------------------------------------
839 -- Building ByteStrings
841 -- | 'scanl' is similar to 'foldl', but returns a list of successive
842 -- reduced values from the left. This function will fuse.
844 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
848 -- > last (scanl f z xs) == foldl f z xs.
849 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
850 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space
853 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
854 -- This function will fuse.
856 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
857 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
860 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
861 {-# INLINE scanl1 #-}
863 -- ---------------------------------------------------------------------
864 -- Unfolds and replicates
866 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
867 -- the value of every element. The following holds:
869 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
871 -- This implemenation uses @memset(3)@
872 replicate :: Int -> Word8 -> ByteString
873 replicate w c | w <= 0 = empty
874 | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
878 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
882 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
885 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
886 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
887 -- the element and returns 'Nothing' if it is done producing the
888 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
889 -- prepending to the ByteString and @b@ is used as the next element in a
892 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
893 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
894 -- final size of the ByteString as an argument. 'cons' can then be
895 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
896 -- linear complexity. The depth of the recursion is limited to this
897 -- size, but may be less. For lazy, infinite unfoldr, use
898 -- 'Data.List.unfoldr' (from 'Data.List').
902 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
904 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
906 -- > unfoldrN n == take n $ List.unfoldr
907 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
910 | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0
913 go q c n | n == i = return n -- stop if we reach `i'
914 | otherwise = case f c of
918 go (q `plusPtr` 1) new_c (n+1)
920 -- ---------------------------------------------------------------------
923 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
924 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
925 take :: Int -> ByteString -> ByteString
929 | otherwise = PS x s n
932 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
933 -- elements, or @[]@ if @n > 'length' xs@.
934 drop :: Int -> ByteString -> ByteString
938 | otherwise = PS x (s+n) (l-n)
941 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
942 splitAt :: Int -> ByteString -> (ByteString, ByteString)
943 splitAt n ps@(PS x s l)
944 | n <= 0 = (empty, ps)
945 | n >= l = (ps, empty)
946 | otherwise = (PS x s n, PS x (s+n) (l-n))
947 {-# INLINE splitAt #-}
949 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
950 -- returns the longest prefix (possibly empty) of @xs@ of elements that
952 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
953 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
954 {-# INLINE takeWhile #-}
956 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
957 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
958 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
959 {-# INLINE dropWhile #-}
961 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
962 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
963 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
966 -- | 'breakByte' breaks its ByteString argument at the first occurence
967 -- of the specified byte. It is more efficient than 'break' as it is
968 -- implemented with @memchr(3)@. I.e.
970 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
972 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
973 breakByte c p = case elemIndex c p of
975 Just n -> (unsafeTake n p, unsafeDrop n p)
976 {-# INLINE breakByte #-}
978 -- | 'spanByte' breaks its ByteString argument at the first
979 -- occurence of a byte other than its argument. It is more efficient
982 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
984 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
985 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
989 go p i | i >= l = return (ps, empty)
990 | otherwise = do c' <- peekByteOff p i
992 then return (unsafeTake i ps, unsafeDrop i ps)
994 {-# INLINE spanByte #-}
996 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
997 -- occurence of @w@. It behaves like 'break', except the delimiter is
998 -- not returned, and @Nothing@ is returned if the delimiter is not in
999 -- the ByteString. I.e.
1001 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
1003 -- > breakFirst c xs ==
1004 -- > let (x,y) = break (== c) xs
1005 -- > in if null y then Nothing else Just (x, drop 1 y))
1007 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
1008 breakFirst c p = case elemIndex c p of
1010 Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p)
1011 {-# INLINE breakFirst #-}
1013 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
1016 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
1018 -- and the following are equivalent:
1020 -- > breakLast 'c' "abcdef"
1021 -- > let (x,y) = break (=='c') (reverse "abcdef")
1022 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
1024 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
1025 breakLast c p = case elemIndexLast c p of
1027 Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p)
1028 {-# INLINE breakLast #-}
1030 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1031 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1032 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1033 span p ps = break (not . p) ps
1036 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1039 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1043 -- > spanEnd (not . isSpace) ps
1045 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1047 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1048 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1050 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1051 -- separators, where the predicate returns True for a separator element.
1052 -- The resulting components do not contain the separators. Two adjacent
1053 -- separators result in an empty component in the output. eg.
1055 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1056 -- > splitWith (=='a') [] == []
1058 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1060 #if defined(__GLASGOW_HASKELL__)
1061 splitWith _pred (PS _ _ 0) = []
1062 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1063 where pred# c# = pred_ (W8# c#)
1066 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1067 splitLoop pred' p 0 off' len' fp'
1069 splitLoop :: (Word# -> Bool)
1071 -> Int -> Int -> Int
1075 splitLoop pred' p idx' off' len' fp'
1076 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1077 | idx' >= len' = return [PS fp' off' idx']
1079 w <- peekElemOff p (off'+idx')
1080 if pred' (case w of W8# w# -> w#)
1081 then return (PS fp' off' idx' :
1082 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1083 else splitLoop pred' p (idx'+1) off' len' fp'
1084 {-# INLINE splitWith #-}
1087 splitWith _ (PS _ _ 0) = []
1088 splitWith p ps = loop p ps
1091 loop q qs = if null rest then [chunk]
1092 else chunk : loop q (unsafeTail rest)
1093 where (chunk,rest) = break q qs
1096 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1097 -- argument, consuming the delimiter. I.e.
1099 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1100 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1101 -- > split 'x' "x" == ["",""]
1105 -- > join [c] . split c == id
1106 -- > split == splitWith . (==)
1108 -- As for all splitting functions in this library, this function does
1109 -- not copy the substrings, it just constructs new 'ByteStrings' that
1110 -- are slices of the original.
1112 split :: Word8 -> ByteString -> [ByteString]
1113 split _ (PS _ _ 0) = []
1114 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1115 let ptr = p `plusPtr` s
1119 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1121 then [PS x (s+n) (l-n)]
1122 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1125 {-# INLINE split #-}
1128 -- slower. but stays inside Haskell.
1129 split _ (PS _ _ 0) = []
1130 split (W8# w#) (PS fp off len) = splitWith' off len fp
1132 splitWith' off' len' fp' = withPtr fp $ \p ->
1133 splitLoop p 0 off' len' fp'
1135 splitLoop :: Ptr Word8
1136 -> Int -> Int -> Int
1141 splitLoop p idx' off' len' fp'
1142 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1143 | idx' >= len' = return [PS fp' off' idx']
1145 (W8# x#) <- peekElemOff p (off'+idx')
1146 if word2Int# w# ==# word2Int# x#
1147 then return (PS fp' off' idx' :
1148 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1149 else splitLoop p (idx'+1) off' len' fp'
1152 -- | Like 'splitWith', except that sequences of adjacent separators are
1153 -- treated as a single separator. eg.
1155 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1157 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1158 tokens f = P.filter (not.null) . splitWith f
1159 {-# INLINE tokens #-}
1161 -- | The 'group' function takes a ByteString and returns a list of
1162 -- ByteStrings such that the concatenation of the result is equal to the
1163 -- argument. Moreover, each sublist in the result contains only equal
1164 -- elements. For example,
1166 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1168 -- It is a special case of 'groupBy', which allows the programmer to
1169 -- supply their own equality test. It is about 40% faster than
1171 group :: ByteString -> [ByteString]
1174 | otherwise = ys : group zs
1176 (ys, zs) = spanByte (unsafeHead xs) xs
1178 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1179 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1182 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1184 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1186 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1187 -- 'ByteString's and concatenates the list after interspersing the first
1188 -- argument between each element of the list.
1189 join :: ByteString -> [ByteString] -> ByteString
1190 join filler pss = concat (splice pss)
1194 splice (x:y:xs) = x:filler:splice (y:xs)
1198 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1199 -- with a char. Around 4 times faster than the generalised join.
1201 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1202 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1203 withForeignPtr ffp $ \fp ->
1204 withForeignPtr fgp $ \gp -> do
1205 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1206 poke (ptr `plusPtr` l) c
1207 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1209 len = length f + length g + 1
1210 {-# INLINE joinWithByte #-}
1212 -- ---------------------------------------------------------------------
1213 -- Indexing ByteStrings
1215 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1216 index :: ByteString -> Int -> Word8
1218 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1219 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1220 ++ ", length = " ++ show (length ps))
1221 | otherwise = ps `unsafeIndex` n
1222 {-# INLINE index #-}
1224 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1225 -- element in the given 'ByteString' which is equal to the query
1226 -- element, or 'Nothing' if there is no such element.
1227 -- This implementation uses memchr(3).
1228 elemIndex :: Word8 -> ByteString -> Maybe Int
1229 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1230 let p' = p `plusPtr` s
1231 q = memchr p' c (fromIntegral l)
1232 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1233 {-# INLINE elemIndex #-}
1235 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1236 -- element in the given 'ByteString' which is equal to the query
1237 -- element, or 'Nothing' if there is no such element. The following
1240 -- > elemIndexLast c xs ==
1241 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1243 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1244 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1245 go (p `plusPtr` s) (l-1)
1248 go p i | i < 0 = return Nothing
1249 | otherwise = do ch' <- peekByteOff p i
1251 then return $ Just i
1253 {-# INLINE elemIndexLast #-}
1255 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1256 -- the indices of all elements equal to the query element, in ascending order.
1257 -- This implementation uses memchr(3).
1258 elemIndices :: Word8 -> ByteString -> [Int]
1259 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1260 let ptr = p `plusPtr` s
1263 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1266 else let i = q `minusPtr` ptr
1269 {-# INLINE elemIndices #-}
1273 elemIndices :: Word8 -> ByteString -> [Int]
1274 elemIndices c ps = loop 0 ps
1276 loop _ ps' | null ps' = []
1277 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1278 | otherwise = loop (n+1) (unsafeTail ps')
1281 -- | count returns the number of times its argument appears in the ByteString
1283 -- > count = length . elemIndices
1285 -- But more efficiently than using length on the intermediate list.
1286 count :: Word8 -> ByteString -> Int
1287 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1288 return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1289 {-# INLINE count #-}
1293 -- around 30% slower
1295 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1296 go (p `plusPtr` s) (fromIntegral m) 0
1298 go :: Ptr Word8 -> CSize -> Int -> IO Int
1301 let q = memchr p w l
1304 else do let k = fromIntegral $ q `minusPtr` p
1305 go (q `plusPtr` 1) (l-k-1) (i+1)
1308 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1309 -- returns the index of the first element in the ByteString
1310 -- satisfying the predicate.
1311 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1312 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1315 go ptr n | n >= l = return Nothing
1316 | otherwise = do w <- peek ptr
1318 then return (Just n)
1319 else go (ptr `plusPtr` 1) (n+1)
1320 {-# INLINE findIndex #-}
1322 -- | The 'findIndices' function extends 'findIndex', by returning the
1323 -- indices of all elements satisfying the predicate, in ascending order.
1324 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1325 findIndices p ps = loop 0 ps
1328 loop n qs | null qs = []
1329 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1330 | otherwise = loop (n+1) (unsafeTail qs)
1332 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1333 -- of the string if no element is found, rather than Nothing.
1334 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1335 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1338 go ptr n | n >= l = return l
1339 | otherwise = do w <- peek ptr
1342 else go (ptr `plusPtr` 1) (n+1)
1343 {-# INLINE findIndexOrEnd #-}
1345 -- ---------------------------------------------------------------------
1346 -- Searching ByteStrings
1348 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1349 elem :: Word8 -> ByteString -> Bool
1350 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1353 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1354 notElem :: Word8 -> ByteString -> Bool
1355 notElem c ps = not (elem c ps)
1356 {-# INLINE notElem #-}
1358 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1359 -- returns a ByteString containing those characters that satisfy the
1360 -- predicate. This function is subject to array fusion.
1361 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1362 filter p = loopArr . loopU (filterEFL p) noAL
1363 {-# INLINE filter #-}
1365 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1366 -- around 2x faster for some one-shot applications.
1367 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1368 filter' k ps@(PS x s l)
1370 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1371 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1372 return (t `minusPtr` p) -- actual length
1375 go f t end | f == end = return t
1379 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1380 else go (f `plusPtr` 1) t end
1381 {-# INLINE filter' #-}
1384 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1385 -- case of filtering a single byte. It is more efficient to use
1386 -- /filterByte/ in this case.
1388 -- > filterByte == filter . (==)
1390 -- filterByte is around 10x faster, and uses much less space, than its
1391 -- filter equivalent
1392 filterByte :: Word8 -> ByteString -> ByteString
1393 filterByte w ps = replicate (count w ps) w
1394 {-# INLINE filterByte #-}
1397 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1398 -- case of filtering a single byte out of a list. It is more efficient
1399 -- to use /filterNotByte/ in this case.
1401 -- > filterNotByte == filter . (/=)
1403 -- filterNotByte is around 2x faster than its filter equivalent.
1404 filterNotByte :: Word8 -> ByteString -> ByteString
1405 filterNotByte w = filter' (/= w)
1406 {-# INLINE filterNotByte #-}
1408 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1409 -- and returns the first element in matching the predicate, or 'Nothing'
1410 -- if there is no such element.
1412 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1414 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1415 find f p = case findIndex f p of
1416 Just n -> Just (p `unsafeIndex` n)
1422 -- fuseable, but we don't want to walk the whole array.
1424 find k = foldl findEFL Nothing
1425 where findEFL a@(Just _) _ = a
1426 findEFL _ c | k c = Just c
1427 | otherwise = Nothing
1430 -- ---------------------------------------------------------------------
1431 -- Searching for substrings
1433 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1434 -- iff the first is a prefix of the second.
1435 isPrefixOf :: ByteString -> ByteString -> Bool
1436 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1439 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1440 withForeignPtr x2 $ \p2 -> do
1441 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1444 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1445 -- iff the first is a suffix of the second.
1447 -- The following holds:
1449 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1451 -- However, the real implemenation uses memcmp to compare the end of the
1452 -- string only, with no reverse required..
1453 isSuffixOf :: ByteString -> ByteString -> Bool
1454 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1457 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1458 withForeignPtr x2 $ \p2 -> do
1459 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1462 -- | Check whether one string is a substring of another. @isSubstringOf
1463 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1464 isSubstringOf :: ByteString -- ^ String to search for.
1465 -> ByteString -- ^ String to search in.
1467 isSubstringOf p s = not $ P.null $ findSubstrings p s
1469 -- | Get the first index of a substring in another string,
1470 -- or 'Nothing' if the string is not found.
1471 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1472 findSubstring :: ByteString -- ^ String to search for.
1473 -> ByteString -- ^ String to seach in.
1475 findSubstring = (listToMaybe .) . findSubstrings
1477 -- | Find the indexes of all (possibly overlapping) occurances of a
1478 -- substring in a string. This function uses the Knuth-Morris-Pratt
1479 -- string matching algorithm.
1480 findSubstrings :: ByteString -- ^ String to search for.
1481 -> ByteString -- ^ String to seach in.
1484 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1486 patc x = pat `unsafeIndex` x
1487 strc x = str `unsafeIndex` x
1489 -- maybe we should make kmpNext a UArray before using it in search?
1490 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1491 kmpNextL p _ | null p = []
1492 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1494 x = if not (null ps) && unsafeHead ps == patc j'
1495 then kmpNext Array.! j' else j'
1497 search i j = match ++ rest -- i: position in string, j: position in pattern
1498 where match = if j == m then [(i - j)] else []
1499 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1500 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1503 -- ---------------------------------------------------------------------
1506 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1507 -- corresponding pairs of bytes. If one input ByteString is short,
1508 -- excess elements of the longer ByteString are discarded. This is
1509 -- equivalent to a pair of 'unpack' operations.
1510 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1512 | null ps || null qs = []
1513 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1515 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1516 -- the first argument, instead of a tupling function. For example,
1517 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1518 -- corresponding sums.
1519 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1521 | null ps || null qs = []
1522 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1524 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1525 -- ByteStrings. Note that this performs two 'pack' operations.
1526 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1527 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1528 {-# INLINE unzip #-}
1530 -- ---------------------------------------------------------------------
1533 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1534 inits :: ByteString -> [ByteString]
1535 inits (PS x s l) = [PS x s n | n <- [0..l]]
1537 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1538 tails :: ByteString -> [ByteString]
1539 tails p | null p = [empty]
1540 | otherwise = p : tails (unsafeTail p)
1542 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1544 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1545 elems :: ByteString -> [ByteString]
1546 elems (PS _ _ 0) = []
1547 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1548 {-# INLINE elems #-}
1550 -- ---------------------------------------------------------------------
1551 -- ** Ordered 'ByteString's
1553 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1554 sort :: ByteString -> ByteString
1555 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1557 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1558 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1561 go 256 _ = return ()
1562 go i ptr = do n <- peekElemOff arr i
1563 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1564 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1567 -- "countEach counts str l" counts the number of occurences of each Word8 in
1568 -- str, and stores the result in counts.
1569 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1571 countEach counts str l = go 0
1574 go i | i == l = return ()
1575 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1576 x <- peekElemOff counts k
1577 pokeElemOff counts k (x + 1)
1581 sort :: ByteString -> ByteString
1582 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1583 memcpy p (f `plusPtr` s) l
1584 c_qsort p l -- inplace
1588 sort = pack . List.sort . unpack
1591 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1593 -- Try some linear sorts: radix, counting
1596 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1597 -- sortBy f ps = undefined
1599 -- ---------------------------------------------------------------------
1601 -- Extensions to the basic interface
1604 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1605 -- check for the empty case, so there is an obligation on the programmer
1606 -- to provide a proof that the ByteString is non-empty.
1607 unsafeHead :: ByteString -> Word8
1608 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1609 {-# INLINE unsafeHead #-}
1611 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1612 -- check for the empty case. As with 'unsafeHead', the programmer must
1613 -- provide a separate proof that the ByteString is non-empty.
1614 unsafeTail :: ByteString -> ByteString
1615 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1616 {-# INLINE unsafeTail #-}
1618 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1619 -- This omits the bounds check, which means there is an accompanying
1620 -- obligation on the programmer to ensure the bounds are checked in some
1622 unsafeIndex :: ByteString -> Int -> Word8
1623 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1624 {-# INLINE unsafeIndex #-}
1626 -- | A variety of 'take' which omits the checks on @n@ so there is an
1627 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1628 unsafeTake :: Int -> ByteString -> ByteString
1629 unsafeTake n (PS x s l) =
1630 assert (0 <= n && n <= l) $ PS x s n
1631 {-# INLINE unsafeTake #-}
1633 -- | A variety of 'drop' which omits the checks on @n@ so there is an
1634 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1635 unsafeDrop :: Int -> ByteString -> ByteString
1636 unsafeDrop n (PS x s l) =
1637 assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
1638 {-# INLINE unsafeDrop #-}
1640 -- ---------------------------------------------------------------------
1641 -- Low level constructors
1643 #if defined(__GLASGOW_HASKELL__)
1644 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1645 -- Addr\# (an arbitrary machine address assumed to point outside the
1646 -- garbage-collected heap) into a @ByteString@. A much faster way to
1647 -- create an Addr\# is with an unboxed string literal, than to pack a
1648 -- boxed string. A unboxed string literal is compiled to a static @char
1649 -- []@ by GHC. Establishing the length of the string requires a call to
1650 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1651 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1652 -- if you know the length of the string statically.
1656 -- > literalFS = packAddress "literal"#
1658 packAddress :: Addr# -> ByteString
1659 packAddress addr# = inlinePerformIO $ do
1660 p <- newForeignPtr_ cstr
1661 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1664 {-# INLINE packAddress #-}
1666 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1667 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1668 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1669 -- 'Addr\#' to the string, and the length of the string. Make sure the
1670 -- length is correct, otherwise use the safer 'packAddress' (where the
1671 -- length will be calculated once at runtime).
1672 unsafePackAddress :: Int -> Addr# -> ByteString
1673 unsafePackAddress len addr# = inlinePerformIO $ do
1674 p <- newForeignPtr_ cstr
1676 where cstr = Ptr addr#
1680 -- | /O(1)/ Build a ByteString from a ForeignPtr
1681 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1682 fromForeignPtr fp l = PS fp 0 l
1684 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1685 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1686 toForeignPtr (PS ps s l) = (ps, s, l)
1688 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1689 -- current 'ByteString' from any larger string it was created from, as
1691 skipIndex :: ByteString -> Int
1692 skipIndex (PS _ s _) = s
1693 {-# INLINE skipIndex #-}
1695 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1696 -- finalizer associated to it. The ByteString length is calculated using
1697 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1698 packCString :: CString -> ByteString
1699 packCString cstr = inlinePerformIO $ do
1700 fp <- newForeignPtr_ (castPtr cstr)
1701 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1703 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1704 -- have /no/ finalizer associated with it. This operation has /O(1)/
1705 -- complexity as we already know the final size, so no /strlen(3)/ is
1707 packCStringLen :: CStringLen -> ByteString
1708 packCStringLen (ptr,len) = inlinePerformIO $ do
1709 fp <- newForeignPtr_ (castPtr ptr)
1710 return $ PS fp 0 (fromIntegral len)
1712 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1713 -- have a @free(3)@ finalizer associated to it.
1714 packMallocCString :: CString -> ByteString
1715 packMallocCString cstr = inlinePerformIO $ do
1716 fp <- newForeignFreePtr (castPtr cstr)
1717 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1719 #if defined(__GLASGOW_HASKELL__)
1720 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1721 -- length, and an IO action representing a finalizer. This function is
1722 -- not available on Hugs.
1724 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1725 packCStringFinalizer p l f = do
1726 fp <- FC.newForeignPtr p f
1729 -- | Explicitly run the finaliser associated with a 'ByteString'.
1730 -- Further references to this value may generate invalid memory
1731 -- references. This operation is unsafe, as there may be other
1732 -- 'ByteStrings' referring to the same underlying pages. If you use
1733 -- this, you need to have a proof of some kind that all 'ByteString's
1734 -- ever generated from the underlying byte array are no longer live.
1735 unsafeFinalize :: ByteString -> IO ()
1736 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1740 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1741 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1742 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1743 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1745 alloc = withForeignPtr ps $ \p -> do
1746 buf <- c_malloc (fromIntegral l+1)
1747 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1748 poke (buf `plusPtr` l) (0::Word8)
1749 return $ castPtr buf
1751 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1752 -- Warning: modifying the @CString@ will affect the @ByteString@.
1753 -- Why is this function unsafe? It relies on the null byte at the end of
1754 -- the ByteString to be there. This is /not/ the case if your ByteString
1755 -- has been spliced from a larger string (i.e. with take or drop).
1756 -- Unless you can guarantee the null byte, you should use the safe
1757 -- version, which will copy the string first.
1759 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1760 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1762 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1763 -- This is mainly useful to allow the rest of the data pointed
1764 -- to by the 'ByteString' to be garbage collected, for example
1765 -- if a large string has been read in, and only a small part of it
1766 -- is needed in the rest of the program.
1767 copy :: ByteString -> ByteString
1768 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1769 memcpy p (f `plusPtr` s) (fromIntegral l)
1771 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1772 -- CString is going to be deallocated from C land.
1773 copyCString :: CString -> IO ByteString
1774 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1776 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1777 copyCStringLen :: CStringLen -> IO ByteString
1778 copyCStringLen (cstr, len) = do
1779 fp <- mallocForeignPtrArray (len+1)
1780 withForeignPtr fp $ \p -> do
1781 memcpy p (castPtr cstr) (fromIntegral len)
1782 poke (p `plusPtr` len) (0 :: Word8)
1783 return $! PS fp 0 len
1785 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1786 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1787 -- This is analogous to unsafeUseAsCString, and comes with the same
1788 -- safety requirements.
1790 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1791 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1793 -- | Given the maximum size needed and a function to make the contents
1794 -- of a ByteString, generate makes the 'ByteString'. The generating
1795 -- function is required to return the actual final size (<= the maximum
1796 -- size), and the resulting byte array is realloced to this size. The
1797 -- string is padded at the end with a null byte.
1799 -- generate is the main mechanism for creating custom, efficient
1800 -- ByteString functions, using Haskell or C functions to fill the space.
1802 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1804 fp <- mallocByteString i
1805 (ptr,n) <- withForeignPtr fp $ \p -> do
1809 else do fp_ <- mallocByteString i' -- realloc
1810 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1816 -- On the C malloc heap. Less fun.
1819 p <- mallocArray (i+1)
1821 p' <- reallocArray p (i'+1)
1822 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1823 fp <- newForeignFreePtr p'
1827 -- ---------------------------------------------------------------------
1830 #if defined(__GLASGOW_HASKELL__)
1832 -- | getLine, read a line from stdin.
1833 getLine :: IO ByteString
1834 getLine = hGetLine stdin
1836 -- | Lazily construct a list of lines of ByteStrings. This will be much
1837 -- better on memory consumption than using lines =<< getContents.
1838 hGetLines :: Handle -> IO [ByteString]
1841 go = unsafeInterleaveIO $ do
1842 ms <- catch (hGetLine h >>= return . Just)
1843 (\_ -> return Nothing)
1845 Nothing -> return []
1846 Just s -> do ss <- go
1849 -- | hGetLine. read a ByteString from a handle
1850 hGetLine :: Handle -> IO ByteString
1851 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1852 case haBufferMode handle_ of
1853 NoBuffering -> error "no buffering"
1854 _other -> hGetLineBuffered handle_
1857 hGetLineBuffered handle_ = do
1858 let ref = haBuffer handle_
1859 buf <- readIORef ref
1860 hGetLineBufferedLoop handle_ ref buf 0 []
1862 hGetLineBufferedLoop handle_ ref
1863 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1865 off <- findEOL r w raw
1866 let new_len = len + off - r
1867 xs <- mkPS raw r off
1869 -- if eol == True, then off is the offset of the '\n'
1870 -- otherwise off == w and the buffer is now empty.
1872 then do if (w == off + 1)
1873 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1874 else writeIORef ref buf{ bufRPtr = off + 1 }
1875 mkBigPS new_len (xs:xss)
1877 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1878 buf{ bufWPtr=0, bufRPtr=0 }
1880 -- Nothing indicates we caught an EOF, and we may have a
1881 -- partial line to return.
1883 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1885 then mkBigPS new_len (xs:xss)
1888 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1890 -- find the end-of-line character, if there is one
1894 (c,r') <- readCharFromBuffer raw r
1896 then return r -- NB. not r': don't include the '\n'
1897 else findEOL r' w raw
1899 maybeFillReadBuffer fd is_line is_stream buf = catch
1900 (do buf' <- fillReadBuffer fd is_line is_stream buf
1902 (\e -> if isEOFError e then return Nothing else ioError e)
1904 -- TODO, rewrite to use normal memcpy
1905 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1906 mkPS buf start end = do
1907 let len = end - start
1908 fp <- mallocByteString len
1909 withForeignPtr fp $ \p -> do
1910 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1911 return (PS fp 0 len)
1913 mkBigPS :: Int -> [ByteString] -> IO ByteString
1914 mkBigPS _ [ps] = return ps
1915 mkBigPS _ pss = return $! concat (P.reverse pss)
1919 -- ---------------------------------------------------------------------
1922 -- | Outputs a 'ByteString' to the specified 'Handle'.
1923 hPut :: Handle -> ByteString -> IO ()
1924 hPut _ (PS _ _ 0) = return ()
1925 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1926 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1928 -- | Write a ByteString to stdout
1929 putStr :: ByteString -> IO ()
1930 putStr = hPut stdout
1932 -- | Write a ByteString to stdout, appending a newline byte
1933 putStrLn :: ByteString -> IO ()
1934 putStrLn ps = hPut stdout ps >> hPut stdout nl
1935 where nl = packByte 0x0a
1937 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1938 -- is far more efficient than reading the characters into a 'String'
1939 -- and then using 'pack'.
1940 hGet :: Handle -> Int -> IO ByteString
1941 hGet _ 0 = return empty
1942 hGet h i = do fp <- mallocByteString i
1943 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1946 #if defined(__GLASGOW_HASKELL__)
1947 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1948 -- waiting for data to become available, instead it returns only whatever data
1950 hGetNonBlocking :: Handle -> Int -> IO ByteString
1951 hGetNonBlocking _ 0 = return empty
1952 hGetNonBlocking h i = do
1953 fp <- mallocByteString i
1954 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1958 -- | Read entire handle contents into a 'ByteString'.
1959 -- This function reads chunks at a time, doubling the chunksize on each
1960 -- read. The final buffer is then realloced to the appropriate size. For
1961 -- files > half of available memory, this may lead to memory exhaustion.
1962 -- Consider using 'readFile' in this case.
1964 -- As with 'hGet', the string representation in the file is assumed to
1967 hGetContents :: Handle -> IO ByteString
1969 let start_size = 1024
1970 p <- mallocArray start_size
1971 i <- hGetBuf h p start_size
1973 then do p' <- reallocArray p i
1974 fp <- newForeignFreePtr p'
1980 p' <- reallocArray p s'
1981 i <- hGetBuf h (p' `plusPtr` s) s
1983 then do let i' = s + i
1984 p'' <- reallocArray p' i'
1985 fp <- newForeignFreePtr p''
1989 -- | getContents. Equivalent to hGetContents stdin
1990 getContents :: IO ByteString
1991 getContents = hGetContents stdin
1993 -- | Read an entire file strictly into a 'ByteString'. This is far more
1994 -- efficient than reading the characters into a 'String' and then using
1995 -- 'pack'. It also may be more efficient than opening the file and
1996 -- reading it using hGet.
1997 readFile :: FilePath -> IO ByteString
1999 h <- openBinaryFile f ReadMode
2001 s <- hGet h $ fromIntegral l
2005 -- | Write a 'ByteString' to a file.
2006 writeFile :: FilePath -> ByteString -> IO ()
2007 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
2012 -- Disable until we can move it into a portable .hsc file
2015 -- | Like readFile, this reads an entire file directly into a
2016 -- 'ByteString', but it is even more efficient. It involves directly
2017 -- mapping the file to memory. This has the advantage that the contents
2018 -- of the file never need to be copied. Also, under memory pressure the
2019 -- page may simply be discarded, while in the case of readFile it would
2020 -- need to be written to swap. If you read many small files, mmapFile
2021 -- will be less memory-efficient than readFile, since each mmapFile
2022 -- takes up a separate page of memory. Also, you can run into bus
2023 -- errors if the file is modified. As with 'readFile', the string
2024 -- representation in the file is assumed to be ISO-8859-1.
2026 -- On systems without mmap, this is the same as a readFile.
2028 mmapFile :: FilePath -> IO ByteString
2029 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
2031 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
2033 h <- openBinaryFile f ReadMode
2034 l <- fromIntegral `fmap` hFileSize h
2035 -- Don't bother mmaping small files because each mmapped file takes up
2036 -- at least one full VM block.
2038 then do thefp <- mallocByteString l
2039 withForeignPtr thefp $ \p-> hGetBuf h p l
2044 fd <- fromIntegral `fmap` handleToFd h
2046 fp <- if p == nullPtr
2047 then do thefp <- mallocByteString l
2048 withForeignPtr thefp $ \p' -> hGetBuf h p' l
2051 -- The munmap leads to crashes on OpenBSD.
2052 -- maybe there's a use after unmap in there somewhere?
2053 #if !defined(__OpenBSD__)
2054 let unmap = c_munmap p l >> return ()
2056 let unmap = return ()
2058 fp <- FC.newForeignPtr p unmap
2063 where mmap_limit = 16*1024
2066 #if defined(__GLASGOW_HASKELL__)
2068 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
2070 getArgs :: IO [ByteString]
2072 alloca $ \ p_argc ->
2073 alloca $ \ p_argv -> do
2074 getProgArgv p_argc p_argv
2075 p <- fromIntegral `fmap` peek p_argc
2077 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
2080 -- ---------------------------------------------------------------------
2081 -- Internal utilities
2083 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
2084 -- silently truncate to 8 bits Chars > '\255'. They are provided as
2085 -- convenience for ByteString construction.
2086 w2c :: Word8 -> Char
2087 #if !defined(__GLASGOW_HASKELL__)
2088 w2c = chr . fromIntegral
2090 w2c = unsafeChr . fromIntegral
2094 c2w :: Char -> Word8
2095 c2w = fromIntegral . ord
2098 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2099 -- is padded with a null byte.
2100 mallocByteString :: Int -> IO (ForeignPtr Word8)
2101 mallocByteString l = do
2102 fp <- mallocForeignPtrArray (l+1)
2103 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2106 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2107 -- argument gives the final size of the ByteString. Unlike 'generate'
2108 -- the ByteString is not reallocated if the final size is less than the
2109 -- estimated size. Also, unlike 'generate' ByteString's created this way
2110 -- are managed on the Haskell heap.
2111 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2112 create l write_ptr = inlinePerformIO $ do
2113 fp <- mallocByteString (l+1)
2114 withForeignPtr fp $ \p -> write_ptr p
2116 {-# INLINE create #-}
2118 -- | Perform an operation with a temporary ByteString
2119 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2120 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2121 {-# INLINE withPtr #-}
2123 -- Common up near identical calls to `error' to reduce the number
2124 -- constant strings created when compiled:
2125 errorEmptyList :: String -> a
2126 errorEmptyList fun = moduleError fun "empty ByteString"
2127 {-# NOINLINE errorEmptyList #-}
2129 moduleError :: String -> String -> a
2130 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2131 {-# NOINLINE moduleError #-}
2133 -- Find from the end of the string using predicate
2134 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2135 STRICT2(findFromEndUntil)
2136 findFromEndUntil f ps@(PS x s l) =
2138 else if f (last ps) then l
2139 else findFromEndUntil f (PS x s (l-1))
2141 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2142 -- it exposes lots of things to further inlining
2144 {-# INLINE inlinePerformIO #-}
2145 inlinePerformIO :: IO a -> a
2146 #if defined(__GLASGOW_HASKELL__)
2147 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2149 inlinePerformIO = unsafePerformIO
2152 {-# INLINE newForeignFreePtr #-}
2153 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2154 #if defined(__GLASGOW_HASKELL__)
2155 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2157 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2160 -- ---------------------------------------------------------------------
2162 -- Standard C functions
2165 foreign import ccall unsafe "string.h strlen" c_strlen
2168 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2169 :: CInt -> IO (Ptr Word8)
2171 foreign import ccall unsafe "static stdlib.h free" c_free
2172 :: Ptr Word8 -> IO ()
2174 #if !defined(__GLASGOW_HASKELL__)
2175 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2176 :: FunPtr (Ptr Word8 -> IO ())
2179 foreign import ccall unsafe "string.h memset" memset
2180 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2182 foreign import ccall unsafe "string.h memchr" memchr
2183 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2185 foreign import ccall unsafe "string.h memcmp" memcmp
2186 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2188 foreign import ccall unsafe "string.h memcpy" memcpy
2189 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2191 -- ---------------------------------------------------------------------
2196 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2197 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2199 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2200 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2202 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2203 :: Ptr Word8 -> CInt -> Word8
2205 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2206 :: Ptr Word8 -> CInt -> Word8
2208 foreign import ccall unsafe "static fpstring.h count" c_count
2209 :: Ptr Word8 -> CInt -> Word8 -> CInt
2211 -- ---------------------------------------------------------------------
2215 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2216 :: Int -> Int -> IO (Ptr Word8)
2218 foreign import ccall unsafe "static unistd.h close" c_close
2221 # if !defined(__OpenBSD__)
2222 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2223 :: Ptr Word8 -> Int -> IO Int
2227 -- ---------------------------------------------------------------------
2228 -- Internal GHC Haskell magic
2230 #if defined(__GLASGOW_HASKELL__)
2231 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2232 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2234 foreign import ccall unsafe "__hscore_memcpy_src_off"
2235 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2238 -- ---------------------------------------------------------------------
2240 -- Functional array fusion for ByteStrings.
2242 -- From the Data Parallel Haskell project,
2243 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2246 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2247 -- the fact that no bottoms of this type are ever constructed; hence, we can
2248 -- assume @(_ :: NoAL) `seq` x = x@.
2252 -- | Special forms of loop arguments
2254 -- * These are common special cases for the three function arguments of gen
2255 -- and loop; we give them special names to make it easier to trigger RULES
2256 -- applying in the special cases represented by these arguments. The
2257 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2258 -- two simplifier phases.
2260 -- * In the case where the accumulator is not needed, it is better to always
2261 -- explicitly return a value `()', rather than just copy the input to the
2262 -- output, as the former gives GHC better local information.
2265 -- | Element function expressing a mapping only
2266 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2267 mapEFL f = \_ e -> (noAL, (Just $ f e))
2268 #if defined(__GLASGOW_HASKELL__)
2269 {-# INLINE [1] mapEFL #-}
2272 -- | Element function implementing a filter function only
2273 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2274 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2275 #if defined(__GLASGOW_HASKELL__)
2276 {-# INLINE [1] filterEFL #-}
2279 -- |Element function expressing a reduction only
2280 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2281 foldEFL f = \a e -> (f a e, Nothing)
2282 #if defined(__GLASGOW_HASKELL__)
2283 {-# INLINE [1] foldEFL #-}
2286 -- | A strict foldEFL.
2287 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2288 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2289 #if defined(__GLASGOW_HASKELL__)
2290 {-# INLINE [1] foldEFL' #-}
2293 -- | Element function expressing a prefix reduction only
2295 scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8)
2296 scanEFL f = \a e -> (f a e, Just a)
2297 #if defined(__GLASGOW_HASKELL__)
2298 {-# INLINE [1] scanEFL #-}
2304 #if defined(__GLASGOW_HASKELL__)
2305 {-# INLINE [1] noAL #-}
2308 -- | Projection functions that are fusion friendly (as in, we determine when
2309 -- they are inlined)
2310 loopArr :: (ByteString, acc) -> ByteString
2311 loopArr (arr, _) = arr
2312 #if defined(__GLASGOW_HASKELL__)
2313 {-# INLINE [1] loopArr #-}
2316 loopAcc :: (ByteString, acc) -> acc
2317 loopAcc (_, acc) = acc
2318 #if defined(__GLASGOW_HASKELL__)
2319 {-# INLINE [1] loopAcc #-}
2322 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2323 loopSndAcc (arr, (_, acc)) = (arr, acc)
2324 #if defined(__GLASGOW_HASKELL__)
2325 {-# INLINE [1] loopSndAcc #-}
2328 ------------------------------------------------------------------------
2331 -- size, and then percentage.
2334 -- | Iteration over over ByteStrings
2335 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2336 -> acc -- ^ initial acc value
2337 -> ByteString -- ^ input ByteString
2338 -> (ByteString, acc)
2340 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2341 fp <- mallocByteString i
2342 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2343 (acc, i') <- go (a `plusPtr` s) p start
2345 then return (fp,i',acc) -- no realloc for map
2346 else do fp_ <- mallocByteString i' -- realloc
2347 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2350 return (PS ptr 0 n, acc)
2355 trans a_off ma_off acc
2356 | a_off >= i = return (acc, ma_off)
2358 x <- peekByteOff p a_off
2359 let (acc', oe) = f acc x
2360 ma_off' <- case oe of
2361 Nothing -> return ma_off
2362 Just e -> do pokeByteOff ma ma_off e
2364 trans (a_off+1) ma_off' acc'
2366 #if defined(__GLASGOW_HASKELL__)
2367 {-# INLINE [1] loopU #-}
2372 -- |Fuse to flat loop functions
2373 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2374 -> (a2 -> Word8 -> (a2, Maybe Word8))
2377 -> ((a1, a2), Maybe Word8)
2378 fuseEFL f g (acc1, acc2) e1 =
2380 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2383 (acc2', res) -> ((acc1', acc2'), res)
2387 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2388 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2389 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2391 "loopArr/loopSndAcc" forall x.
2392 loopArr (loopSndAcc x) = loopArr x
2394 "seq/NoAL" forall (u::NoAL) e.