1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
73 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
75 -- * Reducing 'ByteString's
76 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
78 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
80 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
83 concat, -- :: [ByteString] -> ByteString
84 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
85 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
86 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
87 maximum, -- :: ByteString -> Word8
88 minimum, -- :: ByteString -> Word8
89 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
91 -- * Generating and unfolding ByteStrings
92 replicate, -- :: Int -> Word8 -> ByteString
93 unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
97 -- ** Breaking strings
98 take, -- :: Int -> ByteString -> ByteString
99 drop, -- :: Int -> ByteString -> ByteString
100 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
101 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
102 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
103 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
104 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
105 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
107 -- ** Breaking and dropping on specific bytes
108 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
109 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
110 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
111 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
113 -- ** Breaking into many substrings
114 split, -- :: Word8 -> ByteString -> [ByteString]
115 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
116 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
117 group, -- :: ByteString -> [ByteString]
118 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
120 -- ** Joining strings
121 join, -- :: ByteString -> [ByteString] -> ByteString
122 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
124 -- * Indexing ByteStrings
125 index, -- :: ByteString -> Int -> Word8
126 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
127 elemIndices, -- :: Word8 -> ByteString -> [Int]
128 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
129 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
130 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
131 count, -- :: Word8 -> ByteString -> Int
133 -- * Ordered ByteStrings
134 sort, -- :: ByteString -> ByteString
136 -- * Searching ByteStrings
138 -- ** Searching by equality
139 -- | These functions use memchr(3) to efficiently search the ByteString
141 elem, -- :: Word8 -> ByteString -> Bool
142 notElem, -- :: Word8 -> ByteString -> Bool
143 filterByte, -- :: Word8 -> ByteString -> ByteString
144 filterNotByte, -- :: Word8 -> ByteString -> ByteString
146 -- ** Searching with a predicate
147 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
148 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
149 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
151 -- ** Prefixes and suffixes
152 -- | These functions use memcmp(3) to efficiently compare substrings
153 isPrefixOf, -- :: ByteString -> ByteString -> Bool
154 isSuffixOf, -- :: ByteString -> ByteString -> Bool
156 -- ** Search for arbitrary substrings
157 isSubstringOf, -- :: ByteString -> ByteString -> Bool
158 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
159 findSubstrings, -- :: ByteString -> ByteString -> [Int]
161 -- * Zipping and unzipping ByteStrings
162 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
163 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
164 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
166 -- * Unchecked access
167 unsafeHead, -- :: ByteString -> Word8
168 unsafeTail, -- :: ByteString -> ByteString
169 unsafeIndex, -- :: ByteString -> Int -> Word8
171 -- * Low level introduction and elimination
172 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
173 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
174 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
175 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
176 skipIndex, -- :: ByteString -> Int
178 -- ** Packing CStrings and pointers
179 packCString, -- :: CString -> ByteString
180 packCStringLen, -- :: CString -> ByteString
181 packMallocCString, -- :: CString -> ByteString
183 #if defined(__GLASGOW_HASKELL__)
184 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
185 packAddress, -- :: Addr# -> ByteString
186 unsafePackAddress, -- :: Int -> Addr# -> ByteString
187 unsafeFinalize, -- :: ByteString -> IO ()
190 -- ** Using ByteStrings as CStrings
191 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
192 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
193 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
195 -- ** Copying ByteStrings
196 -- | These functions perform memcpy(3) operations
197 copy, -- :: ByteString -> ByteString
198 copyCString, -- :: CString -> ByteString
199 copyCStringLen, -- :: CStringLen -> ByteString
201 -- * I\/O with @ByteString@s
203 -- ** Standard input and output
205 #if defined(__GLASGOW_HASKELL__)
206 getLine, -- :: IO ByteString
208 getContents, -- :: IO ByteString
209 putStr, -- :: ByteString -> IO ()
210 putStrLn, -- :: ByteString -> IO ()
213 readFile, -- :: FilePath -> IO ByteString
214 writeFile, -- :: FilePath -> ByteString -> IO ()
215 -- mmapFile, -- :: FilePath -> IO ByteString
217 -- ** I\/O with Handles
218 #if defined(__GLASGOW_HASKELL__)
219 getArgs, -- :: IO [ByteString]
220 hGetLine, -- :: Handle -> IO ByteString
221 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
223 hGetContents, -- :: Handle -> IO ByteString
224 hGet, -- :: Handle -> Int -> IO ByteString
225 hPut, -- :: Handle -> ByteString -> IO ()
227 -- * Fusion utilities
228 #if defined(__GLASGOW_HASKELL__)
229 unpackList, -- eek, otherwise it gets thrown away by the simplifier
232 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
233 loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL,
237 import qualified Prelude as P
238 import Prelude hiding (reverse,head,tail,last,init,null
239 ,length,map,lines,foldl,foldr,unlines
240 ,concat,any,take,drop,splitAt,takeWhile
241 ,dropWhile,span,break,elem,filter,maximum
242 ,minimum,all,concatMap,foldl1,foldr1
243 ,readFile,writeFile,replicate
244 ,getContents,getLine,putStr,putStrLn
245 ,zip,zipWith,unzip,notElem)
247 import qualified Data.List as List
250 import Data.Word (Word8)
251 import Data.Maybe (listToMaybe)
252 import Data.Array (listArray)
253 import qualified Data.Array as Array ((!))
255 -- Control.Exception.bracket not available in yhc or nhc
256 import Control.Exception (bracket)
257 import Control.Monad (when)
259 import Foreign.C.String (CString, CStringLen)
260 import Foreign.C.Types (CSize, CInt)
261 import Foreign.ForeignPtr
262 import Foreign.Marshal.Array
264 import Foreign.Storable (Storable(..))
266 -- hGetBuf and hPutBuf not available in yhc or nhc
267 import System.IO (stdin,stdout,hClose,hFileSize
268 ,hGetBuf,hPutBuf,openBinaryFile
271 #if !defined(__GLASGOW_HASKELL__)
272 import System.IO.Unsafe
275 #if defined(__GLASGOW_HASKELL__)
277 import Data.Generics (Data(..), Typeable(..))
279 import System.IO (hGetBufNonBlocking)
280 import System.IO.Error (isEOFError)
282 import Foreign.Marshal (alloca)
283 import qualified Foreign.Concurrent as FC (newForeignPtr)
286 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
287 import GHC.Base (build, unsafeChr)
288 import GHC.Word hiding (Word8)
289 import GHC.Ptr (Ptr(..))
290 import GHC.ST (ST(..))
295 -- CFILES stuff is Hugs only
296 {-# CFILES cbits/fpstring.c #-}
298 -- -----------------------------------------------------------------------------
300 -- Useful macros, until we have bang patterns
303 #define STRICT1(f) f a | a `seq` False = undefined
304 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
305 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
306 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
307 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
309 -- -----------------------------------------------------------------------------
311 -- | A space-efficient representation of a Word8 vector, supporting many
312 -- efficient operations. A 'ByteString' contains 8-bit characters only.
314 -- Instances of Eq, Ord, Read, Show, Data, Typeable
316 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
320 #if defined(__GLASGOW_HASKELL__)
321 deriving (Data, Typeable)
324 instance Eq ByteString
327 instance Ord ByteString
328 where compare = compareBytes
330 instance Show ByteString where
331 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
333 instance Read ByteString where
334 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
337 instance Arbitrary PackedString where
338 arbitrary = P.pack `fmap` arbitrary
339 coarbitrary s = coarbitrary (P.unpack s)
342 -- | /O(n)/ Equality on the 'ByteString' type.
343 eq :: ByteString -> ByteString -> Bool
344 eq a@(PS p s l) b@(PS p' s' l')
345 | l /= l' = False -- short cut on length
346 | p == p' && s == s' = True -- short cut for the same string
347 | otherwise = compareBytes a b == EQ
350 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
351 compareBytes :: ByteString -> ByteString -> Ordering
352 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
353 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
354 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
355 | otherwise = inlinePerformIO $
356 withForeignPtr x1 $ \p1 ->
357 withForeignPtr x2 $ \p2 -> do
358 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
359 return $ case i `compare` 0 of
360 EQ -> l1 `compare` l2
362 {-# INLINE compareBytes #-}
366 -- About 4x slower over 32M
368 compareBytes :: ByteString -> ByteString -> Ordering
369 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
370 withForeignPtr fp1 $ \p1 ->
371 withForeignPtr fp2 $ \p2 ->
372 cmp (p1 `plusPtr` off1)
373 (p2 `plusPtr` off2) 0 len1 len2
375 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
377 cmp p1 p2 n len1 len2
378 | n == len1 = if n == len2 then return EQ else return LT
379 | n == len2 = return GT
381 (a :: Word8) <- peekByteOff p1 n
382 (b :: Word8) <- peekByteOff p2 n
383 case a `compare` b of
384 EQ -> cmp p1 p2 (n+1) len1 len2
387 {-# INLINE compareBytes #-}
390 -- -----------------------------------------------------------------------------
391 -- Introducing and eliminating 'ByteString's
393 -- | /O(1)/ The empty 'ByteString'
395 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
396 {-# NOINLINE empty #-}
398 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
399 packByte :: Word8 -> ByteString
400 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
401 withForeignPtr fp $ \p -> poke p c
403 {-# INLINE packByte #-}
406 -- XXX The unsafePerformIO is critical!
410 -- packByte 255 `compare` packByte 127
414 -- case mallocByteString 2 of
415 -- ForeignPtr f internals ->
416 -- case writeWord8OffAddr# f 0 255 of _ ->
417 -- case writeWord8OffAddr# f 0 127 of _ ->
418 -- case eqAddr# f f of
419 -- False -> case compare (GHC.Prim.plusAddr# f 0)
420 -- (GHC.Prim.plusAddr# f 0)
424 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
426 -- For applications with large numbers of string literals, pack can be a
427 -- bottleneck. In such cases, consider using packAddress (GHC only).
428 pack :: [Word8] -> ByteString
430 #if !defined(__GLASGOW_HASKELL__)
432 pack str = create (P.length str) $ \p -> go p str
435 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
437 #else /* hack away */
439 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
441 go _ _ [] = return ()
442 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
444 writeByte p i c = ST $ \s# ->
445 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
449 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
450 unpack :: ByteString -> [Word8]
452 #if !defined(__GLASGOW_HASKELL__)
454 unpack (PS _ _ 0) = []
455 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
456 go (p `plusPtr` s) (l - 1) []
459 go p 0 acc = peek p >>= \e -> return (e : acc)
460 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
461 {-# INLINE unpack #-}
465 unpack ps = build (unpackFoldr ps)
466 {-# INLINE unpack #-}
468 unpackList :: ByteString -> [Word8]
469 unpackList (PS fp off len) = withPtr fp $ \p -> do
471 loop _ (-1) acc = return acc
474 loop q (n-1) (a : acc)
475 loop (p `plusPtr` off) (len-1) []
478 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
481 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
482 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
484 loop _ (-1) acc = return acc
487 loop q (n-1) (a `f` acc)
488 loop (p `plusPtr` off) (len-1) ch
489 {-# INLINE [0] unpackFoldr #-}
493 ------------------------------------------------------------------------
495 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
496 -- conversion function
497 packWith :: (a -> Word8) -> [a] -> ByteString
498 packWith k str = create (P.length str) $ \p -> go p str
502 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
503 {-# INLINE packWith #-}
504 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
506 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
507 unpackWith :: (Word8 -> a) -> ByteString -> [a]
508 unpackWith _ (PS _ _ 0) = []
509 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
510 go (p `plusPtr` s) (l - 1) []
513 go p 0 acc = peek p >>= \e -> return (k e : acc)
514 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
515 {-# INLINE unpackWith #-}
516 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
518 -- ---------------------------------------------------------------------
521 -- | /O(1)/ Test whether a ByteString is empty.
522 null :: ByteString -> Bool
523 null (PS _ _ l) = l == 0
526 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
527 length :: ByteString -> Int
528 length (PS _ _ l) = l
530 #if defined(__GLASGOW_HASKELL__)
531 {-# INLINE [1] length #-}
536 -- Translate length into a loop.
537 -- Performace ok, but allocates too much, so disable for now.
539 "length/loop" forall f acc s .
540 length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
544 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
545 -- complexity, as it requires a memcpy.
546 cons :: Word8 -> ByteString -> ByteString
547 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
549 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
554 -- | /O(n)/ Append a byte to the end of a 'ByteString'
555 snoc :: ByteString -> Word8 -> ByteString
556 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
557 memcpy p (f `plusPtr` s) (fromIntegral l)
558 poke (p `plusPtr` l) c
563 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
564 head :: ByteString -> Word8
566 | null ps = errorEmptyList "head"
567 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
570 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
571 tail :: ByteString -> ByteString
573 | l <= 0 = errorEmptyList "tail"
574 | otherwise = PS p (s+1) (l-1)
577 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
578 last :: ByteString -> Word8
580 | null ps = errorEmptyList "last"
581 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
584 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
585 init :: ByteString -> ByteString
587 | l <= 0 = errorEmptyList "init"
588 | otherwise = PS p s (l-1)
591 -- | /O(n)/ Append two ByteStrings
592 append :: ByteString -> ByteString -> ByteString
593 append xs ys | null xs = ys
595 | otherwise = concat [xs,ys]
596 {-# INLINE append #-}
598 -- ---------------------------------------------------------------------
601 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
602 -- element of @xs@. This function is subject to array fusion.
603 map :: (Word8 -> Word8) -> ByteString -> ByteString
604 map f = loopArr . loopU (mapEFL f) noAL
607 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
608 -- slightly faster for one-shot cases.
609 map' :: (Word8 -> Word8) -> ByteString -> ByteString
610 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
611 np <- mallocByteString (len+1)
612 withForeignPtr np $ \p -> do
613 map_ 0 (a `plusPtr` s) p
617 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
620 | n >= len = return ()
622 x <- peekByteOff p1 n
623 pokeByteOff p2 n (f x)
627 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
628 reverse :: ByteString -> ByteString
629 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
630 c_reverse p (f `plusPtr` s) (fromIntegral l)
633 reverse = pack . P.reverse . unpack
636 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
637 -- 'ByteString' and \`intersperses\' that byte between the elements of
638 -- the 'ByteString'. It is analogous to the intersperse function on
640 intersperse :: Word8 -> ByteString -> ByteString
641 intersperse c ps@(PS x s l)
643 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
644 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
647 intersperse c = pack . List.intersperse c . unpack
650 -- | The 'transpose' function transposes the rows and columns of its
651 -- 'ByteString' argument.
652 transpose :: [ByteString] -> [ByteString]
653 transpose ps = P.map pack (List.transpose (P.map unpack ps))
655 -- ---------------------------------------------------------------------
656 -- Reducing 'ByteString's
658 -- | 'foldl', applied to a binary operator, a starting value (typically
659 -- the left-identity of the operator), and a ByteString, reduces the
660 -- ByteString using the binary operator, from left to right.
661 -- This function is subject to array fusion.
662 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
663 foldl f z = loopAcc . loopU (foldEFL f) z
668 -- About twice as fast with 6.4.1, but not fuseable
669 -- A simple fold . map is enough to make it worth while.
671 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
672 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
675 lgo z p q | p == q = return z
676 | otherwise = do c <- peek p
677 lgo (f z c) (p `plusPtr` 1) q
680 -- | 'foldl\'' is like foldl, but strict in the accumulator.
681 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
682 foldl' f z = loopAcc . loopU (foldEFL' f) z
683 {-# INLINE foldl' #-}
685 -- | 'foldr', applied to a binary operator, a starting value
686 -- (typically the right-identity of the operator), and a ByteString,
687 -- reduces the ByteString using the binary operator, from right to left.
688 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
689 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
690 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
693 go p q | p == q = return z
694 | otherwise = do c <- peek p
695 ws <- go (p `plusPtr` 1) q
698 -- | 'foldl1' is a variant of 'foldl' that has no starting value
699 -- argument, and thus must be applied to non-empty 'ByteStrings'.
700 -- This function is subject to array fusion.
701 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
703 | null ps = errorEmptyList "foldl1"
704 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
706 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
707 -- and thus must be applied to non-empty 'ByteString's
708 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
710 | null ps = errorEmptyList "foldr1"
711 | otherwise = foldr f (last ps) (init ps)
713 -- ---------------------------------------------------------------------
716 -- | /O(n)/ Concatenate a list of ByteStrings.
717 concat :: [ByteString] -> ByteString
720 concat xs = create len $ \ptr -> go xs ptr
721 where len = P.sum . P.map length $ xs
724 go (PS p s l:ps) ptr = do
725 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
726 go ps (ptr `plusPtr` l)
728 -- | Map a function over a 'ByteString' and concatenate the results
729 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
730 concatMap f = foldr (append . f) empty
732 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
733 -- any element of the 'ByteString' satisfies the predicate.
734 any :: (Word8 -> Bool) -> ByteString -> Bool
735 any _ (PS _ _ 0) = False
736 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
737 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
740 go p q | p == q = return False
741 | otherwise = do c <- peek p
742 if f c then return True
743 else go (p `plusPtr` 1) q
747 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
748 -- if all elements of the 'ByteString' satisfy the predicate.
749 all :: (Word8 -> Bool) -> ByteString -> Bool
750 all _ (PS _ _ 0) = True
751 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
752 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
755 go p q | p == q = return True -- end of list
756 | otherwise = do c <- peek p
758 then go (p `plusPtr` 1) q
762 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
763 maximum :: ByteString -> Word8
764 maximum xs@(PS x s l)
765 | null xs = errorEmptyList "maximum"
766 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
767 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
768 {-# INLINE maximum #-}
770 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
771 minimum :: ByteString -> Word8
772 minimum xs@(PS x s l)
773 | null xs = errorEmptyList "minimum"
774 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
775 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
776 {-# INLINE minimum #-}
778 -- fusion is too slow here (10x)
781 maximum xs@(PS x s l)
782 | null xs = errorEmptyList "maximum"
783 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
785 maximum_ (p `plusPtr` s) 0 l w
787 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
791 | otherwise = do w <- peekByteOff ptr n
792 maximum_ ptr (n+1) m (if w > c then w else c)
794 minimum xs@(PS x s l)
795 | null xs = errorEmptyList "minimum"
796 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
798 minimum_ (p `plusPtr` s) 0 l w
800 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
804 | otherwise = do w <- peekByteOff ptr n
805 minimum_ ptr (n+1) m (if w < c then w else c)
808 -- | /O(n)/ map Word8 functions, provided with the index at each position
809 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
810 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
811 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
813 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
815 go n f t p | f == p = return ()
816 | otherwise = do w <- peek f
818 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
820 -- ---------------------------------------------------------------------
821 -- Unfolds and replicates
823 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
824 -- the value of every element. The following holds:
826 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
828 -- This implemenation uses @memset(3)@
829 replicate :: Int -> Word8 -> ByteString
830 replicate w c | w <= 0 = empty
831 | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
835 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
839 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
842 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
843 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
844 -- the element and returns 'Nothing' if it is done producing the
845 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
846 -- prepending to the ByteString and @b@ is used as the next element in a
849 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
850 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
851 -- final size of the ByteString as an argument. 'cons' can then be
852 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
853 -- linear complexity. The depth of the recursion is limited to this
854 -- size, but may be less. For lazy, infinite unfoldr, use
855 -- 'Data.List.unfoldr' (from 'Data.List').
859 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
861 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
863 -- > unfoldrN n == take n $ List.unfoldr
864 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
867 | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0
870 go q c n | n == i = return n -- stop if we reach `i'
871 | otherwise = case f c of
875 go (q `plusPtr` 1) new_c (n+1)
877 -- ---------------------------------------------------------------------
880 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
881 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
882 take :: Int -> ByteString -> ByteString
886 | otherwise = PS x s n
889 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
890 -- elements, or @[]@ if @n > 'length' xs@.
891 drop :: Int -> ByteString -> ByteString
895 | otherwise = PS x (s+n) (l-n)
898 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
899 splitAt :: Int -> ByteString -> (ByteString, ByteString)
900 splitAt n ps = (take n ps, drop n ps)
901 {-# INLINE splitAt #-}
903 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
904 -- returns the longest prefix (possibly empty) of @xs@ of elements that
906 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
907 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
908 {-# INLINE takeWhile #-}
910 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
911 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
912 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
913 {-# INLINE dropWhile #-}
915 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
916 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
917 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
920 -- | 'breakByte' breaks its ByteString argument at the first occurence
921 -- of the specified byte. It is more efficient than 'break' as it is
922 -- implemented with @memchr(3)@. I.e.
924 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
926 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
927 breakByte c p = case elemIndex c p of
929 Just n -> (take n p, drop n p)
930 {-# INLINE breakByte #-}
932 -- | 'spanByte' breaks its ByteString argument at the first
933 -- occurence of a byte other than its argument. It is more efficient
936 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
938 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
939 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
943 go p i | i >= l = return (ps, empty)
944 | otherwise = do c' <- peekByteOff p i
946 then return (take i ps, drop i ps)
948 {-# INLINE spanByte #-}
950 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
951 -- occurence of @w@. It behaves like 'break', except the delimiter is
952 -- not returned, and @Nothing@ is returned if the delimiter is not in
953 -- the ByteString. I.e.
955 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
957 -- > breakFirst c xs ==
958 -- > let (x,y) = break (== c) xs
959 -- > in if null y then Nothing else Just (x, drop 1 y))
961 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
962 breakFirst c p = case elemIndex c p of
964 Just n -> Just (take n p, drop (n+1) p)
965 {-# INLINE breakFirst #-}
967 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
970 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
972 -- and the following are equivalent:
974 -- > breakLast 'c' "abcdef"
975 -- > let (x,y) = break (=='c') (reverse "abcdef")
976 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
978 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
979 breakLast c p = case elemIndexLast c p of
981 Just n -> Just (take n p, drop (n+1) p)
982 {-# INLINE breakLast #-}
984 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
985 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
986 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
987 span p ps = break (not . p) ps
990 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
993 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
997 -- > spanEnd (not . isSpace) ps
999 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1001 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1002 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1004 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1005 -- separators, where the predicate returns True for a separator element.
1006 -- The resulting components do not contain the separators. Two adjacent
1007 -- separators result in an empty component in the output. eg.
1009 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1010 -- > splitWith (=='a') [] == []
1012 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1014 #if defined(__GLASGOW_HASKELL__)
1015 splitWith _pred (PS _ _ 0) = []
1016 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1017 where pred# c# = pred_ (W8# c#)
1020 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1021 splitLoop pred' p 0 off' len' fp'
1023 splitLoop :: (Word# -> Bool)
1025 -> Int -> Int -> Int
1029 splitLoop pred' p idx' off' len' fp'
1030 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1031 | idx' >= len' = return [PS fp' off' idx']
1033 w <- peekElemOff p (off'+idx')
1034 if pred' (case w of W8# w# -> w#)
1035 then return (PS fp' off' idx' :
1036 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1037 else splitLoop pred' p (idx'+1) off' len' fp'
1038 {-# INLINE splitWith #-}
1041 splitWith _ (PS _ _ 0) = []
1042 splitWith p ps = loop p ps
1045 loop q qs = if null rest then [chunk]
1046 else chunk : loop q (unsafeTail rest)
1047 where (chunk,rest) = break q qs
1050 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1051 -- argument, consuming the delimiter. I.e.
1053 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1054 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1055 -- > split 'x' "x" == ["",""]
1059 -- > join [c] . split c == id
1060 -- > split == splitWith . (==)
1062 -- As for all splitting functions in this library, this function does
1063 -- not copy the substrings, it just constructs new 'ByteStrings' that
1064 -- are slices of the original.
1066 split :: Word8 -> ByteString -> [ByteString]
1067 split _ (PS _ _ 0) = []
1068 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1069 let ptr = p `plusPtr` s
1073 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1075 then [PS x (s+n) (l-n)]
1076 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1079 {-# INLINE split #-}
1082 -- slower. but stays inside Haskell.
1083 split _ (PS _ _ 0) = []
1084 split (W8# w#) (PS fp off len) = splitWith' off len fp
1086 splitWith' off' len' fp' = withPtr fp $ \p ->
1087 splitLoop p 0 off' len' fp'
1089 splitLoop :: Ptr Word8
1090 -> Int -> Int -> Int
1095 splitLoop p idx' off' len' fp'
1096 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1097 | idx' >= len' = return [PS fp' off' idx']
1099 (W8# x#) <- peekElemOff p (off'+idx')
1100 if word2Int# w# ==# word2Int# x#
1101 then return (PS fp' off' idx' :
1102 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1103 else splitLoop p (idx'+1) off' len' fp'
1106 -- | Like 'splitWith', except that sequences of adjacent separators are
1107 -- treated as a single separator. eg.
1109 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1111 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1112 tokens f = P.filter (not.null) . splitWith f
1113 {-# INLINE tokens #-}
1115 -- | The 'group' function takes a ByteString and returns a list of
1116 -- ByteStrings such that the concatenation of the result is equal to the
1117 -- argument. Moreover, each sublist in the result contains only equal
1118 -- elements. For example,
1120 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1122 -- It is a special case of 'groupBy', which allows the programmer to
1123 -- supply their own equality test. It is about 40% faster than
1125 group :: ByteString -> [ByteString]
1128 | otherwise = ys : group zs
1130 (ys, zs) = spanByte (unsafeHead xs) xs
1132 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1133 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1136 | otherwise = take n xs : groupBy k (drop n xs)
1138 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1140 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1141 -- 'ByteString's and concatenates the list after interspersing the first
1142 -- argument between each element of the list.
1143 join :: ByteString -> [ByteString] -> ByteString
1144 join filler pss = concat (splice pss)
1148 splice (x:y:xs) = x:filler:splice (y:xs)
1152 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1153 -- with a char. Around 4 times faster than the generalised join.
1155 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1156 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1157 withForeignPtr ffp $ \fp ->
1158 withForeignPtr fgp $ \gp -> do
1159 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1160 poke (ptr `plusPtr` l) c
1161 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1163 len = length f + length g + 1
1164 {-# INLINE joinWithByte #-}
1166 -- ---------------------------------------------------------------------
1167 -- Indexing ByteStrings
1169 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1170 index :: ByteString -> Int -> Word8
1172 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1173 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1174 ++ ", length = " ++ show (length ps))
1175 | otherwise = ps `unsafeIndex` n
1176 {-# INLINE index #-}
1178 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1179 -- element in the given 'ByteString' which is equal to the query
1180 -- element, or 'Nothing' if there is no such element.
1181 -- This implementation uses memchr(3).
1182 elemIndex :: Word8 -> ByteString -> Maybe Int
1183 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1184 let p' = p `plusPtr` s
1185 q = memchr p' c (fromIntegral l)
1186 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1187 {-# INLINE elemIndex #-}
1189 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1190 -- element in the given 'ByteString' which is equal to the query
1191 -- element, or 'Nothing' if there is no such element. The following
1194 -- > elemIndexLast c xs ==
1195 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1197 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1198 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1199 go (p `plusPtr` s) (l-1)
1202 go p i | i < 0 = return Nothing
1203 | otherwise = do ch' <- peekByteOff p i
1205 then return $ Just i
1207 {-# INLINE elemIndexLast #-}
1209 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1210 -- the indices of all elements equal to the query element, in ascending order.
1211 -- This implementation uses memchr(3).
1212 elemIndices :: Word8 -> ByteString -> [Int]
1213 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1214 let ptr = p `plusPtr` s
1217 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1220 else let i = q `minusPtr` ptr
1223 {-# INLINE elemIndices #-}
1227 elemIndices :: Word8 -> ByteString -> [Int]
1228 elemIndices c ps = loop 0 ps
1230 loop _ ps' | null ps' = []
1231 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1232 | otherwise = loop (n+1) (unsafeTail ps')
1235 -- | count returns the number of times its argument appears in the ByteString
1237 -- > count = length . elemIndices
1239 -- But more efficiently than using length on the intermediate list.
1240 count :: Word8 -> ByteString -> Int
1241 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1242 return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1243 {-# INLINE count #-}
1247 -- around 30% slower
1249 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1250 go (p `plusPtr` s) (fromIntegral m) 0
1252 go :: Ptr Word8 -> CSize -> Int -> IO Int
1255 let q = memchr p w l
1258 else do let k = fromIntegral $ q `minusPtr` p
1259 go (q `plusPtr` 1) (l-k-1) (i+1)
1262 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1263 -- returns the index of the first element in the ByteString
1264 -- satisfying the predicate.
1265 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1266 findIndex k ps@(PS x s l)
1268 | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1271 go ptr n | n >= l = return Nothing
1272 | otherwise = do w <- peek ptr
1274 then return (Just n)
1275 else go (ptr `plusPtr` 1) (n+1)
1276 {-# INLINE findIndex #-}
1278 -- | The 'findIndices' function extends 'findIndex', by returning the
1279 -- indices of all elements satisfying the predicate, in ascending order.
1280 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1281 findIndices p ps = loop 0 ps
1284 loop n qs | null qs = []
1285 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1286 | otherwise = loop (n+1) (unsafeTail qs)
1288 -- ---------------------------------------------------------------------
1289 -- Searching ByteStrings
1291 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1292 elem :: Word8 -> ByteString -> Bool
1293 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1296 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1297 notElem :: Word8 -> ByteString -> Bool
1298 notElem c ps = not (elem c ps)
1299 {-# INLINE notElem #-}
1301 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1302 -- returns a ByteString containing those characters that satisfy the
1303 -- predicate. This function is subject to array fusion.
1304 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1305 filter p = loopArr . loopU (filterEFL p) noAL
1306 {-# INLINE filter #-}
1308 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1309 -- around 2x faster for some one-shot applications.
1310 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1311 filter' k ps@(PS x s l)
1313 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1314 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1315 return (t `minusPtr` p) -- actual length
1318 go f t end | f == end = return t
1322 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1323 else go (f `plusPtr` 1) t end
1324 {-# INLINE filter' #-}
1327 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1328 -- case of filtering a single byte. It is more efficient to use
1329 -- /filterByte/ in this case.
1331 -- > filterByte == filter . (==)
1333 -- filterByte is around 10x faster, and uses much less space, than its
1334 -- filter equivalent
1335 filterByte :: Word8 -> ByteString -> ByteString
1336 filterByte w ps = replicate (count w ps) w
1337 {-# INLINE filterByte #-}
1340 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1341 -- case of filtering a single byte out of a list. It is more efficient
1342 -- to use /filterNotByte/ in this case.
1344 -- > filterNotByte == filter . (/=)
1346 -- filterNotByte is around 2x faster than its filter equivalent.
1347 filterNotByte :: Word8 -> ByteString -> ByteString
1348 filterNotByte w = filter' (/= w)
1349 {-# INLINE filterNotByte #-}
1351 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1352 -- and returns the first element in matching the predicate, or 'Nothing'
1353 -- if there is no such element.
1355 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1357 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1358 find f p = case findIndex f p of
1359 Just n -> Just (p `unsafeIndex` n)
1365 -- fuseable, but we don't want to walk the whole array.
1367 find k = foldl findEFL Nothing
1368 where findEFL a@(Just _) _ = a
1369 findEFL _ c | k c = Just c
1370 | otherwise = Nothing
1373 -- ---------------------------------------------------------------------
1374 -- Searching for substrings
1376 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1377 -- iff the first is a prefix of the second.
1378 isPrefixOf :: ByteString -> ByteString -> Bool
1379 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1382 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1383 withForeignPtr x2 $ \p2 -> do
1384 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1387 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1388 -- iff the first is a suffix of the second.
1390 -- The following holds:
1392 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1394 -- However, the real implemenation uses memcmp to compare the end of the
1395 -- string only, with no reverse required..
1396 isSuffixOf :: ByteString -> ByteString -> Bool
1397 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1400 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1401 withForeignPtr x2 $ \p2 -> do
1402 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1405 -- | Check whether one string is a substring of another. @isSubstringOf
1406 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1407 isSubstringOf :: ByteString -- ^ String to search for.
1408 -> ByteString -- ^ String to search in.
1410 isSubstringOf p s = not $ P.null $ findSubstrings p s
1412 -- | Get the first index of a substring in another string,
1413 -- or 'Nothing' if the string is not found.
1414 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1415 findSubstring :: ByteString -- ^ String to search for.
1416 -> ByteString -- ^ String to seach in.
1418 findSubstring = (listToMaybe .) . findSubstrings
1420 -- | Find the indexes of all (possibly overlapping) occurances of a
1421 -- substring in a string. This function uses the Knuth-Morris-Pratt
1422 -- string matching algorithm.
1423 findSubstrings :: ByteString -- ^ String to search for.
1424 -> ByteString -- ^ String to seach in.
1427 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1429 patc x = pat `unsafeIndex` x
1430 strc x = str `unsafeIndex` x
1432 -- maybe we should make kmpNext a UArray before using it in search?
1433 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1434 kmpNextL p _ | null p = []
1435 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1437 x = if not (null ps) && unsafeHead ps == patc j'
1438 then kmpNext Array.! j' else j'
1440 search i j = match ++ rest -- i: position in string, j: position in pattern
1441 where match = if j == m then [(i - j)] else []
1442 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1443 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1446 -- ---------------------------------------------------------------------
1449 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1450 -- corresponding pairs of bytes. If one input ByteString is short,
1451 -- excess elements of the longer ByteString are discarded. This is
1452 -- equivalent to a pair of 'unpack' operations.
1453 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1455 | null ps || null qs = []
1456 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1458 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1459 -- the first argument, instead of a tupling function. For example,
1460 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1461 -- corresponding sums.
1462 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1464 | null ps || null qs = []
1465 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1467 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1468 -- ByteStrings. Note that this performs two 'pack' operations.
1469 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1470 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1471 {-# INLINE unzip #-}
1473 -- ---------------------------------------------------------------------
1476 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1477 inits :: ByteString -> [ByteString]
1478 inits (PS x s l) = [PS x s n | n <- [0..l]]
1480 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1481 tails :: ByteString -> [ByteString]
1482 tails p | null p = [empty]
1483 | otherwise = p : tails (unsafeTail p)
1485 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1487 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1488 elems :: ByteString -> [ByteString]
1489 elems (PS _ _ 0) = []
1490 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1491 {-# INLINE elems #-}
1493 -- ---------------------------------------------------------------------
1494 -- ** Ordered 'ByteString's
1496 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1497 sort :: ByteString -> ByteString
1498 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1500 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1501 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1504 go 256 _ = return ()
1505 go i ptr = do n <- peekElemOff arr i
1506 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1507 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1510 -- "countEach counts str l" counts the number of occurences of each Word8 in
1511 -- str, and stores the result in counts.
1512 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1514 countEach counts str l = go 0
1517 go i | i == l = return ()
1518 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1519 x <- peekElemOff counts k
1520 pokeElemOff counts k (x + 1)
1524 sort :: ByteString -> ByteString
1525 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1526 memcpy p (f `plusPtr` s) l
1527 c_qsort p l -- inplace
1531 sort = pack . List.sort . unpack
1534 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1536 -- Try some linear sorts: radix, counting
1539 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1540 -- sortBy f ps = undefined
1542 -- ---------------------------------------------------------------------
1544 -- Extensions to the basic interface
1547 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1548 -- check for the empty case, so there is an obligation on the programmer
1549 -- to provide a proof that the ByteString is non-empty.
1550 unsafeHead :: ByteString -> Word8
1551 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1552 {-# INLINE unsafeHead #-}
1554 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1555 -- check for the empty case. As with 'unsafeHead', the programmer must
1556 -- provide a separate proof that the ByteString is non-empty.
1557 unsafeTail :: ByteString -> ByteString
1558 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1559 {-# INLINE unsafeTail #-}
1561 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1562 -- This omits the bounds check, which means there is an accompanying
1563 -- obligation on the programmer to ensure the bounds are checked in some
1565 unsafeIndex :: ByteString -> Int -> Word8
1566 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1567 {-# INLINE unsafeIndex #-}
1569 -- ---------------------------------------------------------------------
1570 -- Low level constructors
1572 #if defined(__GLASGOW_HASKELL__)
1573 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1574 -- Addr\# (an arbitrary machine address assumed to point outside the
1575 -- garbage-collected heap) into a @ByteString@. A much faster way to
1576 -- create an Addr\# is with an unboxed string literal, than to pack a
1577 -- boxed string. A unboxed string literal is compiled to a static @char
1578 -- []@ by GHC. Establishing the length of the string requires a call to
1579 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1580 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1581 -- if you know the length of the string statically.
1585 -- > literalFS = packAddress "literal"#
1587 packAddress :: Addr# -> ByteString
1588 packAddress addr# = inlinePerformIO $ do
1589 p <- newForeignPtr_ cstr
1590 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1593 {-# INLINE packAddress #-}
1595 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1596 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1597 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1598 -- 'Addr\#' to the string, and the length of the string. Make sure the
1599 -- length is correct, otherwise use the safer 'packAddress' (where the
1600 -- length will be calculated once at runtime).
1601 unsafePackAddress :: Int -> Addr# -> ByteString
1602 unsafePackAddress len addr# = inlinePerformIO $ do
1603 p <- newForeignPtr_ cstr
1605 where cstr = Ptr addr#
1609 -- | /O(1)/ Build a ByteString from a ForeignPtr
1610 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1611 fromForeignPtr fp l = PS fp 0 l
1613 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1614 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1615 toForeignPtr (PS ps s l) = (ps, s, l)
1617 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1618 -- current 'ByteString' from any larger string it was created from, as
1620 skipIndex :: ByteString -> Int
1621 skipIndex (PS _ s _) = s
1622 {-# INLINE skipIndex #-}
1624 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1625 -- finalizer associated to it. The ByteString length is calculated using
1626 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1627 packCString :: CString -> ByteString
1628 packCString cstr = inlinePerformIO $ do
1629 fp <- newForeignPtr_ (castPtr cstr)
1630 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1632 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1633 -- have /no/ finalizer associated with it. This operation has /O(1)/
1634 -- complexity as we already know the final size, so no /strlen(3)/ is
1636 packCStringLen :: CStringLen -> ByteString
1637 packCStringLen (ptr,len) = inlinePerformIO $ do
1638 fp <- newForeignPtr_ (castPtr ptr)
1639 return $ PS fp 0 (fromIntegral len)
1641 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1642 -- have a @free(3)@ finalizer associated to it.
1643 packMallocCString :: CString -> ByteString
1644 packMallocCString cstr = inlinePerformIO $ do
1645 fp <- newForeignFreePtr (castPtr cstr)
1646 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1648 #if defined(__GLASGOW_HASKELL__)
1649 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1650 -- length, and an IO action representing a finalizer. This function is
1651 -- not available on Hugs.
1653 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1654 packCStringFinalizer p l f = do
1655 fp <- FC.newForeignPtr p f
1658 -- | Explicitly run the finaliser associated with a 'ByteString'.
1659 -- Further references to this value may generate invalid memory
1660 -- references. This operation is unsafe, as there may be other
1661 -- 'ByteStrings' referring to the same underlying pages. If you use
1662 -- this, you need to have a proof of some kind that all 'ByteString's
1663 -- ever generated from the underlying byte array are no longer live.
1664 unsafeFinalize :: ByteString -> IO ()
1665 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1669 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1670 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1671 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1672 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1674 alloc = withForeignPtr ps $ \p -> do
1675 buf <- c_malloc (fromIntegral l+1)
1676 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1677 poke (buf `plusPtr` l) (0::Word8)
1678 return $ castPtr buf
1680 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1681 -- Warning: modifying the @CString@ will affect the @ByteString@.
1682 -- Why is this function unsafe? It relies on the null byte at the end of
1683 -- the ByteString to be there. This is /not/ the case if your ByteString
1684 -- has been spliced from a larger string (i.e. with take or drop).
1685 -- Unless you can guarantee the null byte, you should use the safe
1686 -- version, which will copy the string first.
1688 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1689 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1691 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1692 -- This is mainly useful to allow the rest of the data pointed
1693 -- to by the 'ByteString' to be garbage collected, for example
1694 -- if a large string has been read in, and only a small part of it
1695 -- is needed in the rest of the program.
1696 copy :: ByteString -> ByteString
1697 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1698 memcpy p (f `plusPtr` s) (fromIntegral l)
1700 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1701 -- CString is going to be deallocated from C land.
1702 copyCString :: CString -> ByteString
1703 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1705 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1706 copyCStringLen :: CStringLen -> ByteString
1707 copyCStringLen (cstr, len) = inlinePerformIO $ do
1708 fp <- mallocForeignPtrArray (len+1)
1709 withForeignPtr fp $ \p -> do
1710 memcpy p (castPtr cstr) (fromIntegral len)
1711 poke (p `plusPtr` len) (0 :: Word8)
1712 return $! PS fp 0 len
1714 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1715 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1716 -- This is analogous to unsafeUseAsCString, and comes with the same
1717 -- safety requirements.
1719 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1720 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1722 -- | Given the maximum size needed and a function to make the contents
1723 -- of a ByteString, generate makes the 'ByteString'. The generating
1724 -- function is required to return the actual final size (<= the maximum
1725 -- size), and the resulting byte array is realloced to this size. The
1726 -- string is padded at the end with a null byte.
1728 -- generate is the main mechanism for creating custom, efficient
1729 -- ByteString functions, using Haskell or C functions to fill the space.
1731 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1733 fp <- mallocByteString i
1734 (ptr,n) <- withForeignPtr fp $ \p -> do
1738 else do fp_ <- mallocByteString i' -- realloc
1739 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1745 -- On the C malloc heap. Less fun.
1748 p <- mallocArray (i+1)
1750 p' <- reallocArray p (i'+1)
1751 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1752 fp <- newForeignFreePtr p'
1756 -- ---------------------------------------------------------------------
1759 #if defined(__GLASGOW_HASKELL__)
1761 -- | getLine, read a line from stdin.
1762 getLine :: IO ByteString
1763 getLine = hGetLine stdin
1765 -- | hGetLine. read a ByteString from a handle
1766 hGetLine :: Handle -> IO ByteString
1767 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1768 case haBufferMode handle_ of
1769 NoBuffering -> error "no buffering"
1770 _other -> hGetLineBuffered handle_
1773 hGetLineBuffered handle_ = do
1774 let ref = haBuffer handle_
1775 buf <- readIORef ref
1776 hGetLineBufferedLoop handle_ ref buf 0 []
1778 hGetLineBufferedLoop handle_ ref
1779 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1781 off <- findEOL r w raw
1782 let new_len = len + off - r
1783 xs <- mkPS raw r off
1785 -- if eol == True, then off is the offset of the '\n'
1786 -- otherwise off == w and the buffer is now empty.
1788 then do if (w == off + 1)
1789 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1790 else writeIORef ref buf{ bufRPtr = off + 1 }
1791 mkBigPS new_len (xs:xss)
1793 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1794 buf{ bufWPtr=0, bufRPtr=0 }
1796 -- Nothing indicates we caught an EOF, and we may have a
1797 -- partial line to return.
1799 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1801 then mkBigPS new_len (xs:xss)
1804 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1806 -- find the end-of-line character, if there is one
1810 (c,r') <- readCharFromBuffer raw r
1812 then return r -- NB. not r': don't include the '\n'
1813 else findEOL r' w raw
1815 maybeFillReadBuffer fd is_line is_stream buf = catch
1816 (do buf' <- fillReadBuffer fd is_line is_stream buf
1818 (\e -> if isEOFError e then return Nothing else ioError e)
1820 -- TODO, rewrite to use normal memcpy
1821 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1822 mkPS buf start end = do
1823 let len = end - start
1824 fp <- mallocByteString len
1825 withForeignPtr fp $ \p -> do
1826 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1827 return (PS fp 0 len)
1829 mkBigPS :: Int -> [ByteString] -> IO ByteString
1830 mkBigPS _ [ps] = return ps
1831 mkBigPS _ pss = return $! concat (P.reverse pss)
1835 -- ---------------------------------------------------------------------
1838 -- | Outputs a 'ByteString' to the specified 'Handle'.
1839 hPut :: Handle -> ByteString -> IO ()
1840 hPut _ (PS _ _ 0) = return ()
1841 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1842 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1844 -- | Write a ByteString to stdout
1845 putStr :: ByteString -> IO ()
1846 putStr = hPut stdout
1848 -- | Write a ByteString to stdout, appending a newline byte
1849 putStrLn :: ByteString -> IO ()
1850 putStrLn ps = hPut stdout ps >> hPut stdout nl
1851 where nl = packByte 0x0a
1853 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1854 -- is far more efficient than reading the characters into a 'String'
1855 -- and then using 'pack'.
1856 hGet :: Handle -> Int -> IO ByteString
1857 hGet _ 0 = return empty
1858 hGet h i = do fp <- mallocByteString i
1859 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1862 #if defined(__GLASGOW_HASKELL__)
1863 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1864 -- waiting for data to become available, instead it returns only whatever data
1866 hGetNonBlocking :: Handle -> Int -> IO ByteString
1867 hGetNonBlocking _ 0 = return empty
1868 hGetNonBlocking h i = do
1869 fp <- mallocByteString i
1870 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1874 -- | Read entire handle contents into a 'ByteString'.
1876 -- As with 'hGet', the string representation in the file is assumed to
1879 hGetContents :: Handle -> IO ByteString
1881 let start_size = 1024
1882 p <- mallocArray start_size
1883 i <- hGetBuf h p start_size
1885 then do p' <- reallocArray p i
1886 fp <- newForeignFreePtr p'
1892 p' <- reallocArray p s'
1893 i <- hGetBuf h (p' `plusPtr` s) s
1895 then do let i' = s + i
1896 p'' <- reallocArray p' i'
1897 fp <- newForeignFreePtr p''
1901 -- | getContents. Equivalent to hGetContents stdin
1902 getContents :: IO ByteString
1903 getContents = hGetContents stdin
1905 -- | Read an entire file strictly into a 'ByteString'. This is far more
1906 -- efficient than reading the characters into a 'String' and then using
1907 -- 'pack'. It also may be more efficient than opening the file and
1908 -- reading it using hGet.
1909 readFile :: FilePath -> IO ByteString
1911 h <- openBinaryFile f ReadMode
1913 s <- hGet h $ fromIntegral l
1917 -- | Write a 'ByteString' to a file.
1918 writeFile :: FilePath -> ByteString -> IO ()
1919 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1924 -- Disable until we can move it into a portable .hsc file
1927 -- | Like readFile, this reads an entire file directly into a
1928 -- 'ByteString', but it is even more efficient. It involves directly
1929 -- mapping the file to memory. This has the advantage that the contents
1930 -- of the file never need to be copied. Also, under memory pressure the
1931 -- page may simply be discarded, while in the case of readFile it would
1932 -- need to be written to swap. If you read many small files, mmapFile
1933 -- will be less memory-efficient than readFile, since each mmapFile
1934 -- takes up a separate page of memory. Also, you can run into bus
1935 -- errors if the file is modified. As with 'readFile', the string
1936 -- representation in the file is assumed to be ISO-8859-1.
1938 -- On systems without mmap, this is the same as a readFile.
1940 mmapFile :: FilePath -> IO ByteString
1941 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1943 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1945 h <- openBinaryFile f ReadMode
1946 l <- fromIntegral `fmap` hFileSize h
1947 -- Don't bother mmaping small files because each mmapped file takes up
1948 -- at least one full VM block.
1950 then do thefp <- mallocByteString l
1951 withForeignPtr thefp $ \p-> hGetBuf h p l
1956 fd <- fromIntegral `fmap` handleToFd h
1958 fp <- if p == nullPtr
1959 then do thefp <- mallocByteString l
1960 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1963 -- The munmap leads to crashes on OpenBSD.
1964 -- maybe there's a use after unmap in there somewhere?
1965 #if !defined(__OpenBSD__)
1966 let unmap = c_munmap p l >> return ()
1968 let unmap = return ()
1970 fp <- FC.newForeignPtr p unmap
1975 where mmap_limit = 16*1024
1978 #if defined(__GLASGOW_HASKELL__)
1980 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1982 getArgs :: IO [ByteString]
1984 alloca $ \ p_argc ->
1985 alloca $ \ p_argv -> do
1986 getProgArgv p_argc p_argv
1987 p <- fromIntegral `fmap` peek p_argc
1989 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1992 -- ---------------------------------------------------------------------
1993 -- Internal utilities
1995 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1996 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1997 -- convenience for ByteString construction.
1998 w2c :: Word8 -> Char
1999 #if !defined(__GLASGOW_HASKELL__)
2000 w2c = chr . fromIntegral
2002 w2c = unsafeChr . fromIntegral
2006 c2w :: Char -> Word8
2007 c2w = fromIntegral . ord
2010 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2011 -- is padded with a null byte.
2012 mallocByteString :: Int -> IO (ForeignPtr Word8)
2013 mallocByteString l = do
2014 fp <- mallocForeignPtrArray (l+1)
2015 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2018 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2019 -- argument gives the final size of the ByteString. Unlike 'generate'
2020 -- the ByteString is not reallocated if the final size is less than the
2021 -- estimated size. Also, unlike 'generate' ByteString's created this way
2022 -- are managed on the Haskell heap.
2023 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2024 create l write_ptr = inlinePerformIO $ do
2025 fp <- mallocByteString (l+1)
2026 withForeignPtr fp $ \p -> write_ptr p
2028 {-# INLINE create #-}
2030 -- | Perform an operation with a temporary ByteString
2031 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2032 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2033 {-# INLINE withPtr #-}
2035 -- Common up near identical calls to `error' to reduce the number
2036 -- constant strings created when compiled:
2037 errorEmptyList :: String -> a
2038 errorEmptyList fun = moduleError fun "empty ByteString"
2039 {-# NOINLINE errorEmptyList #-}
2041 moduleError :: String -> String -> a
2042 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2043 {-# NOINLINE moduleError #-}
2045 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2046 -- of the string if no element is found, rather than Nothing.
2047 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2048 STRICT2(findIndexOrEnd)
2051 | f (unsafeHead ps) = 0
2052 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
2053 {-# INLINE findIndexOrEnd #-}
2055 -- Find from the end of the string using predicate
2056 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2057 STRICT2(findFromEndUntil)
2058 findFromEndUntil f ps@(PS x s l) =
2060 else if f (last ps) then l
2061 else findFromEndUntil f (PS x s (l-1))
2063 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2064 -- it exposes lots of things to further inlining
2066 {-# INLINE inlinePerformIO #-}
2067 inlinePerformIO :: IO a -> a
2068 #if defined(__GLASGOW_HASKELL__)
2069 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2071 inlinePerformIO = unsafePerformIO
2074 {-# INLINE newForeignFreePtr #-}
2075 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2076 #if defined(__GLASGOW_HASKELL__)
2077 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2079 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2082 -- ---------------------------------------------------------------------
2084 -- Standard C functions
2087 foreign import ccall unsafe "string.h strlen" c_strlen
2090 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2091 :: CInt -> IO (Ptr Word8)
2093 foreign import ccall unsafe "static stdlib.h free" c_free
2094 :: Ptr Word8 -> IO ()
2096 #if !defined(__GLASGOW_HASKELL__)
2097 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2098 :: FunPtr (Ptr Word8 -> IO ())
2101 foreign import ccall unsafe "string.h memset" memset
2102 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2104 foreign import ccall unsafe "string.h memchr" memchr
2105 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2107 foreign import ccall unsafe "string.h memcmp" memcmp
2108 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2110 foreign import ccall unsafe "string.h memcpy" memcpy
2111 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2113 -- ---------------------------------------------------------------------
2118 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2119 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2121 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2122 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2124 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2125 :: Ptr Word8 -> CInt -> Word8
2127 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2128 :: Ptr Word8 -> CInt -> Word8
2130 foreign import ccall unsafe "static fpstring.h count" c_count
2131 :: Ptr Word8 -> CInt -> Word8 -> CInt
2133 -- ---------------------------------------------------------------------
2137 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2138 :: Int -> Int -> IO (Ptr Word8)
2140 foreign import ccall unsafe "static unistd.h close" c_close
2143 # if !defined(__OpenBSD__)
2144 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2145 :: Ptr Word8 -> Int -> IO Int
2149 -- ---------------------------------------------------------------------
2150 -- Internal GHC Haskell magic
2152 #if defined(__GLASGOW_HASKELL__)
2153 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2154 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2156 foreign import ccall unsafe "__hscore_memcpy_src_off"
2157 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2160 -- ---------------------------------------------------------------------
2162 -- Functional array fusion for ByteStrings.
2164 -- From the Data Parallel Haskell project,
2165 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2168 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2169 -- the fact that no bottoms of this type are ever constructed; hence, we can
2170 -- assume @(_ :: NoAL) `seq` x = x@.
2174 -- | Special forms of loop arguments
2176 -- * These are common special cases for the three function arguments of gen
2177 -- and loop; we give them special names to make it easier to trigger RULES
2178 -- applying in the special cases represented by these arguments. The
2179 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2180 -- two simplifier phases.
2182 -- * In the case where the accumulator is not needed, it is better to always
2183 -- explicitly return a value `()', rather than just copy the input to the
2184 -- output, as the former gives GHC better local information.
2187 -- | Element function expressing a mapping only
2188 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2189 mapEFL f = \_ e -> (noAL, (Just $ f e))
2190 #if defined(__GLASGOW_HASKELL__)
2191 {-# INLINE [1] mapEFL #-}
2194 -- | Element function implementing a filter function only
2195 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2196 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2197 #if defined(__GLASGOW_HASKELL__)
2198 {-# INLINE [1] filterEFL #-}
2201 -- |Element function expressing a reduction only
2202 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2203 foldEFL f = \a e -> (f a e, Nothing)
2204 #if defined(__GLASGOW_HASKELL__)
2205 {-# INLINE [1] foldEFL #-}
2208 -- | A strict foldEFL.
2209 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2210 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2211 #if defined(__GLASGOW_HASKELL__)
2212 {-# INLINE [1] foldEFL' #-}
2218 #if defined(__GLASGOW_HASKELL__)
2219 {-# INLINE [1] noAL #-}
2222 -- | Projection functions that are fusion friendly (as in, we determine when
2223 -- they are inlined)
2224 loopArr :: (ByteString, acc) -> ByteString
2225 loopArr (arr, _) = arr
2226 #if defined(__GLASGOW_HASKELL__)
2227 {-# INLINE [1] loopArr #-}
2230 loopAcc :: (ByteString, acc) -> acc
2231 loopAcc (_, acc) = acc
2232 #if defined(__GLASGOW_HASKELL__)
2233 {-# INLINE [1] loopAcc #-}
2236 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2237 loopSndAcc (arr, (_, acc)) = (arr, acc)
2238 #if defined(__GLASGOW_HASKELL__)
2239 {-# INLINE [1] loopSndAcc #-}
2242 ------------------------------------------------------------------------
2245 -- size, and then percentage.
2248 -- | Iteration over over ByteStrings
2249 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2250 -> acc -- ^ initial acc value
2251 -> ByteString -- ^ input ByteString
2252 -> (ByteString, acc)
2254 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2255 fp <- mallocByteString i
2256 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2257 (acc, i') <- go (a `plusPtr` s) p start
2259 then return (fp,i',acc) -- no realloc for map
2260 else do fp_ <- mallocByteString i' -- realloc
2261 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2264 return (PS ptr 0 n, acc)
2269 trans a_off ma_off acc
2270 | a_off >= i = return (acc, ma_off)
2272 x <- peekByteOff p a_off
2273 let (acc', oe) = f acc x
2274 ma_off' <- case oe of
2275 Nothing -> return ma_off
2276 Just e -> do pokeByteOff ma ma_off e
2278 trans (a_off+1) ma_off' acc'
2280 #if defined(__GLASGOW_HASKELL__)
2281 {-# INLINE [1] loopU #-}
2286 -- |Fuse to flat loop functions
2287 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2288 -> (a2 -> Word8 -> (a2, Maybe Word8))
2291 -> ((a1, a2), Maybe Word8)
2292 fuseEFL f g (acc1, acc2) e1 =
2294 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2297 (acc2', res) -> ((acc1', acc2'), res)
2301 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2302 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2303 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2305 "loopArr/loopSndAcc" forall x.
2306 loopArr (loopSndAcc x) = loopArr x
2308 "seq/NoAL" forall (u::NoAL) e.