1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
74 -- * Reducing 'ByteString's
75 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
82 concat, -- :: [ByteString] -> ByteString
83 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
84 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
86 maximum, -- :: ByteString -> Word8
87 minimum, -- :: ByteString -> Word8
88 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
90 -- * Generating and unfolding ByteStrings
91 replicate, -- :: Int -> Word8 -> ByteString
92 unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
96 -- ** Breaking strings
97 take, -- :: Int -> ByteString -> ByteString
98 drop, -- :: Int -> ByteString -> ByteString
99 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
100 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
101 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
102 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
103 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
104 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
106 -- ** Breaking and dropping on specific bytes
107 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
108 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
109 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
110 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
112 -- ** Breaking into many substrings
113 split, -- :: Word8 -> ByteString -> [ByteString]
114 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
115 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
116 group, -- :: ByteString -> [ByteString]
117 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
119 -- ** Joining strings
120 join, -- :: ByteString -> [ByteString] -> ByteString
121 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
123 -- * Indexing ByteStrings
124 index, -- :: ByteString -> Int -> Word8
125 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
126 elemIndices, -- :: Word8 -> ByteString -> [Int]
127 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
128 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
129 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
130 count, -- :: Word8 -> ByteString -> Int
132 -- * Ordered ByteStrings
133 sort, -- :: ByteString -> ByteString
135 -- * Searching ByteStrings
137 -- ** Searching by equality
138 -- | These functions use memchr(3) to efficiently search the ByteString
140 elem, -- :: Word8 -> ByteString -> Bool
141 notElem, -- :: Word8 -> ByteString -> Bool
142 filterByte, -- :: Word8 -> ByteString -> ByteString
143 filterNotByte, -- :: Word8 -> ByteString -> ByteString
145 -- ** Searching with a predicate
146 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
147 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
149 -- ** Prefixes and suffixes
150 -- | These functions use memcmp(3) to efficiently compare substrings
151 isPrefixOf, -- :: ByteString -> ByteString -> Bool
152 isSuffixOf, -- :: ByteString -> ByteString -> Bool
154 -- ** Search for arbitrary substrings
155 isSubstringOf, -- :: ByteString -> ByteString -> Bool
156 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
157 findSubstrings, -- :: ByteString -> ByteString -> [Int]
159 -- * Zipping and unzipping ByteStrings
160 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
161 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
162 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
164 -- * Unchecked access
165 unsafeHead, -- :: ByteString -> Word8
166 unsafeTail, -- :: ByteString -> ByteString
167 unsafeIndex, -- :: ByteString -> Int -> Word8
169 -- * Low level introduction and elimination
170 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
171 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
172 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
173 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
174 skipIndex, -- :: ByteString -> Int
176 -- ** Packing CStrings and pointers
177 packCString, -- :: CString -> ByteString
178 packCStringLen, -- :: CString -> ByteString
179 packMallocCString, -- :: CString -> ByteString
181 #if defined(__GLASGOW_HASKELL__)
182 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
183 packAddress, -- :: Addr# -> ByteString
184 unsafePackAddress, -- :: Int -> Addr# -> ByteString
185 unsafeFinalize, -- :: ByteString -> IO ()
188 -- ** Using ByteStrings as CStrings
189 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
190 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
191 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
193 -- ** Copying ByteStrings
194 -- | These functions perform memcpy(3) operations
195 copy, -- :: ByteString -> ByteString
196 copyCString, -- :: CString -> ByteString
197 copyCStringLen, -- :: CStringLen -> ByteString
199 -- * I\/O with @ByteString@s
201 -- ** Standard input and output
203 #if defined(__GLASGOW_HASKELL__)
204 getLine, -- :: IO ByteString
206 getContents, -- :: IO ByteString
207 putStr, -- :: ByteString -> IO ()
208 putStrLn, -- :: ByteString -> IO ()
211 readFile, -- :: FilePath -> IO ByteString
212 writeFile, -- :: FilePath -> ByteString -> IO ()
213 -- mmapFile, -- :: FilePath -> IO ByteString
215 -- ** I\/O with Handles
216 #if defined(__GLASGOW_HASKELL__)
217 getArgs, -- :: IO [ByteString]
218 hGetLine, -- :: Handle -> IO ByteString
219 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
221 hGetContents, -- :: Handle -> IO ByteString
222 hGet, -- :: Handle -> Int -> IO ByteString
223 hPut, -- :: Handle -> ByteString -> IO ()
225 -- * Fusion utilities
226 #if defined(__GLASGOW_HASKELL__)
227 unpackList, -- eek, otherwise it gets thrown away by the simplifier
230 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
231 loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL,
236 import qualified Prelude as P
237 import Prelude hiding (reverse,head,tail,last,init,null
238 ,length,map,lines,foldl,foldr,unlines
239 ,concat,any,take,drop,splitAt,takeWhile
240 ,dropWhile,span,break,elem,filter,maximum
241 ,minimum,all,concatMap,foldl1,foldr1
242 ,readFile,writeFile,replicate
243 ,getContents,getLine,putStr,putStrLn
244 ,zip,zipWith,unzip,notElem)
246 import qualified Data.List as List
249 import Data.Word (Word8)
250 import Data.Maybe (listToMaybe)
251 import Data.Array (listArray)
252 import qualified Data.Array as Array ((!))
254 -- Control.Exception.bracket not available in yhc or nhc
255 import Control.Exception (bracket)
256 import Control.Monad (when)
258 import Foreign.C.String (CString, CStringLen)
259 import Foreign.C.Types (CSize, CInt)
260 import Foreign.ForeignPtr
261 import Foreign.Marshal.Array
263 import Foreign.Storable (Storable(..))
265 -- hGetBuf and hPutBuf not available in yhc or nhc
266 import System.IO (stdin,stdout,hClose,hFileSize
267 ,hGetBuf,hPutBuf,openBinaryFile
270 #if !defined(__GLASGOW_HASKELL__)
271 import System.IO.Unsafe
274 #if defined(__GLASGOW_HASKELL__)
276 import Data.Generics (Data(..), Typeable(..))
278 import System.IO (hGetBufNonBlocking)
279 import System.IO.Error (isEOFError)
281 import Foreign.Marshal (alloca)
282 import qualified Foreign.Concurrent as FC (newForeignPtr)
285 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
286 import GHC.Base (build, unsafeChr)
287 import GHC.Word hiding (Word8)
288 import GHC.Ptr (Ptr(..))
289 import GHC.ST (ST(..))
294 -- CFILES stuff is Hugs only
295 {-# CFILES cbits/fpstring.c #-}
297 -- -----------------------------------------------------------------------------
299 -- Useful macros, until we have bang patterns
302 #define STRICT1(f) f a | a `seq` False = undefined
303 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
304 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
305 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
306 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
308 -- -----------------------------------------------------------------------------
310 -- | A space-efficient representation of a Word8 vector, supporting many
311 -- efficient operations. A 'ByteString' contains 8-bit characters only.
313 -- Instances of Eq, Ord, Read, Show, Data, Typeable
315 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
319 #if defined(__GLASGOW_HASKELL__)
320 deriving (Data, Typeable)
323 instance Eq ByteString
326 instance Ord ByteString
327 where compare = compareBytes
329 instance Show ByteString where
330 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
332 instance Read ByteString where
333 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
336 instance Arbitrary PackedString where
337 arbitrary = P.pack `fmap` arbitrary
338 coarbitrary s = coarbitrary (P.unpack s)
341 -- | /O(n)/ Equality on the 'ByteString' type.
342 eq :: ByteString -> ByteString -> Bool
343 eq a@(PS p s l) b@(PS p' s' l')
344 | l /= l' = False -- short cut on length
345 | p == p' && s == s' = True -- short cut for the same string
346 | otherwise = compareBytes a b == EQ
349 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
350 compareBytes :: ByteString -> ByteString -> Ordering
351 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
352 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
353 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
354 | otherwise = inlinePerformIO $
355 withForeignPtr x1 $ \p1 ->
356 withForeignPtr x2 $ \p2 -> do
357 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
358 return $ case i `compare` 0 of
359 EQ -> l1 `compare` l2
361 {-# INLINE compareBytes #-}
365 -- About 4x slower over 32M
367 compareBytes :: ByteString -> ByteString -> Ordering
368 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
369 withForeignPtr fp1 $ \p1 ->
370 withForeignPtr fp2 $ \p2 ->
371 cmp (p1 `plusPtr` off1)
372 (p2 `plusPtr` off2) 0 len1 len2
374 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
376 cmp p1 p2 n len1 len2
377 | n == len1 = if n == len2 then return EQ else return LT
378 | n == len2 = return GT
380 (a :: Word8) <- peekByteOff p1 n
381 (b :: Word8) <- peekByteOff p2 n
382 case a `compare` b of
383 EQ -> cmp p1 p2 (n+1) len1 len2
386 {-# INLINE compareBytes #-}
389 -- -----------------------------------------------------------------------------
390 -- Introducing and eliminating 'ByteString's
392 -- | /O(1)/ The empty 'ByteString'
394 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
395 {-# NOINLINE empty #-}
397 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
398 packByte :: Word8 -> ByteString
399 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
400 withForeignPtr fp $ \p -> poke p c
402 {-# INLINE packByte #-}
405 -- XXX The unsafePerformIO is critical!
409 -- packByte 255 `compare` packByte 127
413 -- case mallocByteString 2 of
414 -- ForeignPtr f internals ->
415 -- case writeWord8OffAddr# f 0 255 of _ ->
416 -- case writeWord8OffAddr# f 0 127 of _ ->
417 -- case eqAddr# f f of
418 -- False -> case compare (GHC.Prim.plusAddr# f 0)
419 -- (GHC.Prim.plusAddr# f 0)
423 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
425 -- For applications with large numbers of string literals, pack can be a
426 -- bottleneck. In such cases, consider using packAddress (GHC only).
427 pack :: [Word8] -> ByteString
429 #if !defined(__GLASGOW_HASKELL__)
431 pack str = create (P.length str) $ \p -> go p str
434 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
436 #else /* hack away */
438 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
440 go _ _ [] = return ()
441 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
443 writeByte p i c = ST $ \s# ->
444 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
448 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
449 unpack :: ByteString -> [Word8]
451 #if !defined(__GLASGOW_HASKELL__)
453 unpack (PS _ _ 0) = []
454 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
455 go (p `plusPtr` s) (l - 1) []
458 go p 0 acc = peek p >>= \e -> return (e : acc)
459 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
460 {-# INLINE unpack #-}
464 unpack ps = build (unpackFoldr ps)
465 {-# INLINE unpack #-}
467 unpackList :: ByteString -> [Word8]
468 unpackList (PS fp off len) = withPtr fp $ \p -> do
470 loop _ (-1) acc = return acc
473 loop q (n-1) (a : acc)
474 loop (p `plusPtr` off) (len-1) []
477 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
480 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
481 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
483 loop _ (-1) acc = return acc
486 loop q (n-1) (a `f` acc)
487 loop (p `plusPtr` off) (len-1) ch
488 {-# INLINE [0] unpackFoldr #-}
492 ------------------------------------------------------------------------
494 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
495 -- conversion function
496 packWith :: (a -> Word8) -> [a] -> ByteString
497 packWith k str = create (P.length str) $ \p -> go p str
501 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
502 {-# INLINE packWith #-}
503 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
505 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
506 unpackWith :: (Word8 -> a) -> ByteString -> [a]
507 unpackWith _ (PS _ _ 0) = []
508 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
509 go (p `plusPtr` s) (l - 1) []
512 go p 0 acc = peek p >>= \e -> return (k e : acc)
513 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
514 {-# INLINE unpackWith #-}
515 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
517 -- ---------------------------------------------------------------------
520 -- | /O(1)/ Test whether a ByteString is empty.
521 null :: ByteString -> Bool
522 null (PS _ _ l) = l == 0
525 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
526 length :: ByteString -> Int
527 length (PS _ _ l) = l
529 #if defined(__GLASGOW_HASKELL__)
530 {-# INLINE [1] length #-}
535 -- Translate length into a loop.
536 -- Performace ok, but allocates too much, so disable for now.
538 "length/loop" forall f acc s .
539 length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
543 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
544 -- complexity, as it requires a memcpy.
545 cons :: Word8 -> ByteString -> ByteString
546 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
548 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
553 -- | /O(n)/ Append a byte to the end of a 'ByteString'
554 snoc :: ByteString -> Word8 -> ByteString
555 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
556 memcpy p (f `plusPtr` s) (fromIntegral l)
557 poke (p `plusPtr` l) c
562 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
563 head :: ByteString -> Word8
565 | null ps = errorEmptyList "head"
566 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
569 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
570 tail :: ByteString -> ByteString
572 | l <= 0 = errorEmptyList "tail"
573 | otherwise = PS p (s+1) (l-1)
576 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
577 last :: ByteString -> Word8
579 | null ps = errorEmptyList "last"
580 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
583 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
584 init :: ByteString -> ByteString
586 | l <= 0 = errorEmptyList "init"
587 | otherwise = PS p s (l-1)
590 -- | /O(n)/ Append two ByteStrings
591 append :: ByteString -> ByteString -> ByteString
592 append xs ys | null xs = ys
594 | otherwise = concat [xs,ys]
595 {-# INLINE append #-}
597 -- ---------------------------------------------------------------------
600 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
601 -- element of @xs@. This function is subject to array fusion.
602 map :: (Word8 -> Word8) -> ByteString -> ByteString
603 map f = loopArr . loopU (mapEFL f) noAL
606 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
607 -- slightly faster for one-shot cases.
608 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
610 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
611 np <- mallocByteString (len+1)
612 withForeignPtr np $ \p -> do
613 map_ 0 (a `plusPtr` s) p
616 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
619 | n >= len = return ()
621 x <- peekByteOff p1 n
622 pokeByteOff p2 n (f x)
626 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
627 reverse :: ByteString -> ByteString
628 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
629 c_reverse p (f `plusPtr` s) (fromIntegral l)
632 reverse = pack . P.reverse . unpack
635 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
636 -- 'ByteString' and \`intersperses\' that byte between the elements of
637 -- the 'ByteString'. It is analogous to the intersperse function on
639 intersperse :: Word8 -> ByteString -> ByteString
640 intersperse c ps@(PS x s l)
642 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
643 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
646 intersperse c = pack . List.intersperse c . unpack
649 -- | The 'transpose' function transposes the rows and columns of its
650 -- 'ByteString' argument.
651 transpose :: [ByteString] -> [ByteString]
652 transpose ps = P.map pack (List.transpose (P.map unpack ps))
654 -- ---------------------------------------------------------------------
655 -- Reducing 'ByteString's
657 -- | 'foldl', applied to a binary operator, a starting value (typically
658 -- the left-identity of the operator), and a ByteString, reduces the
659 -- ByteString using the binary operator, from left to right.
660 -- This function is subject to array fusion.
661 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
662 foldl f z = loopAcc . loopU (foldEFL f) z
667 -- About twice as fast with 6.4.1, but not fuseable
668 -- A simple fold . map is enough to make it worth while.
670 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
671 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
674 lgo z p q | p == q = return z
675 | otherwise = do c <- peek p
676 lgo (f z c) (p `plusPtr` 1) q
679 -- | 'foldl\'' is like foldl, but strict in the accumulator.
680 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
681 foldl' f z = loopAcc . loopU (foldEFL' f) z
682 {-# INLINE foldl' #-}
684 -- | 'foldr', applied to a binary operator, a starting value
685 -- (typically the right-identity of the operator), and a ByteString,
686 -- reduces the ByteString using the binary operator, from right to left.
687 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
688 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
689 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
692 go p q | p == q = return z
693 | otherwise = do c <- peek p
694 ws <- go (p `plusPtr` 1) q
697 -- | 'foldl1' is a variant of 'foldl' that has no starting value
698 -- argument, and thus must be applied to non-empty 'ByteStrings'.
699 -- This function is subject to array fusion.
700 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
702 | null ps = errorEmptyList "foldl1"
703 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
705 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
706 -- and thus must be applied to non-empty 'ByteString's
707 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
709 | null ps = errorEmptyList "foldr1"
710 | otherwise = foldr f (last ps) (init ps)
712 -- ---------------------------------------------------------------------
715 -- | /O(n)/ Concatenate a list of ByteStrings.
716 concat :: [ByteString] -> ByteString
719 concat xs = create len $ \ptr -> go xs ptr
720 where len = P.sum . P.map length $ xs
723 go (PS p s l:ps) ptr = do
724 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
725 go ps (ptr `plusPtr` l)
727 -- | Map a function over a 'ByteString' and concatenate the results
728 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
729 concatMap f = foldr (append . f) empty
731 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
732 -- any element of the 'ByteString' satisfies the predicate.
733 any :: (Word8 -> Bool) -> ByteString -> Bool
734 any _ (PS _ _ 0) = False
735 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
736 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
739 go p q | p == q = return False
740 | otherwise = do c <- peek p
741 if f c then return True
742 else go (p `plusPtr` 1) q
746 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
747 -- if all elements of the 'ByteString' satisfy the predicate.
748 all :: (Word8 -> Bool) -> ByteString -> Bool
749 all _ (PS _ _ 0) = True
750 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
751 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
754 go p q | p == q = return True -- end of list
755 | otherwise = do c <- peek p
757 then go (p `plusPtr` 1) q
761 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
762 maximum :: ByteString -> Word8
763 maximum xs@(PS x s l)
764 | null xs = errorEmptyList "maximum"
765 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
766 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
767 {-# INLINE maximum #-}
769 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
770 minimum :: ByteString -> Word8
771 minimum xs@(PS x s l)
772 | null xs = errorEmptyList "minimum"
773 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
774 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
775 {-# INLINE minimum #-}
777 -- fusion is too slow here (10x)
780 maximum xs@(PS x s l)
781 | null xs = errorEmptyList "maximum"
782 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
784 maximum_ (p `plusPtr` s) 0 l w
786 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
790 | otherwise = do w <- peekByteOff ptr n
791 maximum_ ptr (n+1) m (if w > c then w else c)
793 minimum xs@(PS x s l)
794 | null xs = errorEmptyList "minimum"
795 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
797 minimum_ (p `plusPtr` s) 0 l w
799 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
803 | otherwise = do w <- peekByteOff ptr n
804 minimum_ ptr (n+1) m (if w < c then w else c)
807 -- | /O(n)/ map Word8 functions, provided with the index at each position
808 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
809 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
810 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
812 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
814 go n f t p | f == p = return ()
815 | otherwise = do w <- peek f
817 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
819 -- ---------------------------------------------------------------------
820 -- Unfolds and replicates
822 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
823 -- the value of every element. The following holds:
825 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
827 -- This implemenation uses @memset(3)@
828 replicate :: Int -> Word8 -> ByteString
829 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
833 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
837 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
840 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
841 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
842 -- the element and returns 'Nothing' if it is done producing the
843 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
844 -- prepending to the ByteString and @b@ is used as the next element in a
847 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
848 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
849 -- final size of the ByteString as an argument. 'cons' can then be
850 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
851 -- linear complexity. The depth of the recursion is limited to this
852 -- size, but may be less. For lazy, infinite unfoldr, use
853 -- 'Data.List.unfoldr' (from 'Data.List').
857 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
859 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
861 -- > unfoldrN n == take n $ List.unfoldr
862 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
863 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
866 go q c n | n == i = return n -- stop if we reach `i'
867 | otherwise = case f c of
871 go (q `plusPtr` 1) new_c (n+1)
873 -- ---------------------------------------------------------------------
876 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
877 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
878 take :: Int -> ByteString -> ByteString
882 | otherwise = PS x s n
885 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
886 -- elements, or @[]@ if @n > 'length' xs@.
887 drop :: Int -> ByteString -> ByteString
891 | otherwise = PS x (s+n) (l-n)
894 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
895 splitAt :: Int -> ByteString -> (ByteString, ByteString)
896 splitAt n ps = (take n ps, drop n ps)
897 {-# INLINE splitAt #-}
899 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
900 -- returns the longest prefix (possibly empty) of @xs@ of elements that
902 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
903 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
904 {-# INLINE takeWhile #-}
906 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
907 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
908 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
909 {-# INLINE dropWhile #-}
911 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
912 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
913 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
916 -- | 'breakByte' breaks its ByteString argument at the first occurence
917 -- of the specified byte. It is more efficient than 'break' as it is
918 -- implemented with @memchr(3)@. I.e.
920 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
922 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
923 breakByte c p = case elemIndex c p of
925 Just n -> (take n p, drop n p)
926 {-# INLINE breakByte #-}
928 -- | 'spanByte' breaks its ByteString argument at the first
929 -- occurence of a byte other than its argument. It is more efficient
932 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
934 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
935 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
939 go p i | i >= l = return (ps, empty)
940 | otherwise = do c' <- peekByteOff p i
942 then return (take i ps, drop i ps)
944 {-# INLINE spanByte #-}
946 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
947 -- occurence of @w@. It behaves like 'break', except the delimiter is
948 -- not returned, and @Nothing@ is returned if the delimiter is not in
949 -- the ByteString. I.e.
951 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
953 -- > breakFirst c xs ==
954 -- > let (x,y) = break (== c) xs
955 -- > in if null y then Nothing else Just (x, drop 1 y))
957 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
958 breakFirst c p = case elemIndex c p of
960 Just n -> Just (take n p, drop (n+1) p)
961 {-# INLINE breakFirst #-}
963 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
966 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
968 -- and the following are equivalent:
970 -- > breakLast 'c' "abcdef"
971 -- > let (x,y) = break (=='c') (reverse "abcdef")
972 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
974 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
975 breakLast c p = case elemIndexLast c p of
977 Just n -> Just (take n p, drop (n+1) p)
978 {-# INLINE breakLast #-}
980 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
981 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
982 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
983 span p ps = break (not . p) ps
986 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
989 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
993 -- > spanEnd (not . isSpace) ps
995 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
997 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
998 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1000 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1001 -- separators, where the predicate returns True for a separator element.
1002 -- The resulting components do not contain the separators. Two adjacent
1003 -- separators result in an empty component in the output. eg.
1005 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1006 -- > splitWith (=='a') [] == []
1008 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1010 #if defined(__GLASGOW_HASKELL__)
1011 splitWith _pred (PS _ _ 0) = []
1012 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1013 where pred# c# = pred_ (W8# c#)
1016 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1017 splitLoop pred' p 0 off' len' fp'
1019 splitLoop :: (Word# -> Bool)
1021 -> Int -> Int -> Int
1025 splitLoop pred' p idx' off' len' fp'
1026 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1027 | idx' >= len' = return [PS fp' off' idx']
1029 w <- peekElemOff p (off'+idx')
1030 if pred' (case w of W8# w# -> w#)
1031 then return (PS fp' off' idx' :
1032 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1033 else splitLoop pred' p (idx'+1) off' len' fp'
1034 {-# INLINE splitWith #-}
1037 splitWith _ (PS _ _ 0) = []
1038 splitWith p ps = loop p ps
1041 loop q qs = if null rest then [chunk]
1042 else chunk : loop q (unsafeTail rest)
1043 where (chunk,rest) = break q qs
1046 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1047 -- argument, consuming the delimiter. I.e.
1049 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1050 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1051 -- > split 'x' "x" == ["",""]
1055 -- > join [c] . split c == id
1056 -- > split == splitWith . (==)
1058 -- As for all splitting functions in this library, this function does
1059 -- not copy the substrings, it just constructs new 'ByteStrings' that
1060 -- are slices of the original.
1062 split :: Word8 -> ByteString -> [ByteString]
1063 split _ (PS _ _ 0) = []
1064 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1065 let ptr = p `plusPtr` s
1069 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1071 then [PS x (s+n) (l-n)]
1072 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1075 {-# INLINE split #-}
1078 -- slower. but stays inside Haskell.
1079 split _ (PS _ _ 0) = []
1080 split (W8# w#) (PS fp off len) = splitWith' off len fp
1082 splitWith' off' len' fp' = withPtr fp $ \p ->
1083 splitLoop p 0 off' len' fp'
1085 splitLoop :: Ptr Word8
1086 -> Int -> Int -> Int
1091 splitLoop p idx' off' len' fp'
1092 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1093 | idx' >= len' = return [PS fp' off' idx']
1095 (W8# x#) <- peekElemOff p (off'+idx')
1096 if word2Int# w# ==# word2Int# x#
1097 then return (PS fp' off' idx' :
1098 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1099 else splitLoop p (idx'+1) off' len' fp'
1102 -- | Like 'splitWith', except that sequences of adjacent separators are
1103 -- treated as a single separator. eg.
1105 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1107 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1108 tokens f = P.filter (not.null) . splitWith f
1109 {-# INLINE tokens #-}
1111 -- | The 'group' function takes a ByteString and returns a list of
1112 -- ByteStrings such that the concatenation of the result is equal to the
1113 -- argument. Moreover, each sublist in the result contains only equal
1114 -- elements. For example,
1116 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1118 -- It is a special case of 'groupBy', which allows the programmer to
1119 -- supply their own equality test. It is about 40% faster than
1121 group :: ByteString -> [ByteString]
1124 | otherwise = ys : group zs
1126 (ys, zs) = spanByte (unsafeHead xs) xs
1128 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1129 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1132 | otherwise = take n xs : groupBy k (drop n xs)
1134 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1136 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1137 -- 'ByteString's and concatenates the list after interspersing the first
1138 -- argument between each element of the list.
1139 join :: ByteString -> [ByteString] -> ByteString
1140 join filler pss = concat (splice pss)
1144 splice (x:y:xs) = x:filler:splice (y:xs)
1148 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1149 -- with a char. Around 4 times faster than the generalised join.
1151 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1152 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1153 withForeignPtr ffp $ \fp ->
1154 withForeignPtr fgp $ \gp -> do
1155 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1156 poke (ptr `plusPtr` l) c
1157 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1159 len = length f + length g + 1
1160 {-# INLINE joinWithByte #-}
1162 -- ---------------------------------------------------------------------
1163 -- Indexing ByteStrings
1165 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1166 index :: ByteString -> Int -> Word8
1168 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1169 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1170 ++ ", length = " ++ show (length ps))
1171 | otherwise = ps `unsafeIndex` n
1172 {-# INLINE index #-}
1174 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1175 -- element in the given 'ByteString' which is equal to the query
1176 -- element, or 'Nothing' if there is no such element.
1177 -- This implementation uses memchr(3).
1178 elemIndex :: Word8 -> ByteString -> Maybe Int
1179 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1180 let p' = p `plusPtr` s
1181 q = memchr p' c (fromIntegral l)
1182 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1183 {-# INLINE elemIndex #-}
1185 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1186 -- element in the given 'ByteString' which is equal to the query
1187 -- element, or 'Nothing' if there is no such element. The following
1190 -- > elemIndexLast c xs ==
1191 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1193 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1194 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1195 go (p `plusPtr` s) (l-1)
1198 go p i | i < 0 = return Nothing
1199 | otherwise = do ch' <- peekByteOff p i
1201 then return $ Just i
1203 {-# INLINE elemIndexLast #-}
1205 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1206 -- the indices of all elements equal to the query element, in ascending order.
1207 -- This implementation uses memchr(3).
1208 elemIndices :: Word8 -> ByteString -> [Int]
1209 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1210 let ptr = p `plusPtr` s
1213 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1216 else let i = q `minusPtr` ptr
1219 {-# INLINE elemIndices #-}
1223 elemIndices :: Word8 -> ByteString -> [Int]
1224 elemIndices c ps = loop 0 ps
1226 loop _ ps' | null ps' = []
1227 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1228 | otherwise = loop (n+1) (unsafeTail ps')
1231 -- | count returns the number of times its argument appears in the ByteString
1233 -- > count = length . elemIndices
1235 -- But more efficiently than using length on the intermediate list.
1236 count :: Word8 -> ByteString -> Int
1237 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1238 return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1239 {-# INLINE count #-}
1243 -- around 30% slower
1245 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1246 go (p `plusPtr` s) (fromIntegral m) 0
1248 go :: Ptr Word8 -> CSize -> Int -> IO Int
1251 let q = memchr p w l
1254 else do let k = fromIntegral $ q `minusPtr` p
1255 go (q `plusPtr` 1) (l-k-1) (i+1)
1258 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1259 -- returns the index of the first element in the ByteString
1260 -- satisfying the predicate.
1261 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1262 findIndex k ps@(PS x s l)
1264 | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1267 go ptr n | n >= l = return Nothing
1268 | otherwise = do w <- peek ptr
1270 then return (Just n)
1271 else go (ptr `plusPtr` 1) (n+1)
1272 {-# INLINE findIndex #-}
1274 -- | The 'findIndices' function extends 'findIndex', by returning the
1275 -- indices of all elements satisfying the predicate, in ascending order.
1276 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1277 findIndices p ps = loop 0 ps
1280 loop n qs | null qs = []
1281 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1282 | otherwise = loop (n+1) (unsafeTail qs)
1284 -- ---------------------------------------------------------------------
1285 -- Searching ByteStrings
1287 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1288 elem :: Word8 -> ByteString -> Bool
1289 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1292 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1293 notElem :: Word8 -> ByteString -> Bool
1294 notElem c ps = not (elem c ps)
1295 {-# INLINE notElem #-}
1297 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1298 -- returns a ByteString containing those characters that satisfy the
1299 -- predicate. This function is subject to array fusion.
1300 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1301 filter p = loopArr . loopU (filterEFL p) noAL
1302 {-# INLINE filter #-}
1304 -- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
1305 -- around 2x faster for some one-shot applications.
1306 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1307 filterF k ps@(PS x s l)
1309 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1310 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1311 return (t `minusPtr` p) -- actual length
1314 go f t end | f == end = return t
1318 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1319 else go (f `plusPtr` 1) t end
1320 {-# INLINE filterF #-}
1323 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1324 -- case of filtering a single byte. It is more efficient to use
1325 -- /filterByte/ in this case.
1327 -- > filterByte == filter . (==)
1329 -- filterByte is around 10x faster, and uses much less space, than its
1330 -- filter equivalent
1331 filterByte :: Word8 -> ByteString -> ByteString
1332 filterByte w ps = replicate (count w ps) w
1333 {-# INLINE filterByte #-}
1336 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1337 -- case of filtering a single byte out of a list. It is more efficient
1338 -- to use /filterNotByte/ in this case.
1340 -- > filterNotByte == filter . (/=)
1342 -- filterNotByte is around 2x faster than its filter equivalent.
1343 filterNotByte :: Word8 -> ByteString -> ByteString
1344 filterNotByte w = filterF (/= w)
1345 {-# INLINE filterNotByte #-}
1347 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1348 -- and returns the first element in matching the predicate, or 'Nothing'
1349 -- if there is no such element.
1351 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1353 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1354 find f p = case findIndex f p of
1355 Just n -> Just (p `unsafeIndex` n)
1361 -- fuseable, but we don't want to walk the whole array.
1363 find k = foldl findEFL Nothing
1364 where findEFL a@(Just _) _ = a
1365 findEFL _ c | k c = Just c
1366 | otherwise = Nothing
1369 -- ---------------------------------------------------------------------
1370 -- Searching for substrings
1372 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1373 -- iff the first is a prefix of the second.
1374 isPrefixOf :: ByteString -> ByteString -> Bool
1375 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1378 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1379 withForeignPtr x2 $ \p2 -> do
1380 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1383 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1384 -- iff the first is a suffix of the second.
1386 -- The following holds:
1388 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1390 -- However, the real implemenation uses memcmp to compare the end of the
1391 -- string only, with no reverse required..
1392 isSuffixOf :: ByteString -> ByteString -> Bool
1393 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1396 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1397 withForeignPtr x2 $ \p2 -> do
1398 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1401 -- | Check whether one string is a substring of another. @isSubstringOf
1402 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1403 isSubstringOf :: ByteString -- ^ String to search for.
1404 -> ByteString -- ^ String to search in.
1406 isSubstringOf p s = not $ P.null $ findSubstrings p s
1408 -- | Get the first index of a substring in another string,
1409 -- or 'Nothing' if the string is not found.
1410 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1411 findSubstring :: ByteString -- ^ String to search for.
1412 -> ByteString -- ^ String to seach in.
1414 findSubstring = (listToMaybe .) . findSubstrings
1416 -- | Find the indexes of all (possibly overlapping) occurances of a
1417 -- substring in a string. This function uses the Knuth-Morris-Pratt
1418 -- string matching algorithm.
1419 findSubstrings :: ByteString -- ^ String to search for.
1420 -> ByteString -- ^ String to seach in.
1423 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1425 patc x = pat `unsafeIndex` x
1426 strc x = str `unsafeIndex` x
1428 -- maybe we should make kmpNext a UArray before using it in search?
1429 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1430 kmpNextL p _ | null p = []
1431 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1433 x = if not (null ps) && unsafeHead ps == patc j'
1434 then kmpNext Array.! j' else j'
1436 search i j = match ++ rest -- i: position in string, j: position in pattern
1437 where match = if j == m then [(i - j)] else []
1438 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1439 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1442 -- ---------------------------------------------------------------------
1445 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1446 -- corresponding pairs of bytes. If one input ByteString is short,
1447 -- excess elements of the longer ByteString are discarded. This is
1448 -- equivalent to a pair of 'unpack' operations.
1449 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1451 | null ps || null qs = []
1452 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1454 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1455 -- the first argument, instead of a tupling function. For example,
1456 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1457 -- corresponding sums.
1458 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1460 | null ps || null qs = []
1461 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1463 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1464 -- ByteStrings. Note that this performs two 'pack' operations.
1465 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1466 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1467 {-# INLINE unzip #-}
1469 -- ---------------------------------------------------------------------
1472 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1473 inits :: ByteString -> [ByteString]
1474 inits (PS x s l) = [PS x s n | n <- [0..l]]
1476 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1477 tails :: ByteString -> [ByteString]
1478 tails p | null p = [empty]
1479 | otherwise = p : tails (unsafeTail p)
1481 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1483 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1484 elems :: ByteString -> [ByteString]
1485 elems (PS _ _ 0) = []
1486 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1487 {-# INLINE elems #-}
1489 -- ---------------------------------------------------------------------
1490 -- ** Ordered 'ByteString's
1492 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1493 sort :: ByteString -> ByteString
1494 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1496 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1497 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1500 go 256 _ = return ()
1501 go i ptr = do n <- peekElemOff arr i
1502 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1503 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1506 -- "countEach counts str l" counts the number of occurences of each Word8 in
1507 -- str, and stores the result in counts.
1508 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1510 countEach counts str l = go 0
1513 go i | i == l = return ()
1514 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1515 x <- peekElemOff counts k
1516 pokeElemOff counts k (x + 1)
1520 sort :: ByteString -> ByteString
1521 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1522 memcpy p (f `plusPtr` s) l
1523 c_qsort p l -- inplace
1527 sort = pack . List.sort . unpack
1530 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1532 -- Try some linear sorts: radix, counting
1535 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1536 -- sortBy f ps = undefined
1538 -- ---------------------------------------------------------------------
1540 -- Extensions to the basic interface
1543 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1544 -- check for the empty case, so there is an obligation on the programmer
1545 -- to provide a proof that the ByteString is non-empty.
1546 unsafeHead :: ByteString -> Word8
1547 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1548 {-# INLINE unsafeHead #-}
1550 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1551 -- check for the empty case. As with 'unsafeHead', the programmer must
1552 -- provide a separate proof that the ByteString is non-empty.
1553 unsafeTail :: ByteString -> ByteString
1554 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1555 {-# INLINE unsafeTail #-}
1557 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1558 -- This omits the bounds check, which means there is an accompanying
1559 -- obligation on the programmer to ensure the bounds are checked in some
1561 unsafeIndex :: ByteString -> Int -> Word8
1562 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1563 {-# INLINE unsafeIndex #-}
1565 -- ---------------------------------------------------------------------
1566 -- Low level constructors
1568 #if defined(__GLASGOW_HASKELL__)
1569 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1570 -- Addr\# (an arbitrary machine address assumed to point outside the
1571 -- garbage-collected heap) into a @ByteString@. A much faster way to
1572 -- create an Addr\# is with an unboxed string literal, than to pack a
1573 -- boxed string. A unboxed string literal is compiled to a static @char
1574 -- []@ by GHC. Establishing the length of the string requires a call to
1575 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1576 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1577 -- if you know the length of the string statically.
1581 -- > literalFS = packAddress "literal"#
1583 packAddress :: Addr# -> ByteString
1584 packAddress addr# = inlinePerformIO $ do
1585 p <- newForeignPtr_ cstr
1586 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1589 {-# INLINE packAddress #-}
1591 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1592 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1593 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1594 -- 'Addr\#' to the string, and the length of the string. Make sure the
1595 -- length is correct, otherwise use the safer 'packAddress' (where the
1596 -- length will be calculated once at runtime).
1597 unsafePackAddress :: Int -> Addr# -> ByteString
1598 unsafePackAddress len addr# = inlinePerformIO $ do
1599 p <- newForeignPtr_ cstr
1601 where cstr = Ptr addr#
1605 -- | /O(1)/ Build a ByteString from a ForeignPtr
1606 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1607 fromForeignPtr fp l = PS fp 0 l
1609 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1610 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1611 toForeignPtr (PS ps s l) = (ps, s, l)
1613 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1614 -- current 'ByteString' from any larger string it was created from, as
1616 skipIndex :: ByteString -> Int
1617 skipIndex (PS _ s _) = s
1618 {-# INLINE skipIndex #-}
1620 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1621 -- finalizer associated to it. The ByteString length is calculated using
1622 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1623 packCString :: CString -> ByteString
1624 packCString cstr = inlinePerformIO $ do
1625 fp <- newForeignPtr_ (castPtr cstr)
1626 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1628 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1629 -- have /no/ finalizer associated with it. This operation has /O(1)/
1630 -- complexity as we already know the final size, so no /strlen(3)/ is
1632 packCStringLen :: CStringLen -> ByteString
1633 packCStringLen (ptr,len) = inlinePerformIO $ do
1634 fp <- newForeignPtr_ (castPtr ptr)
1635 return $ PS fp 0 (fromIntegral len)
1637 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1638 -- have a @free(3)@ finalizer associated to it.
1639 packMallocCString :: CString -> ByteString
1640 packMallocCString cstr = inlinePerformIO $ do
1641 fp <- newForeignFreePtr (castPtr cstr)
1642 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1644 #if defined(__GLASGOW_HASKELL__)
1645 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1646 -- length, and an IO action representing a finalizer. This function is
1647 -- not available on Hugs.
1649 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1650 packCStringFinalizer p l f = do
1651 fp <- FC.newForeignPtr p f
1654 -- | Explicitly run the finaliser associated with a 'ByteString'.
1655 -- Further references to this value may generate invalid memory
1656 -- references. This operation is unsafe, as there may be other
1657 -- 'ByteStrings' referring to the same underlying pages. If you use
1658 -- this, you need to have a proof of some kind that all 'ByteString's
1659 -- ever generated from the underlying byte array are no longer live.
1660 unsafeFinalize :: ByteString -> IO ()
1661 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1665 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1666 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1667 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1668 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1670 alloc = withForeignPtr ps $ \p -> do
1671 buf <- c_malloc (fromIntegral l+1)
1672 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1673 poke (buf `plusPtr` l) (0::Word8)
1674 return $ castPtr buf
1676 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1677 -- Warning: modifying the @CString@ will affect the @ByteString@.
1678 -- Why is this function unsafe? It relies on the null byte at the end of
1679 -- the ByteString to be there. This is /not/ the case if your ByteString
1680 -- has been spliced from a larger string (i.e. with take or drop).
1681 -- Unless you can guarantee the null byte, you should use the safe
1682 -- version, which will copy the string first.
1684 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1685 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1687 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1688 -- This is mainly useful to allow the rest of the data pointed
1689 -- to by the 'ByteString' to be garbage collected, for example
1690 -- if a large string has been read in, and only a small part of it
1691 -- is needed in the rest of the program.
1692 copy :: ByteString -> ByteString
1693 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1694 memcpy p (f `plusPtr` s) (fromIntegral l)
1696 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1697 -- CString is going to be deallocated from C land.
1698 copyCString :: CString -> ByteString
1699 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1701 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1702 copyCStringLen :: CStringLen -> ByteString
1703 copyCStringLen (cstr, len) = inlinePerformIO $ do
1704 fp <- mallocForeignPtrArray (len+1)
1705 withForeignPtr fp $ \p -> do
1706 memcpy p (castPtr cstr) (fromIntegral len)
1707 poke (p `plusPtr` len) (0 :: Word8)
1708 return $! PS fp 0 len
1710 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1711 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1712 -- This is analogous to unsafeUseAsCString, and comes with the same
1713 -- safety requirements.
1715 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1716 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1718 -- | Given the maximum size needed and a function to make the contents
1719 -- of a ByteString, generate makes the 'ByteString'. The generating
1720 -- function is required to return the actual final size (<= the maximum
1721 -- size), and the resulting byte array is realloced to this size. The
1722 -- string is padded at the end with a null byte.
1724 -- generate is the main mechanism for creating custom, efficient
1725 -- ByteString functions, using Haskell or C functions to fill the space.
1727 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1729 fp <- mallocByteString i
1730 (ptr,n) <- withForeignPtr fp $ \p -> do
1734 else do fp_ <- mallocByteString i' -- realloc
1735 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1741 -- On the C malloc heap. Less fun.
1744 p <- mallocArray (i+1)
1746 p' <- reallocArray p (i'+1)
1747 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1748 fp <- newForeignFreePtr p'
1752 -- ---------------------------------------------------------------------
1755 #if defined(__GLASGOW_HASKELL__)
1757 -- | getLine, read a line from stdin.
1758 getLine :: IO ByteString
1759 getLine = hGetLine stdin
1761 -- | hGetLine. read a ByteString from a handle
1762 hGetLine :: Handle -> IO ByteString
1763 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1764 case haBufferMode handle_ of
1765 NoBuffering -> error "no buffering"
1766 _other -> hGetLineBuffered handle_
1769 hGetLineBuffered handle_ = do
1770 let ref = haBuffer handle_
1771 buf <- readIORef ref
1772 hGetLineBufferedLoop handle_ ref buf 0 []
1774 hGetLineBufferedLoop handle_ ref
1775 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1777 off <- findEOL r w raw
1778 let new_len = len + off - r
1779 xs <- mkPS raw r off
1781 -- if eol == True, then off is the offset of the '\n'
1782 -- otherwise off == w and the buffer is now empty.
1784 then do if (w == off + 1)
1785 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1786 else writeIORef ref buf{ bufRPtr = off + 1 }
1787 mkBigPS new_len (xs:xss)
1789 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1790 buf{ bufWPtr=0, bufRPtr=0 }
1792 -- Nothing indicates we caught an EOF, and we may have a
1793 -- partial line to return.
1795 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1797 then mkBigPS new_len (xs:xss)
1800 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1802 -- find the end-of-line character, if there is one
1806 (c,r') <- readCharFromBuffer raw r
1808 then return r -- NB. not r': don't include the '\n'
1809 else findEOL r' w raw
1811 maybeFillReadBuffer fd is_line is_stream buf = catch
1812 (do buf' <- fillReadBuffer fd is_line is_stream buf
1814 (\e -> if isEOFError e then return Nothing else ioError e)
1816 -- TODO, rewrite to use normal memcpy
1817 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1818 mkPS buf start end = do
1819 let len = end - start
1820 fp <- mallocByteString len
1821 withForeignPtr fp $ \p -> do
1822 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1823 return (PS fp 0 len)
1825 mkBigPS :: Int -> [ByteString] -> IO ByteString
1826 mkBigPS _ [ps] = return ps
1827 mkBigPS _ pss = return $! concat (P.reverse pss)
1831 -- ---------------------------------------------------------------------
1834 -- | Outputs a 'ByteString' to the specified 'Handle'.
1835 hPut :: Handle -> ByteString -> IO ()
1836 hPut _ (PS _ _ 0) = return ()
1837 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1838 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1840 -- | Write a ByteString to stdout
1841 putStr :: ByteString -> IO ()
1842 putStr = hPut stdout
1844 -- | Write a ByteString to stdout, appending a newline byte
1845 putStrLn :: ByteString -> IO ()
1846 putStrLn ps = hPut stdout ps >> hPut stdout nl
1847 where nl = packByte 0x0a
1849 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1850 -- is far more efficient than reading the characters into a 'String'
1851 -- and then using 'pack'.
1852 hGet :: Handle -> Int -> IO ByteString
1853 hGet _ 0 = return empty
1854 hGet h i = do fp <- mallocByteString i
1855 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1858 #if defined(__GLASGOW_HASKELL__)
1859 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1860 -- waiting for data to become available, instead it returns only whatever data
1862 hGetNonBlocking :: Handle -> Int -> IO ByteString
1863 hGetNonBlocking _ 0 = return empty
1864 hGetNonBlocking h i = do
1865 fp <- mallocByteString i
1866 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1870 -- | Read entire handle contents into a 'ByteString'.
1872 -- As with 'hGet', the string representation in the file is assumed to
1875 hGetContents :: Handle -> IO ByteString
1877 let start_size = 1024
1878 p <- mallocArray start_size
1879 i <- hGetBuf h p start_size
1881 then do p' <- reallocArray p i
1882 fp <- newForeignFreePtr p'
1888 p' <- reallocArray p s'
1889 i <- hGetBuf h (p' `plusPtr` s) s
1891 then do let i' = s + i
1892 p'' <- reallocArray p' i'
1893 fp <- newForeignFreePtr p''
1897 -- | getContents. Equivalent to hGetContents stdin
1898 getContents :: IO ByteString
1899 getContents = hGetContents stdin
1901 -- | Read an entire file strictly into a 'ByteString'. This is far more
1902 -- efficient than reading the characters into a 'String' and then using
1903 -- 'pack'. It also may be more efficient than opening the file and
1904 -- reading it using hGet.
1905 readFile :: FilePath -> IO ByteString
1907 h <- openBinaryFile f ReadMode
1909 s <- hGet h $ fromIntegral l
1913 -- | Write a 'ByteString' to a file.
1914 writeFile :: FilePath -> ByteString -> IO ()
1915 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1920 -- Disable until we can move it into a portable .hsc file
1923 -- | Like readFile, this reads an entire file directly into a
1924 -- 'ByteString', but it is even more efficient. It involves directly
1925 -- mapping the file to memory. This has the advantage that the contents
1926 -- of the file never need to be copied. Also, under memory pressure the
1927 -- page may simply be discarded, while in the case of readFile it would
1928 -- need to be written to swap. If you read many small files, mmapFile
1929 -- will be less memory-efficient than readFile, since each mmapFile
1930 -- takes up a separate page of memory. Also, you can run into bus
1931 -- errors if the file is modified. As with 'readFile', the string
1932 -- representation in the file is assumed to be ISO-8859-1.
1934 -- On systems without mmap, this is the same as a readFile.
1936 mmapFile :: FilePath -> IO ByteString
1937 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1939 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1941 h <- openBinaryFile f ReadMode
1942 l <- fromIntegral `fmap` hFileSize h
1943 -- Don't bother mmaping small files because each mmapped file takes up
1944 -- at least one full VM block.
1946 then do thefp <- mallocByteString l
1947 withForeignPtr thefp $ \p-> hGetBuf h p l
1952 fd <- fromIntegral `fmap` handleToFd h
1954 fp <- if p == nullPtr
1955 then do thefp <- mallocByteString l
1956 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1959 -- The munmap leads to crashes on OpenBSD.
1960 -- maybe there's a use after unmap in there somewhere?
1961 #if !defined(__OpenBSD__)
1962 let unmap = c_munmap p l >> return ()
1964 let unmap = return ()
1966 fp <- FC.newForeignPtr p unmap
1971 where mmap_limit = 16*1024
1974 #if defined(__GLASGOW_HASKELL__)
1976 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1978 getArgs :: IO [ByteString]
1980 alloca $ \ p_argc ->
1981 alloca $ \ p_argv -> do
1982 getProgArgv p_argc p_argv
1983 p <- fromIntegral `fmap` peek p_argc
1985 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1988 -- ---------------------------------------------------------------------
1989 -- Internal utilities
1991 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1992 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1993 -- convenience for ByteString construction.
1994 w2c :: Word8 -> Char
1995 #if !defined(__GLASGOW_HASKELL__)
1996 w2c = chr . fromIntegral
1998 w2c = unsafeChr . fromIntegral
2002 c2w :: Char -> Word8
2003 c2w = fromIntegral . ord
2006 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2007 -- is padded with a null byte.
2008 mallocByteString :: Int -> IO (ForeignPtr Word8)
2009 mallocByteString l = do
2010 fp <- mallocForeignPtrArray (l+1)
2011 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2014 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2015 -- argument gives the final size of the ByteString. Unlike 'generate'
2016 -- the ByteString is not reallocated if the final size is less than the
2017 -- estimated size. Also, unlike 'generate' ByteString's created this way
2018 -- are managed on the Haskell heap.
2019 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2020 create l write_ptr = inlinePerformIO $ do
2021 fp <- mallocByteString (l+1)
2022 withForeignPtr fp $ \p -> write_ptr p
2024 {-# INLINE create #-}
2026 -- | Perform an operation with a temporary ByteString
2027 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2028 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2029 {-# INLINE withPtr #-}
2031 -- Common up near identical calls to `error' to reduce the number
2032 -- constant strings created when compiled:
2033 errorEmptyList :: String -> a
2034 errorEmptyList fun = moduleError fun "empty ByteString"
2035 {-# NOINLINE errorEmptyList #-}
2037 moduleError :: String -> String -> a
2038 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2039 {-# NOINLINE moduleError #-}
2041 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2042 -- of the string if no element is found, rather than Nothing.
2043 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2044 STRICT2(findIndexOrEnd)
2047 | f (unsafeHead ps) = 0
2048 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
2049 {-# INLINE findIndexOrEnd #-}
2051 -- Find from the end of the string using predicate
2052 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2053 STRICT2(findFromEndUntil)
2054 findFromEndUntil f ps@(PS x s l) =
2056 else if f (last ps) then l
2057 else findFromEndUntil f (PS x s (l-1))
2059 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2060 -- it exposes lots of things to further inlining
2062 {-# INLINE inlinePerformIO #-}
2063 inlinePerformIO :: IO a -> a
2064 #if defined(__GLASGOW_HASKELL__)
2065 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2067 inlinePerformIO = unsafePerformIO
2070 {-# INLINE newForeignFreePtr #-}
2071 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2072 #if defined(__GLASGOW_HASKELL__)
2073 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2075 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2078 -- ---------------------------------------------------------------------
2080 -- Standard C functions
2083 foreign import ccall unsafe "string.h strlen" c_strlen
2086 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2087 :: CInt -> IO (Ptr Word8)
2089 foreign import ccall unsafe "static stdlib.h free" c_free
2090 :: Ptr Word8 -> IO ()
2092 #if !defined(__GLASGOW_HASKELL__)
2093 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2094 :: FunPtr (Ptr Word8 -> IO ())
2097 foreign import ccall unsafe "string.h memset" memset
2098 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2100 foreign import ccall unsafe "string.h memchr" memchr
2101 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2103 foreign import ccall unsafe "string.h memcmp" memcmp
2104 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2106 foreign import ccall unsafe "string.h memcpy" memcpy
2107 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2109 -- ---------------------------------------------------------------------
2114 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2115 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2117 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2118 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2120 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2121 :: Ptr Word8 -> CInt -> Word8
2123 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2124 :: Ptr Word8 -> CInt -> Word8
2126 foreign import ccall unsafe "static fpstring.h count" c_count
2127 :: Ptr Word8 -> CInt -> Word8 -> CInt
2129 -- ---------------------------------------------------------------------
2133 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2134 :: Int -> Int -> IO (Ptr Word8)
2136 foreign import ccall unsafe "static unistd.h close" c_close
2139 # if !defined(__OpenBSD__)
2140 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2141 :: Ptr Word8 -> Int -> IO Int
2145 -- ---------------------------------------------------------------------
2146 -- Internal GHC Haskell magic
2148 #if defined(__GLASGOW_HASKELL__)
2149 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2150 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2152 foreign import ccall unsafe "__hscore_memcpy_src_off"
2153 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2156 -- ---------------------------------------------------------------------
2158 -- Functional array fusion for ByteStrings.
2160 -- From the Data Parallel Haskell project,
2161 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2164 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2165 -- the fact that no bottoms of this type are ever constructed; hence, we can
2166 -- assume @(_ :: NoAL) `seq` x = x@.
2170 -- | Special forms of loop arguments
2172 -- * These are common special cases for the three function arguments of gen
2173 -- and loop; we give them special names to make it easier to trigger RULES
2174 -- applying in the special cases represented by these arguments. The
2175 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2176 -- two simplifier phases.
2178 -- * In the case where the accumulator is not needed, it is better to always
2179 -- explicitly return a value `()', rather than just copy the input to the
2180 -- output, as the former gives GHC better local information.
2183 -- | Element function expressing a mapping only
2184 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2185 mapEFL f = \_ e -> (noAL, (Just $ f e))
2186 #if defined(__GLASGOW_HASKELL__)
2187 {-# INLINE [1] mapEFL #-}
2190 -- | Element function implementing a filter function only
2191 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2192 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2193 #if defined(__GLASGOW_HASKELL__)
2194 {-# INLINE [1] filterEFL #-}
2197 -- |Element function expressing a reduction only
2198 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2199 foldEFL f = \a e -> (f a e, Nothing)
2200 #if defined(__GLASGOW_HASKELL__)
2201 {-# INLINE [1] foldEFL #-}
2204 -- | A strict foldEFL.
2205 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2206 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2207 #if defined(__GLASGOW_HASKELL__)
2208 {-# INLINE [1] foldEFL' #-}
2214 #if defined(__GLASGOW_HASKELL__)
2215 {-# INLINE [1] noAL #-}
2218 -- | Projection functions that are fusion friendly (as in, we determine when
2219 -- they are inlined)
2220 loopArr :: (ByteString, acc) -> ByteString
2221 loopArr (arr, _) = arr
2222 #if defined(__GLASGOW_HASKELL__)
2223 {-# INLINE [1] loopArr #-}
2226 loopAcc :: (ByteString, acc) -> acc
2227 loopAcc (_, acc) = acc
2228 #if defined(__GLASGOW_HASKELL__)
2229 {-# INLINE [1] loopAcc #-}
2232 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2233 loopSndAcc (arr, (_, acc)) = (arr, acc)
2234 #if defined(__GLASGOW_HASKELL__)
2235 {-# INLINE [1] loopSndAcc #-}
2238 ------------------------------------------------------------------------
2241 -- size, and then percentage.
2244 -- | Iteration over over ByteStrings
2245 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2246 -> acc -- ^ initial acc value
2247 -> ByteString -- ^ input ByteString
2248 -> (ByteString, acc)
2250 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2251 fp <- mallocByteString i
2252 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2253 (acc, i') <- go (a `plusPtr` s) p start
2255 then return (fp,i',acc) -- no realloc for map
2256 else do fp_ <- mallocByteString i' -- realloc
2257 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2260 return (PS ptr 0 n, acc)
2265 trans a_off ma_off acc
2266 | a_off >= i = return (acc, ma_off)
2268 x <- peekByteOff p a_off
2269 let (acc', oe) = f acc x
2270 ma_off' <- case oe of
2271 Nothing -> return ma_off
2272 Just e -> do pokeByteOff ma ma_off e
2274 trans (a_off+1) ma_off' acc'
2276 #if defined(__GLASGOW_HASKELL__)
2277 {-# INLINE [1] loopU #-}
2282 -- |Fuse to flat loop functions
2283 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2284 -> (a2 -> Word8 -> (a2, Maybe Word8))
2287 -> ((a1, a2), Maybe Word8)
2288 fuseEFL f g (acc1, acc2) e1 =
2290 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2293 (acc2', res) -> ((acc1', acc2'), res)
2297 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2298 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2299 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2301 "loopArr/loopSndAcc" forall x.
2302 loopArr (loopSndAcc x) = loopArr x
2304 "seq/NoAL" forall (u::NoAL) e.