1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
14 -- License : BSD-style
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27 -- and can be passed between C and Haskell with little effort.
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
32 -- > import qualified Data.ByteString as B
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
39 module Data.ByteString (
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 singleton, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 append, -- :: ByteString -> ByteString -> ByteString
56 head, -- :: ByteString -> Word8
57 last, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 init, -- :: ByteString -> ByteString
60 null, -- :: ByteString -> Bool
61 length, -- :: ByteString -> Int
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
66 reverse, -- :: ByteString -> ByteString
67 intersperse, -- :: Word8 -> ByteString -> ByteString
68 transpose, -- :: [ByteString] -> [ByteString]
70 -- * Reducing 'ByteString's (folds)
71 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
72 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
73 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
74 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
75 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
76 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 concat, -- :: [ByteString] -> ByteString
80 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
81 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
82 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 maximum, -- :: ByteString -> Word8
84 minimum, -- :: ByteString -> Word8
86 -- * Building ByteStrings
88 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
89 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
91 -- ** Accumulating maps
92 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
93 -- mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
94 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
96 -- ** Unfolding ByteStrings
97 replicate, -- :: Int -> Word8 -> ByteString
98 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
99 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
103 -- ** Breaking strings
104 take, -- :: Int -> ByteString -> ByteString
105 drop, -- :: Int -> ByteString -> ByteString
106 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
107 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
108 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
109 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
110 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
111 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 group, -- :: ByteString -> [ByteString]
113 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
114 inits, -- :: ByteString -> [ByteString]
115 tails, -- :: ByteString -> [ByteString]
117 -- ** Breaking and dropping on specific bytes
118 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
119 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
121 -- ** Breaking into many substrings
122 split, -- :: Word8 -> ByteString -> [ByteString]
123 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
124 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
126 -- ** Joining strings
127 join, -- :: ByteString -> [ByteString] -> ByteString
128 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
131 isPrefixOf, -- :: ByteString -> ByteString -> Bool
132 isSuffixOf, -- :: ByteString -> ByteString -> Bool
134 -- ** Search for arbitrary substrings
135 isSubstringOf, -- :: ByteString -> ByteString -> Bool
136 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
137 findSubstrings, -- :: ByteString -> ByteString -> [Int]
139 -- * Searching ByteStrings
141 -- ** Searching by equality
142 -- | These functions use memchr(3) to efficiently search the ByteString
143 elem, -- :: Word8 -> ByteString -> Bool
144 notElem, -- :: Word8 -> ByteString -> Bool
145 filterByte, -- :: Word8 -> ByteString -> ByteString
146 filterNotByte, -- :: Word8 -> ByteString -> ByteString
148 -- ** Searching with a predicate
149 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
150 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
151 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
152 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
154 -- * Indexing ByteStrings
155 index, -- :: ByteString -> Int -> Word8
156 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
157 elemIndices, -- :: Word8 -> ByteString -> [Int]
158 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
159 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
160 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
161 count, -- :: Word8 -> ByteString -> Int
162 findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int
164 -- * Zipping and unzipping ByteStrings
165 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
166 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
167 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
169 -- * Ordered ByteStrings
170 sort, -- :: ByteString -> ByteString
172 -- * Unchecked access
173 unsafeHead, -- :: ByteString -> Word8
174 unsafeTail, -- :: ByteString -> ByteString
175 unsafeIndex, -- :: ByteString -> Int -> Word8
176 unsafeTake, -- :: Int -> ByteString -> ByteString
177 unsafeDrop, -- :: Int -> ByteString -> ByteString
179 -- * Low level introduction and elimination
180 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
181 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
182 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
183 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
184 skipIndex, -- :: ByteString -> Int
186 -- ** Packing CStrings and pointers
187 packCString, -- :: CString -> ByteString
188 packCStringLen, -- :: CString -> ByteString
189 packMallocCString, -- :: CString -> ByteString
191 #if defined(__GLASGOW_HASKELL__)
192 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
193 packAddress, -- :: Addr# -> ByteString
194 unsafePackAddress, -- :: Int -> Addr# -> ByteString
195 unsafeFinalize, -- :: ByteString -> IO ()
198 -- ** Using ByteStrings as CStrings
199 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
200 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
201 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
203 -- ** Copying ByteStrings
204 -- | These functions perform memcpy(3) operations
205 copy, -- :: ByteString -> ByteString
206 copyCString, -- :: CString -> ByteString
207 copyCStringLen, -- :: CStringLen -> ByteString
209 -- * I\/O with 'ByteString's
211 -- ** Standard input and output
213 #if defined(__GLASGOW_HASKELL__)
214 getLine, -- :: IO ByteString
216 getContents, -- :: IO ByteString
217 putStr, -- :: ByteString -> IO ()
218 putStrLn, -- :: ByteString -> IO ()
221 readFile, -- :: FilePath -> IO ByteString
222 writeFile, -- :: FilePath -> ByteString -> IO ()
223 -- mmapFile, -- :: FilePath -> IO ByteString
225 -- ** I\/O with Handles
226 #if defined(__GLASGOW_HASKELL__)
227 getArgs, -- :: IO [ByteString]
228 hGetLine, -- :: Handle -> IO ByteString
229 hGetLines, -- :: Handle -> IO [ByteString]
230 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
232 hGetContents, -- :: Handle -> IO ByteString
233 hGet, -- :: Handle -> Int -> IO ByteString
234 hPut, -- :: Handle -> ByteString -> IO ()
236 -- * Fusion utilities
237 #if defined(__GLASGOW_HASKELL__)
238 unpackList, -- eek, otherwise it gets thrown away by the simplifier
241 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
242 loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL,
243 mapAccumEFL, mapIndexEFL,
247 import qualified Prelude as P
248 import Prelude hiding (reverse,head,tail,last,init,null
249 ,length,map,lines,foldl,foldr,unlines
250 ,concat,any,take,drop,splitAt,takeWhile
251 ,dropWhile,span,break,elem,filter,maximum
252 ,minimum,all,concatMap,foldl1,foldr1
253 ,scanl,scanl1,readFile,writeFile,replicate
254 ,getContents,getLine,putStr,putStrLn
255 ,zip,zipWith,unzip,notElem)
257 import qualified Data.List as List
260 import Data.Word (Word8)
261 import Data.Maybe (listToMaybe)
262 import Data.Array (listArray)
263 import qualified Data.Array as Array ((!))
265 -- Control.Exception.bracket not available in yhc or nhc
266 import Control.Exception (bracket, assert)
267 import Control.Monad (when)
269 import Foreign.C.String (CString, CStringLen)
270 import Foreign.C.Types (CSize,CInt)
271 import Foreign.ForeignPtr
272 import Foreign.Marshal.Array
274 import Foreign.Storable (Storable(..))
276 -- hGetBuf and hPutBuf not available in yhc or nhc
277 import System.IO (stdin,stdout,hClose,hFileSize
278 ,hGetBuf,hPutBuf,openBinaryFile
281 import Data.Monoid (Monoid, mempty, mappend, mconcat)
283 #if !defined(__GLASGOW_HASKELL__)
284 import System.IO.Unsafe
287 #if defined(__GLASGOW_HASKELL__)
289 import Data.Generics (Data(..), Typeable(..))
291 import System.IO (hGetBufNonBlocking)
292 import System.IO.Error (isEOFError)
294 import Foreign.Marshal (alloca)
295 import qualified Foreign.Concurrent as FC (newForeignPtr)
298 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
299 import GHC.Base (build, unsafeChr)
300 import GHC.Word hiding (Word8)
301 import GHC.Ptr (Ptr(..))
302 import GHC.ST (ST(..))
307 -- CFILES stuff is Hugs only
308 {-# CFILES cbits/fpstring.c #-}
310 -- -----------------------------------------------------------------------------
312 -- Useful macros, until we have bang patterns
315 #define STRICT1(f) f a | a `seq` False = undefined
316 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
317 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
318 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
319 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
321 -- -----------------------------------------------------------------------------
323 -- | A space-efficient representation of a Word8 vector, supporting many
324 -- efficient operations. A 'ByteString' contains 8-bit characters only.
326 -- Instances of Eq, Ord, Read, Show, Data, Typeable
328 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
332 #if defined(__GLASGOW_HASKELL__)
333 deriving (Data, Typeable)
336 instance Eq ByteString
339 instance Ord ByteString
340 where compare = compareBytes
342 instance Show ByteString where
343 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
345 instance Read ByteString where
346 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
348 instance Monoid ByteString where
354 instance Arbitrary PackedString where
355 arbitrary = P.pack `fmap` arbitrary
356 coarbitrary s = coarbitrary (P.unpack s)
359 -- | /O(n)/ Equality on the 'ByteString' type.
360 eq :: ByteString -> ByteString -> Bool
361 eq a@(PS p s l) b@(PS p' s' l')
362 | l /= l' = False -- short cut on length
363 | p == p' && s == s' = True -- short cut for the same string
364 | otherwise = compareBytes a b == EQ
367 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
368 compareBytes :: ByteString -> ByteString -> Ordering
369 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
370 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
371 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
372 | otherwise = inlinePerformIO $
373 withForeignPtr x1 $ \p1 ->
374 withForeignPtr x2 $ \p2 -> do
375 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
376 return $ case i `compare` 0 of
377 EQ -> l1 `compare` l2
379 {-# INLINE compareBytes #-}
383 -- About 4x slower over 32M
385 compareBytes :: ByteString -> ByteString -> Ordering
386 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
387 withForeignPtr fp1 $ \p1 ->
388 withForeignPtr fp2 $ \p2 ->
389 cmp (p1 `plusPtr` off1)
390 (p2 `plusPtr` off2) 0 len1 len2
392 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
394 cmp p1 p2 n len1 len2
395 | n == len1 = if n == len2 then return EQ else return LT
396 | n == len2 = return GT
398 (a :: Word8) <- peekByteOff p1 n
399 (b :: Word8) <- peekByteOff p2 n
400 case a `compare` b of
401 EQ -> cmp p1 p2 (n+1) len1 len2
404 {-# INLINE compareBytes #-}
407 -- -----------------------------------------------------------------------------
408 -- Introducing and eliminating 'ByteString's
410 -- | /O(1)/ The empty 'ByteString'
412 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
413 {-# NOINLINE empty #-}
415 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
416 singleton :: Word8 -> ByteString
417 singleton c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
418 withForeignPtr fp $ \p -> poke p c
420 {-# INLINE singleton #-}
423 -- XXX The unsafePerformIO is critical!
427 -- singleton 255 `compare` singleton 127
431 -- case mallocByteString 2 of
432 -- ForeignPtr f internals ->
433 -- case writeWord8OffAddr# f 0 255 of _ ->
434 -- case writeWord8OffAddr# f 0 127 of _ ->
435 -- case eqAddr# f f of
436 -- False -> case compare (GHC.Prim.plusAddr# f 0)
437 -- (GHC.Prim.plusAddr# f 0)
441 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
443 -- For applications with large numbers of string literals, pack can be a
444 -- bottleneck. In such cases, consider using packAddress (GHC only).
445 pack :: [Word8] -> ByteString
447 #if !defined(__GLASGOW_HASKELL__)
449 pack str = create (P.length str) $ \p -> go p str
452 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
454 #else /* hack away */
456 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
458 go _ _ [] = return ()
459 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
461 writeByte p i c = ST $ \s# ->
462 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
466 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
467 unpack :: ByteString -> [Word8]
469 #if !defined(__GLASGOW_HASKELL__)
471 unpack (PS _ _ 0) = []
472 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
473 go (p `plusPtr` s) (l - 1) []
476 go p 0 acc = peek p >>= \e -> return (e : acc)
477 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
478 {-# INLINE unpack #-}
482 unpack ps = build (unpackFoldr ps)
483 {-# INLINE unpack #-}
485 unpackList :: ByteString -> [Word8]
486 unpackList (PS fp off len) = withPtr fp $ \p -> do
488 loop _ (-1) acc = return acc
491 loop q (n-1) (a : acc)
492 loop (p `plusPtr` off) (len-1) []
495 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
498 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
499 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
501 loop _ (-1) acc = return acc
504 loop q (n-1) (a `f` acc)
505 loop (p `plusPtr` off) (len-1) ch
506 {-# INLINE [0] unpackFoldr #-}
508 -- TODO just use normal foldr here.
512 ------------------------------------------------------------------------
514 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
515 -- conversion function
516 packWith :: (a -> Word8) -> [a] -> ByteString
517 packWith k str = create (P.length str) $ \p -> go p str
521 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
522 {-# INLINE packWith #-}
523 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
525 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
526 unpackWith :: (Word8 -> a) -> ByteString -> [a]
527 unpackWith _ (PS _ _ 0) = []
528 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
529 go (p `plusPtr` s) (l - 1) []
532 go p 0 acc = peek p >>= \e -> return (k e : acc)
533 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
534 {-# INLINE unpackWith #-}
535 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
537 -- ---------------------------------------------------------------------
540 -- | /O(1)/ Test whether a ByteString is empty.
541 null :: ByteString -> Bool
542 null (PS _ _ l) = l == 0
545 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
546 length :: ByteString -> Int
547 length (PS _ _ l) = l
549 #if defined(__GLASGOW_HASKELL__)
550 {-# INLINE [1] length #-}
555 -- Translate length into a loop.
556 -- Performace ok, but allocates too much, so disable for now.
558 "length/loop" forall f acc s .
559 length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
563 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
564 -- complexity, as it requires a memcpy.
565 cons :: Word8 -> ByteString -> ByteString
566 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
568 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
573 -- | /O(n)/ Append a byte to the end of a 'ByteString'
574 snoc :: ByteString -> Word8 -> ByteString
575 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
576 memcpy p (f `plusPtr` s) (fromIntegral l)
577 poke (p `plusPtr` l) c
582 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
583 head :: ByteString -> Word8
585 | null ps = errorEmptyList "head"
586 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
589 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
590 tail :: ByteString -> ByteString
592 | l <= 0 = errorEmptyList "tail"
593 | otherwise = PS p (s+1) (l-1)
596 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
597 last :: ByteString -> Word8
599 | null ps = errorEmptyList "last"
600 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
603 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
604 init :: ByteString -> ByteString
606 | l <= 0 = errorEmptyList "init"
607 | otherwise = PS p s (l-1)
610 -- | /O(n)/ Append two ByteStrings
611 append :: ByteString -> ByteString -> ByteString
612 append xs ys | null xs = ys
614 | otherwise = concat [xs,ys]
615 {-# INLINE append #-}
617 -- ---------------------------------------------------------------------
620 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
621 -- element of @xs@. This function is subject to array fusion.
622 map :: (Word8 -> Word8) -> ByteString -> ByteString
623 map f = loopArr . loopU (mapEFL f) noAL
626 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
627 -- slightly faster for one-shot cases.
628 map' :: (Word8 -> Word8) -> ByteString -> ByteString
629 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
630 np <- mallocByteString (len+1)
631 withForeignPtr np $ \p -> do
632 map_ 0 (a `plusPtr` s) p
636 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
639 | n >= len = return ()
641 x <- peekByteOff p1 n
642 pokeByteOff p2 n (f x)
646 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
647 reverse :: ByteString -> ByteString
648 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
649 c_reverse p (f `plusPtr` s) (fromIntegral l)
652 reverse = pack . P.reverse . unpack
655 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
656 -- 'ByteString' and \`intersperses\' that byte between the elements of
657 -- the 'ByteString'. It is analogous to the intersperse function on
659 intersperse :: Word8 -> ByteString -> ByteString
660 intersperse c ps@(PS x s l)
662 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
663 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
666 intersperse c = pack . List.intersperse c . unpack
669 -- | The 'transpose' function transposes the rows and columns of its
670 -- 'ByteString' argument.
671 transpose :: [ByteString] -> [ByteString]
672 transpose ps = P.map pack (List.transpose (P.map unpack ps))
674 -- ---------------------------------------------------------------------
675 -- Reducing 'ByteString's
677 -- | 'foldl', applied to a binary operator, a starting value (typically
678 -- the left-identity of the operator), and a ByteString, reduces the
679 -- ByteString using the binary operator, from left to right.
680 -- This function is subject to array fusion.
681 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
682 foldl f z = loopAcc . loopU (foldEFL f) z
687 -- About twice as fast with 6.4.1, but not fuseable
688 -- A simple fold . map is enough to make it worth while.
690 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
691 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
694 lgo z p q | p == q = return z
695 | otherwise = do c <- peek p
696 lgo (f z c) (p `plusPtr` 1) q
699 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
700 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
701 foldl' f z = loopAcc . loopU (foldEFL' f) z
702 {-# INLINE foldl' #-}
704 -- | 'foldr', applied to a binary operator, a starting value
705 -- (typically the right-identity of the operator), and a ByteString,
706 -- reduces the ByteString using the binary operator, from right to left.
707 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
708 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
709 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
712 go p q | p == q = return z
713 | otherwise = do c <- peek p
714 ws <- go (p `plusPtr` 1) q
717 -- | 'foldl1' is a variant of 'foldl' that has no starting value
718 -- argument, and thus must be applied to non-empty 'ByteStrings'.
719 -- This function is subject to array fusion.
720 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
722 | null ps = errorEmptyList "foldl1"
723 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
725 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
726 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
728 | null ps = errorEmptyList "foldl1'"
729 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
731 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
732 -- and thus must be applied to non-empty 'ByteString's
733 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
735 | null ps = errorEmptyList "foldr1"
736 | otherwise = foldr f (last ps) (init ps)
738 -- ---------------------------------------------------------------------
741 -- | /O(n)/ Concatenate a list of ByteStrings.
742 concat :: [ByteString] -> ByteString
745 concat xs = create len $ \ptr -> go xs ptr
746 where len = P.sum . P.map length $ xs
749 go (PS p s l:ps) ptr = do
750 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
751 go ps (ptr `plusPtr` l)
753 -- | Map a function over a 'ByteString' and concatenate the results
754 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
755 concatMap f = foldr (append . f) empty
756 -- A silly function for ByteStrings anyway.
758 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
759 -- any element of the 'ByteString' satisfies the predicate.
760 any :: (Word8 -> Bool) -> ByteString -> Bool
761 any _ (PS _ _ 0) = False
762 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
763 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
766 go p q | p == q = return False
767 | otherwise = do c <- peek p
768 if f c then return True
769 else go (p `plusPtr` 1) q
773 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
774 -- if all elements of the 'ByteString' satisfy the predicate.
775 all :: (Word8 -> Bool) -> ByteString -> Bool
776 all _ (PS _ _ 0) = True
777 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
778 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
781 go p q | p == q = return True -- end of list
782 | otherwise = do c <- peek p
784 then go (p `plusPtr` 1) q
788 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
789 maximum :: ByteString -> Word8
790 maximum xs@(PS x s l)
791 | null xs = errorEmptyList "maximum"
792 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
793 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
794 {-# INLINE maximum #-}
796 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
797 minimum :: ByteString -> Word8
798 minimum xs@(PS x s l)
799 | null xs = errorEmptyList "minimum"
800 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
801 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
802 {-# INLINE minimum #-}
804 -- fusion is too slow here (10x)
807 maximum xs@(PS x s l)
808 | null xs = errorEmptyList "maximum"
809 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
811 maximum_ (p `plusPtr` s) 0 l w
813 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
817 | otherwise = do w <- peekByteOff ptr n
818 maximum_ ptr (n+1) m (if w > c then w else c)
820 minimum xs@(PS x s l)
821 | null xs = errorEmptyList "minimum"
822 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
824 minimum_ (p `plusPtr` s) 0 l w
826 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
830 | otherwise = do w <- peekByteOff ptr n
831 minimum_ ptr (n+1) m (if w < c then w else c)
834 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
835 mapAccumL f z = loopU (mapAccumEFL f) z
837 --mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
839 -- | /O(n)/ map Word8 functions, provided with the index at each position
840 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
841 mapIndexed f = loopArr . loopU (mapIndexEFL f) 0
843 -- ---------------------------------------------------------------------
844 -- Building ByteStrings
846 -- | 'scanl' is similar to 'foldl', but returns a list of successive
847 -- reduced values from the left. This function will fuse.
849 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
853 -- > last (scanl f z xs) == foldl f z xs.
854 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
855 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space
858 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
859 -- This function will fuse.
861 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
862 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
865 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
866 {-# INLINE scanl1 #-}
868 -- ---------------------------------------------------------------------
869 -- Unfolds and replicates
871 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
872 -- the value of every element. The following holds:
874 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
876 -- This implemenation uses @memset(3)@
877 replicate :: Int -> Word8 -> ByteString
878 replicate w c | w <= 0 = empty
879 | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
883 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
887 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
890 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
891 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
892 -- ByteString from a seed value. The function takes the element and
893 -- returns 'Nothing' if it is done producing the ByteString or returns
894 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
895 -- and @b@ is the seed value for further production.
899 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
900 -- > == pack [0, 1, 2, 3, 4, 5]
902 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
903 unfoldr f = concat . unfoldChunk 32 64
904 where unfoldChunk n n' x =
905 case unfoldrN n f x of
906 (s, Nothing) -> s : []
907 (s, Just x') -> s : unfoldChunk n' (n+n') x'
909 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
910 -- value. However, the length of the result is limited by the first
911 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
912 -- when the maximum length of the result is known.
914 -- The following equation relates 'unfoldrN' and 'unfoldr':
916 -- > unfoldrN n f s == take n (unfoldr f s)
918 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
920 | i < 0 = (empty, Just x0)
921 | otherwise = inlinePerformIO $ do
922 fp <- mallocByteString i
923 withForeignPtr fp (\p -> go fp p x0 0)
927 Nothing -> let s = copy (PS fp 0 n)
928 in s `seq` return (s, Nothing)
930 | n == i -> return (PS fp 0 i, Just x)
931 | otherwise -> do poke p w
932 go fp (p `plusPtr` 1) x' (n+1)
934 -- ---------------------------------------------------------------------
937 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
938 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
939 take :: Int -> ByteString -> ByteString
943 | otherwise = PS x s n
946 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
947 -- elements, or @[]@ if @n > 'length' xs@.
948 drop :: Int -> ByteString -> ByteString
952 | otherwise = PS x (s+n) (l-n)
955 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
956 splitAt :: Int -> ByteString -> (ByteString, ByteString)
957 splitAt n ps@(PS x s l)
958 | n <= 0 = (empty, ps)
959 | n >= l = (ps, empty)
960 | otherwise = (PS x s n, PS x (s+n) (l-n))
961 {-# INLINE splitAt #-}
963 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
964 -- returns the longest prefix (possibly empty) of @xs@ of elements that
966 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
967 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
968 {-# INLINE takeWhile #-}
970 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
971 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
972 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
973 {-# INLINE dropWhile #-}
975 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
976 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
977 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
980 -- | 'breakByte' breaks its ByteString argument at the first occurence
981 -- of the specified byte. It is more efficient than 'break' as it is
982 -- implemented with @memchr(3)@. I.e.
984 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
986 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
987 breakByte c p = case elemIndex c p of
989 Just n -> (unsafeTake n p, unsafeDrop n p)
990 {-# INLINE breakByte #-}
992 -- | 'spanByte' breaks its ByteString argument at the first
993 -- occurence of a byte other than its argument. It is more efficient
996 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
998 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
999 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1000 go (p `plusPtr` s) 0
1003 go p i | i >= l = return (ps, empty)
1004 | otherwise = do c' <- peekByteOff p i
1006 then return (unsafeTake i ps, unsafeDrop i ps)
1008 {-# INLINE spanByte #-}
1010 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1011 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1012 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1013 span p ps = break (not . p) ps
1016 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1019 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1023 -- > spanEnd (not . isSpace) ps
1025 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1027 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1028 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1030 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1031 -- separators, where the predicate returns True for a separator element.
1032 -- The resulting components do not contain the separators. Two adjacent
1033 -- separators result in an empty component in the output. eg.
1035 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1036 -- > splitWith (=='a') [] == []
1038 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1040 #if defined(__GLASGOW_HASKELL__)
1041 splitWith _pred (PS _ _ 0) = []
1042 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1043 where pred# c# = pred_ (W8# c#)
1046 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1047 splitLoop pred' p 0 off' len' fp'
1049 splitLoop :: (Word# -> Bool)
1051 -> Int -> Int -> Int
1055 splitLoop pred' p idx' off' len' fp'
1056 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1057 | idx' >= len' = return [PS fp' off' idx']
1059 w <- peekElemOff p (off'+idx')
1060 if pred' (case w of W8# w# -> w#)
1061 then return (PS fp' off' idx' :
1062 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1063 else splitLoop pred' p (idx'+1) off' len' fp'
1064 {-# INLINE splitWith #-}
1067 splitWith _ (PS _ _ 0) = []
1068 splitWith p ps = loop p ps
1071 loop q qs = if null rest then [chunk]
1072 else chunk : loop q (unsafeTail rest)
1073 where (chunk,rest) = break q qs
1076 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1077 -- argument, consuming the delimiter. I.e.
1079 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1080 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1081 -- > split 'x' "x" == ["",""]
1085 -- > join [c] . split c == id
1086 -- > split == splitWith . (==)
1088 -- As for all splitting functions in this library, this function does
1089 -- not copy the substrings, it just constructs new 'ByteStrings' that
1090 -- are slices of the original.
1092 split :: Word8 -> ByteString -> [ByteString]
1093 split _ (PS _ _ 0) = []
1094 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1095 let ptr = p `plusPtr` s
1099 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1101 then [PS x (s+n) (l-n)]
1102 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1105 {-# INLINE split #-}
1108 -- slower. but stays inside Haskell.
1109 split _ (PS _ _ 0) = []
1110 split (W8# w#) (PS fp off len) = splitWith' off len fp
1112 splitWith' off' len' fp' = withPtr fp $ \p ->
1113 splitLoop p 0 off' len' fp'
1115 splitLoop :: Ptr Word8
1116 -> Int -> Int -> Int
1121 splitLoop p idx' off' len' fp'
1122 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1123 | idx' >= len' = return [PS fp' off' idx']
1125 (W8# x#) <- peekElemOff p (off'+idx')
1126 if word2Int# w# ==# word2Int# x#
1127 then return (PS fp' off' idx' :
1128 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1129 else splitLoop p (idx'+1) off' len' fp'
1132 -- | Like 'splitWith', except that sequences of adjacent separators are
1133 -- treated as a single separator. eg.
1135 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1137 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1138 tokens f = P.filter (not.null) . splitWith f
1139 {-# INLINE tokens #-}
1141 -- | The 'group' function takes a ByteString and returns a list of
1142 -- ByteStrings such that the concatenation of the result is equal to the
1143 -- argument. Moreover, each sublist in the result contains only equal
1144 -- elements. For example,
1146 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1148 -- It is a special case of 'groupBy', which allows the programmer to
1149 -- supply their own equality test. It is about 40% faster than
1151 group :: ByteString -> [ByteString]
1154 | otherwise = ys : group zs
1156 (ys, zs) = spanByte (unsafeHead xs) xs
1158 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1159 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1162 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1164 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1166 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1167 -- 'ByteString's and concatenates the list after interspersing the first
1168 -- argument between each element of the list.
1169 join :: ByteString -> [ByteString] -> ByteString
1170 join s = concat . (List.intersperse s)
1174 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1175 -- with a char. Around 4 times faster than the generalised join.
1177 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1178 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1179 withForeignPtr ffp $ \fp ->
1180 withForeignPtr fgp $ \gp -> do
1181 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1182 poke (ptr `plusPtr` l) c
1183 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1185 len = length f + length g + 1
1186 {-# INLINE joinWithByte #-}
1188 -- ---------------------------------------------------------------------
1189 -- Indexing ByteStrings
1191 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1192 index :: ByteString -> Int -> Word8
1194 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1195 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1196 ++ ", length = " ++ show (length ps))
1197 | otherwise = ps `unsafeIndex` n
1198 {-# INLINE index #-}
1200 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1201 -- element in the given 'ByteString' which is equal to the query
1202 -- element, or 'Nothing' if there is no such element.
1203 -- This implementation uses memchr(3).
1204 elemIndex :: Word8 -> ByteString -> Maybe Int
1205 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1206 let p' = p `plusPtr` s
1207 q = memchr p' c (fromIntegral l)
1208 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1209 {-# INLINE elemIndex #-}
1211 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1212 -- element in the given 'ByteString' which is equal to the query
1213 -- element, or 'Nothing' if there is no such element. The following
1216 -- > elemIndexEnd c xs ==
1217 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1219 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1220 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1221 go (p `plusPtr` s) (l-1)
1224 go p i | i < 0 = return Nothing
1225 | otherwise = do ch' <- peekByteOff p i
1227 then return $ Just i
1229 {-# INLINE elemIndexEnd #-}
1231 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1232 -- the indices of all elements equal to the query element, in ascending order.
1233 -- This implementation uses memchr(3).
1234 elemIndices :: Word8 -> ByteString -> [Int]
1235 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1236 let ptr = p `plusPtr` s
1239 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1242 else let i = q `minusPtr` ptr
1245 {-# INLINE elemIndices #-}
1249 elemIndices :: Word8 -> ByteString -> [Int]
1250 elemIndices c ps = loop 0 ps
1252 loop _ ps' | null ps' = []
1253 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1254 | otherwise = loop (n+1) (unsafeTail ps')
1257 -- | count returns the number of times its argument appears in the ByteString
1259 -- > count = length . elemIndices
1261 -- But more efficiently than using length on the intermediate list.
1262 count :: Word8 -> ByteString -> Int
1263 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1264 return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1265 {-# INLINE count #-}
1269 -- around 30% slower
1271 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1272 go (p `plusPtr` s) (fromIntegral m) 0
1274 go :: Ptr Word8 -> CSize -> Int -> IO Int
1277 let q = memchr p w l
1280 else do let k = fromIntegral $ q `minusPtr` p
1281 go (q `plusPtr` 1) (l-k-1) (i+1)
1284 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1285 -- returns the index of the first element in the ByteString
1286 -- satisfying the predicate.
1287 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1288 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1291 go ptr n | n >= l = return Nothing
1292 | otherwise = do w <- peek ptr
1294 then return (Just n)
1295 else go (ptr `plusPtr` 1) (n+1)
1296 {-# INLINE findIndex #-}
1298 -- | The 'findIndices' function extends 'findIndex', by returning the
1299 -- indices of all elements satisfying the predicate, in ascending order.
1300 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1301 findIndices p ps = loop 0 ps
1304 loop n qs | null qs = []
1305 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1306 | otherwise = loop (n+1) (unsafeTail qs)
1308 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1309 -- of the string if no element is found, rather than Nothing.
1310 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1311 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1314 go ptr n | n >= l = return l
1315 | otherwise = do w <- peek ptr
1318 else go (ptr `plusPtr` 1) (n+1)
1319 {-# INLINE findIndexOrEnd #-}
1321 -- ---------------------------------------------------------------------
1322 -- Searching ByteStrings
1324 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1325 elem :: Word8 -> ByteString -> Bool
1326 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1329 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1330 notElem :: Word8 -> ByteString -> Bool
1331 notElem c ps = not (elem c ps)
1332 {-# INLINE notElem #-}
1334 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1335 -- returns a ByteString containing those characters that satisfy the
1336 -- predicate. This function is subject to array fusion.
1337 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1338 filter p = loopArr . loopU (filterEFL p) noAL
1339 {-# INLINE filter #-}
1341 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1342 -- around 2x faster for some one-shot applications.
1343 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1344 filter' k ps@(PS x s l)
1346 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1347 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1348 return (t `minusPtr` p) -- actual length
1351 go f t end | f == end = return t
1355 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1356 else go (f `plusPtr` 1) t end
1357 {-# INLINE filter' #-}
1360 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1361 -- case of filtering a single byte. It is more efficient to use
1362 -- /filterByte/ in this case.
1364 -- > filterByte == filter . (==)
1366 -- filterByte is around 10x faster, and uses much less space, than its
1367 -- filter equivalent
1368 filterByte :: Word8 -> ByteString -> ByteString
1369 filterByte w ps = replicate (count w ps) w
1370 {-# INLINE filterByte #-}
1373 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1374 -- case of filtering a single byte out of a list. It is more efficient
1375 -- to use /filterNotByte/ in this case.
1377 -- > filterNotByte == filter . (/=)
1379 -- filterNotByte is around 2x faster than its filter equivalent.
1380 filterNotByte :: Word8 -> ByteString -> ByteString
1381 filterNotByte w = filter' (/= w)
1382 {-# INLINE filterNotByte #-}
1384 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1385 -- and returns the first element in matching the predicate, or 'Nothing'
1386 -- if there is no such element.
1388 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1390 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1391 find f p = case findIndex f p of
1392 Just n -> Just (p `unsafeIndex` n)
1398 -- fuseable, but we don't want to walk the whole array.
1400 find k = foldl findEFL Nothing
1401 where findEFL a@(Just _) _ = a
1402 findEFL _ c | k c = Just c
1403 | otherwise = Nothing
1406 -- ---------------------------------------------------------------------
1407 -- Searching for substrings
1409 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1410 -- iff the first is a prefix of the second.
1411 isPrefixOf :: ByteString -> ByteString -> Bool
1412 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1415 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1416 withForeignPtr x2 $ \p2 -> do
1417 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1420 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1421 -- iff the first is a suffix of the second.
1423 -- The following holds:
1425 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1427 -- However, the real implemenation uses memcmp to compare the end of the
1428 -- string only, with no reverse required..
1429 isSuffixOf :: ByteString -> ByteString -> Bool
1430 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1433 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1434 withForeignPtr x2 $ \p2 -> do
1435 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1438 -- | Check whether one string is a substring of another. @isSubstringOf
1439 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1440 isSubstringOf :: ByteString -- ^ String to search for.
1441 -> ByteString -- ^ String to search in.
1443 isSubstringOf p s = not $ P.null $ findSubstrings p s
1445 -- | Get the first index of a substring in another string,
1446 -- or 'Nothing' if the string is not found.
1447 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1448 findSubstring :: ByteString -- ^ String to search for.
1449 -> ByteString -- ^ String to seach in.
1451 findSubstring = (listToMaybe .) . findSubstrings
1453 -- | Find the indexes of all (possibly overlapping) occurances of a
1454 -- substring in a string. This function uses the Knuth-Morris-Pratt
1455 -- string matching algorithm.
1456 findSubstrings :: ByteString -- ^ String to search for.
1457 -> ByteString -- ^ String to seach in.
1460 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1462 patc x = pat `unsafeIndex` x
1463 strc x = str `unsafeIndex` x
1465 -- maybe we should make kmpNext a UArray before using it in search?
1466 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1467 kmpNextL p _ | null p = []
1468 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1470 x = if not (null ps) && unsafeHead ps == patc j'
1471 then kmpNext Array.! j' else j'
1473 search i j = match ++ rest -- i: position in string, j: position in pattern
1474 where match = if j == m then [(i - j)] else []
1475 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1476 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1479 -- ---------------------------------------------------------------------
1482 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1483 -- corresponding pairs of bytes. If one input ByteString is short,
1484 -- excess elements of the longer ByteString are discarded. This is
1485 -- equivalent to a pair of 'unpack' operations.
1486 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1488 | null ps || null qs = []
1489 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1491 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1492 -- the first argument, instead of a tupling function. For example,
1493 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1494 -- corresponding sums.
1495 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1497 | null ps || null qs = []
1498 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1500 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1501 -- ByteStrings. Note that this performs two 'pack' operations.
1502 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1503 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1504 {-# INLINE unzip #-}
1506 -- ---------------------------------------------------------------------
1509 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1510 inits :: ByteString -> [ByteString]
1511 inits (PS x s l) = [PS x s n | n <- [0..l]]
1513 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1514 tails :: ByteString -> [ByteString]
1515 tails p | null p = [empty]
1516 | otherwise = p : tails (unsafeTail p)
1518 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1520 -- ---------------------------------------------------------------------
1521 -- ** Ordered 'ByteString's
1523 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1524 sort :: ByteString -> ByteString
1525 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1527 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1528 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1531 go 256 _ = return ()
1532 go i ptr = do n <- peekElemOff arr i
1533 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1534 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1537 -- "countEach counts str l" counts the number of occurences of each Word8 in
1538 -- str, and stores the result in counts.
1539 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1541 countEach counts str l = go 0
1544 go i | i == l = return ()
1545 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1546 x <- peekElemOff counts k
1547 pokeElemOff counts k (x + 1)
1551 sort :: ByteString -> ByteString
1552 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1553 memcpy p (f `plusPtr` s) l
1554 c_qsort p l -- inplace
1558 sort = pack . List.sort . unpack
1561 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1563 -- Try some linear sorts: radix, counting
1566 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1567 -- sortBy f ps = undefined
1569 -- ---------------------------------------------------------------------
1571 -- Extensions to the basic interface
1574 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1575 -- check for the empty case, so there is an obligation on the programmer
1576 -- to provide a proof that the ByteString is non-empty.
1577 unsafeHead :: ByteString -> Word8
1578 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1579 {-# INLINE unsafeHead #-}
1581 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1582 -- check for the empty case. As with 'unsafeHead', the programmer must
1583 -- provide a separate proof that the ByteString is non-empty.
1584 unsafeTail :: ByteString -> ByteString
1585 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1586 {-# INLINE unsafeTail #-}
1588 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1589 -- This omits the bounds check, which means there is an accompanying
1590 -- obligation on the programmer to ensure the bounds are checked in some
1592 unsafeIndex :: ByteString -> Int -> Word8
1593 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1594 {-# INLINE unsafeIndex #-}
1596 -- | A variety of 'take' which omits the checks on @n@ so there is an
1597 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1598 unsafeTake :: Int -> ByteString -> ByteString
1599 unsafeTake n (PS x s l) =
1600 assert (0 <= n && n <= l) $ PS x s n
1601 {-# INLINE unsafeTake #-}
1603 -- | A variety of 'drop' which omits the checks on @n@ so there is an
1604 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1605 unsafeDrop :: Int -> ByteString -> ByteString
1606 unsafeDrop n (PS x s l) =
1607 assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
1608 {-# INLINE unsafeDrop #-}
1610 -- ---------------------------------------------------------------------
1611 -- Low level constructors
1613 #if defined(__GLASGOW_HASKELL__)
1614 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1615 -- Addr\# (an arbitrary machine address assumed to point outside the
1616 -- garbage-collected heap) into a @ByteString@. A much faster way to
1617 -- create an Addr\# is with an unboxed string literal, than to pack a
1618 -- boxed string. A unboxed string literal is compiled to a static @char
1619 -- []@ by GHC. Establishing the length of the string requires a call to
1620 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1621 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1622 -- if you know the length of the string statically.
1626 -- > literalFS = packAddress "literal"#
1628 packAddress :: Addr# -> ByteString
1629 packAddress addr# = inlinePerformIO $ do
1630 p <- newForeignPtr_ cstr
1631 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1634 {-# INLINE packAddress #-}
1636 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1637 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1638 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1639 -- 'Addr\#' to the string, and the length of the string. Make sure the
1640 -- length is correct, otherwise use the safer 'packAddress' (where the
1641 -- length will be calculated once at runtime).
1642 unsafePackAddress :: Int -> Addr# -> ByteString
1643 unsafePackAddress len addr# = inlinePerformIO $ do
1644 p <- newForeignPtr_ cstr
1646 where cstr = Ptr addr#
1650 -- | /O(1)/ Build a ByteString from a ForeignPtr
1651 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1652 fromForeignPtr fp l = PS fp 0 l
1654 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1655 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1656 toForeignPtr (PS ps s l) = (ps, s, l)
1658 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1659 -- current 'ByteString' from any larger string it was created from, as
1661 skipIndex :: ByteString -> Int
1662 skipIndex (PS _ s _) = s
1663 {-# INLINE skipIndex #-}
1665 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1666 -- finalizer associated to it. The ByteString length is calculated using
1667 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1668 packCString :: CString -> ByteString
1669 packCString cstr = inlinePerformIO $ do
1670 fp <- newForeignPtr_ (castPtr cstr)
1671 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1673 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1674 -- have /no/ finalizer associated with it. This operation has /O(1)/
1675 -- complexity as we already know the final size, so no /strlen(3)/ is
1677 packCStringLen :: CStringLen -> ByteString
1678 packCStringLen (ptr,len) = inlinePerformIO $ do
1679 fp <- newForeignPtr_ (castPtr ptr)
1680 return $ PS fp 0 (fromIntegral len)
1682 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1683 -- have a @free(3)@ finalizer associated to it.
1684 packMallocCString :: CString -> ByteString
1685 packMallocCString cstr = inlinePerformIO $ do
1686 fp <- newForeignFreePtr (castPtr cstr)
1687 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1689 #if defined(__GLASGOW_HASKELL__)
1690 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1691 -- length, and an IO action representing a finalizer. This function is
1692 -- not available on Hugs.
1694 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1695 packCStringFinalizer p l f = do
1696 fp <- FC.newForeignPtr p f
1699 -- | Explicitly run the finaliser associated with a 'ByteString'.
1700 -- Further references to this value may generate invalid memory
1701 -- references. This operation is unsafe, as there may be other
1702 -- 'ByteStrings' referring to the same underlying pages. If you use
1703 -- this, you need to have a proof of some kind that all 'ByteString's
1704 -- ever generated from the underlying byte array are no longer live.
1705 unsafeFinalize :: ByteString -> IO ()
1706 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1710 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1711 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1712 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1713 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1715 alloc = withForeignPtr ps $ \p -> do
1716 buf <- c_malloc (fromIntegral l+1)
1717 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1718 poke (buf `plusPtr` l) (0::Word8)
1719 return $ castPtr buf
1721 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1722 -- Warning: modifying the @CString@ will affect the @ByteString@.
1723 -- Why is this function unsafe? It relies on the null byte at the end of
1724 -- the ByteString to be there. This is /not/ the case if your ByteString
1725 -- has been spliced from a larger string (i.e. with take or drop).
1726 -- Unless you can guarantee the null byte, you should use the safe
1727 -- version, which will copy the string first.
1729 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1730 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1732 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1733 -- This is mainly useful to allow the rest of the data pointed
1734 -- to by the 'ByteString' to be garbage collected, for example
1735 -- if a large string has been read in, and only a small part of it
1736 -- is needed in the rest of the program.
1737 copy :: ByteString -> ByteString
1738 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1739 memcpy p (f `plusPtr` s) (fromIntegral l)
1741 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1742 -- CString is going to be deallocated from C land.
1743 copyCString :: CString -> IO ByteString
1744 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1746 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1747 copyCStringLen :: CStringLen -> IO ByteString
1748 copyCStringLen (cstr, len) = do
1749 fp <- mallocForeignPtrArray (len+1)
1750 withForeignPtr fp $ \p -> do
1751 memcpy p (castPtr cstr) (fromIntegral len)
1752 poke (p `plusPtr` len) (0 :: Word8)
1753 return $! PS fp 0 len
1755 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1756 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1757 -- This is analogous to unsafeUseAsCString, and comes with the same
1758 -- safety requirements.
1760 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1761 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1763 -- | Given the maximum size needed and a function to make the contents
1764 -- of a ByteString, generate makes the 'ByteString'. The generating
1765 -- function is required to return the actual final size (<= the maximum
1766 -- size), and the resulting byte array is realloced to this size. The
1767 -- string is padded at the end with a null byte.
1769 -- generate is the main mechanism for creating custom, efficient
1770 -- ByteString functions, using Haskell or C functions to fill the space.
1772 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1774 fp <- mallocByteString i
1775 (ptr,n) <- withForeignPtr fp $ \p -> do
1779 else do fp_ <- mallocByteString i' -- realloc
1780 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1786 -- On the C malloc heap. Less fun.
1789 p <- mallocArray (i+1)
1791 p' <- reallocArray p (i'+1)
1792 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1793 fp <- newForeignFreePtr p'
1797 -- ---------------------------------------------------------------------
1800 #if defined(__GLASGOW_HASKELL__)
1802 -- | getLine, read a line from stdin.
1803 getLine :: IO ByteString
1804 getLine = hGetLine stdin
1806 -- | Lazily construct a list of lines of ByteStrings. This will be much
1807 -- better on memory consumption than using lines =<< getContents.
1808 hGetLines :: Handle -> IO [ByteString]
1811 go = unsafeInterleaveIO $ do
1812 ms <- catch (hGetLine h >>= return . Just)
1813 (\_ -> return Nothing)
1815 Nothing -> return []
1816 Just s -> do ss <- go
1819 -- | hGetLine. read a ByteString from a handle
1820 hGetLine :: Handle -> IO ByteString
1821 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1822 case haBufferMode handle_ of
1823 NoBuffering -> error "no buffering"
1824 _other -> hGetLineBuffered handle_
1827 hGetLineBuffered handle_ = do
1828 let ref = haBuffer handle_
1829 buf <- readIORef ref
1830 hGetLineBufferedLoop handle_ ref buf 0 []
1832 hGetLineBufferedLoop handle_ ref
1833 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1835 off <- findEOL r w raw
1836 let new_len = len + off - r
1837 xs <- mkPS raw r off
1839 -- if eol == True, then off is the offset of the '\n'
1840 -- otherwise off == w and the buffer is now empty.
1842 then do if (w == off + 1)
1843 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1844 else writeIORef ref buf{ bufRPtr = off + 1 }
1845 mkBigPS new_len (xs:xss)
1847 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1848 buf{ bufWPtr=0, bufRPtr=0 }
1850 -- Nothing indicates we caught an EOF, and we may have a
1851 -- partial line to return.
1853 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1855 then mkBigPS new_len (xs:xss)
1858 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1860 -- find the end-of-line character, if there is one
1864 (c,r') <- readCharFromBuffer raw r
1866 then return r -- NB. not r': don't include the '\n'
1867 else findEOL r' w raw
1869 maybeFillReadBuffer fd is_line is_stream buf = catch
1870 (do buf' <- fillReadBuffer fd is_line is_stream buf
1872 (\e -> if isEOFError e then return Nothing else ioError e)
1874 -- TODO, rewrite to use normal memcpy
1875 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1876 mkPS buf start end = do
1877 let len = end - start
1878 fp <- mallocByteString len
1879 withForeignPtr fp $ \p -> do
1880 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1881 return (PS fp 0 len)
1883 mkBigPS :: Int -> [ByteString] -> IO ByteString
1884 mkBigPS _ [ps] = return ps
1885 mkBigPS _ pss = return $! concat (P.reverse pss)
1889 -- ---------------------------------------------------------------------
1892 -- | Outputs a 'ByteString' to the specified 'Handle'.
1893 hPut :: Handle -> ByteString -> IO ()
1894 hPut _ (PS _ _ 0) = return ()
1895 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1896 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1898 -- | Write a ByteString to stdout
1899 putStr :: ByteString -> IO ()
1900 putStr = hPut stdout
1902 -- | Write a ByteString to stdout, appending a newline byte
1903 putStrLn :: ByteString -> IO ()
1904 putStrLn ps = hPut stdout ps >> hPut stdout nl
1905 where nl = singleton 0x0a
1907 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1908 -- is far more efficient than reading the characters into a 'String'
1909 -- and then using 'pack'.
1910 hGet :: Handle -> Int -> IO ByteString
1911 hGet _ 0 = return empty
1912 hGet h i = do fp <- mallocByteString i
1913 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1916 #if defined(__GLASGOW_HASKELL__)
1917 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1918 -- waiting for data to become available, instead it returns only whatever data
1920 hGetNonBlocking :: Handle -> Int -> IO ByteString
1921 hGetNonBlocking _ 0 = return empty
1922 hGetNonBlocking h i = do
1923 fp <- mallocByteString i
1924 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1928 -- | Read entire handle contents into a 'ByteString'.
1929 -- This function reads chunks at a time, doubling the chunksize on each
1930 -- read. The final buffer is then realloced to the appropriate size. For
1931 -- files > half of available memory, this may lead to memory exhaustion.
1932 -- Consider using 'readFile' in this case.
1934 -- As with 'hGet', the string representation in the file is assumed to
1937 hGetContents :: Handle -> IO ByteString
1939 let start_size = 1024
1940 p <- mallocArray start_size
1941 i <- hGetBuf h p start_size
1943 then do p' <- reallocArray p i
1944 fp <- newForeignFreePtr p'
1950 p' <- reallocArray p s'
1951 i <- hGetBuf h (p' `plusPtr` s) s
1953 then do let i' = s + i
1954 p'' <- reallocArray p' i'
1955 fp <- newForeignFreePtr p''
1959 -- | getContents. Equivalent to hGetContents stdin
1960 getContents :: IO ByteString
1961 getContents = hGetContents stdin
1963 -- | Read an entire file strictly into a 'ByteString'. This is far more
1964 -- efficient than reading the characters into a 'String' and then using
1965 -- 'pack'. It also may be more efficient than opening the file and
1966 -- reading it using hGet.
1967 readFile :: FilePath -> IO ByteString
1969 h <- openBinaryFile f ReadMode
1971 s <- hGet h $ fromIntegral l
1975 -- | Write a 'ByteString' to a file.
1976 writeFile :: FilePath -> ByteString -> IO ()
1977 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1982 -- Disable until we can move it into a portable .hsc file
1985 -- | Like readFile, this reads an entire file directly into a
1986 -- 'ByteString', but it is even more efficient. It involves directly
1987 -- mapping the file to memory. This has the advantage that the contents
1988 -- of the file never need to be copied. Also, under memory pressure the
1989 -- page may simply be discarded, while in the case of readFile it would
1990 -- need to be written to swap. If you read many small files, mmapFile
1991 -- will be less memory-efficient than readFile, since each mmapFile
1992 -- takes up a separate page of memory. Also, you can run into bus
1993 -- errors if the file is modified. As with 'readFile', the string
1994 -- representation in the file is assumed to be ISO-8859-1.
1996 -- On systems without mmap, this is the same as a readFile.
1998 mmapFile :: FilePath -> IO ByteString
1999 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
2001 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
2003 h <- openBinaryFile f ReadMode
2004 l <- fromIntegral `fmap` hFileSize h
2005 -- Don't bother mmaping small files because each mmapped file takes up
2006 -- at least one full VM block.
2008 then do thefp <- mallocByteString l
2009 withForeignPtr thefp $ \p-> hGetBuf h p l
2014 fd <- fromIntegral `fmap` handleToFd h
2016 fp <- if p == nullPtr
2017 then do thefp <- mallocByteString l
2018 withForeignPtr thefp $ \p' -> hGetBuf h p' l
2021 -- The munmap leads to crashes on OpenBSD.
2022 -- maybe there's a use after unmap in there somewhere?
2023 #if !defined(__OpenBSD__)
2024 let unmap = c_munmap p l >> return ()
2026 let unmap = return ()
2028 fp <- FC.newForeignPtr p unmap
2033 where mmap_limit = 16*1024
2036 #if defined(__GLASGOW_HASKELL__)
2038 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
2040 getArgs :: IO [ByteString]
2042 alloca $ \ p_argc ->
2043 alloca $ \ p_argv -> do
2044 getProgArgv p_argc p_argv
2045 p <- fromIntegral `fmap` peek p_argc
2047 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
2050 -- ---------------------------------------------------------------------
2051 -- Internal utilities
2053 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
2054 -- silently truncate to 8 bits Chars > '\255'. They are provided as
2055 -- convenience for ByteString construction.
2056 w2c :: Word8 -> Char
2057 #if !defined(__GLASGOW_HASKELL__)
2058 w2c = chr . fromIntegral
2060 w2c = unsafeChr . fromIntegral
2064 c2w :: Char -> Word8
2065 c2w = fromIntegral . ord
2068 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2069 -- is padded with a null byte.
2070 mallocByteString :: Int -> IO (ForeignPtr Word8)
2071 mallocByteString l = do
2072 fp <- mallocForeignPtrArray (l+1)
2073 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2076 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2077 -- argument gives the final size of the ByteString. Unlike 'generate'
2078 -- the ByteString is not reallocated if the final size is less than the
2079 -- estimated size. Also, unlike 'generate' ByteString's created this way
2080 -- are managed on the Haskell heap.
2081 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2082 create l write_ptr = inlinePerformIO $ do
2083 fp <- mallocByteString (l+1)
2084 withForeignPtr fp $ \p -> write_ptr p
2086 {-# INLINE create #-}
2088 -- | Perform an operation with a temporary ByteString
2089 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2090 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2091 {-# INLINE withPtr #-}
2093 -- Common up near identical calls to `error' to reduce the number
2094 -- constant strings created when compiled:
2095 errorEmptyList :: String -> a
2096 errorEmptyList fun = moduleError fun "empty ByteString"
2097 {-# NOINLINE errorEmptyList #-}
2099 moduleError :: String -> String -> a
2100 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2101 {-# NOINLINE moduleError #-}
2103 -- Find from the end of the string using predicate
2104 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2105 STRICT2(findFromEndUntil)
2106 findFromEndUntil f ps@(PS x s l) =
2108 else if f (last ps) then l
2109 else findFromEndUntil f (PS x s (l-1))
2111 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2112 -- it exposes lots of things to further inlining
2114 {-# INLINE inlinePerformIO #-}
2115 inlinePerformIO :: IO a -> a
2116 #if defined(__GLASGOW_HASKELL__)
2117 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2119 inlinePerformIO = unsafePerformIO
2122 {-# INLINE newForeignFreePtr #-}
2123 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2124 #if defined(__GLASGOW_HASKELL__)
2125 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2127 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2130 -- ---------------------------------------------------------------------
2132 -- Standard C functions
2135 foreign import ccall unsafe "string.h strlen" c_strlen
2138 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2139 :: CInt -> IO (Ptr Word8)
2141 foreign import ccall unsafe "static stdlib.h free" c_free
2142 :: Ptr Word8 -> IO ()
2144 #if !defined(__GLASGOW_HASKELL__)
2145 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2146 :: FunPtr (Ptr Word8 -> IO ())
2149 foreign import ccall unsafe "string.h memset" memset
2150 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2152 foreign import ccall unsafe "string.h memchr" memchr
2153 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2155 foreign import ccall unsafe "string.h memcmp" memcmp
2156 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2158 foreign import ccall unsafe "string.h memcpy" memcpy
2159 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2161 -- ---------------------------------------------------------------------
2166 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2167 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2169 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2170 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2172 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2173 :: Ptr Word8 -> CInt -> Word8
2175 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2176 :: Ptr Word8 -> CInt -> Word8
2178 foreign import ccall unsafe "static fpstring.h count" c_count
2179 :: Ptr Word8 -> CInt -> Word8 -> CInt
2181 -- ---------------------------------------------------------------------
2185 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2186 :: Int -> Int -> IO (Ptr Word8)
2188 foreign import ccall unsafe "static unistd.h close" c_close
2191 # if !defined(__OpenBSD__)
2192 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2193 :: Ptr Word8 -> Int -> IO Int
2197 -- ---------------------------------------------------------------------
2198 -- Internal GHC Haskell magic
2200 #if defined(__GLASGOW_HASKELL__)
2201 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2202 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2204 foreign import ccall unsafe "__hscore_memcpy_src_off"
2205 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2208 -- ---------------------------------------------------------------------
2210 -- Functional array fusion for ByteStrings.
2212 -- From the Data Parallel Haskell project,
2213 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2216 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2217 -- the fact that no bottoms of this type are ever constructed; hence, we can
2218 -- assume @(_ :: NoAL) `seq` x = x@.
2222 -- | Special forms of loop arguments
2224 -- * These are common special cases for the three function arguments of gen
2225 -- and loop; we give them special names to make it easier to trigger RULES
2226 -- applying in the special cases represented by these arguments. The
2227 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2228 -- two simplifier phases.
2230 -- * In the case where the accumulator is not needed, it is better to always
2231 -- explicitly return a value `()', rather than just copy the input to the
2232 -- output, as the former gives GHC better local information.
2235 -- | Element function expressing a mapping only
2236 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2237 mapEFL f = \_ e -> (noAL, (Just $ f e))
2238 #if defined(__GLASGOW_HASKELL__)
2239 {-# INLINE [1] mapEFL #-}
2242 -- | Element function implementing a filter function only
2243 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2244 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2245 #if defined(__GLASGOW_HASKELL__)
2246 {-# INLINE [1] filterEFL #-}
2249 -- |Element function expressing a reduction only
2250 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2251 foldEFL f = \a e -> (f a e, Nothing)
2252 #if defined(__GLASGOW_HASKELL__)
2253 {-# INLINE [1] foldEFL #-}
2256 -- | A strict foldEFL.
2257 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2258 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2259 #if defined(__GLASGOW_HASKELL__)
2260 {-# INLINE [1] foldEFL' #-}
2263 -- | Element function expressing a prefix reduction only
2265 scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8)
2266 scanEFL f = \a e -> (f a e, Just a)
2267 #if defined(__GLASGOW_HASKELL__)
2268 {-# INLINE [1] scanEFL #-}
2271 -- | Element function implementing a map and fold
2273 mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> acc -> Word8 -> (acc, Maybe Word8)
2274 mapAccumEFL f = \a e -> case f a e of (a', e') -> (a', Just e')
2275 #if defined(__GLASGOW_HASKELL__)
2276 {-# INLINE [1] mapAccumEFL #-}
2279 -- | Element function implementing a map with index
2281 mapIndexEFL :: (Int -> Word8 -> Word8) -> Int -> Word8 -> (Int, Maybe Word8)
2282 mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i', Just $ f i e)
2283 #if defined(__GLASGOW_HASKELL__)
2284 {-# INLINE [1] mapIndexEFL #-}
2290 #if defined(__GLASGOW_HASKELL__)
2291 {-# INLINE [1] noAL #-}
2294 -- | Projection functions that are fusion friendly (as in, we determine when
2295 -- they are inlined)
2296 loopArr :: (acc, byteString) -> byteString
2297 loopArr (_, arr) = arr
2298 #if defined(__GLASGOW_HASKELL__)
2299 {-# INLINE [1] loopArr #-}
2302 loopAcc :: (acc, byteString) -> acc
2303 loopAcc (acc, _) = acc
2304 #if defined(__GLASGOW_HASKELL__)
2305 {-# INLINE [1] loopAcc #-}
2308 loopSndAcc :: ((acc1, acc2), byteString) -> (acc2, byteString)
2309 loopSndAcc ((_, acc), arr) = (acc, arr)
2310 #if defined(__GLASGOW_HASKELL__)
2311 {-# INLINE [1] loopSndAcc #-}
2314 ------------------------------------------------------------------------
2317 -- size, and then percentage.
2320 -- | Iteration over over ByteStrings
2321 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2322 -> acc -- ^ initial acc value
2323 -> ByteString -- ^ input ByteString
2324 -> (acc, ByteString)
2326 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2327 fp <- mallocByteString i
2328 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2329 (acc, i') <- go (a `plusPtr` s) p start
2331 then return (fp,i',acc) -- no realloc for map
2332 else do fp_ <- mallocByteString i' -- realloc
2333 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2336 return (acc, PS ptr 0 n)
2341 trans a_off ma_off acc
2342 | a_off >= i = return (acc, ma_off)
2344 x <- peekByteOff p a_off
2345 let (acc', oe) = f acc x
2346 ma_off' <- case oe of
2347 Nothing -> return ma_off
2348 Just e -> do pokeByteOff ma ma_off e
2350 trans (a_off+1) ma_off' acc'
2352 #if defined(__GLASGOW_HASKELL__)
2353 {-# INLINE [1] loopU #-}
2358 -- |Fuse to flat loop functions
2359 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2360 -> (a2 -> Word8 -> (a2, Maybe Word8))
2363 -> ((a1, a2), Maybe Word8)
2364 fuseEFL f g (acc1, acc2) e1 =
2366 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2369 (acc2', res) -> ((acc1', acc2'), res)
2373 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2374 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2375 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2377 "loopArr/loopSndAcc" forall x.
2378 loopArr (loopSndAcc x) = loopArr x
2380 "seq/NoAL" forall (u::NoAL) e.