Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
   2 -- |
   3 -- Module      : Data.ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --               Array fusion code:
  10 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  11 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  12 --
  13 -- License     : BSD-style
  14 --
  15 -- Maintainer  : dons@cse.unsw.edu.au
  16 -- Stability   : experimental
  17 -- Portability : portable
  18 --
  19 -- A time and space-efficient implementation of byte vectors using
  20 -- packed Word8 arrays, suitable for high performance use, both in terms
  21 -- of large data quantities, or high speed requirements. Byte vectors
  22 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  23 -- and can be passed between C and Haskell with little effort.
  24 --
  25 -- This module is intended to be imported @qualified@, to avoid name
  26 -- clashes with "Prelude" functions.  eg.
  27 --
  28 -- > import qualified Data.ByteString as B
  29 --
  30 -- Original GHC implementation by Bryan O\'Sullivan.
  31 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
  32 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
  33 -- Polished and extended by Don Stewart.
  34 --
  35
  36 module Data.ByteString (
  37
  38         -- * The @ByteString@ type
  39         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
  40
  41         -- * Introducing and eliminating 'ByteString's
  42         empty,                  -- :: ByteString
  43         singleton,              -- :: Word8   -> ByteString
  44         pack,                   -- :: [Word8] -> ByteString
  45         unpack,                 -- :: ByteString -> [Word8]
  46
  47         -- * Basic interface
  48         cons,                   -- :: Word8 -> ByteString -> ByteString
  49         snoc,                   -- :: ByteString -> Word8 -> ByteString
  50         append,                 -- :: ByteString -> ByteString -> ByteString
  51         head,                   -- :: ByteString -> Word8
  52         last,                   -- :: ByteString -> Word8
  53         tail,                   -- :: ByteString -> ByteString
  54         init,                   -- :: ByteString -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57
  58         -- * Transformating ByteStrings
  59         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  60         reverse,                -- :: ByteString -> ByteString
  61         intersperse,            -- :: Word8 -> ByteString -> ByteString
  62         transpose,              -- :: [ByteString] -> [ByteString]
  63
  64         -- * Reducing 'ByteString's (folds)
  65         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  66         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  67         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  68         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  69
  70         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  71         foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  72         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  73         foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74
  75         -- ** Special folds
  76         concat,                 -- :: [ByteString] -> ByteString
  77         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  78         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  79         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  80         maximum,                -- :: ByteString -> Word8
  81         minimum,                -- :: ByteString -> Word8
  82
  83         -- * Building ByteStrings
  84         -- ** Scans
  85         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  86         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  87         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  88         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  89
  90         -- ** Accumulating maps
  91         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  92         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  93         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  94
  95         -- ** Unfolding ByteStrings
  96         replicate,              -- :: Int -> Word8 -> ByteString
  97         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  98         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  99
 100         -- * Substrings
 101
 102         -- ** Breaking strings
 103         take,                   -- :: Int -> ByteString -> ByteString
 104         drop,                   -- :: Int -> ByteString -> ByteString
 105         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 106         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 107         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 108         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 109         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 110         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 111         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         group,                  -- :: ByteString -> [ByteString]
 113         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 114         inits,                  -- :: ByteString -> [ByteString]
 115         tails,                  -- :: ByteString -> [ByteString]
 116
 117         -- ** Breaking into many substrings
 118         split,                  -- :: Word8 -> ByteString -> [ByteString]
 119         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 120
 121         -- ** Joining strings
 122         join,                   -- :: ByteString -> [ByteString] -> ByteString
 123
 124         -- * Predicates
 125         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 126         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 127
 128         -- ** Search for arbitrary substrings
 129         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 130         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 131         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 132
 133         -- * Searching ByteStrings
 134
 135         -- ** Searching by equality
 136         -- | These functions use memchr(3) to efficiently search the ByteString
 137         elem,                   -- :: Word8 -> ByteString -> Bool
 138         notElem,                -- :: Word8 -> ByteString -> Bool
 139
 140         -- ** Searching with a predicate
 141         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 142         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 143 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 144
 145         -- * Indexing ByteStrings
 146         index,                  -- :: ByteString -> Int -> Word8
 147         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 148         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 149         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
 150         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 151         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 152         count,                  -- :: Word8 -> ByteString -> Int
 153
 154         -- * Zipping and unzipping ByteStrings
 155         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 156         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 157         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 158
 159         -- * Ordered ByteStrings
 160         sort,                   -- :: ByteString -> ByteString
 161
 162         -- * Low level CString conversions
 163
 164         -- ** Packing CStrings and pointers
 165         packCString,            -- :: CString -> ByteString
 166         packCStringLen,         -- :: CString -> ByteString
 167         packMallocCString,      -- :: CString -> ByteString
 168
 169         -- ** Using ByteStrings as CStrings
 170         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 171         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
 172
 173         -- ** Copying ByteStrings
 174         -- | These functions perform memcpy(3) operations
 175         copy,                   -- :: ByteString -> ByteString
 176         copyCString,            -- :: CString -> IO ByteString
 177         copyCStringLen,         -- :: CStringLen -> IO ByteString
 178
 179         -- * I\/O with 'ByteString's
 180
 181         -- ** Standard input and output
 182         getLine,                -- :: IO ByteString
 183         getContents,            -- :: IO ByteString
 184         putStr,                 -- :: ByteString -> IO ()
 185         putStrLn,               -- :: ByteString -> IO ()
 186         interact,               -- :: (ByteString -> ByteString) -> IO ()
 187
 188         -- ** Files
 189         readFile,               -- :: FilePath -> IO ByteString
 190         writeFile,              -- :: FilePath -> ByteString -> IO ()
 191         appendFile,             -- :: FilePath -> ByteString -> IO ()
 192 --      mmapFile,               -- :: FilePath -> IO ByteString
 193
 194         -- ** I\/O with Handles
 195         hGetLine,               -- :: Handle -> IO ByteString
 196         hGetContents,           -- :: Handle -> IO ByteString
 197         hGet,                   -- :: Handle -> Int -> IO ByteString
 198         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 199         hPut,                   -- :: Handle -> ByteString -> IO ()
 200         hPutStr,                -- :: Handle -> ByteString -> IO ()
 201         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
 202
 203 #if defined(__GLASGOW_HASKELL__)
 204         -- * Fusion utilities
 205         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 206         lengthU, maximumU, minimumU
 207 #endif
 208
 209   ) where
 210
 211 import qualified Prelude as P
 212 import Prelude hiding           (reverse,head,tail,last,init,null
 213                                 ,length,map,lines,foldl,foldr,unlines
 214                                 ,concat,any,take,drop,splitAt,takeWhile
 215                                 ,dropWhile,span,break,elem,filter,maximum
 216                                 ,minimum,all,concatMap,foldl1,foldr1
 217                                 ,scanl,scanl1,scanr,scanr1
 218                                 ,readFile,writeFile,appendFile,replicate
 219                                 ,getContents,getLine,putStr,putStrLn,interact
 220                                 ,zip,zipWith,unzip,notElem)
 221
 222 import Data.ByteString.Base
 223 import Data.ByteString.Fusion
 224
 225 import qualified Data.List as List
 226
 227 import Data.Word                (Word8)
 228 import Data.Maybe               (listToMaybe)
 229 import Data.Array               (listArray)
 230 import qualified Data.Array as Array ((!))
 231
 232 -- Control.Exception.bracket not available in yhc or nhc
 233 import Control.Exception        (bracket, assert)
 234 import qualified Control.Exception as Exception
 235 import Control.Monad            (when)
 236
 237 import Foreign.C.String         (CString, CStringLen)
 238 import Foreign.C.Types          (CSize)
 239 import Foreign.ForeignPtr
 240 import Foreign.Marshal.Array
 241 import Foreign.Ptr
 242 import Foreign.Storable         (Storable(..))
 243
 244 -- hGetBuf and hPutBuf not available in yhc or nhc
 245 import System.IO                (stdin,stdout,hClose,hFileSize
 246                                 ,hGetBuf,hPutBuf,openBinaryFile
 247                                 ,Handle,IOMode(..))
 248
 249 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
 250
 251 #if !defined(__GLASGOW_HASKELL__)
 252 import System.IO.Unsafe
 253 import qualified System.Environment
 254 import qualified System.IO      (hGetLine)
 255 #endif
 256
 257 #if defined(__GLASGOW_HASKELL__)
 258
 259 import System.IO                (hGetBufNonBlocking)
 260 import System.IO.Error          (isEOFError)
 261
 262 import GHC.Handle
 263 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
 264 import GHC.Base                 (build)
 265 import GHC.Word hiding (Word8)
 266 import GHC.Ptr                  (Ptr(..))
 267 import GHC.ST                   (ST(..))
 268 import GHC.IOBase
 269
 270 #endif
 271
 272 -- -----------------------------------------------------------------------------
 273 --
 274 -- Useful macros, until we have bang patterns
 275 --
 276
 277 #define STRICT1(f) f a | a `seq` False = undefined
 278 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 279 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 280 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 281 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 282
 283 -- -----------------------------------------------------------------------------
 284
 285 instance Eq  ByteString
 286     where (==)    = eq
 287
 288 instance Ord ByteString
 289     where compare = compareBytes
 290
 291 instance Monoid ByteString where
 292     mempty  = empty
 293     mappend = append
 294     mconcat = concat
 295
 296 {-
 297 instance Arbitrary PackedString where
 298     arbitrary = P.pack `fmap` arbitrary
 299     coarbitrary s = coarbitrary (P.unpack s)
 300 -}
 301
 302 -- | /O(n)/ Equality on the 'ByteString' type.
 303 eq :: ByteString -> ByteString -> Bool
 304 eq a@(PS p s l) b@(PS p' s' l')
 305     | l /= l'            = False    -- short cut on length
 306     | p == p' && s == s' = True     -- short cut for the same string
 307     | otherwise          = compareBytes a b == EQ
 308 {-# INLINE eq #-}
 309
 310 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 311 compareBytes :: ByteString -> ByteString -> Ordering
 312 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 313     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 314     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 315     | otherwise                         = inlinePerformIO $
 316         withForeignPtr x1 $ \p1 ->
 317         withForeignPtr x2 $ \p2 -> do
 318             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 319             return $! case i `compare` 0 of
 320                         EQ  -> l1 `compare` l2
 321                         x   -> x
 322 {-# INLINE compareBytes #-}
 323
 324 {-
 325 --
 326 -- About 4x slower over 32M
 327 --
 328 compareBytes :: ByteString -> ByteString -> Ordering
 329 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 330     withForeignPtr fp1 $ \p1 ->
 331         withForeignPtr fp2 $ \p2 ->
 332             cmp (p1 `plusPtr` off1)
 333                 (p2 `plusPtr` off2) 0 len1 len2
 334
 335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 336 STRICT5(cmp)
 337 cmp p1 p2 n len1 len2
 338       | n == len1 = if n == len2 then return EQ else return LT
 339       | n == len2 = return GT
 340       | otherwise = do
 341           (a :: Word8) <- peekByteOff p1 n
 342           (b :: Word8) <- peekByteOff p2 n
 343           case a `compare` b of
 344                 EQ -> cmp p1 p2 (n+1) len1 len2
 345                 LT -> return LT
 346                 GT -> return GT
 347 {-# INLINE compareBytes #-}
 348 -}
 349
 350 -- -----------------------------------------------------------------------------
 351 -- Introducing and eliminating 'ByteString's
 352
 353 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 354 singleton :: Word8 -> ByteString
 355 singleton c = unsafeCreate 1 $ \p -> poke p c
 356 {-# INLINE [1] singleton #-}
 357
 358 --
 359 -- XXX The unsafePerformIO is critical!
 360 --
 361 -- Otherwise:
 362 --
 363 --  singleton 255 `compare` singleton 127
 364 --
 365 -- is compiled to:
 366 --
 367 --  case mallocByteString 2 of
 368 --      ForeignPtr f internals ->
 369 --           case writeWord8OffAddr# f 0 255 of _ ->
 370 --           case writeWord8OffAddr# f 0 127 of _ ->
 371 --           case eqAddr# f f of
 372 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 373 --                                        (GHC.Prim.plusAddr# f 0)
 374 --
 375 --
 376
 377 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 378 --
 379 -- For applications with large numbers of string literals, pack can be a
 380 -- bottleneck. In such cases, consider using packAddress (GHC only).
 381 pack :: [Word8] -> ByteString
 382
 383 #if !defined(__GLASGOW_HASKELL__)
 384
 385 pack str = unsafeCreate (P.length str) $ \p -> go p str
 386     where
 387         go _ []     = return ()
 388         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 389
 390 #else /* hack away */
 391
 392 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 393     where
 394         go _ _ []        = return ()
 395         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 396
 397         writeByte p i c = ST $ \s# ->
 398             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 399
 400 #endif
 401
 402 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 403 unpack :: ByteString -> [Word8]
 404
 405 #if !defined(__GLASGOW_HASKELL__)
 406
 407 unpack (PS _  _ 0) = []
 408 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 409         go (p `plusPtr` s) (l - 1) []
 410     where
 411         STRICT3(go)
 412         go p 0 acc = peek p          >>= \e -> return (e : acc)
 413         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 414 {-# INLINE unpack #-}
 415
 416 #else
 417
 418 unpack ps = build (unpackFoldr ps)
 419 {-# INLINE unpack #-}
 420
 421 --
 422 -- critical this isn't strict in the acc
 423 -- as it will break in the presence of list fusion. this is a known
 424 -- issue with seq and build/foldr rewrite rules, which rely on lazy
 425 -- demanding to avoid bottoms in the list.
 426 --
 427 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 428 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 429     let loop q n    _   | q `seq` n `seq` False = undefined -- n.b.
 430         loop _ (-1) acc = return acc
 431         loop q n    acc = do
 432            a <- peekByteOff q n
 433            loop q (n-1) (a `f` acc)
 434     loop (p `plusPtr` off) (len-1) ch
 435 {-# INLINE [0] unpackFoldr #-}
 436
 437 unpackList :: ByteString -> [Word8]
 438 unpackList (PS fp off len) = withPtr fp $ \p -> do
 439     let STRICT3(loop)
 440         loop _ (-1) acc = return acc
 441         loop q n acc = do
 442            a <- peekByteOff q n
 443            loop q (n-1) (a : acc)
 444     loop (p `plusPtr` off) (len-1) []
 445
 446 {-# RULES
 447     "FPS unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 448  #-}
 449
 450 #endif
 451
 452 -- ---------------------------------------------------------------------
 453 -- Basic interface
 454
 455 -- | /O(1)/ Test whether a ByteString is empty.
 456 null :: ByteString -> Bool
 457 null (PS _ _ l) = assert (l >= 0) $ l <= 0
 458 {-# INLINE null #-}
 459
 460 -- ---------------------------------------------------------------------
 461 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 462 length :: ByteString -> Int
 463 length (PS _ _ l) = assert (l >= 0) $ l
 464
 465 --
 466 -- length/loop fusion. When taking the length of any fuseable loop,
 467 -- rewrite it as a foldl', and thus avoid allocating the result buffer
 468 -- worth around 10% in speed testing.
 469 --
 470
 471 #if defined(__GLASGOW_HASKELL__)
 472 {-# INLINE [1] length #-}
 473 #endif
 474
 475 lengthU :: ByteString -> Int
 476 lengthU = foldl' (const . (+1)) (0::Int)
 477 {-# INLINE lengthU #-}
 478
 479 {-# RULES
 480
 481 -- v2 fusion
 482 "FPS length/loop" forall loop s .
 483   length  (loopArr (loopWrapper loop s)) =
 484   lengthU (loopArr (loopWrapper loop s))
 485
 486   #-}
 487
 488 ------------------------------------------------------------------------
 489
 490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 491 -- complexity, as it requires a memcpy.
 492 cons :: Word8 -> ByteString -> ByteString
 493 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 494         poke p c
 495         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 496 {-# INLINE cons #-}
 497
 498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 499 snoc :: ByteString -> Word8 -> ByteString
 500 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 501         memcpy p (f `plusPtr` s) (fromIntegral l)
 502         poke (p `plusPtr` l) c
 503 {-# INLINE snoc #-}
 504
 505 -- todo fuse
 506
 507 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 508 -- An exception will be thrown in the case of an empty ByteString.
 509 head :: ByteString -> Word8
 510 head (PS x s l)
 511     | l <= 0    = errorEmptyList "head"
 512     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 513 {-# INLINE head #-}
 514
 515 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 516 -- An exception will be thrown in the case of an empty ByteString.
 517 tail :: ByteString -> ByteString
 518 tail (PS p s l)
 519     | l <= 0    = errorEmptyList "tail"
 520     | otherwise = PS p (s+1) (l-1)
 521 {-# INLINE tail #-}
 522
 523 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 524 -- An exception will be thrown in the case of an empty ByteString.
 525 last :: ByteString -> Word8
 526 last ps@(PS x s l)
 527     | null ps   = errorEmptyList "last"
 528     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 529 {-# INLINE last #-}
 530
 531 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 532 -- An exception will be thrown in the case of an empty ByteString.
 533 init :: ByteString -> ByteString
 534 init ps@(PS p s l)
 535     | null ps   = errorEmptyList "init"
 536     | otherwise = PS p s (l-1)
 537 {-# INLINE init #-}
 538
 539 -- | /O(n)/ Append two ByteStrings
 540 append :: ByteString -> ByteString -> ByteString
 541 append xs ys | null xs   = ys
 542              | null ys   = xs
 543              | otherwise = concat [xs,ys]
 544 {-# INLINE append #-}
 545
 546 -- ---------------------------------------------------------------------
 547 -- Transformations
 548
 549 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 550 -- element of @xs@. This function is subject to array fusion.
 551 map :: (Word8 -> Word8) -> ByteString -> ByteString
 552 #if defined(LOOPU_FUSION)
 553 map f = loopArr . loopU (mapEFL f) NoAcc
 554 #elif defined(LOOPUP_FUSION)
 555 map f = loopArr . loopUp (mapEFL f) NoAcc
 556 #elif defined(LOOPNOACC_FUSION)
 557 map f = loopArr . loopNoAcc (mapEFL f)
 558 #else
 559 map f = loopArr . loopMap f
 560 #endif
 561 {-# INLINE map #-}
 562
 563 {-
 564 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 565 -- slightly faster for one-shot cases.
 566 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 567 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
 568     create len $ map_ 0 (a `plusPtr` s)
 569   where
 570     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 571     STRICT3(map_)
 572     map_ n p1 p2
 573        | n >= len = return ()
 574        | otherwise = do
 575             x <- peekByteOff p1 n
 576             pokeByteOff p2 n (f x)
 577             map_ (n+1) p1 p2
 578 {-# INLINE map' #-}
 579 -}
 580
 581 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 582 reverse :: ByteString -> ByteString
 583 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 584         c_reverse p (f `plusPtr` s) (fromIntegral l)
 585
 586 -- todo, fuseable version
 587
 588 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 589 -- 'ByteString' and \`intersperses\' that byte between the elements of
 590 -- the 'ByteString'.  It is analogous to the intersperse function on
 591 -- Lists.
 592 intersperse :: Word8 -> ByteString -> ByteString
 593 intersperse c ps@(PS x s l)
 594     | length ps < 2  = ps
 595     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
 596         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 597
 598 {-
 599 intersperse c = pack . List.intersperse c . unpack
 600 -}
 601
 602 -- | The 'transpose' function transposes the rows and columns of its
 603 -- 'ByteString' argument.
 604 transpose :: [ByteString] -> [ByteString]
 605 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 606
 607 -- ---------------------------------------------------------------------
 608 -- Reducing 'ByteString's
 609
 610 -- | 'foldl', applied to a binary operator, a starting value (typically
 611 -- the left-identity of the operator), and a ByteString, reduces the
 612 -- ByteString using the binary operator, from left to right.
 613 -- This function is subject to array fusion.
 614 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 615 #if !defined(LOOPU_FUSION)
 616 foldl f z = loopAcc . loopUp (foldEFL f) z
 617 #else
 618 foldl f z = loopAcc . loopU (foldEFL f) z
 619 #endif
 620 {-# INLINE foldl #-}
 621
 622 {-
 623 --
 624 -- About twice as fast with 6.4.1, but not fuseable
 625 -- A simple fold . map is enough to make it worth while.
 626 --
 627 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 628         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 629     where
 630         STRICT3(lgo)
 631         lgo z p q | p == q    = return z
 632                   | otherwise = do c <- peek p
 633                                    lgo (f z c) (p `plusPtr` 1) q
 634 -}
 635
 636 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 637 -- Though actually foldl is also strict in the accumulator.
 638 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 639 foldl' = foldl
 640 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
 641 {-# INLINE foldl' #-}
 642
 643 -- | 'foldr', applied to a binary operator, a starting value
 644 -- (typically the right-identity of the operator), and a ByteString,
 645 -- reduces the ByteString using the binary operator, from right to left.
 646 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 647 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
 648 {-# INLINE foldr #-}
 649
 650 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
 651 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
 652 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 653         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
 654     where
 655         STRICT3(go)
 656         go z p q | p == q    = return z
 657                  | otherwise = do c  <- peek p
 658                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
 659 {-# INLINE [1] foldr' #-}
 660
 661 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 662 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 663 -- This function is subject to array fusion.
 664 -- An exception will be thrown in the case of an empty ByteString.
 665 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 666 foldl1 f ps
 667     | null ps   = errorEmptyList "foldl1"
 668     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 669 {-# INLINE foldl1 #-}
 670
 671 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 672 -- An exception will be thrown in the case of an empty ByteString.
 673 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 674 foldl1' f ps
 675     | null ps   = errorEmptyList "foldl1'"
 676     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 677 {-# INLINE foldl1' #-}
 678
 679 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 680 -- and thus must be applied to non-empty 'ByteString's
 681 -- An exception will be thrown in the case of an empty ByteString.
 682 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 683 foldr1 f ps
 684     | null ps        = errorEmptyList "foldr1"
 685     | otherwise      = foldr f (last ps) (init ps)
 686 {-# INLINE foldr1 #-}
 687
 688 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
 689 -- accumulator.
 690 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 691 foldr1' f ps
 692     | null ps        = errorEmptyList "foldr1"
 693     | otherwise      = foldr' f (last ps) (init ps)
 694 {-# INLINE [1] foldr1' #-}
 695
 696 -- ---------------------------------------------------------------------
 697 -- Special folds
 698
 699 -- | /O(n)/ Concatenate a list of ByteStrings.
 700 concat :: [ByteString] -> ByteString
 701 concat []     = empty
 702 concat [ps]   = ps
 703 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
 704   where len = P.sum . P.map length $ xs
 705         STRICT2(go)
 706         go []            _   = return ()
 707         go (PS p s l:ps) ptr = do
 708                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 709                 go ps (ptr `plusPtr` l)
 710
 711 -- | Map a function over a 'ByteString' and concatenate the results
 712 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 713 concatMap f = concat . foldr ((:) . f) []
 714
 715 -- foldr (append . f) empty
 716
 717 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 718 -- any element of the 'ByteString' satisfies the predicate.
 719 any :: (Word8 -> Bool) -> ByteString -> Bool
 720 any _ (PS _ _ 0) = False
 721 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 722         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 723     where
 724         STRICT2(go)
 725         go p q | p == q    = return False
 726                | otherwise = do c <- peek p
 727                                 if f c then return True
 728                                        else go (p `plusPtr` 1) q
 729
 730 -- todo fuse
 731
 732 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 733 -- if all elements of the 'ByteString' satisfy the predicate.
 734 all :: (Word8 -> Bool) -> ByteString -> Bool
 735 all _ (PS _ _ 0) = True
 736 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 737         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 738     where
 739         STRICT2(go)
 740         go p q | p == q     = return True  -- end of list
 741                | otherwise  = do c <- peek p
 742                                  if f c
 743                                     then go (p `plusPtr` 1) q
 744                                     else return False
 745
 746 ------------------------------------------------------------------------
 747
 748 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 749 -- This function will fuse.
 750 -- An exception will be thrown in the case of an empty ByteString.
 751 maximum :: ByteString -> Word8
 752 maximum xs@(PS x s l)
 753     | null xs   = errorEmptyList "maximum"
 754     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 755                       c_maximum (p `plusPtr` s) (fromIntegral l)
 756
 757 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 758 -- This function will fuse.
 759 -- An exception will be thrown in the case of an empty ByteString.
 760 minimum :: ByteString -> Word8
 761 minimum xs@(PS x s l)
 762     | null xs   = errorEmptyList "minimum"
 763     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 764                       c_minimum (p `plusPtr` s) (fromIntegral l)
 765
 766 --
 767 -- minimum/maximum/loop fusion. As for length (and other folds), when we
 768 -- see we're applied after a fuseable op, switch from using the C
 769 -- version, to the fuseable version. The result should then avoid
 770 -- allocating a buffer.
 771 --
 772
 773 #if defined(__GLASGOW_HASKELL__)
 774 {-# INLINE [1] minimum #-}
 775 {-# INLINE [1] maximum #-}
 776 #endif
 777
 778 maximumU :: ByteString -> Word8
 779 maximumU = foldl1' max
 780 {-# INLINE maximumU #-}
 781
 782 minimumU :: ByteString -> Word8
 783 minimumU = foldl1' min
 784 {-# INLINE minimumU #-}
 785
 786 {-# RULES
 787
 788 "FPS minimum/loop" forall loop s .
 789   minimum  (loopArr (loopWrapper loop s)) =
 790   minimumU (loopArr (loopWrapper loop s))
 791
 792 "FPS maximum/loop" forall loop s .
 793   maximum  (loopArr (loopWrapper loop s)) =
 794   maximumU (loopArr (loopWrapper loop s))
 795
 796   #-}
 797
 798 ------------------------------------------------------------------------
 799
 800 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 801 -- 'foldl'; it applies a function to each element of a ByteString,
 802 -- passing an accumulating parameter from left to right, and returning a
 803 -- final value of this accumulator together with the new list.
 804 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 805 #if !defined(LOOPU_FUSION)
 806 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
 807 #else
 808 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
 809 #endif
 810 {-# INLINE mapAccumL #-}
 811
 812 -- | The 'mapAccumR' function behaves like a combination of 'map' and
 813 -- 'foldr'; it applies a function to each element of a ByteString,
 814 -- passing an accumulating parameter from right to left, and returning a
 815 -- final value of this accumulator together with the new ByteString.
 816 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 817 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
 818 {-# INLINE mapAccumR #-}
 819
 820 -- | /O(n)/ map Word8 functions, provided with the index at each position
 821 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 822 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
 823 {-# INLINE mapIndexed #-}
 824
 825 -- ---------------------------------------------------------------------
 826 -- Building ByteStrings
 827
 828 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 829 -- reduced values from the left. This function will fuse.
 830 --
 831 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 832 --
 833 -- Note that
 834 --
 835 -- > last (scanl f z xs) == foldl f z xs.
 836 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 837 #if !defined(LOOPU_FUSION)
 838 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
 839 #else
 840 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
 841 #endif
 842
 843     -- n.b. haskell's List scan returns a list one bigger than the
 844     -- input, so we need to snoc here to get some extra space, however,
 845     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
 846 {-# INLINE scanl #-}
 847
 848 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 849 -- This function will fuse.
 850 --
 851 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 852 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 853 scanl1 f ps
 854     | null ps   = empty
 855     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 856 {-# INLINE scanl1 #-}
 857
 858 -- | scanr is the right-to-left dual of scanl.
 859 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 860 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
 861 {-# INLINE scanr #-}
 862
 863 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
 864 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 865 scanr1 f ps
 866     | null ps   = empty
 867     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
 868 {-# INLINE scanr1 #-}
 869
 870 -- ---------------------------------------------------------------------
 871 -- Unfolds and replicates
 872
 873 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 874 -- the value of every element. The following holds:
 875 --
 876 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 877 --
 878 -- This implemenation uses @memset(3)@
 879 replicate :: Int -> Word8 -> ByteString
 880 replicate w c
 881     | w <= 0    = empty
 882     | otherwise = unsafeCreate w $ \ptr ->
 883                       memset ptr c (fromIntegral w) >> return ()
 884
 885 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
 886 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
 887 -- ByteString from a seed value.  The function takes the element and
 888 -- returns 'Nothing' if it is done producing the ByteString or returns
 889 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
 890 -- and @b@ is the seed value for further production.
 891 --
 892 -- Examples:
 893 --
 894 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
 895 -- > == pack [0, 1, 2, 3, 4, 5]
 896 --
 897 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 898 unfoldr f = concat . unfoldChunk 32 64
 899   where unfoldChunk n n' x =
 900           case unfoldrN n f x of
 901             (s, Nothing) -> s : []
 902             (s, Just x') -> s : unfoldChunk n' (n+n') x'
 903
 904 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
 905 -- value.  However, the length of the result is limited by the first
 906 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
 907 -- when the maximum length of the result is known.
 908 --
 909 -- The following equation relates 'unfoldrN' and 'unfoldr':
 910 --
 911 -- > unfoldrN n f s == take n (unfoldr f s)
 912 --
 913 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 914 unfoldrN i f x0
 915     | i < 0     = (empty, Just x0)
 916     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
 917   where STRICT3(go)
 918         go p x n =
 919           case f x of
 920             Nothing      -> return (0, n, Nothing)
 921             Just (w,x')
 922              | n == i    -> return (0, n, Just x)
 923              | otherwise -> do poke p w
 924                                go (p `plusPtr` 1) x' (n+1)
 925
 926 -- ---------------------------------------------------------------------
 927 -- Substrings
 928
 929 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 930 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 931 take :: Int -> ByteString -> ByteString
 932 take n ps@(PS x s l)
 933     | n <= 0    = empty
 934     | n >= l    = ps
 935     | otherwise = PS x s n
 936 {-# INLINE take #-}
 937
 938 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 939 -- elements, or @[]@ if @n > 'length' xs@.
 940 drop  :: Int -> ByteString -> ByteString
 941 drop n ps@(PS x s l)
 942     | n <= 0    = ps
 943     | n >= l    = empty
 944     | otherwise = PS x (s+n) (l-n)
 945 {-# INLINE drop #-}
 946
 947 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 948 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 949 splitAt n ps@(PS x s l)
 950     | n <= 0    = (empty, ps)
 951     | n >= l    = (ps, empty)
 952     | otherwise = (PS x s n, PS x (s+n) (l-n))
 953 {-# INLINE splitAt #-}
 954
 955 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 956 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 957 -- satisfy @p@.
 958 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 959 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 960 {-# INLINE takeWhile #-}
 961
 962 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 963 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 964 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
 965 {-# INLINE dropWhile #-}
 966
 967 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 968 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 969 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
 970 {-# INLINE [1] break #-}
 971
 972 {-# RULES
 973 "FPS specialise break (x==)" forall x.
 974     break ((==) x) = breakByte x
 975   #-}
 976
 977 #if __GLASGOW_HASKELL__ >= 605
 978 {-# RULES
 979 "FPS specialise break (==x)" forall x.
 980     break (==x) = breakByte x
 981   #-}
 982 #endif
 983
 984 -- | 'breakByte' breaks its ByteString argument at the first occurence
 985 -- of the specified byte. It is more efficient than 'break' as it is
 986 -- implemented with @memchr(3)@. I.e.
 987 --
 988 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 989 --
 990 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 991 breakByte c p = case elemIndex c p of
 992     Nothing -> (p,empty)
 993     Just n  -> (unsafeTake n p, unsafeDrop n p)
 994 {-# INLINE breakByte #-}
 995
 996 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
 997 --
 998 -- breakEnd p == spanEnd (not.p)
 999 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1000 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
1001
1002 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1003 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1004 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1005 span p ps = break (not . p) ps
1006 {-# INLINE [1] span #-}
1007
1008 -- | 'spanByte' breaks its ByteString argument at the first
1009 -- occurence of a byte other than its argument. It is more efficient
1010 -- than 'span (==)'
1011 --
1012 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
1013 --
1014 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1015 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1016     go (p `plusPtr` s) 0
1017   where
1018     STRICT2(go)
1019     go p i | i >= l    = return (ps, empty)
1020            | otherwise = do c' <- peekByteOff p i
1021                             if c /= c'
1022                                 then return (unsafeTake i ps, unsafeDrop i ps)
1023                                 else go p (i+1)
1024 {-# INLINE spanByte #-}
1025
1026 {-# RULES
1027 "FPS specialise span (x==)" forall x.
1028     span ((==) x) = spanByte x
1029   #-}
1030
1031 #if __GLASGOW_HASKELL__ >= 605
1032 {-# RULES
1033 "FPS specialise span (==x)" forall x.
1034     span (==x) = spanByte x
1035   #-}
1036 #endif
1037
1038 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1039 -- We have
1040 --
1041 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1042 --
1043 -- and
1044 --
1045 -- > spanEnd (not . isSpace) ps
1046 -- >    ==
1047 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1048 --
1049 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1050 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1051
1052 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1053 -- separators, where the predicate returns True for a separator element.
1054 -- The resulting components do not contain the separators.  Two adjacent
1055 -- separators result in an empty component in the output.  eg.
1056 --
1057 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1058 -- > splitWith (=='a') []        == []
1059 --
1060 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1061
1062 #if defined(__GLASGOW_HASKELL__)
1063 splitWith _pred (PS _  _   0) = []
1064 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1065   where pred# c# = pred_ (W8# c#)
1066
1067         STRICT4(splitWith0)
1068         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1069             splitLoop pred' p 0 off' len' fp'
1070
1071         splitLoop :: (Word# -> Bool)
1072                   -> Ptr Word8
1073                   -> Int -> Int -> Int
1074                   -> ForeignPtr Word8
1075                   -> IO [ByteString]
1076
1077         splitLoop pred' p idx' off' len' fp'
1078             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1079             | idx' >= len'  = return [PS fp' off' idx']
1080             | otherwise = do
1081                 w <- peekElemOff p (off'+idx')
1082                 if pred' (case w of W8# w# -> w#)
1083                    then return (PS fp' off' idx' :
1084                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1085                    else splitLoop pred' p (idx'+1) off' len' fp'
1086 {-# INLINE splitWith #-}
1087
1088 #else
1089 splitWith _ (PS _ _ 0) = []
1090 splitWith p ps = loop p ps
1091     where
1092         STRICT2(loop)
1093         loop q qs = if null rest then [chunk]
1094                                  else chunk : loop q (unsafeTail rest)
1095             where (chunk,rest) = break q qs
1096 #endif
1097
1098 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1099 -- argument, consuming the delimiter. I.e.
1100 --
1101 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1102 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
1103 -- > split 'x'  "x"          == ["",""]
1104 --
1105 -- and
1106 --
1107 -- > join [c] . split c == id
1108 -- > split == splitWith . (==)
1109 --
1110 -- As for all splitting functions in this library, this function does
1111 -- not copy the substrings, it just constructs new 'ByteStrings' that
1112 -- are slices of the original.
1113 --
1114 split :: Word8 -> ByteString -> [ByteString]
1115 split _ (PS _ _ 0) = []
1116 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1117     let ptr = p `plusPtr` s
1118
1119         STRICT1(loop)
1120         loop n =
1121             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1122                                            w (fromIntegral (l-n))
1123             in if q == nullPtr
1124                 then [PS x (s+n) (l-n)]
1125                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1126
1127     return (loop 0)
1128 {-# INLINE split #-}
1129
1130 {-
1131 -- slower. but stays inside Haskell.
1132 split _ (PS _  _   0) = []
1133 split (W8# w#) (PS fp off len) = splitWith' off len fp
1134     where
1135         splitWith' off' len' fp' = withPtr fp $ \p ->
1136             splitLoop p 0 off' len' fp'
1137
1138         splitLoop :: Ptr Word8
1139                   -> Int -> Int -> Int
1140                   -> ForeignPtr Word8
1141                   -> IO [ByteString]
1142
1143         STRICT5(splitLoop)
1144         splitLoop p idx' off' len' fp'
1145             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1146             | idx' >= len'  = return [PS fp' off' idx']
1147             | otherwise = do
1148                 (W8# x#) <- peekElemOff p (off'+idx')
1149                 if word2Int# w# ==# word2Int# x#
1150                    then return (PS fp' off' idx' :
1151                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1152                    else splitLoop p (idx'+1) off' len' fp'
1153 -}
1154
1155 {-
1156 -- | Like 'splitWith', except that sequences of adjacent separators are
1157 -- treated as a single separator. eg.
1158 --
1159 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1160 --
1161 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1162 tokens f = P.filter (not.null) . splitWith f
1163 {-# INLINE tokens #-}
1164 -}
1165
1166 -- | The 'group' function takes a ByteString and returns a list of
1167 -- ByteStrings such that the concatenation of the result is equal to the
1168 -- argument.  Moreover, each sublist in the result contains only equal
1169 -- elements.  For example,
1170 --
1171 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1172 --
1173 -- It is a special case of 'groupBy', which allows the programmer to
1174 -- supply their own equality test. It is about 40% faster than
1175 -- /groupBy (==)/
1176 group :: ByteString -> [ByteString]
1177 group xs
1178     | null xs   = []
1179     | otherwise = ys : group zs
1180     where
1181         (ys, zs) = spanByte (unsafeHead xs) xs
1182
1183 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1184 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1185 groupBy k xs
1186     | null xs   = []
1187     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1188     where
1189         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1190
1191 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1192 -- 'ByteString's and concatenates the list after interspersing the first
1193 -- argument between each element of the list.
1194 join :: ByteString -> [ByteString] -> ByteString
1195 join s = concat . (List.intersperse s)
1196 {-# INLINE [1] join #-}
1197
1198 {-# RULES
1199 "FPS specialise join c -> joinByte" forall c s1 s2 .
1200     join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1201   #-}
1202
1203 --
1204 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1205 -- with a char. Around 4 times faster than the generalised join.
1206 --
1207 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1208 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1209     withForeignPtr ffp $ \fp ->
1210     withForeignPtr fgp $ \gp -> do
1211         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1212         poke (ptr `plusPtr` l) c
1213         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1214     where
1215       len = length f + length g + 1
1216 {-# INLINE joinWithByte #-}
1217
1218 -- ---------------------------------------------------------------------
1219 -- Indexing ByteStrings
1220
1221 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1222 index :: ByteString -> Int -> Word8
1223 index ps n
1224     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1225     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1226                                          ++ ", length = " ++ show (length ps))
1227     | otherwise      = ps `unsafeIndex` n
1228 {-# INLINE index #-}
1229
1230 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1231 -- element in the given 'ByteString' which is equal to the query
1232 -- element, or 'Nothing' if there is no such element.
1233 -- This implementation uses memchr(3).
1234 elemIndex :: Word8 -> ByteString -> Maybe Int
1235 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1236     let p' = p `plusPtr` s
1237     q <- memchr p' c (fromIntegral l)
1238     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1239 {-# INLINE elemIndex #-}
1240
1241 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1242 -- element in the given 'ByteString' which is equal to the query
1243 -- element, or 'Nothing' if there is no such element. The following
1244 -- holds:
1245 --
1246 -- > elemIndexEnd c xs ==
1247 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1248 --
1249 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1250 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1251     go (p `plusPtr` s) (l-1)
1252   where
1253     STRICT2(go)
1254     go p i | i < 0     = return Nothing
1255            | otherwise = do ch' <- peekByteOff p i
1256                             if ch == ch'
1257                                 then return $ Just i
1258                                 else go p (i-1)
1259 {-# INLINE elemIndexEnd #-}
1260
1261 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1262 -- the indices of all elements equal to the query element, in ascending order.
1263 -- This implementation uses memchr(3).
1264 elemIndices :: Word8 -> ByteString -> [Int]
1265 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1266     let ptr = p `plusPtr` s
1267
1268         STRICT1(loop)
1269         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1270                                                 w (fromIntegral (l - n))
1271                  in if q == nullPtr
1272                         then []
1273                         else let i = q `minusPtr` ptr
1274                              in i : loop (i+1)
1275     return $! loop 0
1276 {-# INLINE elemIndices #-}
1277
1278 {-
1279 -- much slower
1280 elemIndices :: Word8 -> ByteString -> [Int]
1281 elemIndices c ps = loop 0 ps
1282    where STRICT2(loop)
1283          loop _ ps' | null ps'            = []
1284          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1285                     | otherwise           = loop (n+1) (unsafeTail ps')
1286 -}
1287
1288 -- | count returns the number of times its argument appears in the ByteString
1289 --
1290 -- > count = length . elemIndices
1291 --
1292 -- But more efficiently than using length on the intermediate list.
1293 count :: Word8 -> ByteString -> Int
1294 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1295     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1296 {-# INLINE count #-}
1297
1298 {-
1299 --
1300 -- around 30% slower
1301 --
1302 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1303      go (p `plusPtr` s) (fromIntegral m) 0
1304     where
1305         go :: Ptr Word8 -> CSize -> Int -> IO Int
1306         STRICT3(go)
1307         go p l i = do
1308             q <- memchr p w l
1309             if q == nullPtr
1310                 then return i
1311                 else do let k = fromIntegral $ q `minusPtr` p
1312                         go (q `plusPtr` 1) (l-k-1) (i+1)
1313 -}
1314
1315 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1316 -- returns the index of the first element in the ByteString
1317 -- satisfying the predicate.
1318 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1319 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1320   where
1321     STRICT2(go)
1322     go ptr n | n >= l    = return Nothing
1323              | otherwise = do w <- peek ptr
1324                               if k w
1325                                 then return (Just n)
1326                                 else go (ptr `plusPtr` 1) (n+1)
1327 {-# INLINE findIndex #-}
1328
1329 -- | The 'findIndices' function extends 'findIndex', by returning the
1330 -- indices of all elements satisfying the predicate, in ascending order.
1331 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1332 findIndices p ps = loop 0 ps
1333    where
1334      STRICT2(loop)
1335      loop n qs | null qs           = []
1336                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1337                | otherwise         =     loop (n+1) (unsafeTail qs)
1338
1339 -- ---------------------------------------------------------------------
1340 -- Searching ByteStrings
1341
1342 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1343 elem :: Word8 -> ByteString -> Bool
1344 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1345 {-# INLINE elem #-}
1346
1347 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1348 notElem :: Word8 -> ByteString -> Bool
1349 notElem c ps = not (elem c ps)
1350 {-# INLINE notElem #-}
1351
1352 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1353 -- returns a ByteString containing those characters that satisfy the
1354 -- predicate. This function is subject to array fusion.
1355 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1356 #if defined(LOOPU_FUSION)
1357 filter p  = loopArr . loopU (filterEFL p) NoAcc
1358 #elif defined(LOOPUP_FUSION)
1359 filter p  = loopArr . loopUp (filterEFL p) NoAcc
1360 #elif defined(LOOPNOACC_FUSION)
1361 filter p  = loopArr . loopNoAcc (filterEFL p)
1362 #else
1363 filter f = loopArr . loopFilter f
1364 #endif
1365 {-# INLINE filter #-}
1366
1367 {-
1368 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1369 -- around 2x faster for some one-shot applications.
1370 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1371 filter' k ps@(PS x s l)
1372     | null ps   = ps
1373     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1374         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1375         return $! t `minusPtr` p -- actual length
1376     where
1377         STRICT3(go)
1378         go f t end | f == end  = return t
1379                    | otherwise = do
1380                         w <- peek f
1381                         if k w
1382                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1383                             else             go (f `plusPtr` 1) t               end
1384 {-# INLINE filter' #-}
1385 -}
1386
1387 --
1388 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1389 -- case of filtering a single byte. It is more efficient to use
1390 -- /filterByte/ in this case.
1391 --
1392 -- > filterByte == filter . (==)
1393 --
1394 -- filterByte is around 10x faster, and uses much less space, than its
1395 -- filter equivalent
1396 filterByte :: Word8 -> ByteString -> ByteString
1397 filterByte w ps = replicate (count w ps) w
1398 {-# INLINE filterByte #-}
1399
1400 {-# RULES
1401   "FPS specialise filter (== x)" forall x.
1402       filter ((==) x) = filterByte x
1403   #-}
1404
1405 #if __GLASGOW_HASKELL__ >= 605
1406 {-# RULES
1407   "FPS specialise filter (== x)" forall x.
1408      filter (== x) = filterByte x
1409   #-}
1410 #endif
1411
1412 --
1413 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1414 -- case of filtering a single byte out of a list. It is more efficient
1415 -- to use /filterNotByte/ in this case.
1416 --
1417 -- > filterNotByte == filter . (/=)
1418 --
1419 -- filterNotByte is around 2x faster than its filter equivalent.
1420 filterNotByte :: Word8 -> ByteString -> ByteString
1421 filterNotByte w = filter (/= w)
1422 {-# INLINE filterNotByte #-}
1423
1424 {-# RULES
1425 "FPS specialise filter (x /=)" forall x.
1426     filter ((/=) x) = filterNotByte x
1427   #-}
1428
1429 #if __GLASGOW_HASKELL__ >= 605
1430 {-# RULES
1431 "FPS specialise filter (/= x)" forall x.
1432     filter (/= x) = filterNotByte x
1433   #-}
1434 #endif
1435
1436 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1437 -- and returns the first element in matching the predicate, or 'Nothing'
1438 -- if there is no such element.
1439 --
1440 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1441 --
1442 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1443 find f p = case findIndex f p of
1444                     Just n -> Just (p `unsafeIndex` n)
1445                     _      -> Nothing
1446 {-# INLINE find #-}
1447
1448 {-
1449 --
1450 -- fuseable, but we don't want to walk the whole array.
1451 --
1452 find k = foldl findEFL Nothing
1453     where findEFL a@(Just _) _ = a
1454           findEFL _          c | k c       = Just c
1455                                | otherwise = Nothing
1456 -}
1457
1458 -- ---------------------------------------------------------------------
1459 -- Searching for substrings
1460
1461 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1462 -- iff the first is a prefix of the second.
1463 isPrefixOf :: ByteString -> ByteString -> Bool
1464 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1465     | l1 == 0   = True
1466     | l2 < l1   = False
1467     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1468         withForeignPtr x2 $ \p2 -> do
1469             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1470             return $! i == 0
1471
1472 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1473 -- iff the first is a suffix of the second.
1474 --
1475 -- The following holds:
1476 --
1477 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1478 --
1479 -- However, the real implemenation uses memcmp to compare the end of the
1480 -- string only, with no reverse required..
1481 isSuffixOf :: ByteString -> ByteString -> Bool
1482 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1483     | l1 == 0   = True
1484     | l2 < l1   = False
1485     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1486         withForeignPtr x2 $ \p2 -> do
1487             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1488             return $! i == 0
1489
1490 -- | Check whether one string is a substring of another. @isSubstringOf
1491 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1492 isSubstringOf :: ByteString -- ^ String to search for.
1493               -> ByteString -- ^ String to search in.
1494               -> Bool
1495 isSubstringOf p s = not $ P.null $ findSubstrings p s
1496
1497 -- | Get the first index of a substring in another string,
1498 --   or 'Nothing' if the string is not found.
1499 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1500 findSubstring :: ByteString -- ^ String to search for.
1501               -> ByteString -- ^ String to seach in.
1502               -> Maybe Int
1503 findSubstring = (listToMaybe .) . findSubstrings
1504
1505 -- | Find the indexes of all (possibly overlapping) occurances of a
1506 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1507 -- string matching algorithm.
1508 findSubstrings :: ByteString -- ^ String to search for.
1509                -> ByteString -- ^ String to seach in.
1510                -> [Int]
1511
1512 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1513   where
1514       patc x = pat `unsafeIndex` x
1515       strc x = str `unsafeIndex` x
1516
1517       -- maybe we should make kmpNext a UArray before using it in search?
1518       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1519       kmpNextL p _ | null p = []
1520       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1521                          ps = unsafeTail p
1522                          x = if not (null ps) && unsafeHead ps == patc j'
1523                                 then kmpNext Array.! j' else j'
1524                         in x:kmpNextL ps j'
1525       search i j = match ++ rest -- i: position in string, j: position in pattern
1526         where match = if j == m then [(i - j)] else []
1527               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1528       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1529                | otherwise = j
1530
1531 -- ---------------------------------------------------------------------
1532 -- Zipping
1533
1534 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1535 -- corresponding pairs of bytes. If one input ByteString is short,
1536 -- excess elements of the longer ByteString are discarded. This is
1537 -- equivalent to a pair of 'unpack' operations.
1538 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1539 zip ps qs
1540     | null ps || null qs = []
1541     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1542
1543 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1544 -- the first argument, instead of a tupling function.  For example,
1545 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1546 -- corresponding sums.
1547 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1548 zipWith f ps qs
1549     | null ps || null qs = []
1550     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1551 #if defined(__GLASGOW_HASKELL__)
1552 {-# INLINE [1] zipWith #-}
1553 #endif
1554
1555 --
1556 -- | A specialised version of zipWith for the common case of a
1557 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1558 -- are used to automatically covert zipWith into zipWith' when a pack is
1559 -- performed on the result of zipWith, but we also export it for
1560 -- convenience.
1561 --
1562 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1563 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1564     withForeignPtr fp $ \a ->
1565     withForeignPtr fq $ \b ->
1566     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1567   where
1568     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1569     STRICT4(zipWith_)
1570     zipWith_ n p1 p2 r
1571        | n >= len = return ()
1572        | otherwise = do
1573             x <- peekByteOff p1 n
1574             y <- peekByteOff p2 n
1575             pokeByteOff r n (f x y)
1576             zipWith_ (n+1) p1 p2 r
1577
1578     len = min l m
1579 {-# INLINE zipWith' #-}
1580
1581 {-# RULES
1582
1583 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1584     zipWith f p q = unpack (zipWith' f p q)
1585
1586   #-}
1587
1588 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1589 -- ByteStrings. Note that this performs two 'pack' operations.
1590 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1591 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1592 {-# INLINE unzip #-}
1593
1594 -- ---------------------------------------------------------------------
1595 -- Special lists
1596
1597 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1598 inits :: ByteString -> [ByteString]
1599 inits (PS x s l) = [PS x s n | n <- [0..l]]
1600
1601 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1602 tails :: ByteString -> [ByteString]
1603 tails p | null p    = [empty]
1604         | otherwise = p : tails (unsafeTail p)
1605
1606 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1607
1608 -- ---------------------------------------------------------------------
1609 -- ** Ordered 'ByteString's
1610
1611 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1612 sort :: ByteString -> ByteString
1613 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1614
1615     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1616     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1617
1618     let STRICT2(go)
1619         go 256 _   = return ()
1620         go i   ptr = do n <- peekElemOff arr i
1621                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1622                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1623     go 0 p
1624
1625 {-
1626 sort :: ByteString -> ByteString
1627 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1628         memcpy p (f `plusPtr` s) l
1629         c_qsort p l -- inplace
1630 -}
1631
1632 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1633 --
1634 -- Try some linear sorts: radix, counting
1635 -- Or mergesort.
1636 --
1637 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1638 -- sortBy f ps = undefined
1639
1640 -- ---------------------------------------------------------------------
1641 -- Low level constructors
1642
1643 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1644 -- finalizer associated to it. The ByteString length is calculated using
1645 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1646 packCString :: CString -> ByteString
1647 packCString cstr = unsafePerformIO $ do
1648     fp <- newForeignPtr_ (castPtr cstr)
1649     l <- c_strlen cstr
1650     return $! PS fp 0 (fromIntegral l)
1651
1652 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1653 -- have /no/ finalizer associated with it. This operation has /O(1)/
1654 -- complexity as we already know the final size, so no /strlen(3)/ is
1655 -- required.
1656 packCStringLen :: CStringLen -> ByteString
1657 packCStringLen (ptr,len) = unsafePerformIO $ do
1658     fp <- newForeignPtr_ (castPtr ptr)
1659     return $! PS fp 0 (fromIntegral len)
1660
1661 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1662 -- have a @free(3)@ finalizer associated to it.
1663 packMallocCString :: CString -> ByteString
1664 packMallocCString cstr = unsafePerformIO $ do
1665     fp <- newForeignFreePtr (castPtr cstr)
1666     len <- c_strlen cstr
1667     return $! PS fp 0 (fromIntegral len)
1668
1669 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1670 -- null-terminated @CString@.  The @CString@ will be freed
1671 -- automatically. This is a memcpy(3).
1672 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1673 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1674     where alloc = withForeignPtr ps $ \p -> do
1675             buf <- c_malloc (fromIntegral l+1)
1676             memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1677             poke (buf `plusPtr` l) (0::Word8) -- n.b.
1678             return (castPtr buf)
1679
1680 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1681 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1682 useAsCStringLen = unsafeUseAsCStringLen
1683
1684 --
1685 -- why were we doing this?
1686 --
1687 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1688 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1689 --     where
1690 --       alloc = withForeignPtr ps $ \p -> do
1691 --                 buf <- c_malloc (fromIntegral l+1)
1692 --                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1693 --                 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1694 --                 return $! (castPtr buf, l)
1695 --
1696
1697 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1698 --   This is mainly useful to allow the rest of the data pointed
1699 --   to by the 'ByteString' to be garbage collected, for example
1700 --   if a large string has been read in, and only a small part of it
1701 --   is needed in the rest of the program.
1702 copy :: ByteString -> ByteString
1703 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1704     memcpy p (f `plusPtr` s) (fromIntegral l)
1705
1706 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1707 -- CString is going to be deallocated from C land.
1708 copyCString :: CString -> IO ByteString
1709 copyCString cstr = do
1710     len <- c_strlen cstr
1711     copyCStringLen (cstr, fromIntegral len)
1712
1713 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1714 copyCStringLen :: CStringLen -> IO ByteString
1715 copyCStringLen (cstr, len) = create len $ \p ->
1716     memcpy p (castPtr cstr) (fromIntegral len)
1717
1718 -- ---------------------------------------------------------------------
1719 -- line IO
1720
1721 -- | Read a line from stdin.
1722 getLine :: IO ByteString
1723 getLine = hGetLine stdin
1724
1725 {-
1726 -- | Lazily construct a list of lines of ByteStrings. This will be much
1727 -- better on memory consumption than using 'hGetContents >>= lines'
1728 -- If you're considering this, a better choice might be to use
1729 -- Data.ByteString.Lazy
1730 hGetLines :: Handle -> IO [ByteString]
1731 hGetLines h = go
1732     where
1733         go = unsafeInterleaveIO $ do
1734                 e <- hIsEOF h
1735                 if e
1736                   then return []
1737                   else do
1738                 x  <- hGetLine h
1739                 xs <- go
1740                 return (x:xs)
1741 -}
1742
1743 -- | Read a line from a handle
1744
1745 hGetLine :: Handle -> IO ByteString
1746 #if !defined(__GLASGOW_HASKELL__)
1747 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
1748 #else
1749 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1750     case haBufferMode handle_ of
1751        NoBuffering -> error "no buffering"
1752        _other      -> hGetLineBuffered handle_
1753
1754  where
1755     hGetLineBuffered handle_ = do
1756         let ref = haBuffer handle_
1757         buf <- readIORef ref
1758         hGetLineBufferedLoop handle_ ref buf 0 []
1759
1760     hGetLineBufferedLoop handle_ ref
1761             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1762         len `seq` do
1763         off <- findEOL r w raw
1764         let new_len = len + off - r
1765         xs <- mkPS raw r off
1766
1767       -- if eol == True, then off is the offset of the '\n'
1768       -- otherwise off == w and the buffer is now empty.
1769         if off /= w
1770             then do if (w == off + 1)
1771                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1772                             else writeIORef ref buf{ bufRPtr = off + 1 }
1773                     mkBigPS new_len (xs:xss)
1774             else do
1775                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1776                                     buf{ bufWPtr=0, bufRPtr=0 }
1777                  case maybe_buf of
1778                     -- Nothing indicates we caught an EOF, and we may have a
1779                     -- partial line to return.
1780                     Nothing -> do
1781                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1782                          if new_len > 0
1783                             then mkBigPS new_len (xs:xss)
1784                             else ioe_EOF
1785                     Just new_buf ->
1786                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1787
1788     -- find the end-of-line character, if there is one
1789     findEOL r w raw
1790         | r == w = return w
1791         | otherwise =  do
1792             (c,r') <- readCharFromBuffer raw r
1793             if c == '\n'
1794                 then return r -- NB. not r': don't include the '\n'
1795                 else findEOL r' w raw
1796
1797     maybeFillReadBuffer fd is_line is_stream buf = catch
1798         (do buf' <- fillReadBuffer fd is_line is_stream buf
1799             return (Just buf'))
1800         (\e -> if isEOFError e then return Nothing else ioError e)
1801
1802 -- TODO, rewrite to use normal memcpy
1803 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1804 mkPS buf start end =
1805     let len = end - start
1806     in create len $ \p -> do
1807         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1808         return ()
1809
1810 mkBigPS :: Int -> [ByteString] -> IO ByteString
1811 mkBigPS _ [ps] = return ps
1812 mkBigPS _ pss = return $! concat (P.reverse pss)
1813
1814 #endif
1815
1816 -- ---------------------------------------------------------------------
1817 -- Block IO
1818
1819 -- | Outputs a 'ByteString' to the specified 'Handle'.
1820 hPut :: Handle -> ByteString -> IO ()
1821 hPut _ (PS _  _ 0) = return ()
1822 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1823
1824 -- | A synonym for @hPut@, for compatibility
1825 hPutStr :: Handle -> ByteString -> IO ()
1826 hPutStr = hPut
1827
1828 -- | Write a ByteString to a handle, appending a newline byte
1829 hPutStrLn :: Handle -> ByteString -> IO ()
1830 hPutStrLn h ps
1831     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1832     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1833
1834 -- | Write a ByteString to stdout
1835 putStr :: ByteString -> IO ()
1836 putStr = hPut stdout
1837
1838 -- | Write a ByteString to stdout, appending a newline byte
1839 putStrLn :: ByteString -> IO ()
1840 putStrLn = hPutStrLn stdout
1841
1842 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1843 -- is far more efficient than reading the characters into a 'String'
1844 -- and then using 'pack'.
1845 hGet :: Handle -> Int -> IO ByteString
1846 hGet _ 0 = return empty
1847 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1848
1849 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1850 -- waiting for data to become available, instead it returns only whatever data
1851 -- is available.
1852 hGetNonBlocking :: Handle -> Int -> IO ByteString
1853 #if defined(__GLASGOW_HASKELL__)
1854 hGetNonBlocking _ 0 = return empty
1855 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1856 #else
1857 hGetNonBlocking = hGet
1858 #endif
1859
1860 -- | Read entire handle contents into a 'ByteString'.
1861 -- This function reads chunks at a time, doubling the chunksize on each
1862 -- read. The final buffer is then realloced to the appropriate size. For
1863 -- files > half of available memory, this may lead to memory exhaustion.
1864 -- Consider using 'readFile' in this case.
1865 --
1866 -- As with 'hGet', the string representation in the file is assumed to
1867 -- be ISO-8859-1.
1868 --
1869 hGetContents :: Handle -> IO ByteString
1870 hGetContents h = do
1871     let start_size = 1024
1872     p <- mallocArray start_size
1873     i <- hGetBuf h p start_size
1874     if i < start_size
1875         then do p' <- reallocArray p i
1876                 fp <- newForeignFreePtr p'
1877                 return $! PS fp 0 i
1878         else f p start_size
1879     where
1880         f p s = do
1881             let s' = 2 * s
1882             p' <- reallocArray p s'
1883             i  <- hGetBuf h (p' `plusPtr` s) s
1884             if i < s
1885                 then do let i' = s + i
1886                         p'' <- reallocArray p' i'
1887                         fp  <- newForeignFreePtr p''
1888                         return $! PS fp 0 i'
1889                 else f p' s'
1890
1891 -- | getContents. Equivalent to hGetContents stdin
1892 getContents :: IO ByteString
1893 getContents = hGetContents stdin
1894
1895 -- | The interact function takes a function of type @ByteString -> ByteString@
1896 -- as its argument. The entire input from the standard input device is passed
1897 -- to this function as its argument, and the resulting string is output on the
1898 -- standard output device. It's great for writing one line programs!
1899 interact :: (ByteString -> ByteString) -> IO ()
1900 interact transformer = putStr . transformer =<< getContents
1901
1902 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1903 -- efficient than reading the characters into a 'String' and then using
1904 -- 'pack'.  It also may be more efficient than opening the file and
1905 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1906 -- for 'text mode' use the Char8 version of this function.
1907 readFile :: FilePath -> IO ByteString
1908 readFile f = bracket (openBinaryFile f ReadMode) hClose
1909     (\h -> hFileSize h >>= hGet h . fromIntegral)
1910
1911 -- | Write a 'ByteString' to a file.
1912 writeFile :: FilePath -> ByteString -> IO ()
1913 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1914     (\h -> hPut h txt)
1915
1916 -- | Append a 'ByteString' to a file.
1917 appendFile :: FilePath -> ByteString -> IO ()
1918 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1919     (\h -> hPut h txt)
1920
1921 {-
1922 --
1923 -- Disable until we can move it into a portable .hsc file
1924 --
1925
1926 -- | Like readFile, this reads an entire file directly into a
1927 -- 'ByteString', but it is even more efficient.  It involves directly
1928 -- mapping the file to memory.  This has the advantage that the contents
1929 -- of the file never need to be copied.  Also, under memory pressure the
1930 -- page may simply be discarded, while in the case of readFile it would
1931 -- need to be written to swap.  If you read many small files, mmapFile
1932 -- will be less memory-efficient than readFile, since each mmapFile
1933 -- takes up a separate page of memory.  Also, you can run into bus
1934 -- errors if the file is modified.  As with 'readFile', the string
1935 -- representation in the file is assumed to be ISO-8859-1.
1936 --
1937 -- On systems without mmap, this is the same as a readFile.
1938 --
1939 mmapFile :: FilePath -> IO ByteString
1940 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1941
1942 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1943 mmap f = do
1944     h <- openBinaryFile f ReadMode
1945     l <- fromIntegral `fmap` hFileSize h
1946     -- Don't bother mmaping small files because each mmapped file takes up
1947     -- at least one full VM block.
1948     if l < mmap_limit
1949        then do thefp <- mallocByteString l
1950                withForeignPtr thefp $ \p-> hGetBuf h p l
1951                hClose h
1952                return (thefp, l)
1953        else do
1954                -- unix only :(
1955                fd <- fromIntegral `fmap` handleToFd h
1956                p  <- my_mmap l fd
1957                fp <- if p == nullPtr
1958                      then do thefp <- mallocByteString l
1959                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1960                              return thefp
1961                      else do
1962                           -- The munmap leads to crashes on OpenBSD.
1963                           -- maybe there's a use after unmap in there somewhere?
1964                           -- Bulat suggests adding the hClose to the
1965                           -- finalizer, excellent idea.
1966 #if !defined(__OpenBSD__)
1967                              let unmap = c_munmap p l >> return ()
1968 #else
1969                              let unmap = return ()
1970 #endif
1971                              fp <- newForeignPtr p unmap
1972                              return fp
1973                c_close fd
1974                hClose h
1975                return (fp, l)
1976     where mmap_limit = 16*1024
1977 -}
1978
1979 -- ---------------------------------------------------------------------
1980 -- Internal utilities
1981
1982 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1983 -- of the string if no element is found, rather than Nothing.
1984 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1985 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1986   where
1987     STRICT2(go)
1988     go ptr n | n >= l    = return l
1989              | otherwise = do w <- peek ptr
1990                               if k w
1991                                 then return n
1992                                 else go (ptr `plusPtr` 1) (n+1)
1993 {-# INLINE findIndexOrEnd #-}
1994
1995 -- | Perform an operation with a temporary ByteString
1996 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1997 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1998 {-# INLINE withPtr #-}
1999
2000 -- Common up near identical calls to `error' to reduce the number
2001 -- constant strings created when compiled:
2002 errorEmptyList :: String -> a
2003 errorEmptyList fun = moduleError fun "empty ByteString"
2004 {-# NOINLINE errorEmptyList #-}
2005
2006 moduleError :: String -> String -> a
2007 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2008 {-# NOINLINE moduleError #-}
2009
2010 -- Find from the end of the string using predicate
2011 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2012 STRICT2(findFromEndUntil)
2013 findFromEndUntil f ps@(PS x s l) =
2014     if null ps then 0
2015     else if f (last ps) then l
2016          else findFromEndUntil f (PS x s (l-1))
2017
2018 {-# INLINE newForeignFreePtr #-}
2019 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2020 newForeignFreePtr p = newForeignPtr c_free_finalizer p