Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with "Prelude" functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         singleton,              -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49
  50         -- * Basic interface
  51         cons,                   -- :: Word8 -> ByteString -> ByteString
  52         snoc,                   -- :: ByteString -> Word8 -> ByteString
  53         append,                 -- :: ByteString -> ByteString -> ByteString
  54         head,                   -- :: ByteString -> Word8
  55         last,                   -- :: ByteString -> Word8
  56         tail,                   -- :: ByteString -> ByteString
  57         init,                   -- :: ByteString -> ByteString
  58         null,                   -- :: ByteString -> Bool
  59         length,                 -- :: ByteString -> Int
  60
  61         -- * Transformating ByteStrings
  62         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  63         reverse,                -- :: ByteString -> ByteString
  64         intersperse,            -- :: Word8 -> ByteString -> ByteString
  65         transpose,              -- :: [ByteString] -> [ByteString]
  66
  67         -- * Reducing 'ByteString's (folds)
  68         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  69         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  70         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  71         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  72
  73         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  74         foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  75         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  76         foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77
  78         -- ** Special folds
  79         concat,                 -- :: [ByteString] -> ByteString
  80         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  81         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  82         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  83         maximum,                -- :: ByteString -> Word8
  84         minimum,                -- :: ByteString -> Word8
  85
  86         -- * Building ByteStrings
  87         -- ** Scans
  88         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  89         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  90         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  91         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  92
  93         -- ** Accumulating maps
  94         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  95         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  96         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  97
  98         -- ** Unfolding ByteStrings
  99         replicate,              -- :: Int -> Word8 -> ByteString
 100         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 101         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 102
 103         -- * Substrings
 104
 105         -- ** Breaking strings
 106         take,                   -- :: Int -> ByteString -> ByteString
 107         drop,                   -- :: Int -> ByteString -> ByteString
 108         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 109         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 110         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 111         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 113         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 114         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117         inits,                  -- :: ByteString -> [ByteString]
 118         tails,                  -- :: ByteString -> [ByteString]
 119
 120         -- ** Breaking into many substrings
 121         split,                  -- :: Word8 -> ByteString -> [ByteString]
 122         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 123
 124         -- ** Joining strings
 125         join,                   -- :: ByteString -> [ByteString] -> ByteString
 126
 127         -- * Predicates
 128         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 129         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 130
 131         -- ** Search for arbitrary substrings
 132         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 133         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 134         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 135
 136         -- * Searching ByteStrings
 137
 138         -- ** Searching by equality
 139         -- | These functions use memchr(3) to efficiently search the ByteString
 140         elem,                   -- :: Word8 -> ByteString -> Bool
 141         notElem,                -- :: Word8 -> ByteString -> Bool
 142
 143         -- ** Searching with a predicate
 144         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 146 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 147
 148         -- * Indexing ByteStrings
 149         index,                  -- :: ByteString -> Int -> Word8
 150         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 151         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 152         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
 153         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 154         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 155         count,                  -- :: Word8 -> ByteString -> Int
 156
 157         -- * Zipping and unzipping ByteStrings
 158         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 159         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 160         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 161
 162         -- * Ordered ByteStrings
 163         sort,                   -- :: ByteString -> ByteString
 164
 165         -- * Low level CString conversions
 166
 167         -- ** Packing CStrings and pointers
 168         packCString,            -- :: CString -> ByteString
 169         packCStringLen,         -- :: CString -> ByteString
 170         packMallocCString,      -- :: CString -> ByteString
 171
 172         -- ** Using ByteStrings as CStrings
 173         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 174         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
 175
 176         -- ** Copying ByteStrings
 177         -- | These functions perform memcpy(3) operations
 178         copy,                   -- :: ByteString -> ByteString
 179         copyCString,            -- :: CString -> IO ByteString
 180         copyCStringLen,         -- :: CStringLen -> IO ByteString
 181
 182         -- * I\/O with 'ByteString's
 183
 184         -- ** Standard input and output
 185         getLine,                -- :: IO ByteString
 186         getContents,            -- :: IO ByteString
 187         putStr,                 -- :: ByteString -> IO ()
 188         putStrLn,               -- :: ByteString -> IO ()
 189         interact,               -- :: (ByteString -> ByteString) -> IO ()
 190
 191         -- ** Files
 192         readFile,               -- :: FilePath -> IO ByteString
 193         writeFile,              -- :: FilePath -> ByteString -> IO ()
 194         appendFile,             -- :: FilePath -> ByteString -> IO ()
 195 --      mmapFile,               -- :: FilePath -> IO ByteString
 196
 197         -- ** I\/O with Handles
 198         hGetLine,               -- :: Handle -> IO ByteString
 199         hGetContents,           -- :: Handle -> IO ByteString
 200         hGet,                   -- :: Handle -> Int -> IO ByteString
 201         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 202         hPut,                   -- :: Handle -> ByteString -> IO ()
 203         hPutStr,                -- :: Handle -> ByteString -> IO ()
 204         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
 205
 206 #if defined(__GLASGOW_HASKELL__)
 207         -- * Fusion utilities
 208         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 209         lengthU, maximumU, minimumU
 210 #endif
 211
 212   ) where
 213
 214 import qualified Prelude as P
 215 import Prelude hiding           (reverse,head,tail,last,init,null
 216                                 ,length,map,lines,foldl,foldr,unlines
 217                                 ,concat,any,take,drop,splitAt,takeWhile
 218                                 ,dropWhile,span,break,elem,filter,maximum
 219                                 ,minimum,all,concatMap,foldl1,foldr1
 220                                 ,scanl,scanl1,scanr,scanr1
 221                                 ,readFile,writeFile,appendFile,replicate
 222                                 ,getContents,getLine,putStr,putStrLn,interact
 223                                 ,zip,zipWith,unzip,notElem)
 224
 225 import Data.ByteString.Base
 226 import Data.ByteString.Fusion
 227
 228 import qualified Data.List as List
 229
 230 import Data.Word                (Word8)
 231 import Data.Maybe               (listToMaybe)
 232 import Data.Array               (listArray)
 233 import qualified Data.Array as Array ((!))
 234
 235 -- Control.Exception.bracket not available in yhc or nhc
 236 import Control.Exception        (bracket, assert)
 237 import qualified Control.Exception as Exception
 238 import Control.Monad            (when)
 239
 240 import Foreign.C.String         (CString, CStringLen)
 241 import Foreign.C.Types          (CSize)
 242 import Foreign.ForeignPtr
 243 import Foreign.Marshal.Array
 244 import Foreign.Ptr
 245 import Foreign.Storable         (Storable(..))
 246
 247 -- hGetBuf and hPutBuf not available in yhc or nhc
 248 import System.IO                (stdin,stdout,hClose,hFileSize
 249                                 ,hGetBuf,hPutBuf,openBinaryFile
 250                                 ,Handle,IOMode(..))
 251
 252 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
 253
 254 #if !defined(__GLASGOW_HASKELL__)
 255 import System.IO.Unsafe
 256 import qualified System.Environment
 257 import qualified System.IO      (hGetLine)
 258 #endif
 259
 260 #if defined(__GLASGOW_HASKELL__)
 261
 262 import System.IO                (hGetBufNonBlocking)
 263 import System.IO.Error          (isEOFError)
 264
 265 import GHC.Handle
 266 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
 267 import GHC.Base                 (build)
 268 import GHC.Word hiding (Word8)
 269 import GHC.Ptr                  (Ptr(..))
 270 import GHC.ST                   (ST(..))
 271 import GHC.IOBase
 272
 273 #endif
 274
 275 -- -----------------------------------------------------------------------------
 276 --
 277 -- Useful macros, until we have bang patterns
 278 --
 279
 280 #define STRICT1(f) f a | a `seq` False = undefined
 281 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 282 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 283 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 284 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 285
 286 -- -----------------------------------------------------------------------------
 287
 288 instance Eq  ByteString
 289     where (==)    = eq
 290
 291 instance Ord ByteString
 292     where compare = compareBytes
 293
 294 instance Monoid ByteString where
 295     mempty  = empty
 296     mappend = append
 297     mconcat = concat
 298
 299 {-
 300 instance Arbitrary PackedString where
 301     arbitrary = P.pack `fmap` arbitrary
 302     coarbitrary s = coarbitrary (P.unpack s)
 303 -}
 304
 305 -- | /O(n)/ Equality on the 'ByteString' type.
 306 eq :: ByteString -> ByteString -> Bool
 307 eq a@(PS p s l) b@(PS p' s' l')
 308     | l /= l'            = False    -- short cut on length
 309     | p == p' && s == s' = True     -- short cut for the same string
 310     | otherwise          = compareBytes a b == EQ
 311 {-# INLINE eq #-}
 312
 313 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 314 compareBytes :: ByteString -> ByteString -> Ordering
 315 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 316     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 317     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 318     | otherwise                         = inlinePerformIO $
 319         withForeignPtr x1 $ \p1 ->
 320         withForeignPtr x2 $ \p2 -> do
 321             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 322             return $! case i `compare` 0 of
 323                         EQ  -> l1 `compare` l2
 324                         x   -> x
 325 {-# INLINE compareBytes #-}
 326
 327 {-
 328 --
 329 -- About 4x slower over 32M
 330 --
 331 compareBytes :: ByteString -> ByteString -> Ordering
 332 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 333     withForeignPtr fp1 $ \p1 ->
 334         withForeignPtr fp2 $ \p2 ->
 335             cmp (p1 `plusPtr` off1)
 336                 (p2 `plusPtr` off2) 0 len1 len2
 337
 338 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 339 STRICT5(cmp)
 340 cmp p1 p2 n len1 len2
 341       | n == len1 = if n == len2 then return EQ else return LT
 342       | n == len2 = return GT
 343       | otherwise = do
 344           (a :: Word8) <- peekByteOff p1 n
 345           (b :: Word8) <- peekByteOff p2 n
 346           case a `compare` b of
 347                 EQ -> cmp p1 p2 (n+1) len1 len2
 348                 LT -> return LT
 349                 GT -> return GT
 350 {-# INLINE compareBytes #-}
 351 -}
 352
 353 -- -----------------------------------------------------------------------------
 354 -- Introducing and eliminating 'ByteString's
 355
 356 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 357 singleton :: Word8 -> ByteString
 358 singleton c = unsafeCreate 1 $ \p -> poke p c
 359 {-# INLINE [1] singleton #-}
 360
 361 --
 362 -- XXX The unsafePerformIO is critical!
 363 --
 364 -- Otherwise:
 365 --
 366 --  singleton 255 `compare` singleton 127
 367 --
 368 -- is compiled to:
 369 --
 370 --  case mallocByteString 2 of
 371 --      ForeignPtr f internals ->
 372 --           case writeWord8OffAddr# f 0 255 of _ ->
 373 --           case writeWord8OffAddr# f 0 127 of _ ->
 374 --           case eqAddr# f f of
 375 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 376 --                                        (GHC.Prim.plusAddr# f 0)
 377 --
 378 --
 379
 380 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 381 --
 382 -- For applications with large numbers of string literals, pack can be a
 383 -- bottleneck. In such cases, consider using packAddress (GHC only).
 384 pack :: [Word8] -> ByteString
 385
 386 #if !defined(__GLASGOW_HASKELL__)
 387
 388 pack str = unsafeCreate (P.length str) $ \p -> go p str
 389     where
 390         go _ []     = return ()
 391         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 392
 393 #else /* hack away */
 394
 395 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 396     where
 397         go _ _ []        = return ()
 398         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 399
 400         writeByte p i c = ST $ \s# ->
 401             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 402
 403 #endif
 404
 405 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 406 unpack :: ByteString -> [Word8]
 407
 408 #if !defined(__GLASGOW_HASKELL__)
 409
 410 unpack (PS _  _ 0) = []
 411 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 412         go (p `plusPtr` s) (l - 1) []
 413     where
 414         STRICT3(go)
 415         go p 0 acc = peek p          >>= \e -> return (e : acc)
 416         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 417 {-# INLINE unpack #-}
 418
 419 #else
 420
 421 unpack ps = build (unpackFoldr ps)
 422 {-# INLINE unpack #-}
 423
 424 --
 425 -- critical this isn't strict in the acc
 426 -- as it will break in the presence of list fusion. this is a known
 427 -- issue with seq and build/foldr rewrite rules, which rely on lazy
 428 -- demanding to avoid bottoms in the list.
 429 --
 430 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 431 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 432     let loop q n    _   | q `seq` n `seq` False = undefined -- n.b.
 433         loop _ (-1) acc = return acc
 434         loop q n    acc = do
 435            a <- peekByteOff q n
 436            loop q (n-1) (a `f` acc)
 437     loop (p `plusPtr` off) (len-1) ch
 438 {-# INLINE [0] unpackFoldr #-}
 439
 440 unpackList :: ByteString -> [Word8]
 441 unpackList (PS fp off len) = withPtr fp $ \p -> do
 442     let STRICT3(loop)
 443         loop _ (-1) acc = return acc
 444         loop q n acc = do
 445            a <- peekByteOff q n
 446            loop q (n-1) (a : acc)
 447     loop (p `plusPtr` off) (len-1) []
 448
 449 {-# RULES
 450     "FPS unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 451  #-}
 452
 453 #endif
 454
 455 -- ---------------------------------------------------------------------
 456 -- Basic interface
 457
 458 -- | /O(1)/ Test whether a ByteString is empty.
 459 null :: ByteString -> Bool
 460 null (PS _ _ l) = assert (l >= 0) $ l <= 0
 461 {-# INLINE null #-}
 462
 463 -- ---------------------------------------------------------------------
 464 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 465 length :: ByteString -> Int
 466 length (PS _ _ l) = assert (l >= 0) $ l
 467
 468 --
 469 -- length/loop fusion. When taking the length of any fuseable loop,
 470 -- rewrite it as a foldl', and thus avoid allocating the result buffer
 471 -- worth around 10% in speed testing.
 472 --
 473
 474 #if defined(__GLASGOW_HASKELL__)
 475 {-# INLINE [1] length #-}
 476 #endif
 477
 478 lengthU :: ByteString -> Int
 479 lengthU = foldl' (const . (+1)) (0::Int)
 480 {-# INLINE lengthU #-}
 481
 482 {-# RULES
 483
 484 -- v2 fusion
 485 "FPS length/loop" forall loop s .
 486   length  (loopArr (loopWrapper loop s)) =
 487   lengthU (loopArr (loopWrapper loop s))
 488
 489   #-}
 490
 491 ------------------------------------------------------------------------
 492
 493 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 494 -- complexity, as it requires a memcpy.
 495 cons :: Word8 -> ByteString -> ByteString
 496 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 497         poke p c
 498         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 499 {-# INLINE cons #-}
 500
 501 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 502 snoc :: ByteString -> Word8 -> ByteString
 503 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 504         memcpy p (f `plusPtr` s) (fromIntegral l)
 505         poke (p `plusPtr` l) c
 506 {-# INLINE snoc #-}
 507
 508 -- todo fuse
 509
 510 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 511 -- An exception will be thrown in the case of an empty ByteString.
 512 head :: ByteString -> Word8
 513 head (PS x s l)
 514     | l <= 0    = errorEmptyList "head"
 515     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 516 {-# INLINE head #-}
 517
 518 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 519 -- An exception will be thrown in the case of an empty ByteString.
 520 tail :: ByteString -> ByteString
 521 tail (PS p s l)
 522     | l <= 0    = errorEmptyList "tail"
 523     | otherwise = PS p (s+1) (l-1)
 524 {-# INLINE tail #-}
 525
 526 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 527 -- An exception will be thrown in the case of an empty ByteString.
 528 last :: ByteString -> Word8
 529 last ps@(PS x s l)
 530     | null ps   = errorEmptyList "last"
 531     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 532 {-# INLINE last #-}
 533
 534 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 535 -- An exception will be thrown in the case of an empty ByteString.
 536 init :: ByteString -> ByteString
 537 init ps@(PS p s l)
 538     | null ps   = errorEmptyList "init"
 539     | otherwise = PS p s (l-1)
 540 {-# INLINE init #-}
 541
 542 -- | /O(n)/ Append two ByteStrings
 543 append :: ByteString -> ByteString -> ByteString
 544 append xs ys | null xs   = ys
 545              | null ys   = xs
 546              | otherwise = concat [xs,ys]
 547 {-# INLINE append #-}
 548
 549 -- ---------------------------------------------------------------------
 550 -- Transformations
 551
 552 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 553 -- element of @xs@. This function is subject to array fusion.
 554 map :: (Word8 -> Word8) -> ByteString -> ByteString
 555 #if defined(LOOPU_FUSION)
 556 map f = loopArr . loopU (mapEFL f) NoAcc
 557 #elif defined(LOOPUP_FUSION)
 558 map f = loopArr . loopUp (mapEFL f) NoAcc
 559 #elif defined(LOOPNOACC_FUSION)
 560 map f = loopArr . loopNoAcc (mapEFL f)
 561 #else
 562 map f = loopArr . loopMap f
 563 #endif
 564 {-# INLINE map #-}
 565
 566 {-
 567 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 568 -- slightly faster for one-shot cases.
 569 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 570 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
 571     create len $ map_ 0 (a `plusPtr` s)
 572   where
 573     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 574     STRICT3(map_)
 575     map_ n p1 p2
 576        | n >= len = return ()
 577        | otherwise = do
 578             x <- peekByteOff p1 n
 579             pokeByteOff p2 n (f x)
 580             map_ (n+1) p1 p2
 581 {-# INLINE map' #-}
 582 -}
 583
 584 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 585 reverse :: ByteString -> ByteString
 586 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 587         c_reverse p (f `plusPtr` s) (fromIntegral l)
 588
 589 -- todo, fuseable version
 590
 591 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 592 -- 'ByteString' and \`intersperses\' that byte between the elements of
 593 -- the 'ByteString'.  It is analogous to the intersperse function on
 594 -- Lists.
 595 intersperse :: Word8 -> ByteString -> ByteString
 596 intersperse c ps@(PS x s l)
 597     | length ps < 2  = ps
 598     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
 599         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 600
 601 {-
 602 intersperse c = pack . List.intersperse c . unpack
 603 -}
 604
 605 -- | The 'transpose' function transposes the rows and columns of its
 606 -- 'ByteString' argument.
 607 transpose :: [ByteString] -> [ByteString]
 608 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 609
 610 -- ---------------------------------------------------------------------
 611 -- Reducing 'ByteString's
 612
 613 -- | 'foldl', applied to a binary operator, a starting value (typically
 614 -- the left-identity of the operator), and a ByteString, reduces the
 615 -- ByteString using the binary operator, from left to right.
 616 -- This function is subject to array fusion.
 617 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 618 #if !defined(LOOPU_FUSION)
 619 foldl f z = loopAcc . loopUp (foldEFL f) z
 620 #else
 621 foldl f z = loopAcc . loopU (foldEFL f) z
 622 #endif
 623 {-# INLINE foldl #-}
 624
 625 {-
 626 --
 627 -- About twice as fast with 6.4.1, but not fuseable
 628 -- A simple fold . map is enough to make it worth while.
 629 --
 630 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 631         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 632     where
 633         STRICT3(lgo)
 634         lgo z p q | p == q    = return z
 635                   | otherwise = do c <- peek p
 636                                    lgo (f z c) (p `plusPtr` 1) q
 637 -}
 638
 639 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 640 -- Though actually foldl is also strict in the accumulator.
 641 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 642 foldl' = foldl
 643 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
 644 {-# INLINE foldl' #-}
 645
 646 -- | 'foldr', applied to a binary operator, a starting value
 647 -- (typically the right-identity of the operator), and a ByteString,
 648 -- reduces the ByteString using the binary operator, from right to left.
 649 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 650 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
 651 {-# INLINE foldr #-}
 652
 653 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
 654 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
 655 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 656         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
 657     where
 658         STRICT3(go)
 659         go z p q | p == q    = return z
 660                  | otherwise = do c  <- peek p
 661                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
 662 {-# INLINE [1] foldr' #-}
 663
 664 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 665 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 666 -- This function is subject to array fusion.
 667 -- An exception will be thrown in the case of an empty ByteString.
 668 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 669 foldl1 f ps
 670     | null ps   = errorEmptyList "foldl1"
 671     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 672 {-# INLINE foldl1 #-}
 673
 674 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 675 -- An exception will be thrown in the case of an empty ByteString.
 676 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 677 foldl1' f ps
 678     | null ps   = errorEmptyList "foldl1'"
 679     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 680 {-# INLINE foldl1' #-}
 681
 682 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 683 -- and thus must be applied to non-empty 'ByteString's
 684 -- An exception will be thrown in the case of an empty ByteString.
 685 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 686 foldr1 f ps
 687     | null ps        = errorEmptyList "foldr1"
 688     | otherwise      = foldr f (last ps) (init ps)
 689 {-# INLINE foldr1 #-}
 690
 691 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
 692 -- accumulator.
 693 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 694 foldr1' f ps
 695     | null ps        = errorEmptyList "foldr1"
 696     | otherwise      = foldr' f (last ps) (init ps)
 697 {-# INLINE [1] foldr1' #-}
 698
 699 -- ---------------------------------------------------------------------
 700 -- Special folds
 701
 702 -- | /O(n)/ Concatenate a list of ByteStrings.
 703 concat :: [ByteString] -> ByteString
 704 concat []     = empty
 705 concat [ps]   = ps
 706 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
 707   where len = P.sum . P.map length $ xs
 708         STRICT2(go)
 709         go []            _   = return ()
 710         go (PS p s l:ps) ptr = do
 711                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 712                 go ps (ptr `plusPtr` l)
 713
 714 -- | Map a function over a 'ByteString' and concatenate the results
 715 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 716 concatMap f = concat . foldr ((:) . f) []
 717
 718 -- foldr (append . f) empty
 719
 720 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 721 -- any element of the 'ByteString' satisfies the predicate.
 722 any :: (Word8 -> Bool) -> ByteString -> Bool
 723 any _ (PS _ _ 0) = False
 724 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 725         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 726     where
 727         STRICT2(go)
 728         go p q | p == q    = return False
 729                | otherwise = do c <- peek p
 730                                 if f c then return True
 731                                        else go (p `plusPtr` 1) q
 732
 733 -- todo fuse
 734
 735 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 736 -- if all elements of the 'ByteString' satisfy the predicate.
 737 all :: (Word8 -> Bool) -> ByteString -> Bool
 738 all _ (PS _ _ 0) = True
 739 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 740         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 741     where
 742         STRICT2(go)
 743         go p q | p == q     = return True  -- end of list
 744                | otherwise  = do c <- peek p
 745                                  if f c
 746                                     then go (p `plusPtr` 1) q
 747                                     else return False
 748
 749 ------------------------------------------------------------------------
 750
 751 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 752 -- This function will fuse.
 753 -- An exception will be thrown in the case of an empty ByteString.
 754 maximum :: ByteString -> Word8
 755 maximum xs@(PS x s l)
 756     | null xs   = errorEmptyList "maximum"
 757     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 758                       c_maximum (p `plusPtr` s) (fromIntegral l)
 759
 760 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 761 -- This function will fuse.
 762 -- An exception will be thrown in the case of an empty ByteString.
 763 minimum :: ByteString -> Word8
 764 minimum xs@(PS x s l)
 765     | null xs   = errorEmptyList "minimum"
 766     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 767                       c_minimum (p `plusPtr` s) (fromIntegral l)
 768
 769 --
 770 -- minimum/maximum/loop fusion. As for length (and other folds), when we
 771 -- see we're applied after a fuseable op, switch from using the C
 772 -- version, to the fuseable version. The result should then avoid
 773 -- allocating a buffer.
 774 --
 775
 776 #if defined(__GLASGOW_HASKELL__)
 777 {-# INLINE [1] minimum #-}
 778 {-# INLINE [1] maximum #-}
 779 #endif
 780
 781 maximumU :: ByteString -> Word8
 782 maximumU = foldl1' max
 783 {-# INLINE maximumU #-}
 784
 785 minimumU :: ByteString -> Word8
 786 minimumU = foldl1' min
 787 {-# INLINE minimumU #-}
 788
 789 {-# RULES
 790
 791 "FPS minimum/loop" forall loop s .
 792   minimum  (loopArr (loopWrapper loop s)) =
 793   minimumU (loopArr (loopWrapper loop s))
 794
 795 "FPS maximum/loop" forall loop s .
 796   maximum  (loopArr (loopWrapper loop s)) =
 797   maximumU (loopArr (loopWrapper loop s))
 798
 799   #-}
 800
 801 ------------------------------------------------------------------------
 802
 803 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 804 -- 'foldl'; it applies a function to each element of a ByteString,
 805 -- passing an accumulating parameter from left to right, and returning a
 806 -- final value of this accumulator together with the new list.
 807 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 808 #if !defined(LOOPU_FUSION)
 809 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
 810 #else
 811 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
 812 #endif
 813 {-# INLINE mapAccumL #-}
 814
 815 -- | The 'mapAccumR' function behaves like a combination of 'map' and
 816 -- 'foldr'; it applies a function to each element of a ByteString,
 817 -- passing an accumulating parameter from right to left, and returning a
 818 -- final value of this accumulator together with the new ByteString.
 819 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 820 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
 821 {-# INLINE mapAccumR #-}
 822
 823 -- | /O(n)/ map Word8 functions, provided with the index at each position
 824 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 825 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
 826 {-# INLINE mapIndexed #-}
 827
 828 -- ---------------------------------------------------------------------
 829 -- Building ByteStrings
 830
 831 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 832 -- reduced values from the left. This function will fuse.
 833 --
 834 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 835 --
 836 -- Note that
 837 --
 838 -- > last (scanl f z xs) == foldl f z xs.
 839 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 840 #if !defined(LOOPU_FUSION)
 841 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
 842 #else
 843 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
 844 #endif
 845
 846     -- n.b. haskell's List scan returns a list one bigger than the
 847     -- input, so we need to snoc here to get some extra space, however,
 848     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
 849 {-# INLINE scanl #-}
 850
 851 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 852 -- This function will fuse.
 853 --
 854 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 855 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 856 scanl1 f ps
 857     | null ps   = empty
 858     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 859 {-# INLINE scanl1 #-}
 860
 861 -- | scanr is the right-to-left dual of scanl.
 862 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 863 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
 864 {-# INLINE scanr #-}
 865
 866 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
 867 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 868 scanr1 f ps
 869     | null ps   = empty
 870     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
 871 {-# INLINE scanr1 #-}
 872
 873 -- ---------------------------------------------------------------------
 874 -- Unfolds and replicates
 875
 876 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 877 -- the value of every element. The following holds:
 878 --
 879 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 880 --
 881 -- This implemenation uses @memset(3)@
 882 replicate :: Int -> Word8 -> ByteString
 883 replicate w c
 884     | w <= 0    = empty
 885     | otherwise = unsafeCreate w $ \ptr ->
 886                       memset ptr c (fromIntegral w) >> return ()
 887
 888 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
 889 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
 890 -- ByteString from a seed value.  The function takes the element and
 891 -- returns 'Nothing' if it is done producing the ByteString or returns
 892 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
 893 -- and @b@ is the seed value for further production.
 894 --
 895 -- Examples:
 896 --
 897 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
 898 -- > == pack [0, 1, 2, 3, 4, 5]
 899 --
 900 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 901 unfoldr f = concat . unfoldChunk 32 64
 902   where unfoldChunk n n' x =
 903           case unfoldrN n f x of
 904             (s, Nothing) -> s : []
 905             (s, Just x') -> s : unfoldChunk n' (n+n') x'
 906
 907 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
 908 -- value.  However, the length of the result is limited by the first
 909 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
 910 -- when the maximum length of the result is known.
 911 --
 912 -- The following equation relates 'unfoldrN' and 'unfoldr':
 913 --
 914 -- > unfoldrN n f s == take n (unfoldr f s)
 915 --
 916 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 917 unfoldrN i f x0
 918     | i < 0     = (empty, Just x0)
 919     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
 920   where STRICT3(go)
 921         go p x n =
 922           case f x of
 923             Nothing      -> return (0, n, Nothing)
 924             Just (w,x')
 925              | n == i    -> return (0, n, Just x)
 926              | otherwise -> do poke p w
 927                                go (p `plusPtr` 1) x' (n+1)
 928
 929 -- ---------------------------------------------------------------------
 930 -- Substrings
 931
 932 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 933 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 934 take :: Int -> ByteString -> ByteString
 935 take n ps@(PS x s l)
 936     | n <= 0    = empty
 937     | n >= l    = ps
 938     | otherwise = PS x s n
 939 {-# INLINE take #-}
 940
 941 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 942 -- elements, or @[]@ if @n > 'length' xs@.
 943 drop  :: Int -> ByteString -> ByteString
 944 drop n ps@(PS x s l)
 945     | n <= 0    = ps
 946     | n >= l    = empty
 947     | otherwise = PS x (s+n) (l-n)
 948 {-# INLINE drop #-}
 949
 950 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 951 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 952 splitAt n ps@(PS x s l)
 953     | n <= 0    = (empty, ps)
 954     | n >= l    = (ps, empty)
 955     | otherwise = (PS x s n, PS x (s+n) (l-n))
 956 {-# INLINE splitAt #-}
 957
 958 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 959 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 960 -- satisfy @p@.
 961 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 962 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 963 {-# INLINE takeWhile #-}
 964
 965 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 966 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 967 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
 968 {-# INLINE dropWhile #-}
 969
 970 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 971 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 972 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
 973 {-# INLINE [1] break #-}
 974
 975 {-# RULES
 976 "FPS specialise break (x==)" forall x.
 977     break ((==) x) = breakByte x
 978   #-}
 979
 980 #if __GLASGOW_HASKELL__ >= 605
 981 {-# RULES
 982 "FPS specialise break (==x)" forall x.
 983     break (==x) = breakByte x
 984   #-}
 985 #endif
 986
 987 -- | 'breakByte' breaks its ByteString argument at the first occurence
 988 -- of the specified byte. It is more efficient than 'break' as it is
 989 -- implemented with @memchr(3)@. I.e.
 990 --
 991 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 992 --
 993 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 994 breakByte c p = case elemIndex c p of
 995     Nothing -> (p,empty)
 996     Just n  -> (unsafeTake n p, unsafeDrop n p)
 997 {-# INLINE breakByte #-}
 998
 999 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1000 --
1001 -- breakEnd p == spanEnd (not.p)
1002 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1003 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
1004
1005 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1006 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1007 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1008 span p ps = break (not . p) ps
1009 {-# INLINE [1] span #-}
1010
1011 -- | 'spanByte' breaks its ByteString argument at the first
1012 -- occurence of a byte other than its argument. It is more efficient
1013 -- than 'span (==)'
1014 --
1015 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
1016 --
1017 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1018 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1019     go (p `plusPtr` s) 0
1020   where
1021     STRICT2(go)
1022     go p i | i >= l    = return (ps, empty)
1023            | otherwise = do c' <- peekByteOff p i
1024                             if c /= c'
1025                                 then return (unsafeTake i ps, unsafeDrop i ps)
1026                                 else go p (i+1)
1027 {-# INLINE spanByte #-}
1028
1029 {-# RULES
1030 "FPS specialise span (x==)" forall x.
1031     span ((==) x) = spanByte x
1032   #-}
1033
1034 #if __GLASGOW_HASKELL__ >= 605
1035 {-# RULES
1036 "FPS specialise span (==x)" forall x.
1037     span (==x) = spanByte x
1038   #-}
1039 #endif
1040
1041 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1042 -- We have
1043 --
1044 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1045 --
1046 -- and
1047 --
1048 -- > spanEnd (not . isSpace) ps
1049 -- >    ==
1050 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1051 --
1052 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1053 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1054
1055 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1056 -- separators, where the predicate returns True for a separator element.
1057 -- The resulting components do not contain the separators.  Two adjacent
1058 -- separators result in an empty component in the output.  eg.
1059 --
1060 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1061 -- > splitWith (=='a') []        == []
1062 --
1063 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1064
1065 #if defined(__GLASGOW_HASKELL__)
1066 splitWith _pred (PS _  _   0) = []
1067 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1068   where pred# c# = pred_ (W8# c#)
1069
1070         STRICT4(splitWith0)
1071         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1072             splitLoop pred' p 0 off' len' fp'
1073
1074         splitLoop :: (Word# -> Bool)
1075                   -> Ptr Word8
1076                   -> Int -> Int -> Int
1077                   -> ForeignPtr Word8
1078                   -> IO [ByteString]
1079
1080         splitLoop pred' p idx' off' len' fp'
1081             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1082             | idx' >= len'  = return [PS fp' off' idx']
1083             | otherwise = do
1084                 w <- peekElemOff p (off'+idx')
1085                 if pred' (case w of W8# w# -> w#)
1086                    then return (PS fp' off' idx' :
1087                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1088                    else splitLoop pred' p (idx'+1) off' len' fp'
1089 {-# INLINE splitWith #-}
1090
1091 #else
1092 splitWith _ (PS _ _ 0) = []
1093 splitWith p ps = loop p ps
1094     where
1095         STRICT2(loop)
1096         loop q qs = if null rest then [chunk]
1097                                  else chunk : loop q (unsafeTail rest)
1098             where (chunk,rest) = break q qs
1099 #endif
1100
1101 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1102 -- argument, consuming the delimiter. I.e.
1103 --
1104 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1105 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1106 -- > split 'x'  "x"          == ["",""]
1107 --
1108 -- and
1109 --
1110 -- > join [c] . split c == id
1111 -- > split == splitWith . (==)
1112 --
1113 -- As for all splitting functions in this library, this function does
1114 -- not copy the substrings, it just constructs new 'ByteStrings' that
1115 -- are slices of the original.
1116 --
1117 split :: Word8 -> ByteString -> [ByteString]
1118 split _ (PS _ _ 0) = []
1119 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1120     let ptr = p `plusPtr` s
1121
1122         STRICT1(loop)
1123         loop n =
1124             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1125                                            w (fromIntegral (l-n))
1126             in if q == nullPtr
1127                 then [PS x (s+n) (l-n)]
1128                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1129
1130     return (loop 0)
1131 {-# INLINE split #-}
1132
1133 {-
1134 -- slower. but stays inside Haskell.
1135 split _ (PS _  _   0) = []
1136 split (W8# w#) (PS fp off len) = splitWith' off len fp
1137     where
1138         splitWith' off' len' fp' = withPtr fp $ \p ->
1139             splitLoop p 0 off' len' fp'
1140
1141         splitLoop :: Ptr Word8
1142                   -> Int -> Int -> Int
1143                   -> ForeignPtr Word8
1144                   -> IO [ByteString]
1145
1146         STRICT5(splitLoop)
1147         splitLoop p idx' off' len' fp'
1148             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1149             | idx' >= len'  = return [PS fp' off' idx']
1150             | otherwise = do
1151                 (W8# x#) <- peekElemOff p (off'+idx')
1152                 if word2Int# w# ==# word2Int# x#
1153                    then return (PS fp' off' idx' :
1154                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1155                    else splitLoop p (idx'+1) off' len' fp'
1156 -}
1157
1158 {-
1159 -- | Like 'splitWith', except that sequences of adjacent separators are
1160 -- treated as a single separator. eg.
1161 --
1162 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1163 --
1164 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1165 tokens f = P.filter (not.null) . splitWith f
1166 {-# INLINE tokens #-}
1167 -}
1168
1169 -- | The 'group' function takes a ByteString and returns a list of
1170 -- ByteStrings such that the concatenation of the result is equal to the
1171 -- argument.  Moreover, each sublist in the result contains only equal
1172 -- elements.  For example,
1173 --
1174 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1175 --
1176 -- It is a special case of 'groupBy', which allows the programmer to
1177 -- supply their own equality test. It is about 40% faster than
1178 -- /groupBy (==)/
1179 group :: ByteString -> [ByteString]
1180 group xs
1181     | null xs   = []
1182     | otherwise = ys : group zs
1183     where
1184         (ys, zs) = spanByte (unsafeHead xs) xs
1185
1186 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1187 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1188 groupBy k xs
1189     | null xs   = []
1190     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1191     where
1192         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1193
1194 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1195 -- 'ByteString's and concatenates the list after interspersing the first
1196 -- argument between each element of the list.
1197 join :: ByteString -> [ByteString] -> ByteString
1198 join s = concat . (List.intersperse s)
1199 {-# INLINE [1] join #-}
1200
1201 {-# RULES
1202 "FPS specialise join c -> joinByte" forall c s1 s2 .
1203     join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1204   #-}
1205
1206 --
1207 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1208 -- with a char. Around 4 times faster than the generalised join.
1209 --
1210 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1211 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1212     withForeignPtr ffp $ \fp ->
1213     withForeignPtr fgp $ \gp -> do
1214         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1215         poke (ptr `plusPtr` l) c
1216         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1217     where
1218       len = length f + length g + 1
1219 {-# INLINE joinWithByte #-}
1220
1221 -- ---------------------------------------------------------------------
1222 -- Indexing ByteStrings
1223
1224 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1225 index :: ByteString -> Int -> Word8
1226 index ps n
1227     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1228     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1229                                          ++ ", length = " ++ show (length ps))
1230     | otherwise      = ps `unsafeIndex` n
1231 {-# INLINE index #-}
1232
1233 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1234 -- element in the given 'ByteString' which is equal to the query
1235 -- element, or 'Nothing' if there is no such element.
1236 -- This implementation uses memchr(3).
1237 elemIndex :: Word8 -> ByteString -> Maybe Int
1238 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1239     let p' = p `plusPtr` s
1240     q <- memchr p' c (fromIntegral l)
1241     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1242 {-# INLINE elemIndex #-}
1243
1244 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1245 -- element in the given 'ByteString' which is equal to the query
1246 -- element, or 'Nothing' if there is no such element. The following
1247 -- holds:
1248 --
1249 -- > elemIndexEnd c xs ==
1250 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1251 --
1252 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1253 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1254     go (p `plusPtr` s) (l-1)
1255   where
1256     STRICT2(go)
1257     go p i | i < 0     = return Nothing
1258            | otherwise = do ch' <- peekByteOff p i
1259                             if ch == ch'
1260                                 then return $ Just i
1261                                 else go p (i-1)
1262 {-# INLINE elemIndexEnd #-}
1263
1264 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1265 -- the indices of all elements equal to the query element, in ascending order.
1266 -- This implementation uses memchr(3).
1267 elemIndices :: Word8 -> ByteString -> [Int]
1268 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1269     let ptr = p `plusPtr` s
1270
1271         STRICT1(loop)
1272         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1273                                                 w (fromIntegral (l - n))
1274                  in if q == nullPtr
1275                         then []
1276                         else let i = q `minusPtr` ptr
1277                              in i : loop (i+1)
1278     return $! loop 0
1279 {-# INLINE elemIndices #-}
1280
1281 {-
1282 -- much slower
1283 elemIndices :: Word8 -> ByteString -> [Int]
1284 elemIndices c ps = loop 0 ps
1285    where STRICT2(loop)
1286          loop _ ps' | null ps'            = []
1287          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1288                     | otherwise           = loop (n+1) (unsafeTail ps')
1289 -}
1290
1291 -- | count returns the number of times its argument appears in the ByteString
1292 --
1293 -- > count = length . elemIndices
1294 --
1295 -- But more efficiently than using length on the intermediate list.
1296 count :: Word8 -> ByteString -> Int
1297 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1298     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1299 {-# INLINE count #-}
1300
1301 {-
1302 --
1303 -- around 30% slower
1304 --
1305 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1306      go (p `plusPtr` s) (fromIntegral m) 0
1307     where
1308         go :: Ptr Word8 -> CSize -> Int -> IO Int
1309         STRICT3(go)
1310         go p l i = do
1311             q <- memchr p w l
1312             if q == nullPtr
1313                 then return i
1314                 else do let k = fromIntegral $ q `minusPtr` p
1315                         go (q `plusPtr` 1) (l-k-1) (i+1)
1316 -}
1317
1318 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1319 -- returns the index of the first element in the ByteString
1320 -- satisfying the predicate.
1321 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1322 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1323   where
1324     STRICT2(go)
1325     go ptr n | n >= l    = return Nothing
1326              | otherwise = do w <- peek ptr
1327                               if k w
1328                                 then return (Just n)
1329                                 else go (ptr `plusPtr` 1) (n+1)
1330 {-# INLINE findIndex #-}
1331
1332 -- | The 'findIndices' function extends 'findIndex', by returning the
1333 -- indices of all elements satisfying the predicate, in ascending order.
1334 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1335 findIndices p ps = loop 0 ps
1336    where
1337      STRICT2(loop)
1338      loop n qs | null qs           = []
1339                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1340                | otherwise         =     loop (n+1) (unsafeTail qs)
1341
1342 -- ---------------------------------------------------------------------
1343 -- Searching ByteStrings
1344
1345 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1346 elem :: Word8 -> ByteString -> Bool
1347 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1348 {-# INLINE elem #-}
1349
1350 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1351 notElem :: Word8 -> ByteString -> Bool
1352 notElem c ps = not (elem c ps)
1353 {-# INLINE notElem #-}
1354
1355 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1356 -- returns a ByteString containing those characters that satisfy the
1357 -- predicate. This function is subject to array fusion.
1358 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1359 #if defined(LOOPU_FUSION)
1360 filter p  = loopArr . loopU (filterEFL p) NoAcc
1361 #elif defined(LOOPUP_FUSION)
1362 filter p  = loopArr . loopUp (filterEFL p) NoAcc
1363 #elif defined(LOOPNOACC_FUSION)
1364 filter p  = loopArr . loopNoAcc (filterEFL p)
1365 #else
1366 filter f = loopArr . loopFilter f
1367 #endif
1368 {-# INLINE filter #-}
1369
1370 {-
1371 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1372 -- around 2x faster for some one-shot applications.
1373 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1374 filter' k ps@(PS x s l)
1375     | null ps   = ps
1376     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1377         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1378         return $! t `minusPtr` p -- actual length
1379     where
1380         STRICT3(go)
1381         go f t end | f == end  = return t
1382                    | otherwise = do
1383                         w <- peek f
1384                         if k w
1385                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1386                             else             go (f `plusPtr` 1) t               end
1387 {-# INLINE filter' #-}
1388 -}
1389
1390 --
1391 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1392 -- case of filtering a single byte. It is more efficient to use
1393 -- /filterByte/ in this case.
1394 --
1395 -- > filterByte == filter . (==)
1396 --
1397 -- filterByte is around 10x faster, and uses much less space, than its
1398 -- filter equivalent
1399 filterByte :: Word8 -> ByteString -> ByteString
1400 filterByte w ps = replicate (count w ps) w
1401 {-# INLINE filterByte #-}
1402
1403 {-# RULES
1404   "FPS specialise filter (== x)" forall x.
1405       filter ((==) x) = filterByte x
1406   #-}
1407
1408 #if __GLASGOW_HASKELL__ >= 605
1409 {-# RULES
1410   "FPS specialise filter (== x)" forall x.
1411      filter (== x) = filterByte x
1412   #-}
1413 #endif
1414
1415 --
1416 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1417 -- case of filtering a single byte out of a list. It is more efficient
1418 -- to use /filterNotByte/ in this case.
1419 --
1420 -- > filterNotByte == filter . (/=)
1421 --
1422 -- filterNotByte is around 2x faster than its filter equivalent.
1423 filterNotByte :: Word8 -> ByteString -> ByteString
1424 filterNotByte w = filter (/= w)
1425 {-# INLINE filterNotByte #-}
1426
1427 {-# RULES
1428 "FPS specialise filter (x /=)" forall x.
1429     filter ((/=) x) = filterNotByte x
1430   #-}
1431
1432 #if __GLASGOW_HASKELL__ >= 605
1433 {-# RULES
1434 "FPS specialise filter (/= x)" forall x.
1435     filter (/= x) = filterNotByte x
1436   #-}
1437 #endif
1438
1439 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1440 -- and returns the first element in matching the predicate, or 'Nothing'
1441 -- if there is no such element.
1442 --
1443 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1444 --
1445 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1446 find f p = case findIndex f p of
1447                     Just n -> Just (p `unsafeIndex` n)
1448                     _      -> Nothing
1449 {-# INLINE find #-}
1450
1451 {-
1452 --
1453 -- fuseable, but we don't want to walk the whole array.
1454 --
1455 find k = foldl findEFL Nothing
1456     where findEFL a@(Just _) _ = a
1457           findEFL _          c | k c       = Just c
1458                                | otherwise = Nothing
1459 -}
1460
1461 -- ---------------------------------------------------------------------
1462 -- Searching for substrings
1463
1464 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1465 -- iff the first is a prefix of the second.
1466 isPrefixOf :: ByteString -> ByteString -> Bool
1467 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1468     | l1 == 0   = True
1469     | l2 < l1   = False
1470     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1471         withForeignPtr x2 $ \p2 -> do
1472             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1473             return $! i == 0
1474
1475 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1476 -- iff the first is a suffix of the second.
1477 --
1478 -- The following holds:
1479 --
1480 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1481 --
1482 -- However, the real implemenation uses memcmp to compare the end of the
1483 -- string only, with no reverse required..
1484 isSuffixOf :: ByteString -> ByteString -> Bool
1485 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1486     | l1 == 0   = True
1487     | l2 < l1   = False
1488     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1489         withForeignPtr x2 $ \p2 -> do
1490             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1491             return $! i == 0
1492
1493 -- | Check whether one string is a substring of another. @isSubstringOf
1494 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1495 isSubstringOf :: ByteString -- ^ String to search for.
1496               -> ByteString -- ^ String to search in.
1497               -> Bool
1498 isSubstringOf p s = not $ P.null $ findSubstrings p s
1499
1500 -- | Get the first index of a substring in another string,
1501 --   or 'Nothing' if the string is not found.
1502 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1503 findSubstring :: ByteString -- ^ String to search for.
1504               -> ByteString -- ^ String to seach in.
1505               -> Maybe Int
1506 findSubstring = (listToMaybe .) . findSubstrings
1507
1508 -- | Find the indexes of all (possibly overlapping) occurances of a
1509 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1510 -- string matching algorithm.
1511 findSubstrings :: ByteString -- ^ String to search for.
1512                -> ByteString -- ^ String to seach in.
1513                -> [Int]
1514
1515 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1516   where
1517       patc x = pat `unsafeIndex` x
1518       strc x = str `unsafeIndex` x
1519
1520       -- maybe we should make kmpNext a UArray before using it in search?
1521       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1522       kmpNextL p _ | null p = []
1523       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1524                          ps = unsafeTail p
1525                          x = if not (null ps) && unsafeHead ps == patc j'
1526                                 then kmpNext Array.! j' else j'
1527                         in x:kmpNextL ps j'
1528       search i j = match ++ rest -- i: position in string, j: position in pattern
1529         where match = if j == m then [(i - j)] else []
1530               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1531       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1532                | otherwise = j
1533
1534 -- ---------------------------------------------------------------------
1535 -- Zipping
1536
1537 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1538 -- corresponding pairs of bytes. If one input ByteString is short,
1539 -- excess elements of the longer ByteString are discarded. This is
1540 -- equivalent to a pair of 'unpack' operations.
1541 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1542 zip ps qs
1543     | null ps || null qs = []
1544     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1545
1546 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1547 -- the first argument, instead of a tupling function.  For example,
1548 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1549 -- corresponding sums.
1550 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1551 zipWith f ps qs
1552     | null ps || null qs = []
1553     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1554 #if defined(__GLASGOW_HASKELL__)
1555 {-# INLINE [1] zipWith #-}
1556 #endif
1557
1558 --
1559 -- | A specialised version of zipWith for the common case of a
1560 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1561 -- are used to automatically covert zipWith into zipWith' when a pack is
1562 -- performed on the result of zipWith, but we also export it for
1563 -- convenience.
1564 --
1565 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1566 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1567     withForeignPtr fp $ \a ->
1568     withForeignPtr fq $ \b ->
1569     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1570   where
1571     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1572     STRICT4(zipWith_)
1573     zipWith_ n p1 p2 r
1574        | n >= len = return ()
1575        | otherwise = do
1576             x <- peekByteOff p1 n
1577             y <- peekByteOff p2 n
1578             pokeByteOff r n (f x y)
1579             zipWith_ (n+1) p1 p2 r
1580
1581     len = min l m
1582 {-# INLINE zipWith' #-}
1583
1584 {-# RULES
1585
1586 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1587     zipWith f p q = unpack (zipWith' f p q)
1588
1589   #-}
1590
1591 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1592 -- ByteStrings. Note that this performs two 'pack' operations.
1593 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1594 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1595 {-# INLINE unzip #-}
1596
1597 -- ---------------------------------------------------------------------
1598 -- Special lists
1599
1600 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1601 inits :: ByteString -> [ByteString]
1602 inits (PS x s l) = [PS x s n | n <- [0..l]]
1603
1604 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1605 tails :: ByteString -> [ByteString]
1606 tails p | null p    = [empty]
1607         | otherwise = p : tails (unsafeTail p)
1608
1609 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1610
1611 -- ---------------------------------------------------------------------
1612 -- ** Ordered 'ByteString's
1613
1614 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1615 sort :: ByteString -> ByteString
1616 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1617
1618     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1619     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1620
1621     let STRICT2(go)
1622         go 256 _   = return ()
1623         go i   ptr = do n <- peekElemOff arr i
1624                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1625                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1626     go 0 p
1627
1628 {-
1629 sort :: ByteString -> ByteString
1630 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1631         memcpy p (f `plusPtr` s) l
1632         c_qsort p l -- inplace
1633 -}
1634
1635 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1636 --
1637 -- Try some linear sorts: radix, counting
1638 -- Or mergesort.
1639 --
1640 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1641 -- sortBy f ps = undefined
1642
1643 -- ---------------------------------------------------------------------
1644 -- Low level constructors
1645
1646 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1647 -- finalizer associated to it. The ByteString length is calculated using
1648 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1649 packCString :: CString -> ByteString
1650 packCString cstr = unsafePerformIO $ do
1651     fp <- newForeignPtr_ (castPtr cstr)
1652     l <- c_strlen cstr
1653     return $! PS fp 0 (fromIntegral l)
1654
1655 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1656 -- have /no/ finalizer associated with it. This operation has /O(1)/
1657 -- complexity as we already know the final size, so no /strlen(3)/ is
1658 -- required.
1659 packCStringLen :: CStringLen -> ByteString
1660 packCStringLen (ptr,len) = unsafePerformIO $ do
1661     fp <- newForeignPtr_ (castPtr ptr)
1662     return $! PS fp 0 (fromIntegral len)
1663
1664 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1665 -- have a @free(3)@ finalizer associated to it.
1666 packMallocCString :: CString -> ByteString
1667 packMallocCString cstr = unsafePerformIO $ do
1668     fp <- newForeignFreePtr (castPtr cstr)
1669     len <- c_strlen cstr
1670     return $! PS fp 0 (fromIntegral len)
1671
1672 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1673 -- null-terminated @CString@.  The @CString@ will be freed
1674 -- automatically. This is a memcpy(3).
1675 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1676 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1677     where alloc = withForeignPtr ps $ \p -> do
1678             buf <- c_malloc (fromIntegral l+1)
1679             memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1680             poke (buf `plusPtr` l) (0::Word8) -- n.b.
1681             return (castPtr buf)
1682
1683 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1684 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1685 useAsCStringLen = unsafeUseAsCStringLen
1686
1687 --
1688 -- why were we doing this?
1689 --
1690 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1691 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1692 --     where
1693 --       alloc = withForeignPtr ps $ \p -> do
1694 --                 buf <- c_malloc (fromIntegral l+1)
1695 --                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1696 --                 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1697 --                 return $! (castPtr buf, l)
1698 --
1699
1700 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1701 --   This is mainly useful to allow the rest of the data pointed
1702 --   to by the 'ByteString' to be garbage collected, for example
1703 --   if a large string has been read in, and only a small part of it
1704 --   is needed in the rest of the program.
1705 copy :: ByteString -> ByteString
1706 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1707     memcpy p (f `plusPtr` s) (fromIntegral l)
1708
1709 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1710 -- CString is going to be deallocated from C land.
1711 copyCString :: CString -> IO ByteString
1712 copyCString cstr = do
1713     len <- c_strlen cstr
1714     copyCStringLen (cstr, fromIntegral len)
1715
1716 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1717 copyCStringLen :: CStringLen -> IO ByteString
1718 copyCStringLen (cstr, len) = create len $ \p ->
1719     memcpy p (castPtr cstr) (fromIntegral len)
1720
1721 -- ---------------------------------------------------------------------
1722 -- line IO
1723
1724 -- | Read a line from stdin.
1725 getLine :: IO ByteString
1726 getLine = hGetLine stdin
1727
1728 {-
1729 -- | Lazily construct a list of lines of ByteStrings. This will be much
1730 -- better on memory consumption than using 'hGetContents >>= lines'
1731 -- If you're considering this, a better choice might be to use
1732 -- Data.ByteString.Lazy
1733 hGetLines :: Handle -> IO [ByteString]
1734 hGetLines h = go
1735     where
1736         go = unsafeInterleaveIO $ do
1737                 e <- hIsEOF h
1738                 if e
1739                   then return []
1740                   else do
1741                 x  <- hGetLine h
1742                 xs <- go
1743                 return (x:xs)
1744 -}
1745
1746 -- | Read a line from a handle
1747
1748 hGetLine :: Handle -> IO ByteString
1749 #if !defined(__GLASGOW_HASKELL__)
1750 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
1751 #else
1752 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1753     case haBufferMode handle_ of
1754        NoBuffering -> error "no buffering"
1755        _other      -> hGetLineBuffered handle_
1756
1757  where
1758     hGetLineBuffered handle_ = do
1759         let ref = haBuffer handle_
1760         buf <- readIORef ref
1761         hGetLineBufferedLoop handle_ ref buf 0 []
1762
1763     hGetLineBufferedLoop handle_ ref
1764             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1765         len `seq` do
1766         off <- findEOL r w raw
1767         let new_len = len + off - r
1768         xs <- mkPS raw r off
1769
1770       -- if eol == True, then off is the offset of the '\n'
1771       -- otherwise off == w and the buffer is now empty.
1772         if off /= w
1773             then do if (w == off + 1)
1774                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1775                             else writeIORef ref buf{ bufRPtr = off + 1 }
1776                     mkBigPS new_len (xs:xss)
1777             else do
1778                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1779                                     buf{ bufWPtr=0, bufRPtr=0 }
1780                  case maybe_buf of
1781                     -- Nothing indicates we caught an EOF, and we may have a
1782                     -- partial line to return.
1783                     Nothing -> do
1784                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1785                          if new_len > 0
1786                             then mkBigPS new_len (xs:xss)
1787                             else ioe_EOF
1788                     Just new_buf ->
1789                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1790
1791     -- find the end-of-line character, if there is one
1792     findEOL r w raw
1793         | r == w = return w
1794         | otherwise =  do
1795             (c,r') <- readCharFromBuffer raw r
1796             if c == '\n'
1797                 then return r -- NB. not r': don't include the '\n'
1798                 else findEOL r' w raw
1799
1800     maybeFillReadBuffer fd is_line is_stream buf = catch
1801         (do buf' <- fillReadBuffer fd is_line is_stream buf
1802             return (Just buf'))
1803         (\e -> if isEOFError e then return Nothing else ioError e)
1804
1805 -- TODO, rewrite to use normal memcpy
1806 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1807 mkPS buf start end =
1808     let len = end - start
1809     in create len $ \p -> do
1810         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1811         return ()
1812
1813 mkBigPS :: Int -> [ByteString] -> IO ByteString
1814 mkBigPS _ [ps] = return ps
1815 mkBigPS _ pss = return $! concat (P.reverse pss)
1816
1817 #endif
1818
1819 -- ---------------------------------------------------------------------
1820 -- Block IO
1821
1822 -- | Outputs a 'ByteString' to the specified 'Handle'.
1823 hPut :: Handle -> ByteString -> IO ()
1824 hPut _ (PS _  _ 0) = return ()
1825 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1826
1827 -- | A synonym for @hPut@, for compatibility
1828 hPutStr :: Handle -> ByteString -> IO ()
1829 hPutStr = hPut
1830
1831 -- | Write a ByteString to a handle, appending a newline byte
1832 hPutStrLn :: Handle -> ByteString -> IO ()
1833 hPutStrLn h ps
1834     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1835     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1836
1837 -- | Write a ByteString to stdout
1838 putStr :: ByteString -> IO ()
1839 putStr = hPut stdout
1840
1841 -- | Write a ByteString to stdout, appending a newline byte
1842 putStrLn :: ByteString -> IO ()
1843 putStrLn = hPutStrLn stdout
1844
1845 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1846 -- is far more efficient than reading the characters into a 'String'
1847 -- and then using 'pack'.
1848 hGet :: Handle -> Int -> IO ByteString
1849 hGet _ 0 = return empty
1850 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1851
1852 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1853 -- waiting for data to become available, instead it returns only whatever data
1854 -- is available.
1855 hGetNonBlocking :: Handle -> Int -> IO ByteString
1856 #if defined(__GLASGOW_HASKELL__)
1857 hGetNonBlocking _ 0 = return empty
1858 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1859 #else
1860 hGetNonBlocking = hGet
1861 #endif
1862
1863 -- | Read entire handle contents into a 'ByteString'.
1864 -- This function reads chunks at a time, doubling the chunksize on each
1865 -- read. The final buffer is then realloced to the appropriate size. For
1866 -- files > half of available memory, this may lead to memory exhaustion.
1867 -- Consider using 'readFile' in this case.
1868 --
1869 -- As with 'hGet', the string representation in the file is assumed to
1870 -- be ISO-8859-1.
1871 --
1872 hGetContents :: Handle -> IO ByteString
1873 hGetContents h = do
1874     let start_size = 1024
1875     p <- mallocArray start_size
1876     i <- hGetBuf h p start_size
1877     if i < start_size
1878         then do p' <- reallocArray p i
1879                 fp <- newForeignFreePtr p'
1880                 return $! PS fp 0 i
1881         else f p start_size
1882     where
1883         f p s = do
1884             let s' = 2 * s
1885             p' <- reallocArray p s'
1886             i  <- hGetBuf h (p' `plusPtr` s) s
1887             if i < s
1888                 then do let i' = s + i
1889                         p'' <- reallocArray p' i'
1890                         fp  <- newForeignFreePtr p''
1891                         return $! PS fp 0 i'
1892                 else f p' s'
1893
1894 -- | getContents. Equivalent to hGetContents stdin
1895 getContents :: IO ByteString
1896 getContents = hGetContents stdin
1897
1898 -- | The interact function takes a function of type @ByteString -> ByteString@
1899 -- as its argument. The entire input from the standard input device is passed
1900 -- to this function as its argument, and the resulting string is output on the
1901 -- standard output device. It's great for writing one line programs!
1902 interact :: (ByteString -> ByteString) -> IO ()
1903 interact transformer = putStr . transformer =<< getContents
1904
1905 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1906 -- efficient than reading the characters into a 'String' and then using
1907 -- 'pack'.  It also may be more efficient than opening the file and
1908 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1909 -- for 'text mode' use the Char8 version of this function.
1910 readFile :: FilePath -> IO ByteString
1911 readFile f = bracket (openBinaryFile f ReadMode) hClose
1912     (\h -> hFileSize h >>= hGet h . fromIntegral)
1913
1914 -- | Write a 'ByteString' to a file.
1915 writeFile :: FilePath -> ByteString -> IO ()
1916 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1917     (\h -> hPut h txt)
1918
1919 -- | Append a 'ByteString' to a file.
1920 appendFile :: FilePath -> ByteString -> IO ()
1921 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1922     (\h -> hPut h txt)
1923
1924 {-
1925 --
1926 -- Disable until we can move it into a portable .hsc file
1927 --
1928
1929 -- | Like readFile, this reads an entire file directly into a
1930 -- 'ByteString', but it is even more efficient.  It involves directly
1931 -- mapping the file to memory.  This has the advantage that the contents
1932 -- of the file never need to be copied.  Also, under memory pressure the
1933 -- page may simply be discarded, while in the case of readFile it would
1934 -- need to be written to swap.  If you read many small files, mmapFile
1935 -- will be less memory-efficient than readFile, since each mmapFile
1936 -- takes up a separate page of memory.  Also, you can run into bus
1937 -- errors if the file is modified.  As with 'readFile', the string
1938 -- representation in the file is assumed to be ISO-8859-1.
1939 --
1940 -- On systems without mmap, this is the same as a readFile.
1941 --
1942 mmapFile :: FilePath -> IO ByteString
1943 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1944
1945 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1946 mmap f = do
1947     h <- openBinaryFile f ReadMode
1948     l <- fromIntegral `fmap` hFileSize h
1949     -- Don't bother mmaping small files because each mmapped file takes up
1950     -- at least one full VM block.
1951     if l < mmap_limit
1952        then do thefp <- mallocByteString l
1953                withForeignPtr thefp $ \p-> hGetBuf h p l
1954                hClose h
1955                return (thefp, l)
1956        else do
1957                -- unix only :(
1958                fd <- fromIntegral `fmap` handleToFd h
1959                p  <- my_mmap l fd
1960                fp <- if p == nullPtr
1961                      then do thefp <- mallocByteString l
1962                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1963                              return thefp
1964                      else do
1965                           -- The munmap leads to crashes on OpenBSD.
1966                           -- maybe there's a use after unmap in there somewhere?
1967                           -- Bulat suggests adding the hClose to the
1968                           -- finalizer, excellent idea.
1969 #if !defined(__OpenBSD__)
1970                              let unmap = c_munmap p l >> return ()
1971 #else
1972                              let unmap = return ()
1973 #endif
1974                              fp <- newForeignPtr p unmap
1975                              return fp
1976                c_close fd
1977                hClose h
1978                return (fp, l)
1979     where mmap_limit = 16*1024
1980 -}
1981
1982 -- ---------------------------------------------------------------------
1983 -- Internal utilities
1984
1985 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1986 -- of the string if no element is found, rather than Nothing.
1987 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1988 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1989   where
1990     STRICT2(go)
1991     go ptr n | n >= l    = return l
1992              | otherwise = do w <- peek ptr
1993                               if k w
1994                                 then return n
1995                                 else go (ptr `plusPtr` 1) (n+1)
1996 {-# INLINE findIndexOrEnd #-}
1997
1998 -- | Perform an operation with a temporary ByteString
1999 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2000 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2001 {-# INLINE withPtr #-}
2002
2003 -- Common up near identical calls to `error' to reduce the number
2004 -- constant strings created when compiled:
2005 errorEmptyList :: String -> a
2006 errorEmptyList fun = moduleError fun "empty ByteString"
2007 {-# NOINLINE errorEmptyList #-}
2008
2009 moduleError :: String -> String -> a
2010 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2011 {-# NOINLINE moduleError #-}
2012
2013 -- Find from the end of the string using predicate
2014 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2015 STRICT2(findFromEndUntil)
2016 findFromEndUntil f ps@(PS x s l) =
2017     if null ps then 0
2018     else if f (last ps) then l
2019          else findFromEndUntil f (PS x s (l-1))
2020
2021 {-# INLINE newForeignFreePtr #-}
2022 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2023 newForeignFreePtr p = newForeignPtr c_free_finalizer p