Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with "Prelude" functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         singleton,              -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49
  50         -- * Basic interface
  51         cons,                   -- :: Word8 -> ByteString -> ByteString
  52         snoc,                   -- :: ByteString -> Word8 -> ByteString
  53         append,                 -- :: ByteString -> ByteString -> ByteString
  54         head,                   -- :: ByteString -> Word8
  55         last,                   -- :: ByteString -> Word8
  56         tail,                   -- :: ByteString -> ByteString
  57         init,                   -- :: ByteString -> ByteString
  58         null,                   -- :: ByteString -> Bool
  59         length,                 -- :: ByteString -> Int
  60
  61         -- * Transformating ByteStrings
  62         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  63         reverse,                -- :: ByteString -> ByteString
  64         intersperse,            -- :: Word8 -> ByteString -> ByteString
  65         transpose,              -- :: [ByteString] -> [ByteString]
  66
  67         -- * Reducing 'ByteString's (folds)
  68         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  69         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  70         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  71         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  72
  73         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  74         foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  75         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  76         foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77
  78         -- ** Special folds
  79         concat,                 -- :: [ByteString] -> ByteString
  80         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  81         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  82         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  83         maximum,                -- :: ByteString -> Word8
  84         minimum,                -- :: ByteString -> Word8
  85
  86         -- * Building ByteStrings
  87         -- ** Scans
  88         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  89         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  90         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  91         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  92
  93         -- ** Accumulating maps
  94         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  95         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  96         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  97
  98         -- ** Unfolding ByteStrings
  99         replicate,              -- :: Int -> Word8 -> ByteString
 100         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 101         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 102
 103         -- * Substrings
 104
 105         -- ** Breaking strings
 106         take,                   -- :: Int -> ByteString -> ByteString
 107         drop,                   -- :: Int -> ByteString -> ByteString
 108         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 109         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 110         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 111         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 113         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 114         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117         inits,                  -- :: ByteString -> [ByteString]
 118         tails,                  -- :: ByteString -> [ByteString]
 119
 120         -- ** Breaking into many substrings
 121         split,                  -- :: Word8 -> ByteString -> [ByteString]
 122         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 123
 124         -- ** Joining strings
 125         join,                   -- :: ByteString -> [ByteString] -> ByteString
 126
 127         -- * Predicates
 128         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 129         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 130
 131         -- ** Search for arbitrary substrings
 132         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 133         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 134         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 135
 136         -- * Searching ByteStrings
 137
 138         -- ** Searching by equality
 139         -- | These functions use memchr(3) to efficiently search the ByteString
 140         elem,                   -- :: Word8 -> ByteString -> Bool
 141         notElem,                -- :: Word8 -> ByteString -> Bool
 142
 143         -- ** Searching with a predicate
 144         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 146 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 147
 148         -- * Indexing ByteStrings
 149         index,                  -- :: ByteString -> Int -> Word8
 150         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 151         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 152         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
 153         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 154         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 155         count,                  -- :: Word8 -> ByteString -> Int
 156
 157         -- * Zipping and unzipping ByteStrings
 158         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 159         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 160         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 161
 162         -- * Ordered ByteStrings
 163         sort,                   -- :: ByteString -> ByteString
 164
 165         -- * Low level CString conversions
 166
 167         -- ** Packing CStrings and pointers
 168         packCString,            -- :: CString -> ByteString
 169         packCStringLen,         -- :: CString -> ByteString
 170         packMallocCString,      -- :: CString -> ByteString
 171
 172         -- ** Using ByteStrings as CStrings
 173         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 174         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
 175
 176         -- ** Copying ByteStrings
 177         -- | These functions perform memcpy(3) operations
 178         copy,                   -- :: ByteString -> ByteString
 179         copyCString,            -- :: CString -> IO ByteString
 180         copyCStringLen,         -- :: CStringLen -> IO ByteString
 181
 182         -- * I\/O with 'ByteString's
 183
 184         -- ** Standard input and output
 185         getLine,                -- :: IO ByteString
 186         getContents,            -- :: IO ByteString
 187         putStr,                 -- :: ByteString -> IO ()
 188         putStrLn,               -- :: ByteString -> IO ()
 189         interact,               -- :: (ByteString -> ByteString) -> IO ()
 190
 191         -- ** Files
 192         readFile,               -- :: FilePath -> IO ByteString
 193         writeFile,              -- :: FilePath -> ByteString -> IO ()
 194         appendFile,             -- :: FilePath -> ByteString -> IO ()
 195 --      mmapFile,               -- :: FilePath -> IO ByteString
 196
 197         -- ** I\/O with Handles
 198         hGetLine,               -- :: Handle -> IO ByteString
 199         hGetContents,           -- :: Handle -> IO ByteString
 200         hGet,                   -- :: Handle -> Int -> IO ByteString
 201         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 202         hPut,                   -- :: Handle -> ByteString -> IO ()
 203         hPutStr,                -- :: Handle -> ByteString -> IO ()
 204         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
 205
 206 #if defined(__GLASGOW_HASKELL__)
 207         -- * Fusion utilities
 208         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 209         lengthU, maximumU, minimumU
 210 #endif
 211
 212   ) where
 213
 214 import qualified Prelude as P
 215 import Prelude hiding           (reverse,head,tail,last,init,null
 216                                 ,length,map,lines,foldl,foldr,unlines
 217                                 ,concat,any,take,drop,splitAt,takeWhile
 218                                 ,dropWhile,span,break,elem,filter,maximum
 219                                 ,minimum,all,concatMap,foldl1,foldr1
 220                                 ,scanl,scanl1,scanr,scanr1
 221                                 ,readFile,writeFile,appendFile,replicate
 222                                 ,getContents,getLine,putStr,putStrLn,interact
 223                                 ,zip,zipWith,unzip,notElem)
 224
 225 import Data.ByteString.Base
 226 import Data.ByteString.Fusion
 227
 228 import qualified Data.List as List
 229
 230 import Data.Word                (Word8)
 231 import Data.Maybe               (listToMaybe)
 232 import Data.Array               (listArray)
 233 import qualified Data.Array as Array ((!))
 234
 235 -- Control.Exception.bracket not available in yhc or nhc
 236 import Control.Exception        (bracket, assert)
 237 import qualified Control.Exception as Exception
 238 import Control.Monad            (when)
 239
 240 import Foreign.C.String         (CString, CStringLen)
 241 import Foreign.C.Types          (CSize)
 242 import Foreign.ForeignPtr
 243 import Foreign.Marshal.Array
 244 import Foreign.Ptr
 245 import Foreign.Storable         (Storable(..))
 246
 247 -- hGetBuf and hPutBuf not available in yhc or nhc
 248 import System.IO                (stdin,stdout,hClose,hFileSize
 249                                 ,hGetBuf,hPutBuf,openBinaryFile
 250                                 ,Handle,IOMode(..))
 251
 252 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
 253
 254 #if !defined(__GLASGOW_HASKELL__)
 255 import System.IO.Unsafe
 256 import qualified System.Environment
 257 import qualified System.IO      (hGetLine)
 258 #endif
 259
 260 #if defined(__GLASGOW_HASKELL__)
 261
 262 import System.IO                (hGetBufNonBlocking)
 263 import System.IO.Error          (isEOFError)
 264
 265 import GHC.Handle
 266 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
 267 import GHC.Base                 (build)
 268 import GHC.Word hiding (Word8)
 269 import GHC.Ptr                  (Ptr(..))
 270 import GHC.ST                   (ST(..))
 271 import GHC.IOBase
 272
 273 #endif
 274
 275 -- -----------------------------------------------------------------------------
 276 --
 277 -- Useful macros, until we have bang patterns
 278 --
 279
 280 #define STRICT1(f) f a | a `seq` False = undefined
 281 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 282 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 283 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 284 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 285
 286 -- -----------------------------------------------------------------------------
 287
 288 instance Eq  ByteString
 289     where (==)    = eq
 290
 291 instance Ord ByteString
 292     where compare = compareBytes
 293
 294 instance Show ByteString where
 295     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 296
 297 instance Read ByteString where
 298     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 299
 300 instance Monoid ByteString where
 301     mempty  = empty
 302     mappend = append
 303     mconcat = concat
 304
 305 {-
 306 instance Arbitrary PackedString where
 307     arbitrary = P.pack `fmap` arbitrary
 308     coarbitrary s = coarbitrary (P.unpack s)
 309 -}
 310
 311 -- | /O(n)/ Equality on the 'ByteString' type.
 312 eq :: ByteString -> ByteString -> Bool
 313 eq a@(PS p s l) b@(PS p' s' l')
 314     | l /= l'            = False    -- short cut on length
 315     | p == p' && s == s' = True     -- short cut for the same string
 316     | otherwise          = compareBytes a b == EQ
 317 {-# INLINE eq #-}
 318
 319 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 320 compareBytes :: ByteString -> ByteString -> Ordering
 321 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 322     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 323     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 324     | otherwise                         = inlinePerformIO $
 325         withForeignPtr x1 $ \p1 ->
 326         withForeignPtr x2 $ \p2 -> do
 327             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 328             return $! case i `compare` 0 of
 329                         EQ  -> l1 `compare` l2
 330                         x   -> x
 331 {-# INLINE compareBytes #-}
 332
 333 {-
 334 --
 335 -- About 4x slower over 32M
 336 --
 337 compareBytes :: ByteString -> ByteString -> Ordering
 338 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 339     withForeignPtr fp1 $ \p1 ->
 340         withForeignPtr fp2 $ \p2 ->
 341             cmp (p1 `plusPtr` off1)
 342                 (p2 `plusPtr` off2) 0 len1 len2
 343
 344 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 345 STRICT5(cmp)
 346 cmp p1 p2 n len1 len2
 347       | n == len1 = if n == len2 then return EQ else return LT
 348       | n == len2 = return GT
 349       | otherwise = do
 350           (a :: Word8) <- peekByteOff p1 n
 351           (b :: Word8) <- peekByteOff p2 n
 352           case a `compare` b of
 353                 EQ -> cmp p1 p2 (n+1) len1 len2
 354                 LT -> return LT
 355                 GT -> return GT
 356 {-# INLINE compareBytes #-}
 357 -}
 358
 359 -- -----------------------------------------------------------------------------
 360 -- Introducing and eliminating 'ByteString's
 361
 362 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 363 singleton :: Word8 -> ByteString
 364 singleton c = unsafeCreate 1 $ \p -> poke p c
 365 {-# INLINE [1] singleton #-}
 366
 367 --
 368 -- XXX The unsafePerformIO is critical!
 369 --
 370 -- Otherwise:
 371 --
 372 --  singleton 255 `compare` singleton 127
 373 --
 374 -- is compiled to:
 375 --
 376 --  case mallocByteString 2 of
 377 --      ForeignPtr f internals ->
 378 --           case writeWord8OffAddr# f 0 255 of _ ->
 379 --           case writeWord8OffAddr# f 0 127 of _ ->
 380 --           case eqAddr# f f of
 381 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 382 --                                        (GHC.Prim.plusAddr# f 0)
 383 --
 384 --
 385
 386 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 387 --
 388 -- For applications with large numbers of string literals, pack can be a
 389 -- bottleneck. In such cases, consider using packAddress (GHC only).
 390 pack :: [Word8] -> ByteString
 391
 392 #if !defined(__GLASGOW_HASKELL__)
 393
 394 pack str = unsafeCreate (P.length str) $ \p -> go p str
 395     where
 396         go _ []     = return ()
 397         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 398
 399 #else /* hack away */
 400
 401 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 402     where
 403         go _ _ []        = return ()
 404         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 405
 406         writeByte p i c = ST $ \s# ->
 407             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 408
 409 #endif
 410
 411 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 412 unpack :: ByteString -> [Word8]
 413
 414 #if !defined(__GLASGOW_HASKELL__)
 415
 416 unpack (PS _  _ 0) = []
 417 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 418         go (p `plusPtr` s) (l - 1) []
 419     where
 420         STRICT3(go)
 421         go p 0 acc = peek p          >>= \e -> return (e : acc)
 422         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 423 {-# INLINE unpack #-}
 424
 425 #else
 426
 427 unpack ps = build (unpackFoldr ps)
 428 {-# INLINE unpack #-}
 429
 430 --
 431 -- critical this isn't strict in the acc
 432 -- as it will break in the presence of list fusion. this is a known
 433 -- issue with seq and build/foldr rewrite rules, which rely on lazy
 434 -- demanding to avoid bottoms in the list.
 435 --
 436 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 437 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 438     let loop q n    _   | q `seq` n `seq` False = undefined -- n.b.
 439         loop _ (-1) acc = return acc
 440         loop q n    acc = do
 441            a <- peekByteOff q n
 442            loop q (n-1) (a `f` acc)
 443     loop (p `plusPtr` off) (len-1) ch
 444 {-# INLINE [0] unpackFoldr #-}
 445
 446 unpackList :: ByteString -> [Word8]
 447 unpackList (PS fp off len) = withPtr fp $ \p -> do
 448     let STRICT3(loop)
 449         loop _ (-1) acc = return acc
 450         loop q n acc = do
 451            a <- peekByteOff q n
 452            loop q (n-1) (a : acc)
 453     loop (p `plusPtr` off) (len-1) []
 454
 455 {-# RULES
 456 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 457  #-}
 458
 459 #endif
 460
 461 ------------------------------------------------------------------------
 462
 463 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 464 -- conversion function
 465 packWith :: (a -> Word8) -> [a] -> ByteString
 466 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
 467     where
 468         STRICT2(go)
 469         go _ []     = return ()
 470         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 471 {-# INLINE packWith #-}
 472 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 473
 474 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 475 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 476 unpackWith _ (PS _  _ 0) = []
 477 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 478         go (p `plusPtr` s) (l - 1) []
 479     where
 480         STRICT3(go)
 481         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 482         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 483 {-# INLINE unpackWith #-}
 484 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 485
 486 -- ---------------------------------------------------------------------
 487 -- Basic interface
 488
 489 -- | /O(1)/ Test whether a ByteString is empty.
 490 null :: ByteString -> Bool
 491 null (PS _ _ l) = assert (l >= 0) $ l <= 0
 492 {-# INLINE null #-}
 493
 494 -- ---------------------------------------------------------------------
 495 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 496 length :: ByteString -> Int
 497 length (PS _ _ l) = assert (l >= 0) $ l
 498
 499 --
 500 -- length/loop fusion. When taking the length of any fuseable loop,
 501 -- rewrite it as a foldl', and thus avoid allocating the result buffer
 502 -- worth around 10% in speed testing.
 503 --
 504
 505 #if defined(__GLASGOW_HASKELL__)
 506 {-# INLINE [1] length #-}
 507 #endif
 508
 509 lengthU :: ByteString -> Int
 510 lengthU = foldl' (const . (+1)) (0::Int)
 511 {-# INLINE lengthU #-}
 512
 513 {-# RULES
 514
 515 -- v2 fusion
 516 "length/loop" forall loop s .
 517   length  (loopArr (loopWrapper loop s)) =
 518   lengthU (loopArr (loopWrapper loop s))
 519
 520   #-}
 521
 522 ------------------------------------------------------------------------
 523
 524 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 525 -- complexity, as it requires a memcpy.
 526 cons :: Word8 -> ByteString -> ByteString
 527 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 528         poke p c
 529         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 530 {-# INLINE cons #-}
 531
 532 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 533 snoc :: ByteString -> Word8 -> ByteString
 534 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 535         memcpy p (f `plusPtr` s) (fromIntegral l)
 536         poke (p `plusPtr` l) c
 537 {-# INLINE snoc #-}
 538
 539 -- todo fuse
 540
 541 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 542 -- An exception will be thrown in the case of an empty ByteString.
 543 head :: ByteString -> Word8
 544 head (PS x s l)
 545     | l <= 0    = errorEmptyList "head"
 546     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 547 {-# INLINE head #-}
 548
 549 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 550 -- An exception will be thrown in the case of an empty ByteString.
 551 tail :: ByteString -> ByteString
 552 tail (PS p s l)
 553     | l <= 0    = errorEmptyList "tail"
 554     | otherwise = PS p (s+1) (l-1)
 555 {-# INLINE tail #-}
 556
 557 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 558 -- An exception will be thrown in the case of an empty ByteString.
 559 last :: ByteString -> Word8
 560 last ps@(PS x s l)
 561     | null ps   = errorEmptyList "last"
 562     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 563 {-# INLINE last #-}
 564
 565 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 566 -- An exception will be thrown in the case of an empty ByteString.
 567 init :: ByteString -> ByteString
 568 init ps@(PS p s l)
 569     | null ps   = errorEmptyList "init"
 570     | otherwise = PS p s (l-1)
 571 {-# INLINE init #-}
 572
 573 -- | /O(n)/ Append two ByteStrings
 574 append :: ByteString -> ByteString -> ByteString
 575 append xs ys | null xs   = ys
 576              | null ys   = xs
 577              | otherwise = concat [xs,ys]
 578 {-# INLINE append #-}
 579
 580 -- ---------------------------------------------------------------------
 581 -- Transformations
 582
 583 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 584 -- element of @xs@. This function is subject to array fusion.
 585 map :: (Word8 -> Word8) -> ByteString -> ByteString
 586 #if defined(LOOPU_FUSION)
 587 map f = loopArr . loopU (mapEFL f) NoAcc
 588 #elif defined(LOOPUP_FUSION)
 589 map f = loopArr . loopUp (mapEFL f) NoAcc
 590 #elif defined(LOOPNOACC_FUSION)
 591 map f = loopArr . loopNoAcc (mapEFL f)
 592 #else
 593 map f = loopArr . loopMap f
 594 #endif
 595 {-# INLINE map #-}
 596
 597 {-
 598 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 599 -- slightly faster for one-shot cases.
 600 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 601 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
 602     create len $ map_ 0 (a `plusPtr` s)
 603   where
 604     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 605     STRICT3(map_)
 606     map_ n p1 p2
 607        | n >= len = return ()
 608        | otherwise = do
 609             x <- peekByteOff p1 n
 610             pokeByteOff p2 n (f x)
 611             map_ (n+1) p1 p2
 612 {-# INLINE map' #-}
 613 -}
 614
 615 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 616 reverse :: ByteString -> ByteString
 617 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 618         c_reverse p (f `plusPtr` s) (fromIntegral l)
 619
 620 -- todo, fuseable version
 621
 622 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 623 -- 'ByteString' and \`intersperses\' that byte between the elements of
 624 -- the 'ByteString'.  It is analogous to the intersperse function on
 625 -- Lists.
 626 intersperse :: Word8 -> ByteString -> ByteString
 627 intersperse c ps@(PS x s l)
 628     | length ps < 2  = ps
 629     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
 630         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 631
 632 {-
 633 intersperse c = pack . List.intersperse c . unpack
 634 -}
 635
 636 -- | The 'transpose' function transposes the rows and columns of its
 637 -- 'ByteString' argument.
 638 transpose :: [ByteString] -> [ByteString]
 639 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 640
 641 -- ---------------------------------------------------------------------
 642 -- Reducing 'ByteString's
 643
 644 -- | 'foldl', applied to a binary operator, a starting value (typically
 645 -- the left-identity of the operator), and a ByteString, reduces the
 646 -- ByteString using the binary operator, from left to right.
 647 -- This function is subject to array fusion.
 648 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 649 #if !defined(LOOPU_FUSION)
 650 foldl f z = loopAcc . loopUp (foldEFL f) z
 651 #else
 652 foldl f z = loopAcc . loopU (foldEFL f) z
 653 #endif
 654 {-# INLINE foldl #-}
 655
 656 {-
 657 --
 658 -- About twice as fast with 6.4.1, but not fuseable
 659 -- A simple fold . map is enough to make it worth while.
 660 --
 661 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 662         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 663     where
 664         STRICT3(lgo)
 665         lgo z p q | p == q    = return z
 666                   | otherwise = do c <- peek p
 667                                    lgo (f z c) (p `plusPtr` 1) q
 668 -}
 669
 670 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 671 -- Though actually foldl is also strict in the accumulator.
 672 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 673 foldl' = foldl
 674 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
 675 {-# INLINE foldl' #-}
 676
 677 -- | 'foldr', applied to a binary operator, a starting value
 678 -- (typically the right-identity of the operator), and a ByteString,
 679 -- reduces the ByteString using the binary operator, from right to left.
 680 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 681 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
 682 {-# INLINE foldr #-}
 683
 684 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
 685 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
 686 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 687         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
 688     where
 689         STRICT3(go)
 690         go z p q | p == q    = return z
 691                  | otherwise = do c  <- peek p
 692                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
 693 {-# INLINE [1] foldr' #-}
 694
 695 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 696 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 697 -- This function is subject to array fusion.
 698 -- An exception will be thrown in the case of an empty ByteString.
 699 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 700 foldl1 f ps
 701     | null ps   = errorEmptyList "foldl1"
 702     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 703 {-# INLINE foldl1 #-}
 704
 705 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 706 -- An exception will be thrown in the case of an empty ByteString.
 707 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 708 foldl1' f ps
 709     | null ps   = errorEmptyList "foldl1'"
 710     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 711 {-# INLINE foldl1' #-}
 712
 713 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 714 -- and thus must be applied to non-empty 'ByteString's
 715 -- An exception will be thrown in the case of an empty ByteString.
 716 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 717 foldr1 f ps
 718     | null ps        = errorEmptyList "foldr1"
 719     | otherwise      = foldr f (last ps) (init ps)
 720 {-# INLINE foldr1 #-}
 721
 722 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
 723 -- accumulator.
 724 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 725 foldr1' f ps
 726     | null ps        = errorEmptyList "foldr1"
 727     | otherwise      = foldr' f (last ps) (init ps)
 728 {-# INLINE [1] foldr1' #-}
 729
 730 -- ---------------------------------------------------------------------
 731 -- Special folds
 732
 733 -- | /O(n)/ Concatenate a list of ByteStrings.
 734 concat :: [ByteString] -> ByteString
 735 concat []     = empty
 736 concat [ps]   = ps
 737 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
 738   where len = P.sum . P.map length $ xs
 739         STRICT2(go)
 740         go []            _   = return ()
 741         go (PS p s l:ps) ptr = do
 742                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 743                 go ps (ptr `plusPtr` l)
 744
 745 -- | Map a function over a 'ByteString' and concatenate the results
 746 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 747 concatMap f = concat . foldr ((:) . f) []
 748
 749 -- foldr (append . f) empty
 750
 751 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 752 -- any element of the 'ByteString' satisfies the predicate.
 753 any :: (Word8 -> Bool) -> ByteString -> Bool
 754 any _ (PS _ _ 0) = False
 755 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 756         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 757     where
 758         STRICT2(go)
 759         go p q | p == q    = return False
 760                | otherwise = do c <- peek p
 761                                 if f c then return True
 762                                        else go (p `plusPtr` 1) q
 763
 764 -- todo fuse
 765
 766 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 767 -- if all elements of the 'ByteString' satisfy the predicate.
 768 all :: (Word8 -> Bool) -> ByteString -> Bool
 769 all _ (PS _ _ 0) = True
 770 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 771         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 772     where
 773         STRICT2(go)
 774         go p q | p == q     = return True  -- end of list
 775                | otherwise  = do c <- peek p
 776                                  if f c
 777                                     then go (p `plusPtr` 1) q
 778                                     else return False
 779
 780 ------------------------------------------------------------------------
 781
 782 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 783 -- This function will fuse.
 784 -- An exception will be thrown in the case of an empty ByteString.
 785 maximum :: ByteString -> Word8
 786 maximum xs@(PS x s l)
 787     | null xs   = errorEmptyList "maximum"
 788     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 789                       c_maximum (p `plusPtr` s) (fromIntegral l)
 790
 791 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 792 -- This function will fuse.
 793 -- An exception will be thrown in the case of an empty ByteString.
 794 minimum :: ByteString -> Word8
 795 minimum xs@(PS x s l)
 796     | null xs   = errorEmptyList "minimum"
 797     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 798                       c_minimum (p `plusPtr` s) (fromIntegral l)
 799
 800 --
 801 -- minimum/maximum/loop fusion. As for length (and other folds), when we
 802 -- see we're applied after a fuseable op, switch from using the C
 803 -- version, to the fuseable version. The result should then avoid
 804 -- allocating a buffer.
 805 --
 806
 807 #if defined(__GLASGOW_HASKELL__)
 808 {-# INLINE [1] minimum #-}
 809 {-# INLINE [1] maximum #-}
 810 #endif
 811
 812 maximumU :: ByteString -> Word8
 813 maximumU = foldl1' max
 814 {-# INLINE maximumU #-}
 815
 816 minimumU :: ByteString -> Word8
 817 minimumU = foldl1' min
 818 {-# INLINE minimumU #-}
 819
 820 {-# RULES
 821
 822 "minimum/loop" forall loop s .
 823   minimum  (loopArr (loopWrapper loop s)) =
 824   minimumU (loopArr (loopWrapper loop s))
 825
 826 "maximum/loop" forall loop s .
 827   maximum  (loopArr (loopWrapper loop s)) =
 828   maximumU (loopArr (loopWrapper loop s))
 829
 830   #-}
 831
 832 ------------------------------------------------------------------------
 833
 834 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 835 -- 'foldl'; it applies a function to each element of a ByteString,
 836 -- passing an accumulating parameter from left to right, and returning a
 837 -- final value of this accumulator together with the new list.
 838 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 839 #if !defined(LOOPU_FUSION)
 840 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
 841 #else
 842 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
 843 #endif
 844 {-# INLINE mapAccumL #-}
 845
 846 -- | The 'mapAccumR' function behaves like a combination of 'map' and
 847 -- 'foldr'; it applies a function to each element of a ByteString,
 848 -- passing an accumulating parameter from right to left, and returning a
 849 -- final value of this accumulator together with the new ByteString.
 850 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 851 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
 852 {-# INLINE mapAccumR #-}
 853
 854 -- | /O(n)/ map Word8 functions, provided with the index at each position
 855 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 856 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
 857 {-# INLINE mapIndexed #-}
 858
 859 -- ---------------------------------------------------------------------
 860 -- Building ByteStrings
 861
 862 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 863 -- reduced values from the left. This function will fuse.
 864 --
 865 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 866 --
 867 -- Note that
 868 --
 869 -- > last (scanl f z xs) == foldl f z xs.
 870 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 871 #if !defined(LOOPU_FUSION)
 872 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
 873 #else
 874 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
 875 #endif
 876
 877     -- n.b. haskell's List scan returns a list one bigger than the
 878     -- input, so we need to snoc here to get some extra space, however,
 879     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
 880 {-# INLINE scanl #-}
 881
 882 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 883 -- This function will fuse.
 884 --
 885 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 886 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 887 scanl1 f ps
 888     | null ps   = empty
 889     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 890 {-# INLINE scanl1 #-}
 891
 892 -- | scanr is the right-to-left dual of scanl.
 893 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 894 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
 895 {-# INLINE scanr #-}
 896
 897 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
 898 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 899 scanr1 f ps
 900     | null ps   = empty
 901     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
 902 {-# INLINE scanr1 #-}
 903
 904 -- ---------------------------------------------------------------------
 905 -- Unfolds and replicates
 906
 907 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 908 -- the value of every element. The following holds:
 909 --
 910 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 911 --
 912 -- This implemenation uses @memset(3)@
 913 replicate :: Int -> Word8 -> ByteString
 914 replicate w c
 915     | w <= 0    = empty
 916     | otherwise = unsafeCreate w $ \ptr ->
 917                       memset ptr c (fromIntegral w) >> return ()
 918
 919 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
 920 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
 921 -- ByteString from a seed value.  The function takes the element and
 922 -- returns 'Nothing' if it is done producing the ByteString or returns
 923 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
 924 -- and @b@ is the seed value for further production.
 925 --
 926 -- Examples:
 927 --
 928 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
 929 -- > == pack [0, 1, 2, 3, 4, 5]
 930 --
 931 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 932 unfoldr f = concat . unfoldChunk 32 64
 933   where unfoldChunk n n' x =
 934           case unfoldrN n f x of
 935             (s, Nothing) -> s : []
 936             (s, Just x') -> s : unfoldChunk n' (n+n') x'
 937
 938 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
 939 -- value.  However, the length of the result is limited by the first
 940 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
 941 -- when the maximum length of the result is known.
 942 --
 943 -- The following equation relates 'unfoldrN' and 'unfoldr':
 944 --
 945 -- > unfoldrN n f s == take n (unfoldr f s)
 946 --
 947 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 948 unfoldrN i f x0
 949     | i < 0     = (empty, Just x0)
 950     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
 951   where STRICT3(go)
 952         go p x n =
 953           case f x of
 954             Nothing      -> return (0, n, Nothing)
 955             Just (w,x')
 956              | n == i    -> return (0, n, Just x)
 957              | otherwise -> do poke p w
 958                                go (p `plusPtr` 1) x' (n+1)
 959
 960 -- ---------------------------------------------------------------------
 961 -- Substrings
 962
 963 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 964 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 965 take :: Int -> ByteString -> ByteString
 966 take n ps@(PS x s l)
 967     | n <= 0    = empty
 968     | n >= l    = ps
 969     | otherwise = PS x s n
 970 {-# INLINE take #-}
 971
 972 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 973 -- elements, or @[]@ if @n > 'length' xs@.
 974 drop  :: Int -> ByteString -> ByteString
 975 drop n ps@(PS x s l)
 976     | n <= 0    = ps
 977     | n >= l    = empty
 978     | otherwise = PS x (s+n) (l-n)
 979 {-# INLINE drop #-}
 980
 981 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 982 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 983 splitAt n ps@(PS x s l)
 984     | n <= 0    = (empty, ps)
 985     | n >= l    = (ps, empty)
 986     | otherwise = (PS x s n, PS x (s+n) (l-n))
 987 {-# INLINE splitAt #-}
 988
 989 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 990 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 991 -- satisfy @p@.
 992 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 993 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 994 {-# INLINE takeWhile #-}
 995
 996 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 997 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 998 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
 999 {-# INLINE dropWhile #-}
1000
1001 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1002 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1003 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1004 {-# INLINE [1] break #-}
1005
1006 {-# RULES
1007 "FPS specialise break (x==)" forall x.
1008     break ((==) x) = breakByte x
1009   #-}
1010
1011 #if __GLASGOW_HASKELL__ >= 605
1012 {-# RULES
1013 "FPS specialise break (==x)" forall x.
1014     break (==x) = breakByte x
1015   #-}
1016 #endif
1017
1018 -- | 'breakByte' breaks its ByteString argument at the first occurence
1019 -- of the specified byte. It is more efficient than 'break' as it is
1020 -- implemented with @memchr(3)@. I.e.
1021 --
1022 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1023 --
1024 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1025 breakByte c p = case elemIndex c p of
1026     Nothing -> (p,empty)
1027     Just n  -> (unsafeTake n p, unsafeDrop n p)
1028 {-# INLINE breakByte #-}
1029
1030 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1031 --
1032 -- breakEnd p == spanEnd (not.p)
1033 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1034 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
1035
1036 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1037 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1038 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1039 span p ps = break (not . p) ps
1040 {-# INLINE [1] span #-}
1041
1042 -- | 'spanByte' breaks its ByteString argument at the first
1043 -- occurence of a byte other than its argument. It is more efficient
1044 -- than 'span (==)'
1045 --
1046 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
1047 --
1048 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1049 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1050     go (p `plusPtr` s) 0
1051   where
1052     STRICT2(go)
1053     go p i | i >= l    = return (ps, empty)
1054            | otherwise = do c' <- peekByteOff p i
1055                             if c /= c'
1056                                 then return (unsafeTake i ps, unsafeDrop i ps)
1057                                 else go p (i+1)
1058 {-# INLINE spanByte #-}
1059
1060 {-# RULES
1061 "FPS specialise span (x==)" forall x.
1062     span ((==) x) = spanByte x
1063   #-}
1064
1065 #if __GLASGOW_HASKELL__ >= 605
1066 {-# RULES
1067 "FPS specialise span (==x)" forall x.
1068     span (==x) = spanByte x
1069   #-}
1070 #endif
1071
1072 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1073 -- We have
1074 --
1075 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1076 --
1077 -- and
1078 --
1079 -- > spanEnd (not . isSpace) ps
1080 -- >    ==
1081 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1082 --
1083 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1084 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1085
1086 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1087 -- separators, where the predicate returns True for a separator element.
1088 -- The resulting components do not contain the separators.  Two adjacent
1089 -- separators result in an empty component in the output.  eg.
1090 --
1091 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1092 -- > splitWith (=='a') []        == []
1093 --
1094 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1095
1096 #if defined(__GLASGOW_HASKELL__)
1097 splitWith _pred (PS _  _   0) = []
1098 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1099   where pred# c# = pred_ (W8# c#)
1100
1101         STRICT4(splitWith0)
1102         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1103             splitLoop pred' p 0 off' len' fp'
1104
1105         splitLoop :: (Word# -> Bool)
1106                   -> Ptr Word8
1107                   -> Int -> Int -> Int
1108                   -> ForeignPtr Word8
1109                   -> IO [ByteString]
1110
1111         splitLoop pred' p idx' off' len' fp'
1112             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1113             | idx' >= len'  = return [PS fp' off' idx']
1114             | otherwise = do
1115                 w <- peekElemOff p (off'+idx')
1116                 if pred' (case w of W8# w# -> w#)
1117                    then return (PS fp' off' idx' :
1118                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1119                    else splitLoop pred' p (idx'+1) off' len' fp'
1120 {-# INLINE splitWith #-}
1121
1122 #else
1123 splitWith _ (PS _ _ 0) = []
1124 splitWith p ps = loop p ps
1125     where
1126         STRICT2(loop)
1127         loop q qs = if null rest then [chunk]
1128                                  else chunk : loop q (unsafeTail rest)
1129             where (chunk,rest) = break q qs
1130 #endif
1131
1132 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1133 -- argument, consuming the delimiter. I.e.
1134 --
1135 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1136 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1137 -- > split 'x'  "x"          == ["",""]
1138 --
1139 -- and
1140 --
1141 -- > join [c] . split c == id
1142 -- > split == splitWith . (==)
1143 --
1144 -- As for all splitting functions in this library, this function does
1145 -- not copy the substrings, it just constructs new 'ByteStrings' that
1146 -- are slices of the original.
1147 --
1148 split :: Word8 -> ByteString -> [ByteString]
1149 split _ (PS _ _ 0) = []
1150 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1151     let ptr = p `plusPtr` s
1152
1153         STRICT1(loop)
1154         loop n =
1155             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1156                                            w (fromIntegral (l-n))
1157             in if q == nullPtr
1158                 then [PS x (s+n) (l-n)]
1159                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1160
1161     return (loop 0)
1162 {-# INLINE split #-}
1163
1164 {-
1165 -- slower. but stays inside Haskell.
1166 split _ (PS _  _   0) = []
1167 split (W8# w#) (PS fp off len) = splitWith' off len fp
1168     where
1169         splitWith' off' len' fp' = withPtr fp $ \p ->
1170             splitLoop p 0 off' len' fp'
1171
1172         splitLoop :: Ptr Word8
1173                   -> Int -> Int -> Int
1174                   -> ForeignPtr Word8
1175                   -> IO [ByteString]
1176
1177         STRICT5(splitLoop)
1178         splitLoop p idx' off' len' fp'
1179             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1180             | idx' >= len'  = return [PS fp' off' idx']
1181             | otherwise = do
1182                 (W8# x#) <- peekElemOff p (off'+idx')
1183                 if word2Int# w# ==# word2Int# x#
1184                    then return (PS fp' off' idx' :
1185                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1186                    else splitLoop p (idx'+1) off' len' fp'
1187 -}
1188
1189 {-
1190 -- | Like 'splitWith', except that sequences of adjacent separators are
1191 -- treated as a single separator. eg.
1192 --
1193 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1194 --
1195 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1196 tokens f = P.filter (not.null) . splitWith f
1197 {-# INLINE tokens #-}
1198 -}
1199
1200 -- | The 'group' function takes a ByteString and returns a list of
1201 -- ByteStrings such that the concatenation of the result is equal to the
1202 -- argument.  Moreover, each sublist in the result contains only equal
1203 -- elements.  For example,
1204 --
1205 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1206 --
1207 -- It is a special case of 'groupBy', which allows the programmer to
1208 -- supply their own equality test. It is about 40% faster than
1209 -- /groupBy (==)/
1210 group :: ByteString -> [ByteString]
1211 group xs
1212     | null xs   = []
1213     | otherwise = ys : group zs
1214     where
1215         (ys, zs) = spanByte (unsafeHead xs) xs
1216
1217 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1218 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1219 groupBy k xs
1220     | null xs   = []
1221     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1222     where
1223         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1224
1225 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1226 -- 'ByteString's and concatenates the list after interspersing the first
1227 -- argument between each element of the list.
1228 join :: ByteString -> [ByteString] -> ByteString
1229 join s = concat . (List.intersperse s)
1230 {-# INLINE [1] join #-}
1231
1232 {-# RULES
1233 "FPS specialise join c -> joinByte" forall c s1 s2 .
1234     join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1235   #-}
1236
1237 --
1238 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1239 -- with a char. Around 4 times faster than the generalised join.
1240 --
1241 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1242 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1243     withForeignPtr ffp $ \fp ->
1244     withForeignPtr fgp $ \gp -> do
1245         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1246         poke (ptr `plusPtr` l) c
1247         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1248     where
1249       len = length f + length g + 1
1250 {-# INLINE joinWithByte #-}
1251
1252 -- ---------------------------------------------------------------------
1253 -- Indexing ByteStrings
1254
1255 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1256 index :: ByteString -> Int -> Word8
1257 index ps n
1258     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1259     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1260                                          ++ ", length = " ++ show (length ps))
1261     | otherwise      = ps `unsafeIndex` n
1262 {-# INLINE index #-}
1263
1264 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1265 -- element in the given 'ByteString' which is equal to the query
1266 -- element, or 'Nothing' if there is no such element.
1267 -- This implementation uses memchr(3).
1268 elemIndex :: Word8 -> ByteString -> Maybe Int
1269 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1270     let p' = p `plusPtr` s
1271     q <- memchr p' c (fromIntegral l)
1272     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1273 {-# INLINE elemIndex #-}
1274
1275 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1276 -- element in the given 'ByteString' which is equal to the query
1277 -- element, or 'Nothing' if there is no such element. The following
1278 -- holds:
1279 --
1280 -- > elemIndexEnd c xs ==
1281 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1282 --
1283 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1284 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1285     go (p `plusPtr` s) (l-1)
1286   where
1287     STRICT2(go)
1288     go p i | i < 0     = return Nothing
1289            | otherwise = do ch' <- peekByteOff p i
1290                             if ch == ch'
1291                                 then return $ Just i
1292                                 else go p (i-1)
1293 {-# INLINE elemIndexEnd #-}
1294
1295 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1296 -- the indices of all elements equal to the query element, in ascending order.
1297 -- This implementation uses memchr(3).
1298 elemIndices :: Word8 -> ByteString -> [Int]
1299 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1300     let ptr = p `plusPtr` s
1301
1302         STRICT1(loop)
1303         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1304                                                 w (fromIntegral (l - n))
1305                  in if q == nullPtr
1306                         then []
1307                         else let i = q `minusPtr` ptr
1308                              in i : loop (i+1)
1309     return $! loop 0
1310 {-# INLINE elemIndices #-}
1311
1312 {-
1313 -- much slower
1314 elemIndices :: Word8 -> ByteString -> [Int]
1315 elemIndices c ps = loop 0 ps
1316    where STRICT2(loop)
1317          loop _ ps' | null ps'            = []
1318          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1319                     | otherwise           = loop (n+1) (unsafeTail ps')
1320 -}
1321
1322 -- | count returns the number of times its argument appears in the ByteString
1323 --
1324 -- > count = length . elemIndices
1325 --
1326 -- But more efficiently than using length on the intermediate list.
1327 count :: Word8 -> ByteString -> Int
1328 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1329     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1330 {-# INLINE count #-}
1331
1332 {-
1333 --
1334 -- around 30% slower
1335 --
1336 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1337      go (p `plusPtr` s) (fromIntegral m) 0
1338     where
1339         go :: Ptr Word8 -> CSize -> Int -> IO Int
1340         STRICT3(go)
1341         go p l i = do
1342             q <- memchr p w l
1343             if q == nullPtr
1344                 then return i
1345                 else do let k = fromIntegral $ q `minusPtr` p
1346                         go (q `plusPtr` 1) (l-k-1) (i+1)
1347 -}
1348
1349 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1350 -- returns the index of the first element in the ByteString
1351 -- satisfying the predicate.
1352 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1353 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1354   where
1355     STRICT2(go)
1356     go ptr n | n >= l    = return Nothing
1357              | otherwise = do w <- peek ptr
1358                               if k w
1359                                 then return (Just n)
1360                                 else go (ptr `plusPtr` 1) (n+1)
1361 {-# INLINE findIndex #-}
1362
1363 -- | The 'findIndices' function extends 'findIndex', by returning the
1364 -- indices of all elements satisfying the predicate, in ascending order.
1365 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1366 findIndices p ps = loop 0 ps
1367    where
1368      STRICT2(loop)
1369      loop n qs | null qs           = []
1370                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1371                | otherwise         =     loop (n+1) (unsafeTail qs)
1372
1373 -- ---------------------------------------------------------------------
1374 -- Searching ByteStrings
1375
1376 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1377 elem :: Word8 -> ByteString -> Bool
1378 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1379 {-# INLINE elem #-}
1380
1381 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1382 notElem :: Word8 -> ByteString -> Bool
1383 notElem c ps = not (elem c ps)
1384 {-# INLINE notElem #-}
1385
1386 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1387 -- returns a ByteString containing those characters that satisfy the
1388 -- predicate. This function is subject to array fusion.
1389 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1390 #if defined(LOOPU_FUSION)
1391 filter p  = loopArr . loopU (filterEFL p) NoAcc
1392 #elif defined(LOOPUP_FUSION)
1393 filter p  = loopArr . loopUp (filterEFL p) NoAcc
1394 #elif defined(LOOPNOACC_FUSION)
1395 filter p  = loopArr . loopNoAcc (filterEFL p)
1396 #else
1397 filter f = loopArr . loopFilter f
1398 #endif
1399 {-# INLINE filter #-}
1400
1401 {-
1402 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1403 -- around 2x faster for some one-shot applications.
1404 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1405 filter' k ps@(PS x s l)
1406     | null ps   = ps
1407     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1408         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1409         return $! t `minusPtr` p -- actual length
1410     where
1411         STRICT3(go)
1412         go f t end | f == end  = return t
1413                    | otherwise = do
1414                         w <- peek f
1415                         if k w
1416                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1417                             else             go (f `plusPtr` 1) t               end
1418 {-# INLINE filter' #-}
1419 -}
1420
1421 --
1422 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1423 -- case of filtering a single byte. It is more efficient to use
1424 -- /filterByte/ in this case.
1425 --
1426 -- > filterByte == filter . (==)
1427 --
1428 -- filterByte is around 10x faster, and uses much less space, than its
1429 -- filter equivalent
1430 filterByte :: Word8 -> ByteString -> ByteString
1431 filterByte w ps = replicate (count w ps) w
1432 {-# INLINE filterByte #-}
1433
1434 {-# RULES
1435   "FPS specialise filter (== x)" forall x.
1436       filter ((==) x) = filterByte x
1437   #-}
1438
1439 #if __GLASGOW_HASKELL__ >= 605
1440 {-# RULES
1441 "FPS specialise filter (== x)" forall x.
1442     filter (== x) = filterByte x
1443   #-}
1444 #endif
1445
1446 --
1447 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1448 -- case of filtering a single byte out of a list. It is more efficient
1449 -- to use /filterNotByte/ in this case.
1450 --
1451 -- > filterNotByte == filter . (/=)
1452 --
1453 -- filterNotByte is around 2x faster than its filter equivalent.
1454 filterNotByte :: Word8 -> ByteString -> ByteString
1455 filterNotByte w = filter (/= w)
1456 {-# INLINE filterNotByte #-}
1457
1458 {-# RULES
1459 "FPS specialise filter (x /=)" forall x.
1460     filter ((/=) x) = filterNotByte x
1461   #-}
1462
1463 #if __GLASGOW_HASKELL__ >= 605
1464 {-# RULES
1465 "FPS specialise filter (/= x)" forall x.
1466     filter (/= x) = filterNotByte x
1467   #-}
1468 #endif
1469
1470 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1471 -- and returns the first element in matching the predicate, or 'Nothing'
1472 -- if there is no such element.
1473 --
1474 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1475 --
1476 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1477 find f p = case findIndex f p of
1478                     Just n -> Just (p `unsafeIndex` n)
1479                     _      -> Nothing
1480 {-# INLINE find #-}
1481
1482 {-
1483 --
1484 -- fuseable, but we don't want to walk the whole array.
1485 --
1486 find k = foldl findEFL Nothing
1487     where findEFL a@(Just _) _ = a
1488           findEFL _          c | k c       = Just c
1489                                | otherwise = Nothing
1490 -}
1491
1492 -- ---------------------------------------------------------------------
1493 -- Searching for substrings
1494
1495 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1496 -- iff the first is a prefix of the second.
1497 isPrefixOf :: ByteString -> ByteString -> Bool
1498 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1499     | l1 == 0   = True
1500     | l2 < l1   = False
1501     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1502         withForeignPtr x2 $ \p2 -> do
1503             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1504             return $! i == 0
1505
1506 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1507 -- iff the first is a suffix of the second.
1508 --
1509 -- The following holds:
1510 --
1511 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1512 --
1513 -- However, the real implemenation uses memcmp to compare the end of the
1514 -- string only, with no reverse required..
1515 isSuffixOf :: ByteString -> ByteString -> Bool
1516 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1517     | l1 == 0   = True
1518     | l2 < l1   = False
1519     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1520         withForeignPtr x2 $ \p2 -> do
1521             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1522             return $! i == 0
1523
1524 -- | Check whether one string is a substring of another. @isSubstringOf
1525 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1526 isSubstringOf :: ByteString -- ^ String to search for.
1527               -> ByteString -- ^ String to search in.
1528               -> Bool
1529 isSubstringOf p s = not $ P.null $ findSubstrings p s
1530
1531 -- | Get the first index of a substring in another string,
1532 --   or 'Nothing' if the string is not found.
1533 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1534 findSubstring :: ByteString -- ^ String to search for.
1535               -> ByteString -- ^ String to seach in.
1536               -> Maybe Int
1537 findSubstring = (listToMaybe .) . findSubstrings
1538
1539 -- | Find the indexes of all (possibly overlapping) occurances of a
1540 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1541 -- string matching algorithm.
1542 findSubstrings :: ByteString -- ^ String to search for.
1543                -> ByteString -- ^ String to seach in.
1544                -> [Int]
1545
1546 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1547   where
1548       patc x = pat `unsafeIndex` x
1549       strc x = str `unsafeIndex` x
1550
1551       -- maybe we should make kmpNext a UArray before using it in search?
1552       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1553       kmpNextL p _ | null p = []
1554       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1555                          ps = unsafeTail p
1556                          x = if not (null ps) && unsafeHead ps == patc j'
1557                                 then kmpNext Array.! j' else j'
1558                         in x:kmpNextL ps j'
1559       search i j = match ++ rest -- i: position in string, j: position in pattern
1560         where match = if j == m then [(i - j)] else []
1561               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1562       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1563                | otherwise = j
1564
1565 -- ---------------------------------------------------------------------
1566 -- Zipping
1567
1568 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1569 -- corresponding pairs of bytes. If one input ByteString is short,
1570 -- excess elements of the longer ByteString are discarded. This is
1571 -- equivalent to a pair of 'unpack' operations.
1572 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1573 zip ps qs
1574     | null ps || null qs = []
1575     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1576
1577 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1578 -- the first argument, instead of a tupling function.  For example,
1579 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1580 -- corresponding sums.
1581 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1582 zipWith f ps qs
1583     | null ps || null qs = []
1584     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1585
1586 --
1587 -- | A specialised version of zipWith for the common case of a
1588 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1589 -- are used to automatically covert zipWith into zipWith' when a pack is
1590 -- performed on the result of zipWith, but we also export it for
1591 -- convenience.
1592 --
1593 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1594 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1595     withForeignPtr fp $ \a ->
1596     withForeignPtr fq $ \b ->
1597     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1598   where
1599     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1600     STRICT4(zipWith_)
1601     zipWith_ n p1 p2 r
1602        | n >= len = return ()
1603        | otherwise = do
1604             x <- peekByteOff p1 n
1605             y <- peekByteOff p2 n
1606             pokeByteOff r n (f x y)
1607             zipWith_ (n+1) p1 p2 r
1608
1609     len = min l m
1610 {-# INLINE zipWith' #-}
1611
1612 {-# RULES
1613
1614 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1615     zipWith f p q = unpack (zipWith' f p q)
1616   #-}
1617
1618 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1619 -- ByteStrings. Note that this performs two 'pack' operations.
1620 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1621 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1622 {-# INLINE unzip #-}
1623
1624 -- ---------------------------------------------------------------------
1625 -- Special lists
1626
1627 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1628 inits :: ByteString -> [ByteString]
1629 inits (PS x s l) = [PS x s n | n <- [0..l]]
1630
1631 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1632 tails :: ByteString -> [ByteString]
1633 tails p | null p    = [empty]
1634         | otherwise = p : tails (unsafeTail p)
1635
1636 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1637
1638 -- ---------------------------------------------------------------------
1639 -- ** Ordered 'ByteString's
1640
1641 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1642 sort :: ByteString -> ByteString
1643 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1644
1645     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1646     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1647
1648     let STRICT2(go)
1649         go 256 _   = return ()
1650         go i   ptr = do n <- peekElemOff arr i
1651                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1652                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1653     go 0 p
1654
1655 {-
1656 sort :: ByteString -> ByteString
1657 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1658         memcpy p (f `plusPtr` s) l
1659         c_qsort p l -- inplace
1660 -}
1661
1662 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1663 --
1664 -- Try some linear sorts: radix, counting
1665 -- Or mergesort.
1666 --
1667 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1668 -- sortBy f ps = undefined
1669
1670 -- ---------------------------------------------------------------------
1671 -- Low level constructors
1672
1673 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1674 -- finalizer associated to it. The ByteString length is calculated using
1675 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1676 packCString :: CString -> ByteString
1677 packCString cstr = unsafePerformIO $ do
1678     fp <- newForeignPtr_ (castPtr cstr)
1679     l <- c_strlen cstr
1680     return $! PS fp 0 (fromIntegral l)
1681
1682 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1683 -- have /no/ finalizer associated with it. This operation has /O(1)/
1684 -- complexity as we already know the final size, so no /strlen(3)/ is
1685 -- required.
1686 packCStringLen :: CStringLen -> ByteString
1687 packCStringLen (ptr,len) = unsafePerformIO $ do
1688     fp <- newForeignPtr_ (castPtr ptr)
1689     return $! PS fp 0 (fromIntegral len)
1690
1691 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1692 -- have a @free(3)@ finalizer associated to it.
1693 packMallocCString :: CString -> ByteString
1694 packMallocCString cstr = unsafePerformIO $ do
1695     fp <- newForeignFreePtr (castPtr cstr)
1696     len <- c_strlen cstr
1697     return $! PS fp 0 (fromIntegral len)
1698
1699 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1700 -- null-terminated @CString@.  The @CString@ will be freed
1701 -- automatically. This is a memcpy(3).
1702 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1703 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1704     where alloc = withForeignPtr ps $ \p -> do
1705             buf <- c_malloc (fromIntegral l+1)
1706             memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1707             poke (buf `plusPtr` l) (0::Word8) -- n.b.
1708             return (castPtr buf)
1709
1710 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1711 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1712 useAsCStringLen = unsafeUseAsCStringLen
1713
1714 --
1715 -- why were we doing this?
1716 --
1717 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1718 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1719 --     where
1720 --       alloc = withForeignPtr ps $ \p -> do
1721 --                 buf <- c_malloc (fromIntegral l+1)
1722 --                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1723 --                 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1724 --                 return $! (castPtr buf, l)
1725 --
1726
1727 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1728 --   This is mainly useful to allow the rest of the data pointed
1729 --   to by the 'ByteString' to be garbage collected, for example
1730 --   if a large string has been read in, and only a small part of it
1731 --   is needed in the rest of the program.
1732 copy :: ByteString -> ByteString
1733 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1734     memcpy p (f `plusPtr` s) (fromIntegral l)
1735
1736 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1737 -- CString is going to be deallocated from C land.
1738 copyCString :: CString -> IO ByteString
1739 copyCString cstr = do
1740     len <- c_strlen cstr
1741     copyCStringLen (cstr, fromIntegral len)
1742
1743 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1744 copyCStringLen :: CStringLen -> IO ByteString
1745 copyCStringLen (cstr, len) = create len $ \p ->
1746     memcpy p (castPtr cstr) (fromIntegral len)
1747
1748 -- ---------------------------------------------------------------------
1749 -- line IO
1750
1751 -- | Read a line from stdin.
1752 getLine :: IO ByteString
1753 getLine = hGetLine stdin
1754
1755 {-
1756 -- | Lazily construct a list of lines of ByteStrings. This will be much
1757 -- better on memory consumption than using 'hGetContents >>= lines'
1758 -- If you're considering this, a better choice might be to use
1759 -- Data.ByteString.Lazy
1760 hGetLines :: Handle -> IO [ByteString]
1761 hGetLines h = go
1762     where
1763         go = unsafeInterleaveIO $ do
1764                 e <- hIsEOF h
1765                 if e
1766                   then return []
1767                   else do
1768                 x  <- hGetLine h
1769                 xs <- go
1770                 return (x:xs)
1771 -}
1772
1773 -- | Read a line from a handle
1774
1775 hGetLine :: Handle -> IO ByteString
1776 #if !defined(__GLASGOW_HASKELL__)
1777 hGetLine h = do
1778   string <- System.IO.hGetLine h
1779   return $ packWith c2w string
1780 #else
1781 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1782     case haBufferMode handle_ of
1783        NoBuffering -> error "no buffering"
1784        _other      -> hGetLineBuffered handle_
1785
1786  where
1787     hGetLineBuffered handle_ = do
1788         let ref = haBuffer handle_
1789         buf <- readIORef ref
1790         hGetLineBufferedLoop handle_ ref buf 0 []
1791
1792     hGetLineBufferedLoop handle_ ref
1793             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1794         len `seq` do
1795         off <- findEOL r w raw
1796         let new_len = len + off - r
1797         xs <- mkPS raw r off
1798
1799       -- if eol == True, then off is the offset of the '\n'
1800       -- otherwise off == w and the buffer is now empty.
1801         if off /= w
1802             then do if (w == off + 1)
1803                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1804                             else writeIORef ref buf{ bufRPtr = off + 1 }
1805                     mkBigPS new_len (xs:xss)
1806             else do
1807                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1808                                     buf{ bufWPtr=0, bufRPtr=0 }
1809                  case maybe_buf of
1810                     -- Nothing indicates we caught an EOF, and we may have a
1811                     -- partial line to return.
1812                     Nothing -> do
1813                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1814                          if new_len > 0
1815                             then mkBigPS new_len (xs:xss)
1816                             else ioe_EOF
1817                     Just new_buf ->
1818                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1819
1820     -- find the end-of-line character, if there is one
1821     findEOL r w raw
1822         | r == w = return w
1823         | otherwise =  do
1824             (c,r') <- readCharFromBuffer raw r
1825             if c == '\n'
1826                 then return r -- NB. not r': don't include the '\n'
1827                 else findEOL r' w raw
1828
1829     maybeFillReadBuffer fd is_line is_stream buf = catch
1830         (do buf' <- fillReadBuffer fd is_line is_stream buf
1831             return (Just buf'))
1832         (\e -> if isEOFError e then return Nothing else ioError e)
1833
1834 -- TODO, rewrite to use normal memcpy
1835 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1836 mkPS buf start end =
1837     let len = end - start
1838     in create len $ \p -> do
1839         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1840         return ()
1841
1842 mkBigPS :: Int -> [ByteString] -> IO ByteString
1843 mkBigPS _ [ps] = return ps
1844 mkBigPS _ pss = return $! concat (P.reverse pss)
1845
1846 #endif
1847
1848 -- ---------------------------------------------------------------------
1849 -- Block IO
1850
1851 -- | Outputs a 'ByteString' to the specified 'Handle'.
1852 hPut :: Handle -> ByteString -> IO ()
1853 hPut _ (PS _  _ 0) = return ()
1854 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1855
1856 -- | A synonym for @hPut@, for compatibility
1857 hPutStr :: Handle -> ByteString -> IO ()
1858 hPutStr = hPut
1859
1860 -- | Write a ByteString to a handle, appending a newline byte
1861 hPutStrLn :: Handle -> ByteString -> IO ()
1862 hPutStrLn h ps
1863     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1864     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1865
1866 -- | Write a ByteString to stdout
1867 putStr :: ByteString -> IO ()
1868 putStr = hPut stdout
1869
1870 -- | Write a ByteString to stdout, appending a newline byte
1871 putStrLn :: ByteString -> IO ()
1872 putStrLn = hPutStrLn stdout
1873
1874 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1875 -- is far more efficient than reading the characters into a 'String'
1876 -- and then using 'pack'.
1877 hGet :: Handle -> Int -> IO ByteString
1878 hGet _ 0 = return empty
1879 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1880
1881 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1882 -- waiting for data to become available, instead it returns only whatever data
1883 -- is available.
1884 hGetNonBlocking :: Handle -> Int -> IO ByteString
1885 #if defined(__GLASGOW_HASKELL__)
1886 hGetNonBlocking _ 0 = return empty
1887 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1888 #else
1889 hGetNonBlocking = hGet
1890 #endif
1891
1892 -- | Read entire handle contents into a 'ByteString'.
1893 -- This function reads chunks at a time, doubling the chunksize on each
1894 -- read. The final buffer is then realloced to the appropriate size. For
1895 -- files > half of available memory, this may lead to memory exhaustion.
1896 -- Consider using 'readFile' in this case.
1897 --
1898 -- As with 'hGet', the string representation in the file is assumed to
1899 -- be ISO-8859-1.
1900 --
1901 hGetContents :: Handle -> IO ByteString
1902 hGetContents h = do
1903     let start_size = 1024
1904     p <- mallocArray start_size
1905     i <- hGetBuf h p start_size
1906     if i < start_size
1907         then do p' <- reallocArray p i
1908                 fp <- newForeignFreePtr p'
1909                 return $! PS fp 0 i
1910         else f p start_size
1911     where
1912         f p s = do
1913             let s' = 2 * s
1914             p' <- reallocArray p s'
1915             i  <- hGetBuf h (p' `plusPtr` s) s
1916             if i < s
1917                 then do let i' = s + i
1918                         p'' <- reallocArray p' i'
1919                         fp  <- newForeignFreePtr p''
1920                         return $! PS fp 0 i'
1921                 else f p' s'
1922
1923 -- | getContents. Equivalent to hGetContents stdin
1924 getContents :: IO ByteString
1925 getContents = hGetContents stdin
1926
1927 -- | The interact function takes a function of type @ByteString -> ByteString@
1928 -- as its argument. The entire input from the standard input device is passed
1929 -- to this function as its argument, and the resulting string is output on the
1930 -- standard output device. It's great for writing one line programs!
1931 interact :: (ByteString -> ByteString) -> IO ()
1932 interact transformer = putStr . transformer =<< getContents
1933
1934 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1935 -- efficient than reading the characters into a 'String' and then using
1936 -- 'pack'.  It also may be more efficient than opening the file and
1937 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1938 -- for 'text mode' use the Char8 version of this function.
1939 readFile :: FilePath -> IO ByteString
1940 readFile f = bracket (openBinaryFile f ReadMode) hClose
1941     (\h -> hFileSize h >>= hGet h . fromIntegral)
1942
1943 -- | Write a 'ByteString' to a file.
1944 writeFile :: FilePath -> ByteString -> IO ()
1945 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1946     (\h -> hPut h txt)
1947
1948 -- | Append a 'ByteString' to a file.
1949 appendFile :: FilePath -> ByteString -> IO ()
1950 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1951     (\h -> hPut h txt)
1952
1953 {-
1954 --
1955 -- Disable until we can move it into a portable .hsc file
1956 --
1957
1958 -- | Like readFile, this reads an entire file directly into a
1959 -- 'ByteString', but it is even more efficient.  It involves directly
1960 -- mapping the file to memory.  This has the advantage that the contents
1961 -- of the file never need to be copied.  Also, under memory pressure the
1962 -- page may simply be discarded, while in the case of readFile it would
1963 -- need to be written to swap.  If you read many small files, mmapFile
1964 -- will be less memory-efficient than readFile, since each mmapFile
1965 -- takes up a separate page of memory.  Also, you can run into bus
1966 -- errors if the file is modified.  As with 'readFile', the string
1967 -- representation in the file is assumed to be ISO-8859-1.
1968 --
1969 -- On systems without mmap, this is the same as a readFile.
1970 --
1971 mmapFile :: FilePath -> IO ByteString
1972 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1973
1974 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1975 mmap f = do
1976     h <- openBinaryFile f ReadMode
1977     l <- fromIntegral `fmap` hFileSize h
1978     -- Don't bother mmaping small files because each mmapped file takes up
1979     -- at least one full VM block.
1980     if l < mmap_limit
1981        then do thefp <- mallocByteString l
1982                withForeignPtr thefp $ \p-> hGetBuf h p l
1983                hClose h
1984                return (thefp, l)
1985        else do
1986                -- unix only :(
1987                fd <- fromIntegral `fmap` handleToFd h
1988                p  <- my_mmap l fd
1989                fp <- if p == nullPtr
1990                      then do thefp <- mallocByteString l
1991                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1992                              return thefp
1993                      else do
1994                           -- The munmap leads to crashes on OpenBSD.
1995                           -- maybe there's a use after unmap in there somewhere?
1996                           -- Bulat suggests adding the hClose to the
1997                           -- finalizer, excellent idea.
1998 #if !defined(__OpenBSD__)
1999                              let unmap = c_munmap p l >> return ()
2000 #else
2001                              let unmap = return ()
2002 #endif
2003                              fp <- newForeignPtr p unmap
2004                              return fp
2005                c_close fd
2006                hClose h
2007                return (fp, l)
2008     where mmap_limit = 16*1024
2009 -}
2010
2011 -- ---------------------------------------------------------------------
2012 -- Internal utilities
2013
2014 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
2015 -- of the string if no element is found, rather than Nothing.
2016 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2017 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
2018   where
2019     STRICT2(go)
2020     go ptr n | n >= l    = return l
2021              | otherwise = do w <- peek ptr
2022                               if k w
2023                                 then return n
2024                                 else go (ptr `plusPtr` 1) (n+1)
2025 {-# INLINE findIndexOrEnd #-}
2026
2027 -- | Perform an operation with a temporary ByteString
2028 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2029 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2030 {-# INLINE withPtr #-}
2031
2032 -- Common up near identical calls to `error' to reduce the number
2033 -- constant strings created when compiled:
2034 errorEmptyList :: String -> a
2035 errorEmptyList fun = moduleError fun "empty ByteString"
2036 {-# NOINLINE errorEmptyList #-}
2037
2038 moduleError :: String -> String -> a
2039 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2040 {-# NOINLINE moduleError #-}
2041
2042 -- Find from the end of the string using predicate
2043 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2044 STRICT2(findFromEndUntil)
2045 findFromEndUntil f ps@(PS x s l) =
2046     if null ps then 0
2047     else if f (last ps) then l
2048          else findFromEndUntil f (PS x s (l-1))
2049
2050 {-# INLINE newForeignFreePtr #-}
2051 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2052 newForeignFreePtr p = newForeignPtr c_free_finalizer p