Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with "Prelude" functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         singleton,              -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: ByteString -> Word8 -> ByteString
  55         append,                 -- :: ByteString -> ByteString -> ByteString
  56         head,                   -- :: ByteString -> Word8
  57         last,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         init,                   -- :: ByteString -> ByteString
  60         null,                   -- :: ByteString -> Bool
  61         length,                 -- :: ByteString -> Int
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         map',                   -- :: (Word8 -> Word8) -> ByteString -> ByteString
  66         reverse,                -- :: ByteString -> ByteString
  67         intersperse,            -- :: Word8 -> ByteString -> ByteString
  68         transpose,              -- :: [ByteString] -> [ByteString]
  69
  70         -- * Reducing 'ByteString's (folds)
  71         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  72         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  73         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  75         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  76         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77
  78         -- ** Special folds
  79         concat,                 -- :: [ByteString] -> ByteString
  80         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  81         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  82         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  83         maximum,                -- :: ByteString -> Word8
  84         minimum,                -- :: ByteString -> Word8
  85
  86         -- * Building ByteStrings
  87         -- ** Scans
  88         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  89         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  90         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  91         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  92
  93         -- ** Accumulating maps
  94         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  95         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  96         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  97
  98         -- ** Unfolding ByteStrings
  99         replicate,              -- :: Int -> Word8 -> ByteString
 100         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 101         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
 102
 103         -- * Substrings
 104
 105         -- ** Breaking strings
 106         take,                   -- :: Int -> ByteString -> ByteString
 107         drop,                   -- :: Int -> ByteString -> ByteString
 108         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 109         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 110         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 111         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 113         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 114         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117         inits,                  -- :: ByteString -> [ByteString]
 118         tails,                  -- :: ByteString -> [ByteString]
 119
 120         -- ** Breaking and dropping on specific bytes
 121         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 122         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 123
 124         -- ** Breaking into many substrings
 125         split,                  -- :: Word8 -> ByteString -> [ByteString]
 126         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 127         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 128
 129         -- ** Joining strings
 130         join,                   -- :: ByteString -> [ByteString] -> ByteString
 131         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 132
 133         -- * Predicates
 134         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 135         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 136
 137         -- ** Search for arbitrary substrings
 138         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 139         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 140         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 141
 142         -- * Searching ByteStrings
 143
 144         -- ** Searching by equality
 145         -- | These functions use memchr(3) to efficiently search the ByteString
 146         elem,                   -- :: Word8 -> ByteString -> Bool
 147         notElem,                -- :: Word8 -> ByteString -> Bool
 148         filterByte,             -- :: Word8 -> ByteString -> ByteString
 149         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 150
 151         -- ** Searching with a predicate
 152         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 153         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 154         filter',                -- :: (Word8 -> Bool) -> ByteString -> ByteString
 155 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 156
 157         -- * Indexing ByteStrings
 158         index,                  -- :: ByteString -> Int -> Word8
 159         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 160         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 161         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
 162         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 163         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 164         count,                  -- :: Word8 -> ByteString -> Int
 165         findIndexOrEnd,         -- :: (Word8 -> Bool) -> ByteString -> Int
 166
 167         -- * Zipping and unzipping ByteStrings
 168         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 169         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 170         zipWith',
 171         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 172
 173         -- * Ordered ByteStrings
 174         sort,                   -- :: ByteString -> ByteString
 175
 176         -- * Low level CString conversions
 177
 178         -- ** Packing CStrings and pointers
 179         packCString,            -- :: CString -> ByteString
 180         packCStringLen,         -- :: CString -> ByteString
 181         packMallocCString,      -- :: CString -> ByteString
 182
 183         -- ** Using ByteStrings as CStrings
 184         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 185         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
 186
 187         -- ** Copying ByteStrings
 188         -- | These functions perform memcpy(3) operations
 189         copy,                   -- :: ByteString -> ByteString
 190         copyCString,            -- :: CString -> IO ByteString
 191         copyCStringLen,         -- :: CStringLen -> IO ByteString
 192
 193         -- * I\/O with 'ByteString's
 194
 195         -- ** Standard input and output
 196
 197 #if defined(__GLASGOW_HASKELL__)
 198         getLine,                -- :: IO ByteString
 199 #endif
 200         getContents,            -- :: IO ByteString
 201         putStr,                 -- :: ByteString -> IO ()
 202         putStrLn,               -- :: ByteString -> IO ()
 203
 204         -- ** Files
 205         readFile,               -- :: FilePath -> IO ByteString
 206         writeFile,              -- :: FilePath -> ByteString -> IO ()
 207         appendFile,             -- :: FilePath -> ByteString -> IO ()
 208 --      mmapFile,               -- :: FilePath -> IO ByteString
 209
 210         -- ** I\/O with Handles
 211 #if defined(__GLASGOW_HASKELL__)
 212         getArgs,                -- :: IO [ByteString]
 213         hGetLine,               -- :: Handle -> IO ByteString
 214         hGetLines,              -- :: Handle -> IO [ByteString]
 215         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 216 #endif
 217         hGetContents,           -- :: Handle -> IO ByteString
 218         hGet,                   -- :: Handle -> Int -> IO ByteString
 219         hPut,                   -- :: Handle -> ByteString -> IO ()
 220         hPutStr,                -- :: Handle -> ByteString -> IO ()
 221         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
 222
 223         -- * Fusion utilities
 224 #if defined(__GLASGOW_HASKELL__)
 225         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 226 #endif
 227         lengthU, maximumU, minimumU
 228   ) where
 229
 230 import qualified Prelude as P
 231 import Prelude hiding           (reverse,head,tail,last,init,null
 232                                 ,length,map,lines,foldl,foldr,unlines
 233                                 ,concat,any,take,drop,splitAt,takeWhile
 234                                 ,dropWhile,span,break,elem,filter,maximum
 235                                 ,minimum,all,concatMap,foldl1,foldr1
 236                                 ,scanl,scanl1,scanr,scanr1
 237                                 ,readFile,writeFile,appendFile,replicate
 238                                 ,getContents,getLine,putStr,putStrLn
 239                                 ,zip,zipWith,unzip,notElem)
 240
 241 import Data.ByteString.Base
 242 import Data.ByteString.Fusion
 243
 244 import qualified Data.List as List
 245
 246 import Data.Word                (Word8)
 247 import Data.Maybe               (listToMaybe)
 248 import Data.Array               (listArray)
 249 import qualified Data.Array as Array ((!))
 250
 251 -- Control.Exception.bracket not available in yhc or nhc
 252 import Control.Exception        (bracket, assert)
 253 import Control.Monad            (when)
 254
 255 import Foreign.C.String         (CString, CStringLen)
 256 import Foreign.C.Types          (CSize)
 257 import Foreign.ForeignPtr
 258 import Foreign.Marshal.Array
 259 import Foreign.Ptr
 260 import Foreign.Storable         (Storable(..))
 261
 262 -- hGetBuf and hPutBuf not available in yhc or nhc
 263 import System.IO                (stdin,stdout,hClose,hFileSize
 264                                 ,hGetBuf,hPutBuf,openBinaryFile
 265                                 ,Handle,IOMode(..))
 266
 267 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
 268
 269 #if !defined(__GLASGOW_HASKELL__)
 270 import System.IO.Unsafe
 271 #endif
 272
 273 #if defined(__GLASGOW_HASKELL__)
 274
 275 import System.IO                (hGetBufNonBlocking)
 276 import System.IO.Error          (isEOFError)
 277
 278 import Foreign.Marshal          (alloca)
 279 import qualified Foreign.Concurrent as FC (newForeignPtr)
 280
 281 import GHC.Handle
 282 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
 283 -- import GHC.Base                 (build)
 284 import GHC.Word hiding (Word8)
 285 import GHC.Ptr                  (Ptr(..))
 286 import GHC.ST                   (ST(..))
 287 import GHC.IOBase
 288
 289 #endif
 290
 291 -- -----------------------------------------------------------------------------
 292 --
 293 -- Useful macros, until we have bang patterns
 294 --
 295
 296 #define STRICT1(f) f a | a `seq` False = undefined
 297 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 298 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 299 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 300 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 301
 302 -- -----------------------------------------------------------------------------
 303
 304 instance Eq  ByteString
 305     where (==)    = eq
 306
 307 instance Ord ByteString
 308     where compare = compareBytes
 309
 310 instance Show ByteString where
 311     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 312
 313 instance Read ByteString where
 314     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 315
 316 instance Monoid ByteString where
 317     mempty  = empty
 318     mappend = append
 319     mconcat = concat
 320
 321 {-
 322 instance Arbitrary PackedString where
 323     arbitrary = P.pack `fmap` arbitrary
 324     coarbitrary s = coarbitrary (P.unpack s)
 325 -}
 326
 327 -- | /O(n)/ Equality on the 'ByteString' type.
 328 eq :: ByteString -> ByteString -> Bool
 329 eq a@(PS p s l) b@(PS p' s' l')
 330     | l /= l'            = False    -- short cut on length
 331     | p == p' && s == s' = True     -- short cut for the same string
 332     | otherwise          = compareBytes a b == EQ
 333 {-# INLINE eq #-}
 334
 335 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 336 compareBytes :: ByteString -> ByteString -> Ordering
 337 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 338     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 339     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 340     | otherwise                         = inlinePerformIO $
 341         withForeignPtr x1 $ \p1 ->
 342         withForeignPtr x2 $ \p2 -> do
 343             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 344             return $! case i `compare` 0 of
 345                         EQ  -> l1 `compare` l2
 346                         x   -> x
 347 {-# INLINE compareBytes #-}
 348
 349 {-
 350 --
 351 -- About 4x slower over 32M
 352 --
 353 compareBytes :: ByteString -> ByteString -> Ordering
 354 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 355     withForeignPtr fp1 $ \p1 ->
 356         withForeignPtr fp2 $ \p2 ->
 357             cmp (p1 `plusPtr` off1)
 358                 (p2 `plusPtr` off2) 0 len1 len2
 359
 360 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 361 STRICT5(cmp)
 362 cmp p1 p2 n len1 len2
 363       | n == len1 = if n == len2 then return EQ else return LT
 364       | n == len2 = return GT
 365       | otherwise = do
 366           (a :: Word8) <- peekByteOff p1 n
 367           (b :: Word8) <- peekByteOff p2 n
 368           case a `compare` b of
 369                 EQ -> cmp p1 p2 (n+1) len1 len2
 370                 LT -> return LT
 371                 GT -> return GT
 372 {-# INLINE compareBytes #-}
 373 -}
 374
 375 -- -----------------------------------------------------------------------------
 376 -- Introducing and eliminating 'ByteString's
 377
 378 -- | /O(1)/ The empty 'ByteString'
 379 empty :: ByteString
 380 empty = unsafeCreate 0 $ const $ return ()
 381 {-# NOINLINE empty #-}
 382
 383 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 384 singleton :: Word8 -> ByteString
 385 singleton c = unsafeCreate 1 $ \p -> poke p c
 386 {-# INLINE singleton #-}
 387
 388 --
 389 -- XXX The unsafePerformIO is critical!
 390 --
 391 -- Otherwise:
 392 --
 393 --  singleton 255 `compare` singleton 127
 394 --
 395 -- is compiled to:
 396 --
 397 --  case mallocByteString 2 of
 398 --      ForeignPtr f internals ->
 399 --           case writeWord8OffAddr# f 0 255 of _ ->
 400 --           case writeWord8OffAddr# f 0 127 of _ ->
 401 --           case eqAddr# f f of
 402 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 403 --                                        (GHC.Prim.plusAddr# f 0)
 404 --
 405 --
 406
 407 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 408 --
 409 -- For applications with large numbers of string literals, pack can be a
 410 -- bottleneck. In such cases, consider using packAddress (GHC only).
 411 pack :: [Word8] -> ByteString
 412
 413 #if !defined(__GLASGOW_HASKELL__)
 414
 415 pack str = unsafeCreate (P.length str) $ \p -> go p str
 416     where
 417         go _ []     = return ()
 418         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 419
 420 #else /* hack away */
 421
 422 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 423     where
 424         go _ _ []        = return ()
 425         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 426
 427         writeByte p i c = ST $ \s# ->
 428             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 429
 430 #endif
 431
 432 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 433 unpack :: ByteString -> [Word8]
 434
 435 -- #if !defined(__GLASGOW_HASKELL__)
 436
 437 unpack (PS _  _ 0) = []
 438 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 439         go (p `plusPtr` s) (l - 1) []
 440     where
 441         STRICT3(go)
 442         go p 0 acc = peek p          >>= \e -> return (e : acc)
 443         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 444 {-# INLINE unpack #-}
 445
 446 -- #else
 447
 448 --
 449 -- Interacting with head/build fusion rule in ghc 6.5. Disable for now
 450 --
 451
 452 -- unpack ps = build (unpackFoldr ps)
 453 -- {-# INLINE unpack #-}
 454
 455 unpackList :: ByteString -> [Word8]
 456 unpackList (PS fp off len) = withPtr fp $ \p -> do
 457     let STRICT3(loop)
 458         loop _ (-1) acc = return acc
 459         loop q n acc = do
 460            a <- peekByteOff q n
 461            loop q (n-1) (a : acc)
 462     loop (p `plusPtr` off) (len-1) []
 463
 464 {-# RULES
 465 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 466  #-}
 467
 468 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 469 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 470     let STRICT3(loop)
 471         loop _ (-1) acc = return acc
 472         loop q n    acc = do
 473            a <- peekByteOff q n
 474            loop q (n-1) (a `f` acc)
 475     loop (p `plusPtr` off) (len-1) ch
 476 {-# INLINE [0] unpackFoldr #-}
 477
 478 -- #endif
 479
 480 ------------------------------------------------------------------------
 481
 482 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 483 -- conversion function
 484 packWith :: (a -> Word8) -> [a] -> ByteString
 485 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
 486     where
 487         STRICT2(go)
 488         go _ []     = return ()
 489         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 490 {-# INLINE packWith #-}
 491 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 492
 493 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 494 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 495 unpackWith _ (PS _  _ 0) = []
 496 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 497         go (p `plusPtr` s) (l - 1) []
 498     where
 499         STRICT3(go)
 500         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 501         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 502 {-# INLINE unpackWith #-}
 503 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 504
 505 -- ---------------------------------------------------------------------
 506 -- Basic interface
 507
 508 -- | /O(1)/ Test whether a ByteString is empty.
 509 null :: ByteString -> Bool
 510 null (PS _ _ l) = assert (l >= 0) $ l <= 0
 511 {-# INLINE null #-}
 512
 513 -- ---------------------------------------------------------------------
 514 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 515 length :: ByteString -> Int
 516 length (PS _ _ l) = assert (l >= 0) $ l
 517
 518 --
 519 -- length/loop fusion. When taking the length of any fuseable loop,
 520 -- rewrite it as a foldl', and thus avoid allocating the result buffer
 521 -- worth around 10% in speed testing.
 522 --
 523
 524 #if defined(__GLASGOW_HASKELL__)
 525 {-# INLINE [1] length #-}
 526 #endif
 527
 528 lengthU :: ByteString -> Int
 529 lengthU = foldl' (const . (+1)) (0::Int)
 530 {-# INLINE lengthU #-}
 531
 532 {-# RULES
 533
 534 -- v2 fusion
 535 "length/loop" forall loop s .
 536   length  (loopArr (loopWrapper loop s)) =
 537   lengthU (loopArr (loopWrapper loop s))
 538
 539   #-}
 540
 541 ------------------------------------------------------------------------
 542
 543 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 544 -- complexity, as it requires a memcpy.
 545 cons :: Word8 -> ByteString -> ByteString
 546 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 547         poke p c
 548         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 549 {-# INLINE cons #-}
 550
 551 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 552 snoc :: ByteString -> Word8 -> ByteString
 553 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 554         memcpy p (f `plusPtr` s) (fromIntegral l)
 555         poke (p `plusPtr` l) c
 556 {-# INLINE snoc #-}
 557
 558 -- todo fuse
 559
 560 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 561 -- An exception will be thrown in the case of an empty ByteString.
 562 head :: ByteString -> Word8
 563 head (PS x s l)
 564     | l <= 0    = errorEmptyList "head"
 565     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 566 {-# INLINE head #-}
 567
 568 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 569 -- An exception will be thrown in the case of an empty ByteString.
 570 tail :: ByteString -> ByteString
 571 tail (PS p s l)
 572     | l <= 0    = errorEmptyList "tail"
 573     | otherwise = PS p (s+1) (l-1)
 574 {-# INLINE tail #-}
 575
 576 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 577 -- An exception will be thrown in the case of an empty ByteString.
 578 last :: ByteString -> Word8
 579 last ps@(PS x s l)
 580     | null ps   = errorEmptyList "last"
 581     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 582 {-# INLINE last #-}
 583
 584 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 585 -- An exception will be thrown in the case of an empty ByteString.
 586 init :: ByteString -> ByteString
 587 init ps@(PS p s l)
 588     | null ps   = errorEmptyList "init"
 589     | otherwise = PS p s (l-1)
 590 {-# INLINE init #-}
 591
 592 -- | /O(n)/ Append two ByteStrings
 593 append :: ByteString -> ByteString -> ByteString
 594 append xs ys | null xs   = ys
 595              | null ys   = xs
 596              | otherwise = concat [xs,ys]
 597 {-# INLINE append #-}
 598
 599 -- ---------------------------------------------------------------------
 600 -- Transformations
 601
 602 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 603 -- element of @xs@. This function is subject to array fusion.
 604 map :: (Word8 -> Word8) -> ByteString -> ByteString
 605 #if defined(LOOPU_FUSION)
 606 map f = loopArr . loopU (mapEFL f) NoAcc
 607 #elif defined(LOOPUP_FUSION)
 608 map f = loopArr . loopUp (mapEFL f) NoAcc
 609 #elif defined(LOOPNOACC_FUSION)
 610 map f = loopArr . loopNoAcc (mapEFL f)
 611 #else
 612 map f = loopArr . loopMap f
 613 #endif
 614 {-# INLINE map #-}
 615
 616 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 617 -- slightly faster for one-shot cases.
 618 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 619 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
 620     create len $ map_ 0 (a `plusPtr` s)
 621   where
 622     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 623     STRICT3(map_)
 624     map_ n p1 p2
 625        | n >= len = return ()
 626        | otherwise = do
 627             x <- peekByteOff p1 n
 628             pokeByteOff p2 n (f x)
 629             map_ (n+1) p1 p2
 630 {-# INLINE map' #-}
 631
 632 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 633 reverse :: ByteString -> ByteString
 634 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 635         c_reverse p (f `plusPtr` s) (fromIntegral l)
 636
 637 -- todo, fuseable version
 638
 639 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 640 -- 'ByteString' and \`intersperses\' that byte between the elements of
 641 -- the 'ByteString'.  It is analogous to the intersperse function on
 642 -- Lists.
 643 intersperse :: Word8 -> ByteString -> ByteString
 644 intersperse c ps@(PS x s l)
 645     | length ps < 2  = ps
 646     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
 647         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 648
 649 {-
 650 intersperse c = pack . List.intersperse c . unpack
 651 -}
 652
 653 -- | The 'transpose' function transposes the rows and columns of its
 654 -- 'ByteString' argument.
 655 transpose :: [ByteString] -> [ByteString]
 656 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 657
 658 -- ---------------------------------------------------------------------
 659 -- Reducing 'ByteString's
 660
 661 -- | 'foldl', applied to a binary operator, a starting value (typically
 662 -- the left-identity of the operator), and a ByteString, reduces the
 663 -- ByteString using the binary operator, from left to right.
 664 -- This function is subject to array fusion.
 665 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 666 #if !defined(LOOPU_FUSION)
 667 foldl f z = loopAcc . loopUp (foldEFL f) z
 668 #else
 669 foldl f z = loopAcc . loopU (foldEFL f) z
 670 #endif
 671 {-# INLINE foldl #-}
 672
 673 {-
 674 --
 675 -- About twice as fast with 6.4.1, but not fuseable
 676 -- A simple fold . map is enough to make it worth while.
 677 --
 678 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 679         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 680     where
 681         STRICT3(lgo)
 682         lgo z p q | p == q    = return z
 683                   | otherwise = do c <- peek p
 684                                    lgo (f z c) (p `plusPtr` 1) q
 685 -}
 686
 687 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 688 -- Though actually foldl is also strict in the accumulator.
 689 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 690 foldl' = foldl
 691 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
 692 {-# INLINE foldl' #-}
 693
 694 -- | 'foldr', applied to a binary operator, a starting value
 695 -- (typically the right-identity of the operator), and a ByteString,
 696 -- reduces the ByteString using the binary operator, from right to left.
 697 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 698 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
 699 {-# INLINE foldr #-}
 700
 701 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 702 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 703 -- This function is subject to array fusion.
 704 -- An exception will be thrown in the case of an empty ByteString.
 705 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 706 foldl1 f ps
 707     | null ps   = errorEmptyList "foldl1"
 708     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 709 {-# INLINE foldl1 #-}
 710
 711 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 712 -- An exception will be thrown in the case of an empty ByteString.
 713 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 714 foldl1' f ps
 715     | null ps   = errorEmptyList "foldl1'"
 716     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 717 {-# INLINE foldl1' #-}
 718
 719 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 720 -- and thus must be applied to non-empty 'ByteString's
 721 -- An exception will be thrown in the case of an empty ByteString.
 722 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 723 foldr1 f ps
 724     | null ps        = errorEmptyList "foldr1"
 725     | otherwise      = foldr f (last ps) (init ps)
 726 {-# INLINE foldr1 #-}
 727
 728 -- ---------------------------------------------------------------------
 729 -- Special folds
 730
 731 -- | /O(n)/ Concatenate a list of ByteStrings.
 732 concat :: [ByteString] -> ByteString
 733 concat []     = empty
 734 concat [ps]   = ps
 735 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
 736   where len = P.sum . P.map length $ xs
 737         STRICT2(go)
 738         go []            _   = return ()
 739         go (PS p s l:ps) ptr = do
 740                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 741                 go ps (ptr `plusPtr` l)
 742
 743 -- | Map a function over a 'ByteString' and concatenate the results
 744 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 745 concatMap f = concat . foldr ((:) . f) []
 746
 747 -- foldr (append . f) empty
 748
 749 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 750 -- any element of the 'ByteString' satisfies the predicate.
 751 any :: (Word8 -> Bool) -> ByteString -> Bool
 752 any _ (PS _ _ 0) = False
 753 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 754         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 755     where
 756         STRICT2(go)
 757         go p q | p == q    = return False
 758                | otherwise = do c <- peek p
 759                                 if f c then return True
 760                                        else go (p `plusPtr` 1) q
 761
 762 -- todo fuse
 763
 764 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 765 -- if all elements of the 'ByteString' satisfy the predicate.
 766 all :: (Word8 -> Bool) -> ByteString -> Bool
 767 all _ (PS _ _ 0) = True
 768 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 769         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 770     where
 771         STRICT2(go)
 772         go p q | p == q     = return True  -- end of list
 773                | otherwise  = do c <- peek p
 774                                  if f c
 775                                     then go (p `plusPtr` 1) q
 776                                     else return False
 777
 778 ------------------------------------------------------------------------
 779
 780 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 781 -- This function will fuse.
 782 -- An exception will be thrown in the case of an empty ByteString.
 783 maximum :: ByteString -> Word8
 784 maximum xs@(PS x s l)
 785     | null xs   = errorEmptyList "maximum"
 786     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 787                       c_maximum (p `plusPtr` s) (fromIntegral l)
 788
 789 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 790 -- This function will fuse.
 791 -- An exception will be thrown in the case of an empty ByteString.
 792 minimum :: ByteString -> Word8
 793 minimum xs@(PS x s l)
 794     | null xs   = errorEmptyList "minimum"
 795     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 796                       c_minimum (p `plusPtr` s) (fromIntegral l)
 797
 798 --
 799 -- minimum/maximum/loop fusion. As for length (and other folds), when we
 800 -- see we're applied after a fuseable op, switch from using the C
 801 -- version, to the fuseable version. The result should then avoid
 802 -- allocating a buffer.
 803 --
 804
 805 #if defined(__GLASGOW_HASKELL__)
 806 {-# INLINE [1] minimum #-}
 807 {-# INLINE [1] maximum #-}
 808 #endif
 809
 810 maximumU :: ByteString -> Word8
 811 maximumU = foldl1' max
 812 {-# INLINE maximumU #-}
 813
 814 minimumU :: ByteString -> Word8
 815 minimumU = foldl1' min
 816 {-# INLINE minimumU #-}
 817
 818 {-# RULES
 819
 820 "minimum/loop" forall loop s .
 821   minimum  (loopArr (loopWrapper loop s)) =
 822   minimumU (loopArr (loopWrapper loop s))
 823
 824 "maximum/loop" forall loop s .
 825   maximum  (loopArr (loopWrapper loop s)) =
 826   maximumU (loopArr (loopWrapper loop s))
 827
 828   #-}
 829
 830 ------------------------------------------------------------------------
 831
 832 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 833 -- 'foldl'; it applies a function to each element of a ByteString,
 834 -- passing an accumulating parameter from left to right, and returning a
 835 -- final value of this accumulator together with the new list.
 836 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 837 #if !defined(LOOPU_FUSION)
 838 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
 839 #else
 840 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
 841 #endif
 842 {-# INLINE mapAccumL #-}
 843
 844 -- | The 'mapAccumR' function behaves like a combination of 'map' and
 845 -- 'foldr'; it applies a function to each element of a ByteString,
 846 -- passing an accumulating parameter from right to left, and returning a
 847 -- final value of this accumulator together with the new ByteString.
 848 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 849 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
 850 {-# INLINE mapAccumR #-}
 851
 852 -- | /O(n)/ map Word8 functions, provided with the index at each position
 853 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 854 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
 855 {-# INLINE mapIndexed #-}
 856
 857 -- ---------------------------------------------------------------------
 858 -- Building ByteStrings
 859
 860 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 861 -- reduced values from the left. This function will fuse.
 862 --
 863 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 864 --
 865 -- Note that
 866 --
 867 -- > last (scanl f z xs) == foldl f z xs.
 868 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 869 #if !defined(LOOPU_FUSION)
 870 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
 871 #else
 872 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
 873 #endif
 874
 875     -- n.b. haskell's List scan returns a list one bigger than the
 876     -- input, so we need to snoc here to get some extra space, however,
 877     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
 878 {-# INLINE scanl #-}
 879
 880 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 881 -- This function will fuse.
 882 --
 883 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 884 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 885 scanl1 f ps
 886     | null ps   = empty
 887     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 888 {-# INLINE scanl1 #-}
 889
 890 -- | scanr is the right-to-left dual of scanl.
 891 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 892 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
 893 {-# INLINE scanr #-}
 894
 895 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
 896 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 897 scanr1 f ps
 898     | null ps   = empty
 899     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
 900 {-# INLINE scanr1 #-}
 901
 902 -- ---------------------------------------------------------------------
 903 -- Unfolds and replicates
 904
 905 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 906 -- the value of every element. The following holds:
 907 --
 908 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 909 --
 910 -- This implemenation uses @memset(3)@
 911 replicate :: Int -> Word8 -> ByteString
 912 replicate w c
 913     | w <= 0    = empty
 914     | otherwise = unsafeCreate w $ \ptr ->
 915                       memset ptr c (fromIntegral w) >> return ()
 916
 917 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
 918 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
 919 -- ByteString from a seed value.  The function takes the element and
 920 -- returns 'Nothing' if it is done producing the ByteString or returns
 921 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
 922 -- and @b@ is the seed value for further production.
 923 --
 924 -- Examples:
 925 --
 926 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
 927 -- > == pack [0, 1, 2, 3, 4, 5]
 928 --
 929 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 930 unfoldr f = concat . unfoldChunk 32 64
 931   where unfoldChunk n n' x =
 932           case unfoldrN n f x of
 933             (s, Nothing) -> s : []
 934             (s, Just x') -> s : unfoldChunk n' (n+n') x'
 935
 936 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
 937 -- value.  However, the length of the result is limited by the first
 938 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
 939 -- when the maximum length of the result is known.
 940 --
 941 -- The following equation relates 'unfoldrN' and 'unfoldr':
 942 --
 943 -- > unfoldrN n f s == take n (unfoldr f s)
 944 --
 945 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 946 unfoldrN i f x0
 947     | i < 0     = (empty, Just x0)
 948     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
 949   where STRICT3(go)
 950         go p x n =
 951           case f x of
 952             Nothing      -> return (0, n, Nothing)
 953             Just (w,x')
 954              | n == i    -> return (0, n, Just x)
 955              | otherwise -> do poke p w
 956                                go (p `plusPtr` 1) x' (n+1)
 957
 958 -- ---------------------------------------------------------------------
 959 -- Substrings
 960
 961 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 962 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 963 take :: Int -> ByteString -> ByteString
 964 take n ps@(PS x s l)
 965     | n <= 0    = empty
 966     | n >= l    = ps
 967     | otherwise = PS x s n
 968 {-# INLINE take #-}
 969
 970 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 971 -- elements, or @[]@ if @n > 'length' xs@.
 972 drop  :: Int -> ByteString -> ByteString
 973 drop n ps@(PS x s l)
 974     | n <= 0    = ps
 975     | n >= l    = empty
 976     | otherwise = PS x (s+n) (l-n)
 977 {-# INLINE drop #-}
 978
 979 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 980 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 981 splitAt n ps@(PS x s l)
 982     | n <= 0    = (empty, ps)
 983     | n >= l    = (ps, empty)
 984     | otherwise = (PS x s n, PS x (s+n) (l-n))
 985 {-# INLINE splitAt #-}
 986
 987 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 988 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 989 -- satisfy @p@.
 990 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 991 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 992 {-# INLINE takeWhile #-}
 993
 994 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 995 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 996 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
 997 {-# INLINE dropWhile #-}
 998
 999 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1000 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1001 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1002 {-# INLINE break #-}
1003
1004 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1005 --
1006 -- breakEnd p == spanEnd (not.p)
1007 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1008 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
1009
1010 -- | 'breakByte' breaks its ByteString argument at the first occurence
1011 -- of the specified byte. It is more efficient than 'break' as it is
1012 -- implemented with @memchr(3)@. I.e.
1013 --
1014 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1015 --
1016 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1017 breakByte c p = case elemIndex c p of
1018     Nothing -> (p,empty)
1019     Just n  -> (unsafeTake n p, unsafeDrop n p)
1020 {-# INLINE breakByte #-}
1021
1022 -- | 'spanByte' breaks its ByteString argument at the first
1023 -- occurence of a byte other than its argument. It is more efficient
1024 -- than 'span (==)'
1025 --
1026 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
1027 --
1028 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1029 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1030     go (p `plusPtr` s) 0
1031   where
1032     STRICT2(go)
1033     go p i | i >= l    = return (ps, empty)
1034            | otherwise = do c' <- peekByteOff p i
1035                             if c /= c'
1036                                 then return (unsafeTake i ps, unsafeDrop i ps)
1037                                 else go p (i+1)
1038 {-# INLINE spanByte #-}
1039
1040 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1041 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1042 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1043 span p ps = break (not . p) ps
1044 {-# INLINE span #-}
1045
1046 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1047 -- We have
1048 --
1049 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1050 --
1051 -- and
1052 --
1053 -- > spanEnd (not . isSpace) ps
1054 -- >    ==
1055 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1056 --
1057 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1058 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1059
1060 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1061 -- separators, where the predicate returns True for a separator element.
1062 -- The resulting components do not contain the separators.  Two adjacent
1063 -- separators result in an empty component in the output.  eg.
1064 --
1065 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1066 -- > splitWith (=='a') []        == []
1067 --
1068 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1069
1070 #if defined(__GLASGOW_HASKELL__)
1071 splitWith _pred (PS _  _   0) = []
1072 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1073   where pred# c# = pred_ (W8# c#)
1074
1075         STRICT4(splitWith0)
1076         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1077             splitLoop pred' p 0 off' len' fp'
1078
1079         splitLoop :: (Word# -> Bool)
1080                   -> Ptr Word8
1081                   -> Int -> Int -> Int
1082                   -> ForeignPtr Word8
1083                   -> IO [ByteString]
1084
1085         splitLoop pred' p idx' off' len' fp'
1086             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1087             | idx' >= len'  = return [PS fp' off' idx']
1088             | otherwise = do
1089                 w <- peekElemOff p (off'+idx')
1090                 if pred' (case w of W8# w# -> w#)
1091                    then return (PS fp' off' idx' :
1092                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1093                    else splitLoop pred' p (idx'+1) off' len' fp'
1094 {-# INLINE splitWith #-}
1095
1096 #else
1097 splitWith _ (PS _ _ 0) = []
1098 splitWith p ps = loop p ps
1099     where
1100         STRICT2(loop)
1101         loop q qs = if null rest then [chunk]
1102                                  else chunk : loop q (unsafeTail rest)
1103             where (chunk,rest) = break q qs
1104 #endif
1105
1106 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1107 -- argument, consuming the delimiter. I.e.
1108 --
1109 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1110 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1111 -- > split 'x'  "x"          == ["",""]
1112 --
1113 -- and
1114 --
1115 -- > join [c] . split c == id
1116 -- > split == splitWith . (==)
1117 --
1118 -- As for all splitting functions in this library, this function does
1119 -- not copy the substrings, it just constructs new 'ByteStrings' that
1120 -- are slices of the original.
1121 --
1122 split :: Word8 -> ByteString -> [ByteString]
1123 split _ (PS _ _ 0) = []
1124 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1125     let ptr = p `plusPtr` s
1126
1127         STRICT1(loop)
1128         loop n =
1129             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1130                                            w (fromIntegral (l-n))
1131             in if q == nullPtr
1132                 then [PS x (s+n) (l-n)]
1133                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1134
1135     return (loop 0)
1136 {-# INLINE split #-}
1137
1138 {-
1139 -- slower. but stays inside Haskell.
1140 split _ (PS _  _   0) = []
1141 split (W8# w#) (PS fp off len) = splitWith' off len fp
1142     where
1143         splitWith' off' len' fp' = withPtr fp $ \p ->
1144             splitLoop p 0 off' len' fp'
1145
1146         splitLoop :: Ptr Word8
1147                   -> Int -> Int -> Int
1148                   -> ForeignPtr Word8
1149                   -> IO [ByteString]
1150
1151         STRICT5(splitLoop)
1152         splitLoop p idx' off' len' fp'
1153             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1154             | idx' >= len'  = return [PS fp' off' idx']
1155             | otherwise = do
1156                 (W8# x#) <- peekElemOff p (off'+idx')
1157                 if word2Int# w# ==# word2Int# x#
1158                    then return (PS fp' off' idx' :
1159                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1160                    else splitLoop p (idx'+1) off' len' fp'
1161 -}
1162
1163 -- | Like 'splitWith', except that sequences of adjacent separators are
1164 -- treated as a single separator. eg.
1165 --
1166 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1167 --
1168 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1169 tokens f = P.filter (not.null) . splitWith f
1170 {-# INLINE tokens #-}
1171
1172 -- | The 'group' function takes a ByteString and returns a list of
1173 -- ByteStrings such that the concatenation of the result is equal to the
1174 -- argument.  Moreover, each sublist in the result contains only equal
1175 -- elements.  For example,
1176 --
1177 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1178 --
1179 -- It is a special case of 'groupBy', which allows the programmer to
1180 -- supply their own equality test. It is about 40% faster than
1181 -- /groupBy (==)/
1182 group :: ByteString -> [ByteString]
1183 group xs
1184     | null xs   = []
1185     | otherwise = ys : group zs
1186     where
1187         (ys, zs) = spanByte (unsafeHead xs) xs
1188
1189 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1190 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1191 groupBy k xs
1192     | null xs   = []
1193     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1194     where
1195         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1196
1197 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1198 -- 'ByteString's and concatenates the list after interspersing the first
1199 -- argument between each element of the list.
1200 join :: ByteString -> [ByteString] -> ByteString
1201 join s = concat . (List.intersperse s)
1202 {-# INLINE join #-}
1203
1204 --
1205 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1206 -- with a char. Around 4 times faster than the generalised join.
1207 --
1208 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1209 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1210     withForeignPtr ffp $ \fp ->
1211     withForeignPtr fgp $ \gp -> do
1212         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1213         poke (ptr `plusPtr` l) c
1214         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1215     where
1216       len = length f + length g + 1
1217 {-# INLINE joinWithByte #-}
1218
1219 -- ---------------------------------------------------------------------
1220 -- Indexing ByteStrings
1221
1222 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1223 index :: ByteString -> Int -> Word8
1224 index ps n
1225     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1226     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1227                                          ++ ", length = " ++ show (length ps))
1228     | otherwise      = ps `unsafeIndex` n
1229 {-# INLINE index #-}
1230
1231 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1232 -- element in the given 'ByteString' which is equal to the query
1233 -- element, or 'Nothing' if there is no such element.
1234 -- This implementation uses memchr(3).
1235 elemIndex :: Word8 -> ByteString -> Maybe Int
1236 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1237     let p' = p `plusPtr` s
1238     q <- memchr p' c (fromIntegral l)
1239     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1240 {-# INLINE elemIndex #-}
1241
1242 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1243 -- element in the given 'ByteString' which is equal to the query
1244 -- element, or 'Nothing' if there is no such element. The following
1245 -- holds:
1246 --
1247 -- > elemIndexEnd c xs ==
1248 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1249 --
1250 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1251 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1252     go (p `plusPtr` s) (l-1)
1253   where
1254     STRICT2(go)
1255     go p i | i < 0     = return Nothing
1256            | otherwise = do ch' <- peekByteOff p i
1257                             if ch == ch'
1258                                 then return $ Just i
1259                                 else go p (i-1)
1260 {-# INLINE elemIndexEnd #-}
1261
1262 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1263 -- the indices of all elements equal to the query element, in ascending order.
1264 -- This implementation uses memchr(3).
1265 elemIndices :: Word8 -> ByteString -> [Int]
1266 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1267     let ptr = p `plusPtr` s
1268
1269         STRICT1(loop)
1270         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1271                                                 w (fromIntegral (l - n))
1272                  in if q == nullPtr
1273                         then []
1274                         else let i = q `minusPtr` ptr
1275                              in i : loop (i+1)
1276     return $! loop 0
1277 {-# INLINE elemIndices #-}
1278
1279 {-
1280 -- much slower
1281 elemIndices :: Word8 -> ByteString -> [Int]
1282 elemIndices c ps = loop 0 ps
1283    where STRICT2(loop)
1284          loop _ ps' | null ps'            = []
1285          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1286                     | otherwise           = loop (n+1) (unsafeTail ps')
1287 -}
1288
1289 -- | count returns the number of times its argument appears in the ByteString
1290 --
1291 -- > count = length . elemIndices
1292 --
1293 -- But more efficiently than using length on the intermediate list.
1294 count :: Word8 -> ByteString -> Int
1295 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1296     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1297 {-# INLINE count #-}
1298
1299 {-
1300 --
1301 -- around 30% slower
1302 --
1303 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1304      go (p `plusPtr` s) (fromIntegral m) 0
1305     where
1306         go :: Ptr Word8 -> CSize -> Int -> IO Int
1307         STRICT3(go)
1308         go p l i = do
1309             q <- memchr p w l
1310             if q == nullPtr
1311                 then return i
1312                 else do let k = fromIntegral $ q `minusPtr` p
1313                         go (q `plusPtr` 1) (l-k-1) (i+1)
1314 -}
1315
1316 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1317 -- returns the index of the first element in the ByteString
1318 -- satisfying the predicate.
1319 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1320 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1321   where
1322     STRICT2(go)
1323     go ptr n | n >= l    = return Nothing
1324              | otherwise = do w <- peek ptr
1325                               if k w
1326                                 then return (Just n)
1327                                 else go (ptr `plusPtr` 1) (n+1)
1328 {-# INLINE findIndex #-}
1329
1330 -- | The 'findIndices' function extends 'findIndex', by returning the
1331 -- indices of all elements satisfying the predicate, in ascending order.
1332 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1333 findIndices p ps = loop 0 ps
1334    where
1335      STRICT2(loop)
1336      loop n qs | null qs           = []
1337                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1338                | otherwise         =     loop (n+1) (unsafeTail qs)
1339
1340 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1341 -- of the string if no element is found, rather than Nothing.
1342 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1343 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1344   where
1345     STRICT2(go)
1346     go ptr n | n >= l    = return l
1347              | otherwise = do w <- peek ptr
1348                               if k w
1349                                 then return n
1350                                 else go (ptr `plusPtr` 1) (n+1)
1351 {-# INLINE findIndexOrEnd #-}
1352
1353 -- ---------------------------------------------------------------------
1354 -- Searching ByteStrings
1355
1356 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1357 elem :: Word8 -> ByteString -> Bool
1358 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1359 {-# INLINE elem #-}
1360
1361 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1362 notElem :: Word8 -> ByteString -> Bool
1363 notElem c ps = not (elem c ps)
1364 {-# INLINE notElem #-}
1365
1366 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1367 -- returns a ByteString containing those characters that satisfy the
1368 -- predicate. This function is subject to array fusion.
1369 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1370 #if defined(LOOPU_FUSION)
1371 filter p  = loopArr . loopU (filterEFL p) NoAcc
1372 #elif defined(LOOPUP_FUSION)
1373 filter p  = loopArr . loopUp (filterEFL p) NoAcc
1374 #elif defined(LOOPNOACC_FUSION)
1375 filter p  = loopArr . loopNoAcc (filterEFL p)
1376 #else
1377 filter f = loopArr . loopFilter f
1378 #endif
1379 {-# INLINE filter #-}
1380
1381 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1382 -- around 2x faster for some one-shot applications.
1383 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1384 filter' k ps@(PS x s l)
1385     | null ps   = ps
1386     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1387         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1388         return $! t `minusPtr` p -- actual length
1389     where
1390         STRICT3(go)
1391         go f t end | f == end  = return t
1392                    | otherwise = do
1393                         w <- peek f
1394                         if k w
1395                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1396                             else             go (f `plusPtr` 1) t               end
1397 {-# INLINE filter' #-}
1398
1399 --
1400 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1401 -- case of filtering a single byte. It is more efficient to use
1402 -- /filterByte/ in this case.
1403 --
1404 -- > filterByte == filter . (==)
1405 --
1406 -- filterByte is around 10x faster, and uses much less space, than its
1407 -- filter equivalent
1408 filterByte :: Word8 -> ByteString -> ByteString
1409 filterByte w ps = replicate (count w ps) w
1410 {-# INLINE filterByte #-}
1411
1412 --
1413 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1414 -- case of filtering a single byte out of a list. It is more efficient
1415 -- to use /filterNotByte/ in this case.
1416 --
1417 -- > filterNotByte == filter . (/=)
1418 --
1419 -- filterNotByte is around 2x faster than its filter equivalent.
1420 filterNotByte :: Word8 -> ByteString -> ByteString
1421 filterNotByte w = filter' (/= w)
1422 {-# INLINE filterNotByte #-}
1423
1424 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1425 -- and returns the first element in matching the predicate, or 'Nothing'
1426 -- if there is no such element.
1427 --
1428 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1429 --
1430 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1431 find f p = case findIndex f p of
1432                     Just n -> Just (p `unsafeIndex` n)
1433                     _      -> Nothing
1434 {-# INLINE find #-}
1435
1436 {-
1437 --
1438 -- fuseable, but we don't want to walk the whole array.
1439 --
1440 find k = foldl findEFL Nothing
1441     where findEFL a@(Just _) _ = a
1442           findEFL _          c | k c       = Just c
1443                                | otherwise = Nothing
1444 -}
1445
1446 -- ---------------------------------------------------------------------
1447 -- Searching for substrings
1448
1449 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1450 -- iff the first is a prefix of the second.
1451 isPrefixOf :: ByteString -> ByteString -> Bool
1452 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1453     | l1 == 0   = True
1454     | l2 < l1   = False
1455     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1456         withForeignPtr x2 $ \p2 -> do
1457             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1458             return $! i == 0
1459
1460 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1461 -- iff the first is a suffix of the second.
1462 --
1463 -- The following holds:
1464 --
1465 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1466 --
1467 -- However, the real implemenation uses memcmp to compare the end of the
1468 -- string only, with no reverse required..
1469 isSuffixOf :: ByteString -> ByteString -> Bool
1470 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1471     | l1 == 0   = True
1472     | l2 < l1   = False
1473     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1474         withForeignPtr x2 $ \p2 -> do
1475             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1476             return $! i == 0
1477
1478 -- | Check whether one string is a substring of another. @isSubstringOf
1479 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1480 isSubstringOf :: ByteString -- ^ String to search for.
1481               -> ByteString -- ^ String to search in.
1482               -> Bool
1483 isSubstringOf p s = not $ P.null $ findSubstrings p s
1484
1485 -- | Get the first index of a substring in another string,
1486 --   or 'Nothing' if the string is not found.
1487 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1488 findSubstring :: ByteString -- ^ String to search for.
1489               -> ByteString -- ^ String to seach in.
1490               -> Maybe Int
1491 findSubstring = (listToMaybe .) . findSubstrings
1492
1493 -- | Find the indexes of all (possibly overlapping) occurances of a
1494 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1495 -- string matching algorithm.
1496 findSubstrings :: ByteString -- ^ String to search for.
1497                -> ByteString -- ^ String to seach in.
1498                -> [Int]
1499
1500 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1501   where
1502       patc x = pat `unsafeIndex` x
1503       strc x = str `unsafeIndex` x
1504
1505       -- maybe we should make kmpNext a UArray before using it in search?
1506       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1507       kmpNextL p _ | null p = []
1508       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1509                          ps = unsafeTail p
1510                          x = if not (null ps) && unsafeHead ps == patc j'
1511                                 then kmpNext Array.! j' else j'
1512                         in x:kmpNextL ps j'
1513       search i j = match ++ rest -- i: position in string, j: position in pattern
1514         where match = if j == m then [(i - j)] else []
1515               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1516       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1517                | otherwise = j
1518
1519 -- ---------------------------------------------------------------------
1520 -- Zipping
1521
1522 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1523 -- corresponding pairs of bytes. If one input ByteString is short,
1524 -- excess elements of the longer ByteString are discarded. This is
1525 -- equivalent to a pair of 'unpack' operations.
1526 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1527 zip ps qs
1528     | null ps || null qs = []
1529     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1530
1531 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1532 -- the first argument, instead of a tupling function.  For example,
1533 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1534 -- corresponding sums.
1535 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1536 zipWith f ps qs
1537     | null ps || null qs = []
1538     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1539
1540 --
1541 -- | A specialised version of zipWith for the common case of a
1542 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1543 -- are used to automatically covert zipWith into zipWith' when a pack is
1544 -- performed on the result of zipWith, but we also export it for
1545 -- convenience.
1546 --
1547 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1548 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1549     withForeignPtr fp $ \a ->
1550     withForeignPtr fq $ \b ->
1551     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1552   where
1553     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1554     STRICT4(zipWith_)
1555     zipWith_ n p1 p2 r
1556        | n >= len = return ()
1557        | otherwise = do
1558             x <- peekByteOff p1 n
1559             y <- peekByteOff p2 n
1560             pokeByteOff r n (f x y)
1561             zipWith_ (n+1) p1 p2 r
1562
1563     len = min l m
1564 {-# INLINE zipWith' #-}
1565
1566 {-# RULES
1567
1568 "Specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1569     pack (zipWith f p q) = zipWith' f p q
1570   #-}
1571
1572 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1573 -- ByteStrings. Note that this performs two 'pack' operations.
1574 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1575 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1576 {-# INLINE unzip #-}
1577
1578 -- ---------------------------------------------------------------------
1579 -- Special lists
1580
1581 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1582 inits :: ByteString -> [ByteString]
1583 inits (PS x s l) = [PS x s n | n <- [0..l]]
1584
1585 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1586 tails :: ByteString -> [ByteString]
1587 tails p | null p    = [empty]
1588         | otherwise = p : tails (unsafeTail p)
1589
1590 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1591
1592 -- ---------------------------------------------------------------------
1593 -- ** Ordered 'ByteString's
1594
1595 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1596 sort :: ByteString -> ByteString
1597 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1598
1599     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1600     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1601
1602     let STRICT2(go)
1603         go 256 _   = return ()
1604         go i   ptr = do n <- peekElemOff arr i
1605                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1606                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1607     go 0 p
1608
1609 {-
1610 sort :: ByteString -> ByteString
1611 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1612         memcpy p (f `plusPtr` s) l
1613         c_qsort p l -- inplace
1614 -}
1615
1616 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1617 --
1618 -- Try some linear sorts: radix, counting
1619 -- Or mergesort.
1620 --
1621 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1622 -- sortBy f ps = undefined
1623
1624 -- ---------------------------------------------------------------------
1625 -- Low level constructors
1626
1627 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1628 -- finalizer associated to it. The ByteString length is calculated using
1629 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1630 packCString :: CString -> ByteString
1631 packCString cstr = unsafePerformIO $ do
1632     fp <- newForeignPtr_ (castPtr cstr)
1633     l <- c_strlen cstr
1634     return $! PS fp 0 (fromIntegral l)
1635
1636 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1637 -- have /no/ finalizer associated with it. This operation has /O(1)/
1638 -- complexity as we already know the final size, so no /strlen(3)/ is
1639 -- required.
1640 packCStringLen :: CStringLen -> ByteString
1641 packCStringLen (ptr,len) = unsafePerformIO $ do
1642     fp <- newForeignPtr_ (castPtr ptr)
1643     return $! PS fp 0 (fromIntegral len)
1644
1645 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1646 -- have a @free(3)@ finalizer associated to it.
1647 packMallocCString :: CString -> ByteString
1648 packMallocCString cstr = unsafePerformIO $ do
1649     fp <- newForeignFreePtr (castPtr cstr)
1650     len <- c_strlen cstr
1651     return $! PS fp 0 (fromIntegral len)
1652
1653 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1654 -- null-terminated @CString@.  The @CString@ will be freed
1655 -- automatically. This is a memcpy(3).
1656 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1657 useAsCString ps f = useAsCStringLen ps (\(s,_) -> f s)
1658
1659 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1660 -- @CStringLen@. The @CStringLen@ will be freed automatically. This is a
1661 -- memcpy(3).
1662 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1663 useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1664     where
1665       alloc = withForeignPtr ps $ \p -> do
1666                 buf <- c_malloc (fromIntegral l+1)
1667                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1668                 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1669                 return $! (castPtr buf, l)
1670
1671 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1672 --   This is mainly useful to allow the rest of the data pointed
1673 --   to by the 'ByteString' to be garbage collected, for example
1674 --   if a large string has been read in, and only a small part of it
1675 --   is needed in the rest of the program.
1676 copy :: ByteString -> ByteString
1677 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1678     memcpy p (f `plusPtr` s) (fromIntegral l)
1679
1680 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1681 -- CString is going to be deallocated from C land.
1682 copyCString :: CString -> IO ByteString
1683 copyCString cstr = do
1684     len <- c_strlen cstr
1685     copyCStringLen (cstr, fromIntegral len)
1686
1687 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1688 copyCStringLen :: CStringLen -> IO ByteString
1689 copyCStringLen (cstr, len) = create len $ \p ->
1690     memcpy p (castPtr cstr) (fromIntegral len)
1691
1692 -- ---------------------------------------------------------------------
1693 -- line IO
1694
1695 #if defined(__GLASGOW_HASKELL__)
1696
1697 -- | getLine, read a line from stdin.
1698 getLine :: IO ByteString
1699 getLine = hGetLine stdin
1700
1701 -- | Lazily construct a list of lines of ByteStrings. This will be much
1702 -- better on memory consumption than using lines =<< getContents.
1703 hGetLines :: Handle -> IO [ByteString]
1704 hGetLines h = go
1705     where
1706         go = unsafeInterleaveIO $ do
1707                 ms <- catch (hGetLine h >>= return . Just)
1708                             (\_ -> return Nothing)
1709                 case ms of
1710                     Nothing -> return []
1711                     Just s  -> do ss <- go
1712                                   return (s:ss)
1713
1714 -- | hGetLine. read a ByteString from a handle
1715 hGetLine :: Handle -> IO ByteString
1716 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1717     case haBufferMode handle_ of
1718        NoBuffering -> error "no buffering"
1719        _other      -> hGetLineBuffered handle_
1720
1721  where
1722     hGetLineBuffered handle_ = do
1723         let ref = haBuffer handle_
1724         buf <- readIORef ref
1725         hGetLineBufferedLoop handle_ ref buf 0 []
1726
1727     hGetLineBufferedLoop handle_ ref
1728             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1729         len `seq` do
1730         off <- findEOL r w raw
1731         let new_len = len + off - r
1732         xs <- mkPS raw r off
1733
1734       -- if eol == True, then off is the offset of the '\n'
1735       -- otherwise off == w and the buffer is now empty.
1736         if off /= w
1737             then do if (w == off + 1)
1738                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1739                             else writeIORef ref buf{ bufRPtr = off + 1 }
1740                     mkBigPS new_len (xs:xss)
1741             else do
1742                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1743                                     buf{ bufWPtr=0, bufRPtr=0 }
1744                  case maybe_buf of
1745                     -- Nothing indicates we caught an EOF, and we may have a
1746                     -- partial line to return.
1747                     Nothing -> do
1748                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1749                          if new_len > 0
1750                             then mkBigPS new_len (xs:xss)
1751                             else ioe_EOF
1752                     Just new_buf ->
1753                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1754
1755     -- find the end-of-line character, if there is one
1756     findEOL r w raw
1757         | r == w = return w
1758         | otherwise =  do
1759             (c,r') <- readCharFromBuffer raw r
1760             if c == '\n'
1761                 then return r -- NB. not r': don't include the '\n'
1762                 else findEOL r' w raw
1763
1764     maybeFillReadBuffer fd is_line is_stream buf = catch
1765         (do buf' <- fillReadBuffer fd is_line is_stream buf
1766             return (Just buf'))
1767         (\e -> if isEOFError e then return Nothing else ioError e)
1768
1769 -- TODO, rewrite to use normal memcpy
1770 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1771 mkPS buf start end =
1772     let len = end - start
1773     in create len $ \p -> do
1774         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1775         return ()
1776
1777 mkBigPS :: Int -> [ByteString] -> IO ByteString
1778 mkBigPS _ [ps] = return ps
1779 mkBigPS _ pss = return $! concat (P.reverse pss)
1780
1781 #endif
1782
1783 -- ---------------------------------------------------------------------
1784 -- Block IO
1785
1786 -- | Outputs a 'ByteString' to the specified 'Handle'.
1787 hPut :: Handle -> ByteString -> IO ()
1788 hPut _ (PS _  _ 0) = return ()
1789 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1790
1791 -- | A synonym for @hPut@, for compatibility
1792 hPutStr :: Handle -> ByteString -> IO ()
1793 hPutStr = hPut
1794
1795 -- | Write a ByteString to a handle, appending a newline byte
1796 hPutStrLn :: Handle -> ByteString -> IO ()
1797 hPutStrLn h ps
1798     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1799     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1800
1801 -- | Write a ByteString to stdout
1802 putStr :: ByteString -> IO ()
1803 putStr = hPut stdout
1804
1805 -- | Write a ByteString to stdout, appending a newline byte
1806 putStrLn :: ByteString -> IO ()
1807 putStrLn = hPutStrLn stdout
1808
1809 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1810 -- is far more efficient than reading the characters into a 'String'
1811 -- and then using 'pack'.
1812 hGet :: Handle -> Int -> IO ByteString
1813 hGet _ 0 = return empty
1814 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1815
1816 #if defined(__GLASGOW_HASKELL__)
1817 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1818 -- waiting for data to become available, instead it returns only whatever data
1819 -- is available.
1820 hGetNonBlocking :: Handle -> Int -> IO ByteString
1821 hGetNonBlocking _ 0 = return empty
1822 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1823 #endif
1824
1825 -- | Read entire handle contents into a 'ByteString'.
1826 -- This function reads chunks at a time, doubling the chunksize on each
1827 -- read. The final buffer is then realloced to the appropriate size. For
1828 -- files > half of available memory, this may lead to memory exhaustion.
1829 -- Consider using 'readFile' in this case.
1830 --
1831 -- As with 'hGet', the string representation in the file is assumed to
1832 -- be ISO-8859-1.
1833 --
1834 hGetContents :: Handle -> IO ByteString
1835 hGetContents h = do
1836     let start_size = 1024
1837     p <- mallocArray start_size
1838     i <- hGetBuf h p start_size
1839     if i < start_size
1840         then do p' <- reallocArray p i
1841                 fp <- newForeignFreePtr p'
1842                 return $! PS fp 0 i
1843         else f p start_size
1844     where
1845         f p s = do
1846             let s' = 2 * s
1847             p' <- reallocArray p s'
1848             i  <- hGetBuf h (p' `plusPtr` s) s
1849             if i < s
1850                 then do let i' = s + i
1851                         p'' <- reallocArray p' i'
1852                         fp  <- newForeignFreePtr p''
1853                         return $! PS fp 0 i'
1854                 else f p' s'
1855
1856 -- | getContents. Equivalent to hGetContents stdin
1857 getContents :: IO ByteString
1858 getContents = hGetContents stdin
1859
1860 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1861 -- efficient than reading the characters into a 'String' and then using
1862 -- 'pack'.  It also may be more efficient than opening the file and
1863 -- reading it using hGet.
1864 readFile :: FilePath -> IO ByteString
1865 readFile f = bracket (openBinaryFile f ReadMode) hClose
1866     (\h -> hFileSize h >>= hGet h . fromIntegral)
1867
1868 -- | Write a 'ByteString' to a file.
1869 writeFile :: FilePath -> ByteString -> IO ()
1870 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1871     (\h -> hPut h ps)
1872
1873 -- | Append a 'ByteString' to a file.
1874 appendFile :: FilePath -> ByteString -> IO ()
1875 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1876     (\hdl -> hPut hdl txt)
1877
1878 {-
1879 --
1880 -- Disable until we can move it into a portable .hsc file
1881 --
1882
1883 -- | Like readFile, this reads an entire file directly into a
1884 -- 'ByteString', but it is even more efficient.  It involves directly
1885 -- mapping the file to memory.  This has the advantage that the contents
1886 -- of the file never need to be copied.  Also, under memory pressure the
1887 -- page may simply be discarded, while in the case of readFile it would
1888 -- need to be written to swap.  If you read many small files, mmapFile
1889 -- will be less memory-efficient than readFile, since each mmapFile
1890 -- takes up a separate page of memory.  Also, you can run into bus
1891 -- errors if the file is modified.  As with 'readFile', the string
1892 -- representation in the file is assumed to be ISO-8859-1.
1893 --
1894 -- On systems without mmap, this is the same as a readFile.
1895 --
1896 mmapFile :: FilePath -> IO ByteString
1897 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1898
1899 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1900 mmap f = do
1901     h <- openBinaryFile f ReadMode
1902     l <- fromIntegral `fmap` hFileSize h
1903     -- Don't bother mmaping small files because each mmapped file takes up
1904     -- at least one full VM block.
1905     if l < mmap_limit
1906        then do thefp <- mallocByteString l
1907                withForeignPtr thefp $ \p-> hGetBuf h p l
1908                hClose h
1909                return (thefp, l)
1910        else do
1911                -- unix only :(
1912                fd <- fromIntegral `fmap` handleToFd h
1913                p  <- my_mmap l fd
1914                fp <- if p == nullPtr
1915                      then do thefp <- mallocByteString l
1916                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1917                              return thefp
1918                      else do
1919                           -- The munmap leads to crashes on OpenBSD.
1920                           -- maybe there's a use after unmap in there somewhere?
1921                           -- Bulat suggests adding the hClose to the
1922                           -- finalizer, excellent idea.
1923 #if !defined(__OpenBSD__)
1924                              let unmap = c_munmap p l >> return ()
1925 #else
1926                              let unmap = return ()
1927 #endif
1928                              fp <- FC.newForeignPtr p unmap
1929                              return fp
1930                c_close fd
1931                hClose h
1932                return (fp, l)
1933     where mmap_limit = 16*1024
1934 -}
1935
1936 #if defined(__GLASGOW_HASKELL__)
1937 --
1938 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1939 --
1940 getArgs :: IO [ByteString]
1941 getArgs =
1942   alloca $ \ p_argc ->
1943   alloca $ \ p_argv -> do
1944     getProgArgv p_argc p_argv
1945     p    <- fromIntegral `fmap` peek p_argc
1946     argv <- peek p_argv
1947     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1948 #endif
1949
1950 -- ---------------------------------------------------------------------
1951 -- Internal utilities
1952
1953 -- | Perform an operation with a temporary ByteString
1954 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1955 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1956 {-# INLINE withPtr #-}
1957
1958 -- Common up near identical calls to `error' to reduce the number
1959 -- constant strings created when compiled:
1960 errorEmptyList :: String -> a
1961 errorEmptyList fun = moduleError fun "empty ByteString"
1962 {-# NOINLINE errorEmptyList #-}
1963
1964 moduleError :: String -> String -> a
1965 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
1966 {-# NOINLINE moduleError #-}
1967
1968 -- Find from the end of the string using predicate
1969 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1970 STRICT2(findFromEndUntil)
1971 findFromEndUntil f ps@(PS x s l) =
1972     if null ps then 0
1973     else if f (last ps) then l
1974          else findFromEndUntil f (PS x s (l-1))
1975
1976 {-# INLINE newForeignFreePtr #-}
1977 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1978 #if defined(__GLASGOW_HASKELL__)
1979 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1980 #else
1981 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1982 #endif