Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with "Prelude" functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         singleton,              -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: ByteString -> Word8 -> ByteString
  55         append,                 -- :: ByteString -> ByteString -> ByteString
  56         head,                   -- :: ByteString -> Word8
  57         last,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         init,                   -- :: ByteString -> ByteString
  60         null,                   -- :: ByteString -> Bool
  61         length,                 -- :: ByteString -> Int
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         map',                   -- :: (Word8 -> Word8) -> ByteString -> ByteString
  66         reverse,                -- :: ByteString -> ByteString
  67         intersperse,            -- :: Word8 -> ByteString -> ByteString
  68         transpose,              -- :: [ByteString] -> [ByteString]
  69
  70         -- * Reducing 'ByteString's (folds)
  71         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  72         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  73         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  75         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  76         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77
  78         -- ** Special folds
  79         concat,                 -- :: [ByteString] -> ByteString
  80         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  81         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  82         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  83         maximum,                -- :: ByteString -> Word8
  84         minimum,                -- :: ByteString -> Word8
  85
  86         -- * Building ByteStrings
  87         -- ** Scans
  88         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  89         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  90         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  91         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  92
  93         -- ** Accumulating maps
  94         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  95         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  96         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  97
  98         -- ** Unfolding ByteStrings
  99         replicate,              -- :: Int -> Word8 -> ByteString
 100         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
 101         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
 102
 103         -- * Substrings
 104
 105         -- ** Breaking strings
 106         take,                   -- :: Int -> ByteString -> ByteString
 107         drop,                   -- :: Int -> ByteString -> ByteString
 108         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 109         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 110         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 111         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 113         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 114         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117         inits,                  -- :: ByteString -> [ByteString]
 118         tails,                  -- :: ByteString -> [ByteString]
 119
 120         -- ** Breaking and dropping on specific bytes
 121         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 122         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 123
 124         -- ** Breaking into many substrings
 125         split,                  -- :: Word8 -> ByteString -> [ByteString]
 126         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 127         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 128
 129         -- ** Joining strings
 130         join,                   -- :: ByteString -> [ByteString] -> ByteString
 131         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 132
 133         -- * Predicates
 134         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 135         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 136
 137         -- ** Search for arbitrary substrings
 138         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 139         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 140         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 141
 142         -- * Searching ByteStrings
 143
 144         -- ** Searching by equality
 145         -- | These functions use memchr(3) to efficiently search the ByteString
 146         elem,                   -- :: Word8 -> ByteString -> Bool
 147         notElem,                -- :: Word8 -> ByteString -> Bool
 148         filterByte,             -- :: Word8 -> ByteString -> ByteString
 149         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 150
 151         -- ** Searching with a predicate
 152         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 153         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 154         filter',                -- :: (Word8 -> Bool) -> ByteString -> ByteString
 155 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 156
 157         -- * Indexing ByteStrings
 158         index,                  -- :: ByteString -> Int -> Word8
 159         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 160         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 161         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
 162         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 163         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 164         count,                  -- :: Word8 -> ByteString -> Int
 165         findIndexOrEnd,         -- :: (Word8 -> Bool) -> ByteString -> Int
 166
 167         -- * Zipping and unzipping ByteStrings
 168         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 169         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 170         zipWith',
 171         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 172
 173         -- * Ordered ByteStrings
 174         sort,                   -- :: ByteString -> ByteString
 175
 176         -- * Low level CString conversions
 177
 178         -- ** Packing CStrings and pointers
 179         packCString,            -- :: CString -> ByteString
 180         packCStringLen,         -- :: CString -> ByteString
 181         packMallocCString,      -- :: CString -> ByteString
 182
 183         -- ** Using ByteStrings as CStrings
 184         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 185         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
 186
 187         -- ** Copying ByteStrings
 188         -- | These functions perform memcpy(3) operations
 189         copy,                   -- :: ByteString -> ByteString
 190         copyCString,            -- :: CString -> IO ByteString
 191         copyCStringLen,         -- :: CStringLen -> IO ByteString
 192
 193         -- * I\/O with 'ByteString's
 194
 195         -- ** Standard input and output
 196
 197 #if defined(__GLASGOW_HASKELL__)
 198         getLine,                -- :: IO ByteString
 199 #endif
 200         getContents,            -- :: IO ByteString
 201         putStr,                 -- :: ByteString -> IO ()
 202         putStrLn,               -- :: ByteString -> IO ()
 203
 204         -- ** Files
 205         readFile,               -- :: FilePath -> IO ByteString
 206         writeFile,              -- :: FilePath -> ByteString -> IO ()
 207         appendFile,             -- :: FilePath -> ByteString -> IO ()
 208 --      mmapFile,               -- :: FilePath -> IO ByteString
 209
 210         -- ** I\/O with Handles
 211 #if defined(__GLASGOW_HASKELL__)
 212         getArgs,                -- :: IO [ByteString]
 213         hGetLine,               -- :: Handle -> IO ByteString
 214         hGetLines,              -- :: Handle -> IO [ByteString]
 215         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 216 #endif
 217         hGetContents,           -- :: Handle -> IO ByteString
 218         hGet,                   -- :: Handle -> Int -> IO ByteString
 219         hPut,                   -- :: Handle -> ByteString -> IO ()
 220         hPutStr,                -- :: Handle -> ByteString -> IO ()
 221         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
 222
 223         -- * Fusion utilities
 224 #if defined(__GLASGOW_HASKELL__)
 225         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 226 #endif
 227         lengthU, maximumU, minimumU
 228   ) where
 229
 230 import qualified Prelude as P
 231 import Prelude hiding           (reverse,head,tail,last,init,null
 232                                 ,length,map,lines,foldl,foldr,unlines
 233                                 ,concat,any,take,drop,splitAt,takeWhile
 234                                 ,dropWhile,span,break,elem,filter,maximum
 235                                 ,minimum,all,concatMap,foldl1,foldr1
 236                                 ,scanl,scanl1,scanr,scanr1
 237                                 ,readFile,writeFile,appendFile,replicate
 238                                 ,getContents,getLine,putStr,putStrLn
 239                                 ,zip,zipWith,unzip,notElem)
 240
 241 import Data.ByteString.Base
 242 import Data.ByteString.Fusion
 243
 244 import qualified Data.List as List
 245
 246 import Data.Word                (Word8)
 247 import Data.Maybe               (listToMaybe)
 248 import Data.Array               (listArray)
 249 import qualified Data.Array as Array ((!))
 250
 251 -- Control.Exception.bracket not available in yhc or nhc
 252 import Control.Exception        (bracket, assert)
 253 import Control.Monad            (when)
 254
 255 import Foreign.C.String         (CString, CStringLen)
 256 import Foreign.C.Types          (CSize)
 257 import Foreign.ForeignPtr
 258 import Foreign.Marshal.Array
 259 import Foreign.Ptr
 260 import Foreign.Storable         (Storable(..))
 261
 262 -- hGetBuf and hPutBuf not available in yhc or nhc
 263 import System.IO                (stdin,stdout,hClose,hFileSize
 264                                 ,hGetBuf,hPutBuf,openBinaryFile
 265                                 ,Handle,IOMode(..))
 266
 267 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
 268
 269 #if !defined(__GLASGOW_HASKELL__)
 270 import System.IO.Unsafe
 271 #endif
 272
 273 #if defined(__GLASGOW_HASKELL__)
 274
 275 import System.IO                (hGetBufNonBlocking)
 276 import System.IO.Error          (isEOFError)
 277
 278 import Foreign.Marshal          (alloca)
 279 import qualified Foreign.Concurrent as FC (newForeignPtr)
 280
 281 import GHC.Handle
 282 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
 283 import GHC.Base                 (build)
 284 import GHC.Word hiding (Word8)
 285 import GHC.Ptr                  (Ptr(..))
 286 import GHC.ST                   (ST(..))
 287 import GHC.IOBase
 288
 289 #endif
 290
 291 -- -----------------------------------------------------------------------------
 292 --
 293 -- Useful macros, until we have bang patterns
 294 --
 295
 296 #define STRICT1(f) f a | a `seq` False = undefined
 297 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 298 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 299 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 300 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 301
 302 -- -----------------------------------------------------------------------------
 303
 304 instance Eq  ByteString
 305     where (==)    = eq
 306
 307 instance Ord ByteString
 308     where compare = compareBytes
 309
 310 instance Show ByteString where
 311     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 312
 313 instance Read ByteString where
 314     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 315
 316 instance Monoid ByteString where
 317     mempty  = empty
 318     mappend = append
 319     mconcat = concat
 320
 321 {-
 322 instance Arbitrary PackedString where
 323     arbitrary = P.pack `fmap` arbitrary
 324     coarbitrary s = coarbitrary (P.unpack s)
 325 -}
 326
 327 -- | /O(n)/ Equality on the 'ByteString' type.
 328 eq :: ByteString -> ByteString -> Bool
 329 eq a@(PS p s l) b@(PS p' s' l')
 330     | l /= l'            = False    -- short cut on length
 331     | p == p' && s == s' = True     -- short cut for the same string
 332     | otherwise          = compareBytes a b == EQ
 333 {-# INLINE eq #-}
 334
 335 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 336 compareBytes :: ByteString -> ByteString -> Ordering
 337 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 338     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 339     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 340     | otherwise                         = inlinePerformIO $
 341         withForeignPtr x1 $ \p1 ->
 342         withForeignPtr x2 $ \p2 -> do
 343             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 344             return $! case i `compare` 0 of
 345                         EQ  -> l1 `compare` l2
 346                         x   -> x
 347 {-# INLINE compareBytes #-}
 348
 349 {-
 350 --
 351 -- About 4x slower over 32M
 352 --
 353 compareBytes :: ByteString -> ByteString -> Ordering
 354 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 355     withForeignPtr fp1 $ \p1 ->
 356         withForeignPtr fp2 $ \p2 ->
 357             cmp (p1 `plusPtr` off1)
 358                 (p2 `plusPtr` off2) 0 len1 len2
 359
 360 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 361 STRICT5(cmp)
 362 cmp p1 p2 n len1 len2
 363       | n == len1 = if n == len2 then return EQ else return LT
 364       | n == len2 = return GT
 365       | otherwise = do
 366           (a :: Word8) <- peekByteOff p1 n
 367           (b :: Word8) <- peekByteOff p2 n
 368           case a `compare` b of
 369                 EQ -> cmp p1 p2 (n+1) len1 len2
 370                 LT -> return LT
 371                 GT -> return GT
 372 {-# INLINE compareBytes #-}
 373 -}
 374
 375 -- -----------------------------------------------------------------------------
 376 -- Introducing and eliminating 'ByteString's
 377
 378 -- | /O(1)/ The empty 'ByteString'
 379 empty :: ByteString
 380 empty = unsafeCreate 0 $ const $ return ()
 381 {-# NOINLINE empty #-}
 382
 383 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 384 singleton :: Word8 -> ByteString
 385 singleton c = unsafeCreate 1 $ \p -> poke p c
 386 {-# INLINE singleton #-}
 387
 388 --
 389 -- XXX The unsafePerformIO is critical!
 390 --
 391 -- Otherwise:
 392 --
 393 --  singleton 255 `compare` singleton 127
 394 --
 395 -- is compiled to:
 396 --
 397 --  case mallocByteString 2 of
 398 --      ForeignPtr f internals ->
 399 --           case writeWord8OffAddr# f 0 255 of _ ->
 400 --           case writeWord8OffAddr# f 0 127 of _ ->
 401 --           case eqAddr# f f of
 402 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 403 --                                        (GHC.Prim.plusAddr# f 0)
 404 --
 405 --
 406
 407 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 408 --
 409 -- For applications with large numbers of string literals, pack can be a
 410 -- bottleneck. In such cases, consider using packAddress (GHC only).
 411 pack :: [Word8] -> ByteString
 412
 413 #if !defined(__GLASGOW_HASKELL__)
 414
 415 pack str = unsafeCreate (P.length str) $ \p -> go p str
 416     where
 417         go _ []     = return ()
 418         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 419
 420 #else /* hack away */
 421
 422 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 423     where
 424         go _ _ []        = return ()
 425         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 426
 427         writeByte p i c = ST $ \s# ->
 428             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 429
 430 #endif
 431
 432 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 433 unpack :: ByteString -> [Word8]
 434
 435 #if !defined(__GLASGOW_HASKELL__)
 436
 437 unpack (PS _  _ 0) = []
 438 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 439         go (p `plusPtr` s) (l - 1) []
 440     where
 441         STRICT3(go)
 442         go p 0 acc = peek p          >>= \e -> return (e : acc)
 443         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 444 {-# INLINE unpack #-}
 445
 446 #else
 447
 448 --
 449 -- Interacting with head/build fusion rule in ghc 6.5. Disable for now
 450 --
 451
 452 unpack ps = build (unpackFoldr ps)
 453 {-# INLINE unpack #-}
 454
 455 --
 456 -- critical this isn't strict in the acc
 457 -- as it will break in the presence of list fusion. this is a known
 458 -- issue with seq and rewrite rules
 459 --
 460 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 461 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 462     let loop q n    _   | q `seq` n `seq` False = undefined -- n.b.
 463         loop _ (-1) acc = return acc
 464         loop q n    acc = do
 465            a <- peekByteOff q n
 466            loop q (n-1) (a `f` acc)
 467     loop (p `plusPtr` off) (len-1) ch
 468 {-# INLINE [0] unpackFoldr #-}
 469
 470 unpackList :: ByteString -> [Word8]
 471 unpackList (PS fp off len) = withPtr fp $ \p -> do
 472     let STRICT3(loop)
 473         loop _ (-1) acc = return acc
 474         loop q n acc = do
 475            a <- peekByteOff q n
 476            loop q (n-1) (a : acc)
 477     loop (p `plusPtr` off) (len-1) []
 478
 479 {-# RULES
 480 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 481  #-}
 482
 483 #endif
 484
 485 ------------------------------------------------------------------------
 486
 487 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 488 -- conversion function
 489 packWith :: (a -> Word8) -> [a] -> ByteString
 490 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
 491     where
 492         STRICT2(go)
 493         go _ []     = return ()
 494         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 495 {-# INLINE packWith #-}
 496 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 497
 498 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 499 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 500 unpackWith _ (PS _  _ 0) = []
 501 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 502         go (p `plusPtr` s) (l - 1) []
 503     where
 504         STRICT3(go)
 505         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 506         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 507 {-# INLINE unpackWith #-}
 508 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 509
 510 -- ---------------------------------------------------------------------
 511 -- Basic interface
 512
 513 -- | /O(1)/ Test whether a ByteString is empty.
 514 null :: ByteString -> Bool
 515 null (PS _ _ l) = assert (l >= 0) $ l <= 0
 516 {-# INLINE null #-}
 517
 518 -- ---------------------------------------------------------------------
 519 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 520 length :: ByteString -> Int
 521 length (PS _ _ l) = assert (l >= 0) $ l
 522
 523 --
 524 -- length/loop fusion. When taking the length of any fuseable loop,
 525 -- rewrite it as a foldl', and thus avoid allocating the result buffer
 526 -- worth around 10% in speed testing.
 527 --
 528
 529 #if defined(__GLASGOW_HASKELL__)
 530 {-# INLINE [1] length #-}
 531 #endif
 532
 533 lengthU :: ByteString -> Int
 534 lengthU = foldl' (const . (+1)) (0::Int)
 535 {-# INLINE lengthU #-}
 536
 537 {-# RULES
 538
 539 -- v2 fusion
 540 "length/loop" forall loop s .
 541   length  (loopArr (loopWrapper loop s)) =
 542   lengthU (loopArr (loopWrapper loop s))
 543
 544   #-}
 545
 546 ------------------------------------------------------------------------
 547
 548 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 549 -- complexity, as it requires a memcpy.
 550 cons :: Word8 -> ByteString -> ByteString
 551 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 552         poke p c
 553         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 554 {-# INLINE cons #-}
 555
 556 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 557 snoc :: ByteString -> Word8 -> ByteString
 558 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
 559         memcpy p (f `plusPtr` s) (fromIntegral l)
 560         poke (p `plusPtr` l) c
 561 {-# INLINE snoc #-}
 562
 563 -- todo fuse
 564
 565 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 566 -- An exception will be thrown in the case of an empty ByteString.
 567 head :: ByteString -> Word8
 568 head (PS x s l)
 569     | l <= 0    = errorEmptyList "head"
 570     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 571 {-# INLINE head #-}
 572
 573 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 574 -- An exception will be thrown in the case of an empty ByteString.
 575 tail :: ByteString -> ByteString
 576 tail (PS p s l)
 577     | l <= 0    = errorEmptyList "tail"
 578     | otherwise = PS p (s+1) (l-1)
 579 {-# INLINE tail #-}
 580
 581 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 582 -- An exception will be thrown in the case of an empty ByteString.
 583 last :: ByteString -> Word8
 584 last ps@(PS x s l)
 585     | null ps   = errorEmptyList "last"
 586     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 587 {-# INLINE last #-}
 588
 589 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 590 -- An exception will be thrown in the case of an empty ByteString.
 591 init :: ByteString -> ByteString
 592 init ps@(PS p s l)
 593     | null ps   = errorEmptyList "init"
 594     | otherwise = PS p s (l-1)
 595 {-# INLINE init #-}
 596
 597 -- | /O(n)/ Append two ByteStrings
 598 append :: ByteString -> ByteString -> ByteString
 599 append xs ys | null xs   = ys
 600              | null ys   = xs
 601              | otherwise = concat [xs,ys]
 602 {-# INLINE append #-}
 603
 604 -- ---------------------------------------------------------------------
 605 -- Transformations
 606
 607 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 608 -- element of @xs@. This function is subject to array fusion.
 609 map :: (Word8 -> Word8) -> ByteString -> ByteString
 610 #if defined(LOOPU_FUSION)
 611 map f = loopArr . loopU (mapEFL f) NoAcc
 612 #elif defined(LOOPUP_FUSION)
 613 map f = loopArr . loopUp (mapEFL f) NoAcc
 614 #elif defined(LOOPNOACC_FUSION)
 615 map f = loopArr . loopNoAcc (mapEFL f)
 616 #else
 617 map f = loopArr . loopMap f
 618 #endif
 619 {-# INLINE map #-}
 620
 621 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 622 -- slightly faster for one-shot cases.
 623 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 624 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
 625     create len $ map_ 0 (a `plusPtr` s)
 626   where
 627     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 628     STRICT3(map_)
 629     map_ n p1 p2
 630        | n >= len = return ()
 631        | otherwise = do
 632             x <- peekByteOff p1 n
 633             pokeByteOff p2 n (f x)
 634             map_ (n+1) p1 p2
 635 {-# INLINE map' #-}
 636
 637 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 638 reverse :: ByteString -> ByteString
 639 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 640         c_reverse p (f `plusPtr` s) (fromIntegral l)
 641
 642 -- todo, fuseable version
 643
 644 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 645 -- 'ByteString' and \`intersperses\' that byte between the elements of
 646 -- the 'ByteString'.  It is analogous to the intersperse function on
 647 -- Lists.
 648 intersperse :: Word8 -> ByteString -> ByteString
 649 intersperse c ps@(PS x s l)
 650     | length ps < 2  = ps
 651     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
 652         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 653
 654 {-
 655 intersperse c = pack . List.intersperse c . unpack
 656 -}
 657
 658 -- | The 'transpose' function transposes the rows and columns of its
 659 -- 'ByteString' argument.
 660 transpose :: [ByteString] -> [ByteString]
 661 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 662
 663 -- ---------------------------------------------------------------------
 664 -- Reducing 'ByteString's
 665
 666 -- | 'foldl', applied to a binary operator, a starting value (typically
 667 -- the left-identity of the operator), and a ByteString, reduces the
 668 -- ByteString using the binary operator, from left to right.
 669 -- This function is subject to array fusion.
 670 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 671 #if !defined(LOOPU_FUSION)
 672 foldl f z = loopAcc . loopUp (foldEFL f) z
 673 #else
 674 foldl f z = loopAcc . loopU (foldEFL f) z
 675 #endif
 676 {-# INLINE foldl #-}
 677
 678 {-
 679 --
 680 -- About twice as fast with 6.4.1, but not fuseable
 681 -- A simple fold . map is enough to make it worth while.
 682 --
 683 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 684         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 685     where
 686         STRICT3(lgo)
 687         lgo z p q | p == q    = return z
 688                   | otherwise = do c <- peek p
 689                                    lgo (f z c) (p `plusPtr` 1) q
 690 -}
 691
 692 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 693 -- Though actually foldl is also strict in the accumulator.
 694 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 695 foldl' = foldl
 696 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
 697 {-# INLINE foldl' #-}
 698
 699 -- | 'foldr', applied to a binary operator, a starting value
 700 -- (typically the right-identity of the operator), and a ByteString,
 701 -- reduces the ByteString using the binary operator, from right to left.
 702 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 703 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
 704 {-# INLINE foldr #-}
 705
 706 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 707 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 708 -- This function is subject to array fusion.
 709 -- An exception will be thrown in the case of an empty ByteString.
 710 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 711 foldl1 f ps
 712     | null ps   = errorEmptyList "foldl1"
 713     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 714 {-# INLINE foldl1 #-}
 715
 716 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 717 -- An exception will be thrown in the case of an empty ByteString.
 718 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 719 foldl1' f ps
 720     | null ps   = errorEmptyList "foldl1'"
 721     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 722 {-# INLINE foldl1' #-}
 723
 724 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 725 -- and thus must be applied to non-empty 'ByteString's
 726 -- An exception will be thrown in the case of an empty ByteString.
 727 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 728 foldr1 f ps
 729     | null ps        = errorEmptyList "foldr1"
 730     | otherwise      = foldr f (last ps) (init ps)
 731 {-# INLINE foldr1 #-}
 732
 733 -- ---------------------------------------------------------------------
 734 -- Special folds
 735
 736 -- | /O(n)/ Concatenate a list of ByteStrings.
 737 concat :: [ByteString] -> ByteString
 738 concat []     = empty
 739 concat [ps]   = ps
 740 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
 741   where len = P.sum . P.map length $ xs
 742         STRICT2(go)
 743         go []            _   = return ()
 744         go (PS p s l:ps) ptr = do
 745                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 746                 go ps (ptr `plusPtr` l)
 747
 748 -- | Map a function over a 'ByteString' and concatenate the results
 749 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 750 concatMap f = concat . foldr ((:) . f) []
 751
 752 -- foldr (append . f) empty
 753
 754 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 755 -- any element of the 'ByteString' satisfies the predicate.
 756 any :: (Word8 -> Bool) -> ByteString -> Bool
 757 any _ (PS _ _ 0) = False
 758 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 759         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 760     where
 761         STRICT2(go)
 762         go p q | p == q    = return False
 763                | otherwise = do c <- peek p
 764                                 if f c then return True
 765                                        else go (p `plusPtr` 1) q
 766
 767 -- todo fuse
 768
 769 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 770 -- if all elements of the 'ByteString' satisfy the predicate.
 771 all :: (Word8 -> Bool) -> ByteString -> Bool
 772 all _ (PS _ _ 0) = True
 773 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 774         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 775     where
 776         STRICT2(go)
 777         go p q | p == q     = return True  -- end of list
 778                | otherwise  = do c <- peek p
 779                                  if f c
 780                                     then go (p `plusPtr` 1) q
 781                                     else return False
 782
 783 ------------------------------------------------------------------------
 784
 785 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 786 -- This function will fuse.
 787 -- An exception will be thrown in the case of an empty ByteString.
 788 maximum :: ByteString -> Word8
 789 maximum xs@(PS x s l)
 790     | null xs   = errorEmptyList "maximum"
 791     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 792                       c_maximum (p `plusPtr` s) (fromIntegral l)
 793
 794 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 795 -- This function will fuse.
 796 -- An exception will be thrown in the case of an empty ByteString.
 797 minimum :: ByteString -> Word8
 798 minimum xs@(PS x s l)
 799     | null xs   = errorEmptyList "minimum"
 800     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 801                       c_minimum (p `plusPtr` s) (fromIntegral l)
 802
 803 --
 804 -- minimum/maximum/loop fusion. As for length (and other folds), when we
 805 -- see we're applied after a fuseable op, switch from using the C
 806 -- version, to the fuseable version. The result should then avoid
 807 -- allocating a buffer.
 808 --
 809
 810 #if defined(__GLASGOW_HASKELL__)
 811 {-# INLINE [1] minimum #-}
 812 {-# INLINE [1] maximum #-}
 813 #endif
 814
 815 maximumU :: ByteString -> Word8
 816 maximumU = foldl1' max
 817 {-# INLINE maximumU #-}
 818
 819 minimumU :: ByteString -> Word8
 820 minimumU = foldl1' min
 821 {-# INLINE minimumU #-}
 822
 823 {-# RULES
 824
 825 "minimum/loop" forall loop s .
 826   minimum  (loopArr (loopWrapper loop s)) =
 827   minimumU (loopArr (loopWrapper loop s))
 828
 829 "maximum/loop" forall loop s .
 830   maximum  (loopArr (loopWrapper loop s)) =
 831   maximumU (loopArr (loopWrapper loop s))
 832
 833   #-}
 834
 835 ------------------------------------------------------------------------
 836
 837 -- | The 'mapAccumL' function behaves like a combination of 'map' and
 838 -- 'foldl'; it applies a function to each element of a ByteString,
 839 -- passing an accumulating parameter from left to right, and returning a
 840 -- final value of this accumulator together with the new list.
 841 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 842 #if !defined(LOOPU_FUSION)
 843 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
 844 #else
 845 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
 846 #endif
 847 {-# INLINE mapAccumL #-}
 848
 849 -- | The 'mapAccumR' function behaves like a combination of 'map' and
 850 -- 'foldr'; it applies a function to each element of a ByteString,
 851 -- passing an accumulating parameter from right to left, and returning a
 852 -- final value of this accumulator together with the new ByteString.
 853 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 854 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
 855 {-# INLINE mapAccumR #-}
 856
 857 -- | /O(n)/ map Word8 functions, provided with the index at each position
 858 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 859 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
 860 {-# INLINE mapIndexed #-}
 861
 862 -- ---------------------------------------------------------------------
 863 -- Building ByteStrings
 864
 865 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 866 -- reduced values from the left. This function will fuse.
 867 --
 868 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 869 --
 870 -- Note that
 871 --
 872 -- > last (scanl f z xs) == foldl f z xs.
 873 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 874 #if !defined(LOOPU_FUSION)
 875 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
 876 #else
 877 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
 878 #endif
 879
 880     -- n.b. haskell's List scan returns a list one bigger than the
 881     -- input, so we need to snoc here to get some extra space, however,
 882     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
 883 {-# INLINE scanl #-}
 884
 885 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 886 -- This function will fuse.
 887 --
 888 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 889 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 890 scanl1 f ps
 891     | null ps   = empty
 892     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 893 {-# INLINE scanl1 #-}
 894
 895 -- | scanr is the right-to-left dual of scanl.
 896 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 897 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
 898 {-# INLINE scanr #-}
 899
 900 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
 901 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 902 scanr1 f ps
 903     | null ps   = empty
 904     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
 905 {-# INLINE scanr1 #-}
 906
 907 -- ---------------------------------------------------------------------
 908 -- Unfolds and replicates
 909
 910 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 911 -- the value of every element. The following holds:
 912 --
 913 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 914 --
 915 -- This implemenation uses @memset(3)@
 916 replicate :: Int -> Word8 -> ByteString
 917 replicate w c
 918     | w <= 0    = empty
 919     | otherwise = unsafeCreate w $ \ptr ->
 920                       memset ptr c (fromIntegral w) >> return ()
 921
 922 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
 923 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
 924 -- ByteString from a seed value.  The function takes the element and
 925 -- returns 'Nothing' if it is done producing the ByteString or returns
 926 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
 927 -- and @b@ is the seed value for further production.
 928 --
 929 -- Examples:
 930 --
 931 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
 932 -- > == pack [0, 1, 2, 3, 4, 5]
 933 --
 934 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 935 unfoldr f = concat . unfoldChunk 32 64
 936   where unfoldChunk n n' x =
 937           case unfoldrN n f x of
 938             (s, Nothing) -> s : []
 939             (s, Just x') -> s : unfoldChunk n' (n+n') x'
 940
 941 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
 942 -- value.  However, the length of the result is limited by the first
 943 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
 944 -- when the maximum length of the result is known.
 945 --
 946 -- The following equation relates 'unfoldrN' and 'unfoldr':
 947 --
 948 -- > unfoldrN n f s == take n (unfoldr f s)
 949 --
 950 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 951 unfoldrN i f x0
 952     | i < 0     = (empty, Just x0)
 953     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
 954   where STRICT3(go)
 955         go p x n =
 956           case f x of
 957             Nothing      -> return (0, n, Nothing)
 958             Just (w,x')
 959              | n == i    -> return (0, n, Just x)
 960              | otherwise -> do poke p w
 961                                go (p `plusPtr` 1) x' (n+1)
 962
 963 -- ---------------------------------------------------------------------
 964 -- Substrings
 965
 966 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 967 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 968 take :: Int -> ByteString -> ByteString
 969 take n ps@(PS x s l)
 970     | n <= 0    = empty
 971     | n >= l    = ps
 972     | otherwise = PS x s n
 973 {-# INLINE take #-}
 974
 975 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 976 -- elements, or @[]@ if @n > 'length' xs@.
 977 drop  :: Int -> ByteString -> ByteString
 978 drop n ps@(PS x s l)
 979     | n <= 0    = ps
 980     | n >= l    = empty
 981     | otherwise = PS x (s+n) (l-n)
 982 {-# INLINE drop #-}
 983
 984 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 985 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 986 splitAt n ps@(PS x s l)
 987     | n <= 0    = (empty, ps)
 988     | n >= l    = (ps, empty)
 989     | otherwise = (PS x s n, PS x (s+n) (l-n))
 990 {-# INLINE splitAt #-}
 991
 992 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 993 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 994 -- satisfy @p@.
 995 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 996 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 997 {-# INLINE takeWhile #-}
 998
 999 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
1000 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
1001 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
1002 {-# INLINE dropWhile #-}
1003
1004 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1005 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1006 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1007 {-# INLINE break #-}
1008
1009 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1010 --
1011 -- breakEnd p == spanEnd (not.p)
1012 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1013 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
1014
1015 -- | 'breakByte' breaks its ByteString argument at the first occurence
1016 -- of the specified byte. It is more efficient than 'break' as it is
1017 -- implemented with @memchr(3)@. I.e.
1018 --
1019 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1020 --
1021 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1022 breakByte c p = case elemIndex c p of
1023     Nothing -> (p,empty)
1024     Just n  -> (unsafeTake n p, unsafeDrop n p)
1025 {-# INLINE breakByte #-}
1026
1027 -- | 'spanByte' breaks its ByteString argument at the first
1028 -- occurence of a byte other than its argument. It is more efficient
1029 -- than 'span (==)'
1030 --
1031 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
1032 --
1033 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1034 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1035     go (p `plusPtr` s) 0
1036   where
1037     STRICT2(go)
1038     go p i | i >= l    = return (ps, empty)
1039            | otherwise = do c' <- peekByteOff p i
1040                             if c /= c'
1041                                 then return (unsafeTake i ps, unsafeDrop i ps)
1042                                 else go p (i+1)
1043 {-# INLINE spanByte #-}
1044
1045 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1046 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1047 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1048 span p ps = break (not . p) ps
1049 {-# INLINE span #-}
1050
1051 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1052 -- We have
1053 --
1054 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1055 --
1056 -- and
1057 --
1058 -- > spanEnd (not . isSpace) ps
1059 -- >    ==
1060 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1061 --
1062 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1063 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1064
1065 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1066 -- separators, where the predicate returns True for a separator element.
1067 -- The resulting components do not contain the separators.  Two adjacent
1068 -- separators result in an empty component in the output.  eg.
1069 --
1070 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1071 -- > splitWith (=='a') []        == []
1072 --
1073 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1074
1075 #if defined(__GLASGOW_HASKELL__)
1076 splitWith _pred (PS _  _   0) = []
1077 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1078   where pred# c# = pred_ (W8# c#)
1079
1080         STRICT4(splitWith0)
1081         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1082             splitLoop pred' p 0 off' len' fp'
1083
1084         splitLoop :: (Word# -> Bool)
1085                   -> Ptr Word8
1086                   -> Int -> Int -> Int
1087                   -> ForeignPtr Word8
1088                   -> IO [ByteString]
1089
1090         splitLoop pred' p idx' off' len' fp'
1091             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1092             | idx' >= len'  = return [PS fp' off' idx']
1093             | otherwise = do
1094                 w <- peekElemOff p (off'+idx')
1095                 if pred' (case w of W8# w# -> w#)
1096                    then return (PS fp' off' idx' :
1097                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1098                    else splitLoop pred' p (idx'+1) off' len' fp'
1099 {-# INLINE splitWith #-}
1100
1101 #else
1102 splitWith _ (PS _ _ 0) = []
1103 splitWith p ps = loop p ps
1104     where
1105         STRICT2(loop)
1106         loop q qs = if null rest then [chunk]
1107                                  else chunk : loop q (unsafeTail rest)
1108             where (chunk,rest) = break q qs
1109 #endif
1110
1111 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1112 -- argument, consuming the delimiter. I.e.
1113 --
1114 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1115 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1116 -- > split 'x'  "x"          == ["",""]
1117 --
1118 -- and
1119 --
1120 -- > join [c] . split c == id
1121 -- > split == splitWith . (==)
1122 --
1123 -- As for all splitting functions in this library, this function does
1124 -- not copy the substrings, it just constructs new 'ByteStrings' that
1125 -- are slices of the original.
1126 --
1127 split :: Word8 -> ByteString -> [ByteString]
1128 split _ (PS _ _ 0) = []
1129 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1130     let ptr = p `plusPtr` s
1131
1132         STRICT1(loop)
1133         loop n =
1134             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1135                                            w (fromIntegral (l-n))
1136             in if q == nullPtr
1137                 then [PS x (s+n) (l-n)]
1138                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1139
1140     return (loop 0)
1141 {-# INLINE split #-}
1142
1143 {-
1144 -- slower. but stays inside Haskell.
1145 split _ (PS _  _   0) = []
1146 split (W8# w#) (PS fp off len) = splitWith' off len fp
1147     where
1148         splitWith' off' len' fp' = withPtr fp $ \p ->
1149             splitLoop p 0 off' len' fp'
1150
1151         splitLoop :: Ptr Word8
1152                   -> Int -> Int -> Int
1153                   -> ForeignPtr Word8
1154                   -> IO [ByteString]
1155
1156         STRICT5(splitLoop)
1157         splitLoop p idx' off' len' fp'
1158             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1159             | idx' >= len'  = return [PS fp' off' idx']
1160             | otherwise = do
1161                 (W8# x#) <- peekElemOff p (off'+idx')
1162                 if word2Int# w# ==# word2Int# x#
1163                    then return (PS fp' off' idx' :
1164                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1165                    else splitLoop p (idx'+1) off' len' fp'
1166 -}
1167
1168 -- | Like 'splitWith', except that sequences of adjacent separators are
1169 -- treated as a single separator. eg.
1170 --
1171 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1172 --
1173 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1174 tokens f = P.filter (not.null) . splitWith f
1175 {-# INLINE tokens #-}
1176
1177 -- | The 'group' function takes a ByteString and returns a list of
1178 -- ByteStrings such that the concatenation of the result is equal to the
1179 -- argument.  Moreover, each sublist in the result contains only equal
1180 -- elements.  For example,
1181 --
1182 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1183 --
1184 -- It is a special case of 'groupBy', which allows the programmer to
1185 -- supply their own equality test. It is about 40% faster than
1186 -- /groupBy (==)/
1187 group :: ByteString -> [ByteString]
1188 group xs
1189     | null xs   = []
1190     | otherwise = ys : group zs
1191     where
1192         (ys, zs) = spanByte (unsafeHead xs) xs
1193
1194 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1195 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1196 groupBy k xs
1197     | null xs   = []
1198     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1199     where
1200         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1201
1202 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1203 -- 'ByteString's and concatenates the list after interspersing the first
1204 -- argument between each element of the list.
1205 join :: ByteString -> [ByteString] -> ByteString
1206 join s = concat . (List.intersperse s)
1207 {-# INLINE join #-}
1208
1209 --
1210 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1211 -- with a char. Around 4 times faster than the generalised join.
1212 --
1213 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1214 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1215     withForeignPtr ffp $ \fp ->
1216     withForeignPtr fgp $ \gp -> do
1217         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1218         poke (ptr `plusPtr` l) c
1219         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1220     where
1221       len = length f + length g + 1
1222 {-# INLINE joinWithByte #-}
1223
1224 -- ---------------------------------------------------------------------
1225 -- Indexing ByteStrings
1226
1227 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1228 index :: ByteString -> Int -> Word8
1229 index ps n
1230     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1231     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1232                                          ++ ", length = " ++ show (length ps))
1233     | otherwise      = ps `unsafeIndex` n
1234 {-# INLINE index #-}
1235
1236 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1237 -- element in the given 'ByteString' which is equal to the query
1238 -- element, or 'Nothing' if there is no such element.
1239 -- This implementation uses memchr(3).
1240 elemIndex :: Word8 -> ByteString -> Maybe Int
1241 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1242     let p' = p `plusPtr` s
1243     q <- memchr p' c (fromIntegral l)
1244     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1245 {-# INLINE elemIndex #-}
1246
1247 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1248 -- element in the given 'ByteString' which is equal to the query
1249 -- element, or 'Nothing' if there is no such element. The following
1250 -- holds:
1251 --
1252 -- > elemIndexEnd c xs ==
1253 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1254 --
1255 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1256 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1257     go (p `plusPtr` s) (l-1)
1258   where
1259     STRICT2(go)
1260     go p i | i < 0     = return Nothing
1261            | otherwise = do ch' <- peekByteOff p i
1262                             if ch == ch'
1263                                 then return $ Just i
1264                                 else go p (i-1)
1265 {-# INLINE elemIndexEnd #-}
1266
1267 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1268 -- the indices of all elements equal to the query element, in ascending order.
1269 -- This implementation uses memchr(3).
1270 elemIndices :: Word8 -> ByteString -> [Int]
1271 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1272     let ptr = p `plusPtr` s
1273
1274         STRICT1(loop)
1275         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1276                                                 w (fromIntegral (l - n))
1277                  in if q == nullPtr
1278                         then []
1279                         else let i = q `minusPtr` ptr
1280                              in i : loop (i+1)
1281     return $! loop 0
1282 {-# INLINE elemIndices #-}
1283
1284 {-
1285 -- much slower
1286 elemIndices :: Word8 -> ByteString -> [Int]
1287 elemIndices c ps = loop 0 ps
1288    where STRICT2(loop)
1289          loop _ ps' | null ps'            = []
1290          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1291                     | otherwise           = loop (n+1) (unsafeTail ps')
1292 -}
1293
1294 -- | count returns the number of times its argument appears in the ByteString
1295 --
1296 -- > count = length . elemIndices
1297 --
1298 -- But more efficiently than using length on the intermediate list.
1299 count :: Word8 -> ByteString -> Int
1300 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1301     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1302 {-# INLINE count #-}
1303
1304 {-
1305 --
1306 -- around 30% slower
1307 --
1308 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1309      go (p `plusPtr` s) (fromIntegral m) 0
1310     where
1311         go :: Ptr Word8 -> CSize -> Int -> IO Int
1312         STRICT3(go)
1313         go p l i = do
1314             q <- memchr p w l
1315             if q == nullPtr
1316                 then return i
1317                 else do let k = fromIntegral $ q `minusPtr` p
1318                         go (q `plusPtr` 1) (l-k-1) (i+1)
1319 -}
1320
1321 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1322 -- returns the index of the first element in the ByteString
1323 -- satisfying the predicate.
1324 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1325 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1326   where
1327     STRICT2(go)
1328     go ptr n | n >= l    = return Nothing
1329              | otherwise = do w <- peek ptr
1330                               if k w
1331                                 then return (Just n)
1332                                 else go (ptr `plusPtr` 1) (n+1)
1333 {-# INLINE findIndex #-}
1334
1335 -- | The 'findIndices' function extends 'findIndex', by returning the
1336 -- indices of all elements satisfying the predicate, in ascending order.
1337 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1338 findIndices p ps = loop 0 ps
1339    where
1340      STRICT2(loop)
1341      loop n qs | null qs           = []
1342                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1343                | otherwise         =     loop (n+1) (unsafeTail qs)
1344
1345 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1346 -- of the string if no element is found, rather than Nothing.
1347 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1348 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1349   where
1350     STRICT2(go)
1351     go ptr n | n >= l    = return l
1352              | otherwise = do w <- peek ptr
1353                               if k w
1354                                 then return n
1355                                 else go (ptr `plusPtr` 1) (n+1)
1356 {-# INLINE findIndexOrEnd #-}
1357
1358 -- ---------------------------------------------------------------------
1359 -- Searching ByteStrings
1360
1361 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1362 elem :: Word8 -> ByteString -> Bool
1363 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1364 {-# INLINE elem #-}
1365
1366 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1367 notElem :: Word8 -> ByteString -> Bool
1368 notElem c ps = not (elem c ps)
1369 {-# INLINE notElem #-}
1370
1371 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1372 -- returns a ByteString containing those characters that satisfy the
1373 -- predicate. This function is subject to array fusion.
1374 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1375 #if defined(LOOPU_FUSION)
1376 filter p  = loopArr . loopU (filterEFL p) NoAcc
1377 #elif defined(LOOPUP_FUSION)
1378 filter p  = loopArr . loopUp (filterEFL p) NoAcc
1379 #elif defined(LOOPNOACC_FUSION)
1380 filter p  = loopArr . loopNoAcc (filterEFL p)
1381 #else
1382 filter f = loopArr . loopFilter f
1383 #endif
1384 {-# INLINE filter #-}
1385
1386 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1387 -- around 2x faster for some one-shot applications.
1388 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1389 filter' k ps@(PS x s l)
1390     | null ps   = ps
1391     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1392         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1393         return $! t `minusPtr` p -- actual length
1394     where
1395         STRICT3(go)
1396         go f t end | f == end  = return t
1397                    | otherwise = do
1398                         w <- peek f
1399                         if k w
1400                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1401                             else             go (f `plusPtr` 1) t               end
1402 {-# INLINE filter' #-}
1403
1404 --
1405 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1406 -- case of filtering a single byte. It is more efficient to use
1407 -- /filterByte/ in this case.
1408 --
1409 -- > filterByte == filter . (==)
1410 --
1411 -- filterByte is around 10x faster, and uses much less space, than its
1412 -- filter equivalent
1413 filterByte :: Word8 -> ByteString -> ByteString
1414 filterByte w ps = replicate (count w ps) w
1415 {-# INLINE filterByte #-}
1416
1417 --
1418 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1419 -- case of filtering a single byte out of a list. It is more efficient
1420 -- to use /filterNotByte/ in this case.
1421 --
1422 -- > filterNotByte == filter . (/=)
1423 --
1424 -- filterNotByte is around 2x faster than its filter equivalent.
1425 filterNotByte :: Word8 -> ByteString -> ByteString
1426 filterNotByte w = filter' (/= w)
1427 {-# INLINE filterNotByte #-}
1428
1429 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1430 -- and returns the first element in matching the predicate, or 'Nothing'
1431 -- if there is no such element.
1432 --
1433 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1434 --
1435 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1436 find f p = case findIndex f p of
1437                     Just n -> Just (p `unsafeIndex` n)
1438                     _      -> Nothing
1439 {-# INLINE find #-}
1440
1441 {-
1442 --
1443 -- fuseable, but we don't want to walk the whole array.
1444 --
1445 find k = foldl findEFL Nothing
1446     where findEFL a@(Just _) _ = a
1447           findEFL _          c | k c       = Just c
1448                                | otherwise = Nothing
1449 -}
1450
1451 -- ---------------------------------------------------------------------
1452 -- Searching for substrings
1453
1454 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1455 -- iff the first is a prefix of the second.
1456 isPrefixOf :: ByteString -> ByteString -> Bool
1457 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1458     | l1 == 0   = True
1459     | l2 < l1   = False
1460     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1461         withForeignPtr x2 $ \p2 -> do
1462             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1463             return $! i == 0
1464
1465 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1466 -- iff the first is a suffix of the second.
1467 --
1468 -- The following holds:
1469 --
1470 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1471 --
1472 -- However, the real implemenation uses memcmp to compare the end of the
1473 -- string only, with no reverse required..
1474 isSuffixOf :: ByteString -> ByteString -> Bool
1475 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1476     | l1 == 0   = True
1477     | l2 < l1   = False
1478     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1479         withForeignPtr x2 $ \p2 -> do
1480             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1481             return $! i == 0
1482
1483 -- | Check whether one string is a substring of another. @isSubstringOf
1484 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1485 isSubstringOf :: ByteString -- ^ String to search for.
1486               -> ByteString -- ^ String to search in.
1487               -> Bool
1488 isSubstringOf p s = not $ P.null $ findSubstrings p s
1489
1490 -- | Get the first index of a substring in another string,
1491 --   or 'Nothing' if the string is not found.
1492 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1493 findSubstring :: ByteString -- ^ String to search for.
1494               -> ByteString -- ^ String to seach in.
1495               -> Maybe Int
1496 findSubstring = (listToMaybe .) . findSubstrings
1497
1498 -- | Find the indexes of all (possibly overlapping) occurances of a
1499 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1500 -- string matching algorithm.
1501 findSubstrings :: ByteString -- ^ String to search for.
1502                -> ByteString -- ^ String to seach in.
1503                -> [Int]
1504
1505 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1506   where
1507       patc x = pat `unsafeIndex` x
1508       strc x = str `unsafeIndex` x
1509
1510       -- maybe we should make kmpNext a UArray before using it in search?
1511       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1512       kmpNextL p _ | null p = []
1513       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1514                          ps = unsafeTail p
1515                          x = if not (null ps) && unsafeHead ps == patc j'
1516                                 then kmpNext Array.! j' else j'
1517                         in x:kmpNextL ps j'
1518       search i j = match ++ rest -- i: position in string, j: position in pattern
1519         where match = if j == m then [(i - j)] else []
1520               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1521       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1522                | otherwise = j
1523
1524 -- ---------------------------------------------------------------------
1525 -- Zipping
1526
1527 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1528 -- corresponding pairs of bytes. If one input ByteString is short,
1529 -- excess elements of the longer ByteString are discarded. This is
1530 -- equivalent to a pair of 'unpack' operations.
1531 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1532 zip ps qs
1533     | null ps || null qs = []
1534     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1535
1536 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1537 -- the first argument, instead of a tupling function.  For example,
1538 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1539 -- corresponding sums.
1540 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1541 zipWith f ps qs
1542     | null ps || null qs = []
1543     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1544
1545 --
1546 -- | A specialised version of zipWith for the common case of a
1547 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1548 -- are used to automatically covert zipWith into zipWith' when a pack is
1549 -- performed on the result of zipWith, but we also export it for
1550 -- convenience.
1551 --
1552 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1553 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1554     withForeignPtr fp $ \a ->
1555     withForeignPtr fq $ \b ->
1556     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1557   where
1558     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1559     STRICT4(zipWith_)
1560     zipWith_ n p1 p2 r
1561        | n >= len = return ()
1562        | otherwise = do
1563             x <- peekByteOff p1 n
1564             y <- peekByteOff p2 n
1565             pokeByteOff r n (f x y)
1566             zipWith_ (n+1) p1 p2 r
1567
1568     len = min l m
1569 {-# INLINE zipWith' #-}
1570
1571 {-# RULES
1572
1573 "Specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1574     pack (zipWith f p q) = zipWith' f p q
1575   #-}
1576
1577 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1578 -- ByteStrings. Note that this performs two 'pack' operations.
1579 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1580 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1581 {-# INLINE unzip #-}
1582
1583 -- ---------------------------------------------------------------------
1584 -- Special lists
1585
1586 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1587 inits :: ByteString -> [ByteString]
1588 inits (PS x s l) = [PS x s n | n <- [0..l]]
1589
1590 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1591 tails :: ByteString -> [ByteString]
1592 tails p | null p    = [empty]
1593         | otherwise = p : tails (unsafeTail p)
1594
1595 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1596
1597 -- ---------------------------------------------------------------------
1598 -- ** Ordered 'ByteString's
1599
1600 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1601 sort :: ByteString -> ByteString
1602 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1603
1604     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1605     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1606
1607     let STRICT2(go)
1608         go 256 _   = return ()
1609         go i   ptr = do n <- peekElemOff arr i
1610                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1611                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1612     go 0 p
1613
1614 {-
1615 sort :: ByteString -> ByteString
1616 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1617         memcpy p (f `plusPtr` s) l
1618         c_qsort p l -- inplace
1619 -}
1620
1621 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1622 --
1623 -- Try some linear sorts: radix, counting
1624 -- Or mergesort.
1625 --
1626 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1627 -- sortBy f ps = undefined
1628
1629 -- ---------------------------------------------------------------------
1630 -- Low level constructors
1631
1632 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1633 -- finalizer associated to it. The ByteString length is calculated using
1634 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1635 packCString :: CString -> ByteString
1636 packCString cstr = unsafePerformIO $ do
1637     fp <- newForeignPtr_ (castPtr cstr)
1638     l <- c_strlen cstr
1639     return $! PS fp 0 (fromIntegral l)
1640
1641 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1642 -- have /no/ finalizer associated with it. This operation has /O(1)/
1643 -- complexity as we already know the final size, so no /strlen(3)/ is
1644 -- required.
1645 packCStringLen :: CStringLen -> ByteString
1646 packCStringLen (ptr,len) = unsafePerformIO $ do
1647     fp <- newForeignPtr_ (castPtr ptr)
1648     return $! PS fp 0 (fromIntegral len)
1649
1650 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1651 -- have a @free(3)@ finalizer associated to it.
1652 packMallocCString :: CString -> ByteString
1653 packMallocCString cstr = unsafePerformIO $ do
1654     fp <- newForeignFreePtr (castPtr cstr)
1655     len <- c_strlen cstr
1656     return $! PS fp 0 (fromIntegral len)
1657
1658 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1659 -- null-terminated @CString@.  The @CString@ will be freed
1660 -- automatically. This is a memcpy(3).
1661 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1662 useAsCString ps f = useAsCStringLen ps (\(s,_) -> f s)
1663
1664 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1665 -- @CStringLen@. The @CStringLen@ will be freed automatically. This is a
1666 -- memcpy(3).
1667 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1668 useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1669     where
1670       alloc = withForeignPtr ps $ \p -> do
1671                 buf <- c_malloc (fromIntegral l+1)
1672                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1673                 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1674                 return $! (castPtr buf, l)
1675
1676 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1677 --   This is mainly useful to allow the rest of the data pointed
1678 --   to by the 'ByteString' to be garbage collected, for example
1679 --   if a large string has been read in, and only a small part of it
1680 --   is needed in the rest of the program.
1681 copy :: ByteString -> ByteString
1682 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1683     memcpy p (f `plusPtr` s) (fromIntegral l)
1684
1685 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1686 -- CString is going to be deallocated from C land.
1687 copyCString :: CString -> IO ByteString
1688 copyCString cstr = do
1689     len <- c_strlen cstr
1690     copyCStringLen (cstr, fromIntegral len)
1691
1692 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1693 copyCStringLen :: CStringLen -> IO ByteString
1694 copyCStringLen (cstr, len) = create len $ \p ->
1695     memcpy p (castPtr cstr) (fromIntegral len)
1696
1697 -- ---------------------------------------------------------------------
1698 -- line IO
1699
1700 #if defined(__GLASGOW_HASKELL__)
1701
1702 -- | getLine, read a line from stdin.
1703 getLine :: IO ByteString
1704 getLine = hGetLine stdin
1705
1706 -- | Lazily construct a list of lines of ByteStrings. This will be much
1707 -- better on memory consumption than using lines =<< getContents.
1708 hGetLines :: Handle -> IO [ByteString]
1709 hGetLines h = go
1710     where
1711         go = unsafeInterleaveIO $ do
1712                 ms <- catch (hGetLine h >>= return . Just)
1713                             (\_ -> return Nothing)
1714                 case ms of
1715                     Nothing -> return []
1716                     Just s  -> do ss <- go
1717                                   return (s:ss)
1718
1719 -- | hGetLine. read a ByteString from a handle
1720 hGetLine :: Handle -> IO ByteString
1721 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1722     case haBufferMode handle_ of
1723        NoBuffering -> error "no buffering"
1724        _other      -> hGetLineBuffered handle_
1725
1726  where
1727     hGetLineBuffered handle_ = do
1728         let ref = haBuffer handle_
1729         buf <- readIORef ref
1730         hGetLineBufferedLoop handle_ ref buf 0 []
1731
1732     hGetLineBufferedLoop handle_ ref
1733             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1734         len `seq` do
1735         off <- findEOL r w raw
1736         let new_len = len + off - r
1737         xs <- mkPS raw r off
1738
1739       -- if eol == True, then off is the offset of the '\n'
1740       -- otherwise off == w and the buffer is now empty.
1741         if off /= w
1742             then do if (w == off + 1)
1743                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1744                             else writeIORef ref buf{ bufRPtr = off + 1 }
1745                     mkBigPS new_len (xs:xss)
1746             else do
1747                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1748                                     buf{ bufWPtr=0, bufRPtr=0 }
1749                  case maybe_buf of
1750                     -- Nothing indicates we caught an EOF, and we may have a
1751                     -- partial line to return.
1752                     Nothing -> do
1753                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1754                          if new_len > 0
1755                             then mkBigPS new_len (xs:xss)
1756                             else ioe_EOF
1757                     Just new_buf ->
1758                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1759
1760     -- find the end-of-line character, if there is one
1761     findEOL r w raw
1762         | r == w = return w
1763         | otherwise =  do
1764             (c,r') <- readCharFromBuffer raw r
1765             if c == '\n'
1766                 then return r -- NB. not r': don't include the '\n'
1767                 else findEOL r' w raw
1768
1769     maybeFillReadBuffer fd is_line is_stream buf = catch
1770         (do buf' <- fillReadBuffer fd is_line is_stream buf
1771             return (Just buf'))
1772         (\e -> if isEOFError e then return Nothing else ioError e)
1773
1774 -- TODO, rewrite to use normal memcpy
1775 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1776 mkPS buf start end =
1777     let len = end - start
1778     in create len $ \p -> do
1779         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1780         return ()
1781
1782 mkBigPS :: Int -> [ByteString] -> IO ByteString
1783 mkBigPS _ [ps] = return ps
1784 mkBigPS _ pss = return $! concat (P.reverse pss)
1785
1786 #endif
1787
1788 -- ---------------------------------------------------------------------
1789 -- Block IO
1790
1791 -- | Outputs a 'ByteString' to the specified 'Handle'.
1792 hPut :: Handle -> ByteString -> IO ()
1793 hPut _ (PS _  _ 0) = return ()
1794 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1795
1796 -- | A synonym for @hPut@, for compatibility
1797 hPutStr :: Handle -> ByteString -> IO ()
1798 hPutStr = hPut
1799
1800 -- | Write a ByteString to a handle, appending a newline byte
1801 hPutStrLn :: Handle -> ByteString -> IO ()
1802 hPutStrLn h ps
1803     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1804     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1805
1806 -- | Write a ByteString to stdout
1807 putStr :: ByteString -> IO ()
1808 putStr = hPut stdout
1809
1810 -- | Write a ByteString to stdout, appending a newline byte
1811 putStrLn :: ByteString -> IO ()
1812 putStrLn = hPutStrLn stdout
1813
1814 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1815 -- is far more efficient than reading the characters into a 'String'
1816 -- and then using 'pack'.
1817 hGet :: Handle -> Int -> IO ByteString
1818 hGet _ 0 = return empty
1819 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1820
1821 #if defined(__GLASGOW_HASKELL__)
1822 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1823 -- waiting for data to become available, instead it returns only whatever data
1824 -- is available.
1825 hGetNonBlocking :: Handle -> Int -> IO ByteString
1826 hGetNonBlocking _ 0 = return empty
1827 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1828 #endif
1829
1830 -- | Read entire handle contents into a 'ByteString'.
1831 -- This function reads chunks at a time, doubling the chunksize on each
1832 -- read. The final buffer is then realloced to the appropriate size. For
1833 -- files > half of available memory, this may lead to memory exhaustion.
1834 -- Consider using 'readFile' in this case.
1835 --
1836 -- As with 'hGet', the string representation in the file is assumed to
1837 -- be ISO-8859-1.
1838 --
1839 hGetContents :: Handle -> IO ByteString
1840 hGetContents h = do
1841     let start_size = 1024
1842     p <- mallocArray start_size
1843     i <- hGetBuf h p start_size
1844     if i < start_size
1845         then do p' <- reallocArray p i
1846                 fp <- newForeignFreePtr p'
1847                 return $! PS fp 0 i
1848         else f p start_size
1849     where
1850         f p s = do
1851             let s' = 2 * s
1852             p' <- reallocArray p s'
1853             i  <- hGetBuf h (p' `plusPtr` s) s
1854             if i < s
1855                 then do let i' = s + i
1856                         p'' <- reallocArray p' i'
1857                         fp  <- newForeignFreePtr p''
1858                         return $! PS fp 0 i'
1859                 else f p' s'
1860
1861 -- | getContents. Equivalent to hGetContents stdin
1862 getContents :: IO ByteString
1863 getContents = hGetContents stdin
1864
1865 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1866 -- efficient than reading the characters into a 'String' and then using
1867 -- 'pack'.  It also may be more efficient than opening the file and
1868 -- reading it using hGet.
1869 readFile :: FilePath -> IO ByteString
1870 readFile f = bracket (openBinaryFile f ReadMode) hClose
1871     (\h -> hFileSize h >>= hGet h . fromIntegral)
1872
1873 -- | Write a 'ByteString' to a file.
1874 writeFile :: FilePath -> ByteString -> IO ()
1875 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1876     (\h -> hPut h ps)
1877
1878 -- | Append a 'ByteString' to a file.
1879 appendFile :: FilePath -> ByteString -> IO ()
1880 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1881     (\hdl -> hPut hdl txt)
1882
1883 {-
1884 --
1885 -- Disable until we can move it into a portable .hsc file
1886 --
1887
1888 -- | Like readFile, this reads an entire file directly into a
1889 -- 'ByteString', but it is even more efficient.  It involves directly
1890 -- mapping the file to memory.  This has the advantage that the contents
1891 -- of the file never need to be copied.  Also, under memory pressure the
1892 -- page may simply be discarded, while in the case of readFile it would
1893 -- need to be written to swap.  If you read many small files, mmapFile
1894 -- will be less memory-efficient than readFile, since each mmapFile
1895 -- takes up a separate page of memory.  Also, you can run into bus
1896 -- errors if the file is modified.  As with 'readFile', the string
1897 -- representation in the file is assumed to be ISO-8859-1.
1898 --
1899 -- On systems without mmap, this is the same as a readFile.
1900 --
1901 mmapFile :: FilePath -> IO ByteString
1902 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1903
1904 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1905 mmap f = do
1906     h <- openBinaryFile f ReadMode
1907     l <- fromIntegral `fmap` hFileSize h
1908     -- Don't bother mmaping small files because each mmapped file takes up
1909     -- at least one full VM block.
1910     if l < mmap_limit
1911        then do thefp <- mallocByteString l
1912                withForeignPtr thefp $ \p-> hGetBuf h p l
1913                hClose h
1914                return (thefp, l)
1915        else do
1916                -- unix only :(
1917                fd <- fromIntegral `fmap` handleToFd h
1918                p  <- my_mmap l fd
1919                fp <- if p == nullPtr
1920                      then do thefp <- mallocByteString l
1921                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1922                              return thefp
1923                      else do
1924                           -- The munmap leads to crashes on OpenBSD.
1925                           -- maybe there's a use after unmap in there somewhere?
1926                           -- Bulat suggests adding the hClose to the
1927                           -- finalizer, excellent idea.
1928 #if !defined(__OpenBSD__)
1929                              let unmap = c_munmap p l >> return ()
1930 #else
1931                              let unmap = return ()
1932 #endif
1933                              fp <- FC.newForeignPtr p unmap
1934                              return fp
1935                c_close fd
1936                hClose h
1937                return (fp, l)
1938     where mmap_limit = 16*1024
1939 -}
1940
1941 #if defined(__GLASGOW_HASKELL__)
1942 --
1943 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1944 --
1945 getArgs :: IO [ByteString]
1946 getArgs =
1947   alloca $ \ p_argc ->
1948   alloca $ \ p_argv -> do
1949     getProgArgv p_argc p_argv
1950     p    <- fromIntegral `fmap` peek p_argc
1951     argv <- peek p_argv
1952     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1953 #endif
1954
1955 -- ---------------------------------------------------------------------
1956 -- Internal utilities
1957
1958 -- | Perform an operation with a temporary ByteString
1959 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1960 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1961 {-# INLINE withPtr #-}
1962
1963 -- Common up near identical calls to `error' to reduce the number
1964 -- constant strings created when compiled:
1965 errorEmptyList :: String -> a
1966 errorEmptyList fun = moduleError fun "empty ByteString"
1967 {-# NOINLINE errorEmptyList #-}
1968
1969 moduleError :: String -> String -> a
1970 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
1971 {-# NOINLINE moduleError #-}
1972
1973 -- Find from the end of the string using predicate
1974 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1975 STRICT2(findFromEndUntil)
1976 findFromEndUntil f ps@(PS x s l) =
1977     if null ps then 0
1978     else if f (last ps) then l
1979          else findFromEndUntil f (PS x s (l-1))
1980
1981 {-# INLINE newForeignFreePtr #-}
1982 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1983 #if defined(__GLASGOW_HASKELL__)
1984 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1985 #else
1986 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1987 #endif