Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 -- License     : BSD-style
  10 --
  11 -- Maintainer  : dons@cse.unsw.edu.au
  12 -- Stability   : experimental
  13 -- Portability : portable, requires ffi and cpp
  14 -- Tested with : GHC 6.4.1 and Hugs March 2005
  15 --
  16
  17 --
  18 -- | A time and space-efficient implementation of byte vectors using
  19 -- packed Word8 arrays, suitable for high performance use, both in terms
  20 -- of large data quantities, or high speed requirements. Byte vectors
  21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
  22 -- be passed between C and Haskell with little effort.
  23 --
  24 -- This module is intended to be imported @qualified@, to avoid name
  25 -- clashes with Prelude functions.  eg.
  26 --
  27 -- > import qualified Data.ByteString as B
  28 --
  29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  30 -- UArray by Simon Marlow. Rewritten to support slices and use
  31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  32 --
  33
  34 module Data.ByteString (
  35
  36         -- * The @ByteString@ type
  37         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  38
  39         -- * Introducing and eliminating 'ByteString's
  40         empty,                  -- :: ByteString
  41         packByte,               -- :: Word8   -> ByteString
  42         pack,                   -- :: [Word8] -> ByteString
  43         unpack,                 -- :: ByteString -> [Word8]
  44         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  45         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  46
  47         -- * Basic interface
  48         cons,                   -- :: Word8 -> ByteString -> ByteString
  49         snoc,                   -- :: Word8 -> ByteString -> ByteString
  50         null,                   -- :: ByteString -> Bool
  51         length,                 -- :: ByteString -> Int
  52         head,                   -- :: ByteString -> Word8
  53         tail,                   -- :: ByteString -> ByteString
  54         last,                   -- :: ByteString -> Word8
  55         init,                   -- :: ByteString -> ByteString
  56         append,                 -- :: ByteString -> ByteString -> ByteString
  57
  58         -- * Special ByteStrings
  59         inits,                  -- :: ByteString -> [ByteString]
  60         tails,                  -- :: ByteString -> [ByteString]
  61         elems,                  -- :: ByteString -> [ByteString]
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         reverse,                -- :: ByteString -> ByteString
  66         intersperse,            -- :: Word8 -> ByteString -> ByteString
  67         transpose,              -- :: [ByteString] -> [ByteString]
  68
  69         -- * Reducing 'ByteString's
  70         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  71         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  72         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  73         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74
  75         -- ** Special folds
  76         concat,                 -- :: [ByteString] -> ByteString
  77         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  78         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  79         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  80         maximum,                -- :: ByteString -> Word8
  81         minimum,                -- :: ByteString -> Word8
  82         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  83
  84         -- * Generating and unfolding ByteStrings
  85         replicate,              -- :: Int -> Word8 -> ByteString
  86         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  87
  88         -- * Substrings
  89
  90         -- ** Breaking strings
  91         take,                   -- :: Int -> ByteString -> ByteString
  92         drop,                   -- :: Int -> ByteString -> ByteString
  93         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  94         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  95         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  96         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  97         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  98         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  99
 100         -- ** Breaking and dropping on specific bytes
 101         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 102         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 103         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 104
 105         -- ** Breaking into many substrings
 106         split,                  -- :: Word8 -> ByteString -> [ByteString]
 107         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 108         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 109
 110         -- ** Joining strings
 111         join,                   -- :: ByteString -> [ByteString] -> ByteString
 112         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 113
 114         -- * Indexing ByteStrings
 115         index,                  -- :: ByteString -> Int -> Word8
 116         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 117         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 118         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 119         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 120         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 121         count,                  -- :: Word8 -> ByteString -> Int
 122
 123         -- * Ordered ByteStrings
 124         sort,                   -- :: ByteString -> ByteString
 125
 126         -- * Searching ByteStrings
 127
 128         -- ** Searching by equality
 129         -- | These functions use memchr(3) to efficiently search the ByteString
 130
 131         elem,                   -- :: Word8 -> ByteString -> Bool
 132         notElem,                -- :: Word8 -> ByteString -> Bool
 133         filterByte,             -- :: Word8 -> ByteString -> ByteString
 134         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 135
 136         -- ** Searching with a predicate
 137         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 138         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 139
 140         -- ** Prefixes and suffixes
 141         -- | These functions use memcmp(3) to efficiently compare substrings
 142         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 143         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 144
 145         -- ** Search for arbitrary substrings
 146         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 147         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 148         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 149
 150         -- * Zipping and unzipping ByteStrings
 151         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 152         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 153         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 154
 155         -- * Unchecked access
 156         unsafeHead,             -- :: ByteString -> Word8
 157         unsafeTail,             -- :: ByteString -> ByteString
 158         unsafeIndex,            -- :: ByteString -> Int -> Word8
 159
 160         -- * Low level introduction and elimination
 161         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 162         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 163         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 164         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 165         skipIndex,              -- :: ByteString -> Int
 166
 167         -- ** Packing CStrings and pointers
 168         packCString,            -- :: CString -> ByteString
 169         packCStringLen,         -- :: CString -> ByteString
 170         packMallocCString,      -- :: CString -> ByteString
 171
 172 #if defined(__GLASGOW_HASKELL__)
 173         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 174         packAddress,            -- :: Addr# -> ByteString
 175         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 176         unsafeFinalize,         -- :: ByteString -> IO ()
 177 #endif
 178
 179         -- ** Using ByteStrings as CStrings
 180         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 181         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 182         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 183
 184         -- ** Copying ByteStrings
 185         -- | These functions perform memcpy(3) operations
 186         copy,                   -- :: ByteString -> ByteString
 187         copyCString,            -- :: CString -> ByteString
 188         copyCStringLen,         -- :: CStringLen -> ByteString
 189
 190         -- * I\/O with @ByteString@s
 191
 192         -- ** Standard input and output
 193
 194 #if defined(__GLASGOW_HASKELL__)
 195         getLine,                -- :: IO ByteString
 196 #endif
 197         getContents,            -- :: IO ByteString
 198         putStr,                 -- :: ByteString -> IO ()
 199         putStrLn,               -- :: ByteString -> IO ()
 200
 201         -- ** Files
 202         readFile,               -- :: FilePath -> IO ByteString
 203         writeFile,              -- :: FilePath -> ByteString -> IO ()
 204 --      mmapFile,               -- :: FilePath -> IO ByteString
 205
 206         -- ** I\/O with Handles
 207 #if defined(__GLASGOW_HASKELL__)
 208         getArgs,                -- :: IO [ByteString]
 209         hGetLine,               -- :: Handle -> IO ByteString
 210         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 211 #endif
 212         hGetContents,           -- :: Handle -> IO ByteString
 213         hGet,                   -- :: Handle -> Int -> IO ByteString
 214         hPut,                   -- :: Handle -> ByteString -> IO ()
 215
 216 #if defined(__GLASGOW_HASKELL__)
 217         -- * Miscellaneous
 218         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 219 #endif
 220
 221   ) where
 222
 223 import qualified Prelude as P
 224 import Prelude hiding           (reverse,head,tail,last,init,null
 225                                 ,length,map,lines,foldl,foldr,unlines
 226                                 ,concat,any,take,drop,splitAt,takeWhile
 227                                 ,dropWhile,span,break,elem,filter,maximum
 228                                 ,minimum,all,concatMap,foldl1,foldr1
 229                                 ,readFile,writeFile,replicate
 230                                 ,getContents,getLine,putStr,putStrLn
 231                                 ,zip,zipWith,unzip,notElem)
 232
 233 import qualified Data.List as List
 234
 235 import Data.Char
 236 import Data.Word                (Word8)
 237 import Data.Maybe               (listToMaybe)
 238 import Data.Array               (listArray)
 239 import qualified Data.Array as Array ((!))
 240
 241 -- Control.Exception.bracket not available in yhc or nhc
 242 import Control.Exception        (bracket)
 243
 244 import Foreign.C.String         (CString, CStringLen)
 245 import Foreign.C.Types          (CSize, CInt)
 246 import Foreign.ForeignPtr
 247 import Foreign.Marshal.Array
 248 import Foreign.Ptr
 249 import Foreign.Storable         (Storable(..))
 250
 251 -- hGetBuf and hPutBuf not available in yhc or nhc
 252 import System.IO                (stdin,stdout,hClose,hFileSize
 253                                 ,hGetBuf,hPutBuf,openBinaryFile
 254                                 ,Handle,IOMode(..))
 255
 256 #if !defined(__GLASGOW_HASKELL__)
 257 import System.IO.Unsafe
 258 #endif
 259
 260 #if defined(__GLASGOW_HASKELL__)
 261
 262 import Data.Generics            (Data(..), Typeable(..))
 263
 264 import System.IO                (hGetBufNonBlocking)
 265 import System.IO.Error          (isEOFError)
 266
 267 import Foreign.Marshal          (alloca)
 268 import qualified Foreign.Concurrent as FC (newForeignPtr)
 269
 270 import GHC.Handle
 271 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 272 import GHC.Base                 (build, unsafeChr)
 273 import GHC.Word hiding (Word8)
 274 import GHC.Ptr                  (Ptr(..))
 275 import GHC.ST                   (ST(..))
 276 import GHC.IOBase
 277
 278 #endif
 279
 280 -- CFILES stuff is Hugs only
 281 {-# CFILES cbits/fpstring.c #-}
 282
 283 -- -----------------------------------------------------------------------------
 284 --
 285 -- Useful macros, until we have bang patterns
 286 --
 287
 288 #define STRICT1(f) f a | a `seq` False = undefined
 289 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 290 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 291 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 292 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 293
 294 -- -----------------------------------------------------------------------------
 295
 296 -- | A space-efficient representation of a Word8 vector, supporting many
 297 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 298 --
 299 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 300 --
 301 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 302                      {-# UNPACK #-} !Int
 303                      {-# UNPACK #-} !Int
 304
 305 #if defined(__GLASGOW_HASKELL__)
 306     deriving (Data, Typeable)
 307 #endif
 308
 309 instance Eq  ByteString
 310     where (==)    = eq
 311
 312 instance Ord ByteString
 313     where compare = compareBytes
 314
 315 instance Show ByteString where
 316     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 317
 318 instance Read ByteString where
 319     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 320
 321 {-
 322 instance Arbitrary PackedString where
 323     arbitrary = P.pack `fmap` arbitrary
 324     coarbitrary s = coarbitrary (P.unpack s)
 325 -}
 326
 327 -- | /O(n)/ Equality on the 'ByteString' type.
 328 eq :: ByteString -> ByteString -> Bool
 329 eq a b = (compareBytes a b) == EQ
 330 {-# INLINE eq #-}
 331
 332 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 333 compareBytes :: ByteString -> ByteString -> Ordering
 334 compareBytes (PS _ _ 0) (PS _ _ 0)       = EQ    -- short cut for empty strings
 335 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
 336     withForeignPtr x1 $ \p1 ->
 337     withForeignPtr x2 $ \p2 -> do
 338         i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
 339         return $ case i `compare` 0 of
 340                     EQ  -> l1 `compare` l2
 341                     x   -> x
 342 {-# INLINE compareBytes #-}
 343
 344 {-
 345 --
 346 -- About 4x slower over 32M
 347 --
 348 compareBytes :: ByteString -> ByteString -> Ordering
 349 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 350     withForeignPtr fp1 $ \p1 ->
 351         withForeignPtr fp2 $ \p2 ->
 352             cmp (p1 `plusPtr` off1)
 353                 (p2 `plusPtr` off2) 0 len1 len2
 354
 355 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 356 STRICT5(cmp)
 357 cmp p1 p2 n len1 len2
 358       | n == len1 = if n == len2 then return EQ else return LT
 359       | n == len2 = return GT
 360       | otherwise = do
 361           (a :: Word8) <- peekByteOff p1 n
 362           (b :: Word8) <- peekByteOff p2 n
 363           case a `compare` b of
 364                 EQ -> cmp p1 p2 (n+1) len1 len2
 365                 LT -> return LT
 366                 GT -> return GT
 367 {-# INLINE compareBytes #-}
 368 -}
 369
 370 -- -----------------------------------------------------------------------------
 371 -- Introducing and eliminating 'ByteString's
 372
 373 -- | /O(1)/ The empty 'ByteString'
 374 empty :: ByteString
 375 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 376 {-# NOINLINE empty #-}
 377
 378 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 379 packByte :: Word8 -> ByteString
 380 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
 381     withForeignPtr fp $ \p -> poke p c
 382     return $ PS fp 0 1
 383 {-# NOINLINE packByte #-}
 384
 385 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 386 --
 387 -- For applications with large numbers of string literals, pack can be a
 388 -- bottleneck. In such cases, consider using packAddress (GHC only).
 389 pack :: [Word8] -> ByteString
 390
 391 #if !defined(__GLASGOW_HASKELL__)
 392
 393 pack str = create (P.length str) $ \p -> go p str
 394     where
 395         go _ []     = return ()
 396         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 397
 398 #else /* hack away */
 399
 400 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 401     where
 402         go _ _ []        = return ()
 403         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 404
 405         writeByte p i c = ST $ \s# ->
 406             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 407
 408 #endif
 409
 410 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 411 unpack :: ByteString -> [Word8]
 412
 413 #if !defined(__GLASGOW_HASKELL__)
 414
 415 unpack (PS _  _ 0) = []
 416 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 417         go (p `plusPtr` s) (l - 1) []
 418     where
 419         STRICT3(go)
 420         go p 0 acc = peek p          >>= \e -> return (e : acc)
 421         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 422 {-# INLINE unpack #-}
 423
 424 #else
 425
 426 unpack ps = build (unpackFoldr ps)
 427 {-# INLINE unpack #-}
 428
 429 unpackList :: ByteString -> [Word8]
 430 unpackList (PS fp off len) = withPtr fp $ \p -> do
 431     let STRICT3(loop)
 432         loop _ (-1) acc = return acc
 433         loop q n acc = do
 434            a <- peekByteOff q n
 435            loop q (n-1) (a : acc)
 436     loop (p `plusPtr` off) (len-1) []
 437
 438 {-# RULES
 439 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 440  #-}
 441
 442 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 443 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 444     let STRICT3(loop)
 445         loop _ (-1) acc = return acc
 446         loop q n    acc = do
 447            a <- peekByteOff q n
 448            loop q (n-1) (a `f` acc)
 449     loop (p `plusPtr` off) (len-1) ch
 450 {-# INLINE [0] unpackFoldr #-}
 451
 452 #endif
 453
 454 ------------------------------------------------------------------------
 455
 456 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 457 -- conversion function
 458 packWith :: (a -> Word8) -> [a] -> ByteString
 459 packWith k str = create (P.length str) $ \p -> go p str
 460     where
 461         STRICT2(go)
 462         go _ []     = return ()
 463         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 464 {-# INLINE packWith #-}
 465 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 466
 467 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 468 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 469 unpackWith _ (PS _  _ 0) = []
 470 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 471         go (p `plusPtr` s) (l - 1) []
 472     where
 473         STRICT3(go)
 474         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 475         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 476 {-# INLINE unpackWith #-}
 477 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 478
 479 -- ---------------------------------------------------------------------
 480 -- Basic interface
 481
 482 -- | /O(1)/ Test whether a ByteString is empty.
 483 null :: ByteString -> Bool
 484 null (PS _ _ l) = l == 0
 485 {-# INLINE null #-}
 486
 487 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 488 length :: ByteString -> Int
 489 length (PS _ _ l) = l
 490 {-# INLINE length #-}
 491
 492 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 493 -- complexity, as it requires a memcpy.
 494 cons :: Word8 -> ByteString -> ByteString
 495 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 496         memcpy (p `plusPtr` 1) (f `plusPtr` s) l
 497         poke p c
 498 {-# INLINE cons #-}
 499
 500 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 501 snoc :: ByteString -> Word8 -> ByteString
 502 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 503         memcpy p (f `plusPtr` s) l
 504         poke (p `plusPtr` l) c
 505 {-# INLINE snoc #-}
 506
 507 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 508 head :: ByteString -> Word8
 509 head ps@(PS x s _)
 510     | null ps   = errorEmptyList "head"
 511     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 512 {-# INLINE head #-}
 513
 514 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 515 tail :: ByteString -> ByteString
 516 tail (PS p s l)
 517     | l <= 0    = errorEmptyList "tail"
 518     | otherwise = PS p (s+1) (l-1)
 519 {-# INLINE tail #-}
 520
 521 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 522 last :: ByteString -> Word8
 523 last ps@(PS x s l)
 524     | null ps   = errorEmptyList "last"
 525     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 526 {-# INLINE last #-}
 527
 528 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 529 init :: ByteString -> ByteString
 530 init (PS p s l)
 531     | l <= 0    = errorEmptyList "init"
 532     | otherwise = PS p s (l-1)
 533 {-# INLINE init #-}
 534
 535 -- | /O(n)/ Append two ByteStrings
 536 append :: ByteString -> ByteString -> ByteString
 537 append xs ys | null xs   = ys
 538              | null ys   = xs
 539              | otherwise = concat [xs,ys]
 540 {-# INLINE append #-}
 541
 542 {-
 543 --
 544 -- About 30% faster, but allocating in a big chunk isn't good for memory use
 545 --
 546 append :: ByteString -> ByteString -> ByteString
 547 append xs@(PS ffp s l) ys@(PS fgp t m)
 548     | null xs   = ys
 549     | null ys   = xs
 550     | otherwise = create len $ \ptr ->
 551         withForeignPtr ffp $ \fp ->
 552         withForeignPtr fgp $ \gp -> do
 553             memcpy ptr               (fp `plusPtr` s) l
 554             memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
 555         where len = length xs + length ys
 556 -}
 557
 558 -- ---------------------------------------------------------------------
 559 -- Transformations
 560
 561 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 562 -- element of @xs@
 563 --
 564 map :: (Word8 -> Word8) -> ByteString -> ByteString
 565 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
 566     new_fp <- mallocByteString len
 567     withForeignPtr new_fp $ \new_p -> do
 568         map_ f (len-1) (p `plusPtr` start) new_p
 569         return (PS new_fp 0 len)
 570 {-# INLINE map #-}
 571
 572 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 573 STRICT4(map_)
 574 map_ f n p1 p2
 575    | n < 0 = return ()
 576    | otherwise = do
 577         x <- peekByteOff p1 n
 578         pokeByteOff p2 n (f x)
 579         map_ f (n-1) p1 p2
 580 {-# INLINE map_ #-}
 581
 582 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 583 reverse :: ByteString -> ByteString
 584 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 585         c_reverse p (f `plusPtr` s) l
 586
 587 {-
 588 reverse = pack . P.reverse . unpack
 589 -}
 590
 591 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 592 -- 'ByteString' and \`intersperses\' that byte between the elements of
 593 -- the 'ByteString'.  It is analogous to the intersperse function on
 594 -- Lists.
 595 intersperse :: Word8 -> ByteString -> ByteString
 596 intersperse c ps@(PS x s l)
 597     | length ps < 2  = ps
 598     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 599         c_intersperse p (f `plusPtr` s) l c
 600
 601 {-
 602 intersperse c = pack . List.intersperse c . unpack
 603 -}
 604
 605 -- | The 'transpose' function transposes the rows and columns of its
 606 -- 'ByteString' argument.
 607 transpose :: [ByteString] -> [ByteString]
 608 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 609
 610 -- ---------------------------------------------------------------------
 611 -- Reducing 'ByteString's
 612
 613 -- | 'foldl', applied to a binary operator, a starting value (typically
 614 -- the left-identity of the operator), and a ByteString, reduces the
 615 -- ByteString using the binary operator, from left to right.
 616 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 617 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 618         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 619     where
 620         STRICT3(lgo)
 621         lgo z p q | p == q    = return z
 622                   | otherwise = do c <- peek p
 623                                    lgo (f z c) (p `plusPtr` 1) q
 624
 625 -- | 'foldr', applied to a binary operator, a starting value
 626 -- (typically the right-identity of the operator), and a ByteString,
 627 -- reduces the ByteString using the binary operator, from right to left.
 628 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 629 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 630         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 631     where
 632         STRICT2(go)
 633         go p q | p == q    = return z
 634                | otherwise = do c  <- peek p
 635                                 ws <- go (p `plusPtr` 1) q
 636                                 return $ c `k` ws
 637
 638 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 639 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 640 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 641 foldl1 f ps
 642     | null ps   = errorEmptyList "foldl1"
 643     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 644
 645 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 646 -- and thus must be applied to non-empty 'ByteString's
 647 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 648 foldr1 f ps
 649     | null ps        = errorEmptyList "foldr1"
 650     | otherwise      = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
 651
 652 -- ---------------------------------------------------------------------
 653 -- Special folds
 654
 655 -- | /O(n)/ Concatenate a list of ByteStrings.
 656 concat :: [ByteString] -> ByteString
 657 concat []     = empty
 658 concat [ps]   = ps
 659 concat xs     = inlinePerformIO $ do
 660     let start_size = 1024
 661     p <- mallocArray start_size
 662     f p 0 1024 xs
 663
 664     where f ptr len _ [] = do
 665                 ptr' <- reallocArray ptr (len+1)
 666                 poke (ptr' `plusPtr` len) (0::Word8)    -- XXX so CStrings work
 667                 fp   <- newForeignFreePtr ptr'
 668                 return $ PS fp 0 len
 669
 670           f ptr len to_go pss@(PS p s l:pss')
 671            | l <= to_go = do withForeignPtr p $ \pf ->
 672                                  memcpy (ptr `plusPtr` len)
 673                                           (pf `plusPtr` s) l
 674                              f ptr (len + l) (to_go - l) pss'
 675
 676            | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
 677                             ptr' <- reallocArray ptr new_total
 678                             f ptr' len (new_total - len) pss
 679
 680 -- | Map a function over a 'ByteString' and concatenate the results
 681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 682 concatMap f = foldr (append . f) empty
 683
 684 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 685 -- any element of the 'ByteString' satisfies the predicate.
 686 any :: (Word8 -> Bool) -> ByteString -> Bool
 687 any _ (PS _ _ 0) = False
 688 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 689         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 690     where
 691         STRICT2(go)
 692         go p q | p == q    = return False
 693                | otherwise = do c <- peek p
 694                                 if f c then return True
 695                                        else go (p `plusPtr` 1) q
 696
 697 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 698 -- if all elements of the 'ByteString' satisfy the predicate.
 699 all :: (Word8 -> Bool) -> ByteString -> Bool
 700 all _ (PS _ _ 0) = True
 701 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 702         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 703     where
 704         STRICT2(go)
 705         go p q | p == q     = return True  -- end of list
 706                | otherwise  = do c <- peek p
 707                                  if f c
 708                                     then go (p `plusPtr` 1) q
 709                                     else return False
 710
 711 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 712 maximum :: ByteString -> Word8
 713 maximum xs@(PS x s l)
 714     | null xs   = errorEmptyList "maximum"
 715     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 716                     return $ c_maximum (p `plusPtr` s) l
 717 {-# INLINE maximum #-}
 718
 719 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 720 minimum :: ByteString -> Word8
 721 minimum xs@(PS x s l)
 722     | null xs   = errorEmptyList "minimum"
 723     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 724                     return $ c_minimum (p `plusPtr` s) l
 725 {-# INLINE minimum #-}
 726
 727 {-
 728 maximum xs@(PS x s l)
 729     | null xs   = errorEmptyList "maximum"
 730     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 731                         w <- peek p
 732                         maximum_ (p `plusPtr` s) 0 l w
 733
 734 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 735 STRICT4(maximum_)
 736 maximum_ ptr n m c
 737     | n >= m    = return c
 738     | otherwise = do w <- peekByteOff ptr n
 739                      maximum_ ptr (n+1) m (if w > c then w else c)
 740
 741 minimum xs@(PS x s l)
 742     | null xs   = errorEmptyList "minimum"
 743     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 744                         w <- peek p
 745                         minimum_ (p `plusPtr` s) 0 l w
 746
 747 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 748 STRICT4(minimum_)
 749 minimum_ ptr n m c
 750     | n >= m    = return c
 751     | otherwise = do w <- peekByteOff ptr n
 752                      minimum_ ptr (n+1) m (if w < c then w else c)
 753 -}
 754
 755 -- | /O(n)/ map Word8 functions, provided with the index at each position
 756 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 757 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 758     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 759   where
 760     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 761     STRICT4(go)
 762     go n f t p | f == p    = return ()
 763                | otherwise = do w <- peek f
 764                                 ((poke t) . k n) w
 765                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 766
 767 -- ---------------------------------------------------------------------
 768 -- Unfolds and replicates
 769
 770 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 771 -- the value of every element. The following holds:
 772 --
 773 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 774 --
 775 -- This implemenation uses @memset(3)@
 776 replicate :: Int -> Word8 -> ByteString
 777 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 778
 779 {-
 780 -- About 5x slower
 781 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 782     where
 783         STRICT2(go)
 784         go _   0 = return w
 785         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 786 -}
 787
 788 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 789 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 790 -- the element and returns 'Nothing' if it is done producing the
 791 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 792 -- prepending to the ByteString and @b@ is used as the next element in a
 793 -- recursive call.
 794 --
 795 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 796 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 797 -- final size of the ByteString as an argument. 'cons' can then be
 798 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 799 -- linear complexity. The depth of the recursion is limited to this
 800 -- size, but may be less. For lazy, infinite unfoldr, use
 801 -- 'Data.List.unfoldr' (from 'Data.List').
 802 --
 803 -- Examples:
 804 --
 805 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 806 --
 807 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 808 --
 809 -- > unfoldrN n == take n $ List.unfoldr
 810 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 811 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 812     where
 813         STRICT3(go)
 814         go q c n | n == i    = return n      -- stop if we reach `i'
 815                  | otherwise = case f c of
 816                                    Nothing        -> return n
 817                                    Just (a,new_c) -> do
 818                                         poke q a
 819                                         go (q `plusPtr` 1) new_c (n+1)
 820
 821 -- ---------------------------------------------------------------------
 822 -- Substrings
 823
 824 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 825 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 826 take :: Int -> ByteString -> ByteString
 827 take n ps@(PS x s l)
 828     | n < 0     = empty
 829     | n >= l    = ps
 830     | otherwise = PS x s n
 831 {-# INLINE take #-}
 832
 833 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 834 -- elements, or @[]@ if @n > 'length' xs@.
 835 drop  :: Int -> ByteString -> ByteString
 836 drop n ps@(PS x s l)
 837     | n <= 0    = ps
 838     | n >  l    = empty
 839     | otherwise = PS x (s+n) (l-n)
 840 {-# INLINE drop #-}
 841
 842 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 843 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 844 splitAt  n ps  = (take n ps, drop n ps)
 845 {-# INLINE splitAt #-}
 846
 847 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 848 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 849 -- satisfy @p@.
 850 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 851 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 852 {-# INLINE takeWhile #-}
 853
 854 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 855 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 856 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 857 {-# INLINE dropWhile #-}
 858
 859 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 860 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 861 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 862 {-# INLINE break #-}
 863
 864 -- | 'breakByte' breaks its ByteString argument at the first occurence
 865 -- of the specified byte. It is more efficient than 'break' as it is
 866 -- implemented with @memchr(3)@. I.e.
 867 --
 868 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 869 --
 870 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 871 breakByte c p = case elemIndex c p of
 872     Nothing -> (p,empty)
 873     Just n  -> (take n p, drop n p)
 874 {-# INLINE breakByte #-}
 875
 876 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 877 -- occurence of @w@. It behaves like 'break', except the delimiter is
 878 -- not returned, and @Nothing@ is returned if the delimiter is not in
 879 -- the ByteString. I.e.
 880 --
 881 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 882 --
 883 -- > breakFirst c xs ==
 884 -- > let (x,y) = break (== c) xs
 885 -- > in if null y then Nothing else Just (x, drop 1 y))
 886 --
 887 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 888 breakFirst c p = case elemIndex c p of
 889    Nothing -> Nothing
 890    Just n -> Just (take n p, drop (n+1) p)
 891 {-# INLINE breakFirst #-}
 892
 893 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 894 -- ByteString.
 895 --
 896 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 897 --
 898 -- and the following are equivalent:
 899 --
 900 -- > breakLast 'c' "abcdef"
 901 -- > let (x,y) = break (=='c') (reverse "abcdef")
 902 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 903 --
 904 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 905 breakLast c p = case elemIndexLast c p of
 906     Nothing -> Nothing
 907     Just n -> Just (take n p, drop (n+1) p)
 908 {-# INLINE breakLast #-}
 909
 910 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 911 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 912 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 913 span  p ps = break (not . p) ps
 914 {-# INLINE span #-}
 915
 916 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 917 -- We have
 918 --
 919 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 920 --
 921 -- and
 922 --
 923 -- > spanEnd (not . isSpace) ps
 924 -- >    ==
 925 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 926 --
 927 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 928 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 929
 930 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 931 -- separators, where the predicate returns True for a separator element.
 932 -- The resulting components do not contain the separators.  Two adjacent
 933 -- separators result in an empty component in the output.  eg.
 934 --
 935 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 936 -- > splitWith (=='a') []        == []
 937 --
 938 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 939
 940 #if defined(__GLASGOW_HASKELL__)
 941 splitWith _pred (PS _  _   0) = []
 942 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
 943   where pred# c# = pred_ (W8# c#)
 944
 945         splitWith' pred' off' len' fp' = withPtr fp $ \p ->
 946             splitLoop pred' p 0 off' len' fp'
 947
 948         splitLoop :: (Word# -> Bool)
 949                   -> Ptr Word8
 950                   -> Int -> Int -> Int
 951                   -> ForeignPtr Word8
 952                   -> IO [ByteString]
 953
 954         splitLoop pred' p idx' off' len' fp'
 955             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
 956             | idx' >= len'  = return [PS fp' off' idx']
 957             | otherwise = do
 958                 w <- peekElemOff p (off'+idx')
 959                 if pred' (case w of W8# w# -> w#)
 960                    then return (PS fp' off' idx' :
 961                               splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
 962                    else splitLoop pred' p (idx'+1) off' len' fp'
 963 {-# INLINE splitWith #-}
 964
 965 #else
 966 splitWith _ (PS _ _ 0) = []
 967 splitWith p ps = splitWith' p ps
 968     where
 969         STRICT2(splitWith')
 970         splitWith' q qs = if null rest then [chunk]
 971                                        else chunk : splitWith' q (unsafeTail rest)
 972             where (chunk,rest) = break q qs
 973 #endif
 974
 975 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 976 -- argument, consuming the delimiter. I.e.
 977 --
 978 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 979 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
 980 -- > split 'x'  "x"          == ["",""]
 981 --
 982 -- and
 983 --
 984 -- > join [c] . split c == id
 985 -- > split == splitWith . (==)
 986 --
 987 -- As for all splitting functions in this library, this function does
 988 -- not copy the substrings, it just constructs new 'ByteStrings' that
 989 -- are slices of the original.
 990 --
 991 split :: Word8 -> ByteString -> [ByteString]
 992 split _ (PS _ _ 0) = []
 993 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 994     let ptr = p `plusPtr` s
 995
 996         STRICT1(loop)
 997         loop n = do
 998             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
 999             if q == nullPtr
1000                 then return [PS x (s+n) (l-n)]
1001                 else do let i = q `minusPtr` ptr
1002                         ls <- loop (i+1)
1003                         return $! PS x (s+n) (i-n) : ls
1004     loop 0
1005 {-# INLINE split #-}
1006
1007 {-
1008 -- slower. but stays inside Haskell.
1009 split _ (PS _  _   0) = []
1010 split (W8# w#) (PS fp off len) = splitWith' off len fp
1011     where
1012         splitWith' off' len' fp' = withPtr fp $ \p ->
1013             splitLoop p 0 off' len' fp'
1014
1015         splitLoop :: Ptr Word8
1016                   -> Int -> Int -> Int
1017                   -> ForeignPtr Word8
1018                   -> IO [ByteString]
1019
1020         STRICT5(splitLoop)
1021         splitLoop p idx' off' len' fp'
1022             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1023             | idx' >= len'  = return [PS fp' off' idx']
1024             | otherwise = do
1025                 (W8# x#) <- peekElemOff p (off'+idx')
1026                 if word2Int# w# ==# word2Int# x#
1027                    then return (PS fp' off' idx' :
1028                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1029                    else splitLoop p (idx'+1) off' len' fp'
1030 -}
1031
1032 -- | Like 'splitWith', except that sequences of adjacent separators are
1033 -- treated as a single separator. eg.
1034 --
1035 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1036 --
1037 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1038 tokens f = P.filter (not.null) . splitWith f
1039
1040 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1041 -- 'ByteString's and concatenates the list after interspersing the first
1042 -- argument between each element of the list.
1043 join :: ByteString -> [ByteString] -> ByteString
1044 join filler pss = concat (splice pss)
1045     where
1046         splice []  = []
1047         splice [x] = [x]
1048         splice (x:y:xs) = x:filler:splice (y:xs)
1049
1050 --
1051 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1052 -- with a char. Around 4 times faster than the generalised join.
1053 --
1054 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1055 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1056     withForeignPtr ffp $ \fp ->
1057     withForeignPtr fgp $ \gp -> do
1058         memcpy ptr (fp `plusPtr` s) l
1059         poke (ptr `plusPtr` l) c
1060         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1061     where
1062       len = length f + length g + 1
1063 {-# INLINE joinWithByte #-}
1064
1065 -- ---------------------------------------------------------------------
1066 -- Indexing ByteStrings
1067
1068 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1069 index :: ByteString -> Int -> Word8
1070 index ps n
1071     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1072     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1073                                 ++ ", length = " ++ show (length ps)
1074     | otherwise      = ps `unsafeIndex` n
1075 {-# INLINE index #-}
1076
1077 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1078 -- element in the given 'ByteString' which is equal to the query
1079 -- element, or 'Nothing' if there is no such element.
1080 -- This implementation uses memchr(3).
1081 elemIndex :: Word8 -> ByteString -> Maybe Int
1082 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1083     let p' = p `plusPtr` s
1084         q  = memchr p' c (fromIntegral l)
1085     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1086 {-# INLINE elemIndex #-}
1087
1088 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1089 -- element in the given 'ByteString' which is equal to the query
1090 -- element, or 'Nothing' if there is no such element. The following
1091 -- holds:
1092 --
1093 -- > elemIndexLast c xs ==
1094 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1095 --
1096 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1097 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1098     go (p `plusPtr` s) (l-1)
1099   where
1100     STRICT2(go)
1101     go p i | i < 0     = return Nothing
1102            | otherwise = do ch' <- peekByteOff p i
1103                             if ch == ch'
1104                                 then return $ Just i
1105                                 else go p (i-1)
1106 {-# INLINE elemIndexLast #-}
1107
1108 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1109 -- the indices of all elements equal to the query element, in ascending order.
1110 -- This implementation uses memchr(3).
1111 elemIndices :: Word8 -> ByteString -> [Int]
1112 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1113     let ptr = p `plusPtr` s
1114
1115         STRICT1(loop)
1116         loop n = do
1117                 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1118                 if q == nullPtr
1119                     then return []
1120                     else do let i = q `minusPtr` ptr
1121                             ls <- loop (i+1)
1122                             return $! i:ls
1123     loop 0
1124
1125 {-
1126 -- much slower
1127 elemIndices :: Word8 -> ByteString -> [Int]
1128 elemIndices c ps = loop 0 ps
1129    where STRICT2(loop)
1130          loop _ ps' | null ps'            = []
1131          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1132                     | otherwise           = loop (n+1) (unsafeTail ps')
1133 -}
1134
1135 -- | count returns the number of times its argument appears in the ByteString
1136 --
1137 -- > count = length . elemIndices
1138 --
1139 -- But more efficiently than using length on the intermediate list.
1140 count :: Word8 -> ByteString -> Int
1141 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1142     return $ c_count (p `plusPtr` s) (fromIntegral m) w
1143 {-# INLINE count #-}
1144
1145 {-
1146 --
1147 -- around 30% slower
1148 --
1149 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1150      go (p `plusPtr` s) (fromIntegral m) 0
1151     where
1152         go :: Ptr Word8 -> CSize -> Int -> IO Int
1153         STRICT3(go)
1154         go p l i = do
1155             let q = memchr p w l
1156             if q == nullPtr
1157                 then return i
1158                 else do let k = fromIntegral $ q `minusPtr` p
1159                         go (q `plusPtr` 1) (l-k-1) (i+1)
1160 -}
1161
1162 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1163 -- returns the index of the first element in the ByteString
1164 -- satisfying the predicate.
1165 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1166 findIndex = (listToMaybe .) . findIndices
1167
1168 -- | The 'findIndices' function extends 'findIndex', by returning the
1169 -- indices of all elements satisfying the predicate, in ascending order.
1170 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1171 findIndices p ps = loop 0 ps
1172    where
1173      STRICT2(loop)
1174      loop _ qs | null qs           = []
1175      loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1176                | otherwise         =     loop (n+1) (unsafeTail qs)
1177
1178 -- ---------------------------------------------------------------------
1179 -- Searching ByteStrings
1180
1181 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1182 elem :: Word8 -> ByteString -> Bool
1183 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1184 {-# INLINE elem #-}
1185
1186 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1187 notElem :: Word8 -> ByteString -> Bool
1188 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1189 {-# INLINE notElem #-}
1190
1191 --
1192 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1193 -- case of filtering a single byte. It is more efficient to use
1194 -- /filterByte/ in this case.
1195 --
1196 -- > filterByte == filter . (==)
1197 --
1198 -- filterByte is around 10x faster, and uses much less space, than its
1199 -- filter equivalent
1200 filterByte :: Word8 -> ByteString -> ByteString
1201 filterByte w ps = replicate (count w ps) w
1202
1203 {-
1204 -- slower than the replicate version
1205
1206 filterByte ch ps@(PS x s l)
1207     | null ps   = ps
1208     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1209         t <- go (f `plusPtr` s) p l
1210         return (t `minusPtr` p) -- actual length
1211     where
1212         STRICT3(go)
1213         go _ t 0 = return t
1214         go f t e = do w <- peek f
1215                       if w == ch
1216                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1217                         else             go (f `plusPtr` 1) t               (e-1)
1218 -}
1219
1220 --
1221 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1222 -- case of filtering a single byte out of a list. It is more efficient
1223 -- to use /filterNotByte/ in this case.
1224 --
1225 -- > filterNotByte == filter . (/=)
1226 --
1227 -- filterNotByte is around 3x faster, and uses much less space, than its
1228 -- filter equivalent
1229 filterNotByte :: Word8 -> ByteString -> ByteString
1230 filterNotByte ch ps@(PS x s l)
1231     | null ps   = ps
1232     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1233         t <- go (f `plusPtr` s) p l
1234         return (t `minusPtr` p) -- actual length
1235     where
1236         STRICT3(go)
1237         go _ t 0 = return t
1238         go f t e = do w <- peek f
1239                       if w /= ch
1240                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1241                         else             go (f `plusPtr` 1) t               (e-1)
1242
1243 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1244 -- returns a ByteString containing those characters that satisfy the
1245 -- predicate.
1246 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1247 filter k ps@(PS x s l)
1248     | null ps   = ps
1249     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1250         t <- go (f `plusPtr` s) p l
1251         return (t `minusPtr` p) -- actual length
1252     where
1253         STRICT3(go)
1254         go _ t 0 = return t
1255         go f t e = do w <- peek f
1256                       if k w
1257                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1258                         else             go (f `plusPtr` 1) t               (e - 1)
1259
1260 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1261
1262 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1263 -- and returns the first element in matching the predicate, or 'Nothing'
1264 -- if there is no such element.
1265 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1266 find p ps = case filter p ps of
1267     q | null q -> Nothing
1268       | otherwise -> Just (unsafeHead q)
1269
1270 -- ---------------------------------------------------------------------
1271 -- Searching for substrings
1272
1273 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1274 -- iff the first is a prefix of the second.
1275 isPrefixOf :: ByteString -> ByteString -> Bool
1276 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1277     | l1 == 0   = True
1278     | l2 < l1   = False
1279     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1280         withForeignPtr x2 $ \p2 -> do
1281             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1282             return (i == 0)
1283
1284 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1285 -- iff the first is a suffix of the second.
1286 --
1287 -- The following holds:
1288 --
1289 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1290 --
1291 -- However, the real implemenation uses memcmp to compare the end of the
1292 -- string only, with no reverse required..
1293 isSuffixOf :: ByteString -> ByteString -> Bool
1294 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1295     | l1 == 0   = True
1296     | l2 < l1   = False
1297     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1298         withForeignPtr x2 $ \p2 -> do
1299             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1300             return (i == 0)
1301
1302 -- | Check whether one string is a substring of another. @isSubstringOf
1303 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1304 isSubstringOf :: ByteString -- ^ String to search for.
1305               -> ByteString -- ^ String to search in.
1306               -> Bool
1307 isSubstringOf p s = not $ P.null $ findSubstrings p s
1308
1309 -- | Get the first index of a substring in another string,
1310 --   or 'Nothing' if the string is not found.
1311 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1312 findSubstring :: ByteString -- ^ String to search for.
1313               -> ByteString -- ^ String to seach in.
1314               -> Maybe Int
1315 findSubstring = (listToMaybe .) . findSubstrings
1316
1317 -- | Find the indexes of all (possibly overlapping) occurances of a
1318 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1319 -- string matching algorithm.
1320 findSubstrings :: ByteString -- ^ String to search for.
1321                -> ByteString -- ^ String to seach in.
1322                -> [Int]
1323
1324 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1325   where
1326       patc x = pat `unsafeIndex` x
1327       strc x = str `unsafeIndex` x
1328
1329       -- maybe we should make kmpNext a UArray before using it in search?
1330       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1331       kmpNextL p _ | null p = []
1332       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1333                          ps = unsafeTail p
1334                          x = if not (null ps) && unsafeHead ps == patc j'
1335                                 then kmpNext Array.! j' else j'
1336                         in x:kmpNextL ps j'
1337       search i j = match ++ rest -- i: position in string, j: position in pattern
1338         where match = if j == m then [(i - j)] else []
1339               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1340       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1341                | otherwise = j
1342
1343 -- ---------------------------------------------------------------------
1344 -- Zipping
1345
1346 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1347 -- corresponding pairs of bytes. If one input ByteString is short,
1348 -- excess elements of the longer ByteString are discarded. This is
1349 -- equivalent to a pair of 'unpack' operations.
1350 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1351 zip ps qs
1352     | null ps || null qs = []
1353     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1354
1355 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1356 -- the first argument, instead of a tupling function.  For example,
1357 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1358 -- corresponding sums.
1359 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1360 zipWith f ps qs
1361     | null ps || null qs = []
1362     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1363
1364 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1365 -- ByteStrings. Note that this performs two 'pack' operations.
1366 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1367 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1368 {-# INLINE unzip #-}
1369
1370 -- ---------------------------------------------------------------------
1371 -- Special lists
1372
1373 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1374 inits :: ByteString -> [ByteString]
1375 inits (PS x s l) = [PS x s n | n <- [0..l]]
1376
1377 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1378 tails :: ByteString -> [ByteString]
1379 tails p | null p    = [empty]
1380         | otherwise = p : tails (unsafeTail p)
1381
1382 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1383
1384 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1385 elems :: ByteString -> [ByteString]
1386 elems (PS _ _ 0) = []
1387 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1388 {-# INLINE elems #-}
1389
1390 -- ---------------------------------------------------------------------
1391 -- ** Ordered 'ByteString's
1392
1393 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1394 sort :: ByteString -> ByteString
1395 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1396         memcpy p (f `plusPtr` s) l
1397         c_qsort p l -- inplace
1398
1399 {-
1400 sort = pack . List.sort . unpack
1401 -}
1402
1403 -- ---------------------------------------------------------------------
1404 --
1405 -- Extensions to the basic interface
1406 --
1407
1408 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1409 -- check for the empty case, so there is an obligation on the programmer
1410 -- to provide a proof that the ByteString is non-empty.
1411 unsafeHead :: ByteString -> Word8
1412 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1413 {-# INLINE unsafeHead #-}
1414
1415 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1416 -- check for the empty case. As with 'unsafeHead', the programmer must
1417 -- provide a separate proof that the ByteString is non-empty.
1418 unsafeTail :: ByteString -> ByteString
1419 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1420 {-# INLINE unsafeTail #-}
1421
1422 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1423 -- This omits the bounds check, which means there is an accompanying
1424 -- obligation on the programmer to ensure the bounds are checked in some
1425 -- other way.
1426 unsafeIndex :: ByteString -> Int -> Word8
1427 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1428 {-# INLINE unsafeIndex #-}
1429
1430 -- ---------------------------------------------------------------------
1431 -- Low level constructors
1432
1433 #if defined(__GLASGOW_HASKELL__)
1434 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1435 -- Addr\# (an arbitrary machine address assumed to point outside the
1436 -- garbage-collected heap) into a @ByteString@. A much faster way to
1437 -- create an Addr\# is with an unboxed string literal, than to pack a
1438 -- boxed string. A unboxed string literal is compiled to a static @char
1439 -- []@ by GHC. Establishing the length of the string requires a call to
1440 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1441 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1442 -- if you know the length of the string statically.
1443 --
1444 -- An example:
1445 --
1446 -- > literalFS = packAddress "literal"#
1447 --
1448 packAddress :: Addr# -> ByteString
1449 packAddress addr# = inlinePerformIO $ do
1450     p <- newForeignPtr_ cstr
1451     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1452   where
1453     cstr = Ptr addr#
1454 {-# INLINE packAddress #-}
1455
1456 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1457 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1458 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1459 -- 'Addr\#' to the string, and the length of the string. Make sure the
1460 -- length is correct, otherwise use the safer 'packAddress' (where the
1461 -- length will be calculated once at runtime).
1462 unsafePackAddress :: Int -> Addr# -> ByteString
1463 unsafePackAddress len addr# = inlinePerformIO $ do
1464     p <- newForeignPtr_ cstr
1465     return $ PS p 0 len
1466     where cstr = Ptr addr#
1467
1468 #endif
1469
1470 -- | /O(1)/ Build a ByteString from a ForeignPtr
1471 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1472 fromForeignPtr fp l = PS fp 0 l
1473
1474 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1475 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1476 toForeignPtr (PS ps s l) = (ps, s, l)
1477
1478 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1479 -- current 'ByteString' from any larger string it was created from, as
1480 -- an 'Int'.
1481 skipIndex :: ByteString -> Int
1482 skipIndex (PS _ s _) = s
1483 {-# INLINE skipIndex #-}
1484
1485 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1486 -- finalizer associated to it. The ByteString length is calculated using
1487 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1488 packCString :: CString -> ByteString
1489 packCString cstr = inlinePerformIO $ do
1490     fp <- newForeignPtr_ (castPtr cstr)
1491     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1492
1493 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1494 -- have /no/ finalizer associated with it. This operation has /O(1)/
1495 -- complexity as we already know the final size, so no /strlen(3)/ is
1496 -- required.
1497 packCStringLen :: CStringLen -> ByteString
1498 packCStringLen (ptr,len) = inlinePerformIO $ do
1499     fp <- newForeignPtr_ (castPtr ptr)
1500     return $ PS fp 0 (fromIntegral len)
1501
1502 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1503 -- have a @free(3)@ finalizer associated to it.
1504 packMallocCString :: CString -> ByteString
1505 packMallocCString cstr = inlinePerformIO $ do
1506     fp <- newForeignFreePtr (castPtr cstr)
1507     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1508
1509 #if defined(__GLASGOW_HASKELL__)
1510 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1511 -- length, and an IO action representing a finalizer. This function is
1512 -- not available on Hugs.
1513 --
1514 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1515 packCStringFinalizer p l f = do
1516     fp <- FC.newForeignPtr p f
1517     return $ PS fp 0 l
1518
1519 -- | Explicitly run the finaliser associated with a 'ByteString'.
1520 -- Further references to this value may generate invalid memory
1521 -- references. This operation is unsafe, as there may be other
1522 -- 'ByteStrings' referring to the same underlying pages. If you use
1523 -- this, you need to have a proof of some kind that all 'ByteString's
1524 -- ever generated from the underlying byte array are no longer live.
1525 unsafeFinalize :: ByteString -> IO ()
1526 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1527
1528 #endif
1529
1530 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1531 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1532 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1533 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1534     where
1535       alloc = withForeignPtr ps $ \p -> do
1536                 buf <- c_malloc (fromIntegral l+1)
1537                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1538                 poke (buf `plusPtr` l) (0::Word8)
1539                 return $ castPtr buf
1540
1541 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1542 -- Warning: modifying the @CString@ will affect the @ByteString@.
1543 -- Why is this function unsafe? It relies on the null byte at the end of
1544 -- the ByteString to be there. This is /not/ the case if your ByteString
1545 -- has been spliced from a larger string (i.e. with take or drop).
1546 -- Unless you can guarantee the null byte, you should use the safe
1547 -- version, which will copy the string first.
1548 --
1549 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1550 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1551
1552 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1553 --   This is mainly useful to allow the rest of the data pointed
1554 --   to by the 'ByteString' to be garbage collected, for example
1555 --   if a large string has been read in, and only a small part of it
1556 --   is needed in the rest of the program.
1557 copy :: ByteString -> ByteString
1558 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1559
1560 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1561 -- CString is going to be deallocated from C land.
1562 copyCString :: CString -> ByteString
1563 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1564
1565 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1566 copyCStringLen :: CStringLen -> ByteString
1567 copyCStringLen (cstr, len) = inlinePerformIO $ do
1568     fp <- mallocForeignPtrArray (len+1)
1569     withForeignPtr fp $ \p -> do
1570         memcpy p (castPtr cstr) len
1571         poke (p `plusPtr` len) (0 :: Word8)
1572     return $! PS fp 0 len
1573
1574 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1575 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1576 -- This is analogous to unsafeUseAsCString, and comes with the same
1577 -- safety requirements.
1578 --
1579 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1580 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1581
1582 -- | Given the maximum size needed and a function to make the contents
1583 -- of a ByteString, generate makes the 'ByteString'. The generating
1584 -- function is required to return the actual final size (<= the maximum
1585 -- size), and the resulting byte array is realloced to this size.  The
1586 -- string is padded at the end with a null byte.
1587 --
1588 -- generate is the main mechanism for creating custom, efficient
1589 -- ByteString functions, using Haskell or C functions to fill the space.
1590 --
1591 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1592 generate i f = do
1593     p <- mallocArray i
1594     i' <- f p
1595     p' <- reallocArray p (i'+1)
1596     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1597     fp <- newForeignFreePtr p'
1598     return $ PS fp 0 i'
1599
1600 -- ---------------------------------------------------------------------
1601 -- line IO
1602
1603 #if defined(__GLASGOW_HASKELL__)
1604
1605 -- | getLine, read a line from stdin.
1606 getLine :: IO ByteString
1607 getLine = hGetLine stdin
1608
1609 -- | hGetLine. read a ByteString from a handle
1610 hGetLine :: Handle -> IO ByteString
1611 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1612     case haBufferMode handle_ of
1613        NoBuffering -> error "no buffering"
1614        _other      -> hGetLineBuffered handle_
1615
1616  where
1617     hGetLineBuffered handle_ = do
1618         let ref = haBuffer handle_
1619         buf <- readIORef ref
1620         hGetLineBufferedLoop handle_ ref buf 0 []
1621
1622     hGetLineBufferedLoop handle_ ref
1623             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1624         len `seq` do
1625         off <- findEOL r w raw
1626         let new_len = len + off - r
1627         xs <- mkPS raw r off
1628
1629       -- if eol == True, then off is the offset of the '\n'
1630       -- otherwise off == w and the buffer is now empty.
1631         if off /= w
1632             then do if (w == off + 1)
1633                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1634                             else writeIORef ref buf{ bufRPtr = off + 1 }
1635                     mkBigPS new_len (xs:xss)
1636             else do
1637                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1638                                     buf{ bufWPtr=0, bufRPtr=0 }
1639                  case maybe_buf of
1640                     -- Nothing indicates we caught an EOF, and we may have a
1641                     -- partial line to return.
1642                     Nothing -> do
1643                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1644                          if new_len > 0
1645                             then mkBigPS new_len (xs:xss)
1646                             else ioe_EOF
1647                     Just new_buf ->
1648                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1649
1650     -- find the end-of-line character, if there is one
1651     findEOL r w raw
1652         | r == w = return w
1653         | otherwise =  do
1654             (c,r') <- readCharFromBuffer raw r
1655             if c == '\n'
1656                 then return r -- NB. not r': don't include the '\n'
1657                 else findEOL r' w raw
1658
1659     maybeFillReadBuffer fd is_line is_stream buf = catch
1660         (do buf' <- fillReadBuffer fd is_line is_stream buf
1661             return (Just buf'))
1662         (\e -> if isEOFError e then return Nothing else ioError e)
1663
1664 -- TODO, rewrite to use normal memcpy
1665 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1666 mkPS buf start end = do
1667     let len = end - start
1668     fp <- mallocByteString len
1669     withForeignPtr fp $ \p -> do
1670         memcpy_ptr_baoff p buf start (fromIntegral len)
1671         return (PS fp 0 len)
1672
1673 mkBigPS :: Int -> [ByteString] -> IO ByteString
1674 mkBigPS _ [ps] = return ps
1675 mkBigPS _ pss = return $! concat (P.reverse pss)
1676
1677 #endif
1678
1679 -- ---------------------------------------------------------------------
1680 -- Block IO
1681
1682 -- | Outputs a 'ByteString' to the specified 'Handle'.
1683 hPut :: Handle -> ByteString -> IO ()
1684 hPut _ (PS _ _ 0)  = return ()
1685 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1686 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1687
1688 -- | Write a ByteString to stdout
1689 putStr :: ByteString -> IO ()
1690 putStr = hPut stdout
1691
1692 -- | Write a ByteString to stdout, appending a newline byte
1693 putStrLn :: ByteString -> IO ()
1694 putStrLn ps = hPut stdout ps >> hPut stdout nl
1695     where nl = packByte 0x0a
1696
1697 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1698 -- is far more efficient than reading the characters into a 'String'
1699 -- and then using 'pack'.
1700 hGet :: Handle -> Int -> IO ByteString
1701 hGet _ 0 = return empty
1702 hGet h i = do fp <- mallocByteString i
1703               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1704               return $ PS fp 0 l
1705
1706 #if defined(__GLASGOW_HASKELL__)
1707 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1708 -- waiting for data to become available, instead it returns only whatever data
1709 -- is available.
1710 hGetNonBlocking :: Handle -> Int -> IO ByteString
1711 hGetNonBlocking _ 0 = return empty
1712 hGetNonBlocking h i = do
1713     fp <- mallocByteString i
1714     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1715     return $ PS fp 0 l
1716 #endif
1717
1718 -- | Read entire handle contents into a 'ByteString'.
1719 --
1720 -- As with 'hGet', the string representation in the file is assumed to
1721 -- be ISO-8859-1.
1722 --
1723 hGetContents :: Handle -> IO ByteString
1724 hGetContents h = do
1725     let start_size = 1024
1726     p <- mallocArray start_size
1727     i <- hGetBuf h p start_size
1728     if i < start_size
1729         then do p' <- reallocArray p i
1730                 fp <- newForeignFreePtr p'
1731                 return $ PS fp 0 i
1732         else f p start_size
1733     where
1734         f p s = do
1735             let s' = 2 * s
1736             p' <- reallocArray p s'
1737             i  <- hGetBuf h (p' `plusPtr` s) s
1738             if i < s
1739                 then do let i' = s + i
1740                         p'' <- reallocArray p' i'
1741                         fp  <- newForeignFreePtr p''
1742                         return $ PS fp 0 i'
1743                 else f p' s'
1744
1745 -- | getContents. Equivalent to hGetContents stdin
1746 getContents :: IO ByteString
1747 getContents = hGetContents stdin
1748
1749 -- | Read an entire file directly into a 'ByteString'.  This is far more
1750 -- efficient than reading the characters into a 'String' and then using
1751 -- 'pack'.  It also may be more efficient than opening the file and
1752 -- reading it using hGet.
1753 readFile :: FilePath -> IO ByteString
1754 readFile f = do
1755     h <- openBinaryFile f ReadMode
1756     l <- hFileSize h
1757     s <- hGet h $ fromIntegral l
1758     hClose h
1759     return s
1760
1761 -- | Write a 'ByteString' to a file.
1762 writeFile :: FilePath -> ByteString -> IO ()
1763 writeFile f ps = do
1764     h <- openBinaryFile f WriteMode
1765     hPut h ps
1766     hClose h
1767
1768 {-
1769 --
1770 -- Disable until we can move it into a portable .hsc file
1771 --
1772
1773 -- | Like readFile, this reads an entire file directly into a
1774 -- 'ByteString', but it is even more efficient.  It involves directly
1775 -- mapping the file to memory.  This has the advantage that the contents
1776 -- of the file never need to be copied.  Also, under memory pressure the
1777 -- page may simply be discarded, while in the case of readFile it would
1778 -- need to be written to swap.  If you read many small files, mmapFile
1779 -- will be less memory-efficient than readFile, since each mmapFile
1780 -- takes up a separate page of memory.  Also, you can run into bus
1781 -- errors if the file is modified.  As with 'readFile', the string
1782 -- representation in the file is assumed to be ISO-8859-1.
1783 --
1784 -- On systems without mmap, this is the same as a readFile.
1785 --
1786 mmapFile :: FilePath -> IO ByteString
1787 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1788
1789 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1790 mmap f = do
1791     h <- openBinaryFile f ReadMode
1792     l <- fromIntegral `fmap` hFileSize h
1793     -- Don't bother mmaping small files because each mmapped file takes up
1794     -- at least one full VM block.
1795     if l < mmap_limit
1796        then do thefp <- mallocByteString l
1797                withForeignPtr thefp $ \p-> hGetBuf h p l
1798                hClose h
1799                return (thefp, l)
1800        else do
1801                -- unix only :(
1802                fd <- fromIntegral `fmap` handleToFd h
1803                p  <- my_mmap l fd
1804                fp <- if p == nullPtr
1805                      then do thefp <- mallocByteString l
1806                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1807                              return thefp
1808                      else do
1809                           -- The munmap leads to crashes on OpenBSD.
1810                           -- maybe there's a use after unmap in there somewhere?
1811 #if !defined(__OpenBSD__)
1812                              let unmap = c_munmap p l >> return ()
1813 #else
1814                              let unmap = return ()
1815 #endif
1816                              fp <- FC.newForeignPtr p unmap
1817                              return fp
1818                c_close fd
1819                hClose h
1820                return (fp, l)
1821     where mmap_limit = 16*1024
1822 -}
1823
1824 #if defined(__GLASGOW_HASKELL__)
1825 --
1826 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1827 --
1828 getArgs :: IO [ByteString]
1829 getArgs =
1830   alloca $ \ p_argc ->
1831   alloca $ \ p_argv -> do
1832     getProgArgv p_argc p_argv
1833     p    <- fromIntegral `fmap` peek p_argc
1834     argv <- peek p_argv
1835     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1836 #endif
1837
1838 -- ---------------------------------------------------------------------
1839 -- Internal utilities
1840
1841 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1842 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1843 -- convenience for ByteString construction.
1844 w2c :: Word8 -> Char
1845 #if !defined(__GLASGOW_HASKELL__)
1846 w2c = chr . fromIntegral
1847 #else
1848 w2c = unsafeChr . fromIntegral
1849 #endif
1850 {-# INLINE w2c #-}
1851
1852 c2w :: Char -> Word8
1853 c2w = fromIntegral . ord
1854 {-# INLINE c2w #-}
1855
1856 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1857 -- is padded with a null byte.
1858 mallocByteString :: Int -> IO (ForeignPtr Word8)
1859 mallocByteString l = do
1860     fp <- mallocForeignPtrArray (l+1)
1861     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1862     return fp
1863
1864 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1865 -- argument gives the final size of the ByteString. Unlike 'generate'
1866 -- the ByteString is no reallocated if the final size is less than the
1867 -- estimated size.
1868 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1869 create l write_ptr = inlinePerformIO $ do
1870     fp <- mallocByteString (l+1)
1871     withForeignPtr fp $ \p -> write_ptr p
1872     return $ PS fp 0 l
1873 {-# INLINE create #-}
1874
1875 -- | Perform an operation with a temporary ByteString
1876 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1877 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1878 {-# INLINE withPtr #-}
1879
1880 -- Common up near identical calls to `error' to reduce the number
1881 -- constant strings created when compiled:
1882 errorEmptyList :: String -> a
1883 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1884 {-# INLINE errorEmptyList #-}
1885
1886 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1887 -- of the string if no element is found, rather than Nothing.
1888 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1889 STRICT2(findIndexOrEnd)
1890 findIndexOrEnd f ps
1891     | null ps           = 0
1892     | f (unsafeHead ps) = 0
1893     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
1894 {-# INLINE findIndexOrEnd #-}
1895
1896 -- Find from the end of the string using predicate
1897 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1898 STRICT2(findFromEndUntil)
1899 findFromEndUntil f ps@(PS x s l) =
1900     if null ps then 0
1901     else if f (last ps) then l
1902          else findFromEndUntil f (PS x s (l-1))
1903
1904 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1905 -- it exposes lots of things to further inlining
1906 --
1907 {-# INLINE inlinePerformIO #-}
1908 inlinePerformIO :: IO a -> a
1909 #if defined(__GLASGOW_HASKELL__)
1910 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1911 #else
1912 inlinePerformIO = unsafePerformIO
1913 #endif
1914
1915 {-# INLINE newForeignFreePtr #-}
1916 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1917 #if defined(__GLASGOW_HASKELL__)
1918 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1919 #else
1920 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1921 #endif
1922
1923 -- ---------------------------------------------------------------------
1924 --
1925 -- Standard C functions
1926 --
1927
1928 foreign import ccall unsafe "string.h strlen" c_strlen
1929     :: CString -> CInt
1930
1931 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1932     :: CInt -> IO (Ptr Word8)
1933
1934 foreign import ccall unsafe "static stdlib.h free" c_free
1935     :: Ptr Word8 -> IO ()
1936
1937 #if !defined(__GLASGOW_HASKELL__)
1938 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1939     :: FunPtr (Ptr Word8 -> IO ())
1940 #endif
1941
1942 foreign import ccall unsafe "string.h memset" memset
1943     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1944
1945 foreign import ccall unsafe "string.h memchr" memchr
1946     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1947
1948 foreign import ccall unsafe "string.h memcmp" memcmp
1949     :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1950
1951 foreign import ccall unsafe "string.h memcpy" memcpy
1952     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1953
1954 -- ---------------------------------------------------------------------
1955 --
1956 -- Uses our C code
1957 --
1958
1959 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1960     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1961
1962 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1963     :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1964
1965 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1966     :: Ptr Word8 -> Int -> Word8
1967
1968 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1969     :: Ptr Word8 -> Int -> Word8
1970
1971 foreign import ccall unsafe "static fpstring.h count" c_count
1972     :: Ptr Word8 -> Int -> Word8 -> Int
1973
1974 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1975     :: Ptr Word8 -> Int -> IO ()
1976
1977 -- ---------------------------------------------------------------------
1978 -- MMap
1979
1980 {-
1981 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
1982     :: Int -> Int -> IO (Ptr Word8)
1983
1984 foreign import ccall unsafe "static unistd.h close" c_close
1985     :: Int -> IO Int
1986
1987 #  if !defined(__OpenBSD__)
1988 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
1989     :: Ptr Word8 -> Int -> IO Int
1990 #  endif
1991 -}
1992
1993 -- ---------------------------------------------------------------------
1994 -- Internal GHC Haskell magic
1995
1996 #if defined(__GLASGOW_HASKELL__)
1997 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1998     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
1999
2000 foreign import ccall unsafe "__hscore_memcpy_src_off"
2001    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2002 #endif