Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 -- License     : BSD-style
  10 --
  11 -- Maintainer  : dons@cse.unsw.edu.au
  12 -- Stability   : experimental
  13 -- Portability : portable, requires ffi and cpp
  14 -- Tested with : GHC 6.4.1 and Hugs March 2005
  15 --
  16
  17 --
  18 -- | A time and space-efficient implementation of byte vectors using
  19 -- packed Word8 arrays, suitable for high performance use, both in terms
  20 -- of large data quantities, or high speed requirements. Byte vectors
  21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
  22 -- be passed between C and Haskell with little effort.
  23 --
  24 -- This module is intended to be imported @qualified@, to avoid name
  25 -- clashes with Prelude functions.  eg.
  26 --
  27 -- > import qualified Data.ByteString as B
  28 --
  29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  30 -- UArray by Simon Marlow. Rewritten to support slices and use
  31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  32 --
  33
  34 module Data.ByteString (
  35
  36         -- * The @ByteString@ type
  37         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  38
  39         -- * Introducing and eliminating 'ByteString's
  40         empty,                  -- :: ByteString
  41         packByte,               -- :: Word8   -> ByteString
  42         pack,                   -- :: [Word8] -> ByteString
  43         unpack,                 -- :: ByteString -> [Word8]
  44         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  45         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  46
  47         -- * Basic interface
  48         cons,                   -- :: Word8 -> ByteString -> ByteString
  49         snoc,                   -- :: Word8 -> ByteString -> ByteString
  50         null,                   -- :: ByteString -> Bool
  51         length,                 -- :: ByteString -> Int
  52         head,                   -- :: ByteString -> Word8
  53         tail,                   -- :: ByteString -> ByteString
  54         last,                   -- :: ByteString -> Word8
  55         init,                   -- :: ByteString -> ByteString
  56         append,                 -- :: ByteString -> ByteString -> ByteString
  57
  58         -- * Special ByteStrings
  59         inits,                  -- :: ByteString -> [ByteString]
  60         tails,                  -- :: ByteString -> [ByteString]
  61         elems,                  -- :: ByteString -> [ByteString]
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         reverse,                -- :: ByteString -> ByteString
  66         intersperse,            -- :: Word8 -> ByteString -> ByteString
  67         transpose,              -- :: [ByteString] -> [ByteString]
  68
  69         -- * Reducing 'ByteString's
  70         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  71         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  72         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  73         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74
  75         -- ** Special folds
  76         concat,                 -- :: [ByteString] -> ByteString
  77         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  78         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  79         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  80         maximum,                -- :: ByteString -> Word8
  81         minimum,                -- :: ByteString -> Word8
  82         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  83         hash,                   -- :: ByteString -> Int32
  84
  85         -- * Generating and unfolding ByteStrings
  86         replicate,              -- :: Int -> Word8 -> ByteString
  87         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  88
  89         -- * Substrings
  90
  91         -- ** Breaking strings
  92         take,                   -- :: Int -> ByteString -> ByteString
  93         drop,                   -- :: Int -> ByteString -> ByteString
  94         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  95         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  96         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  97         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  98         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  99         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 100
 101         -- ** Breaking and dropping on specific bytes
 102         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 103         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 104         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 105
 106         -- ** Breaking into many substrings
 107         split,                  -- :: Word8 -> ByteString -> [ByteString]
 108         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 109         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 110
 111         -- ** Joining strings
 112         join,                   -- :: ByteString -> [ByteString] -> ByteString
 113         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 114
 115         -- * Indexing ByteStrings
 116         index,                  -- :: ByteString -> Int -> Word8
 117         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 118         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 119         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 120         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 121         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 122         count,                  -- :: Word8 -> ByteString -> Int
 123
 124         -- * Ordered ByteStrings
 125         sort,                   -- :: ByteString -> ByteString
 126
 127         -- * Searching ByteStrings
 128
 129         -- ** Searching by equality
 130         -- | These functions use memchr(3) to efficiently search the ByteString
 131
 132         elem,                   -- :: Word8 -> ByteString -> Bool
 133         notElem,                -- :: Word8 -> ByteString -> Bool
 134         filterByte,             -- :: Word8 -> ByteString -> ByteString
 135         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 136
 137         -- ** Searching with a predicate
 138         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 139         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 140
 141         -- ** Prefixes and suffixes
 142         -- | These functions use memcmp(3) to efficiently compare substrings
 143         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 144         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 145
 146         -- ** Search for arbitrary substrings
 147         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 148         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 149         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 150
 151         -- * Zipping and unzipping ByteStrings
 152         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 153         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 154         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 155
 156         -- * Unchecked access
 157         unsafeHead,             -- :: ByteString -> Word8
 158         unsafeTail,             -- :: ByteString -> ByteString
 159         unsafeIndex,            -- :: ByteString -> Int -> Word8
 160
 161         -- * Low level introduction and elimination
 162         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 163         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 164         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 165         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 166         skipIndex,              -- :: ByteString -> Int
 167
 168         -- ** Packing CStrings and pointers
 169         packCString,            -- :: CString -> ByteString
 170         packCStringLen,         -- :: CString -> ByteString
 171         packMallocCString,      -- :: CString -> ByteString
 172
 173 #if defined(__GLASGOW_HASKELL__)
 174         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 175         packAddress,            -- :: Addr# -> ByteString
 176         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 177         unsafeFinalize,         -- :: ByteString -> IO ()
 178 #endif
 179
 180         -- ** Using ByteStrings as CStrings
 181         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 182         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 183         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 184
 185         -- ** Copying ByteStrings
 186         -- | These functions perform memcpy(3) operations
 187         copy,                   -- :: ByteString -> ByteString
 188         copyCString,            -- :: CString -> ByteString
 189         copyCStringLen,         -- :: CStringLen -> ByteString
 190
 191         -- * I\/O with @ByteString@s
 192
 193         -- ** Standard input and output
 194
 195 #if defined(__GLASGOW_HASKELL__)
 196         getLine,                -- :: IO ByteString
 197 #endif
 198         getContents,            -- :: IO ByteString
 199         putStr,                 -- :: ByteString -> IO ()
 200         putStrLn,               -- :: ByteString -> IO ()
 201
 202         -- ** Files
 203         readFile,               -- :: FilePath -> IO ByteString
 204         writeFile,              -- :: FilePath -> ByteString -> IO ()
 205
 206         -- ** I\/O with Handles
 207 #if defined(__GLASGOW_HASKELL__)
 208         getArgs,                -- :: IO [ByteString]
 209         hGetLine,               -- :: Handle -> IO ByteString
 210         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 211 #endif
 212         hGetContents,           -- :: Handle -> IO ByteString
 213         hGet,                   -- :: Handle -> Int -> IO ByteString
 214         hPut,                   -- :: Handle -> ByteString -> IO ()
 215
 216 #if defined(__GLASGOW_HASKELL__)
 217         -- * Miscellaneous
 218         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 219 #endif
 220
 221   ) where
 222
 223 import qualified Prelude as P
 224 import Prelude hiding           (reverse,head,tail,last,init,null
 225                                 ,length,map,lines,foldl,foldr,unlines
 226                                 ,concat,any,take,drop,splitAt,takeWhile
 227                                 ,dropWhile,span,break,elem,filter,maximum
 228                                 ,minimum,all,concatMap,foldl1,foldr1
 229                                 ,readFile,writeFile,replicate
 230                                 ,getContents,getLine,putStr,putStrLn
 231                                 ,zip,zipWith,unzip,notElem)
 232
 233 import qualified Data.List as List
 234
 235 import Data.Char
 236 import Data.Word                (Word8)
 237 import Data.Int                 (Int32)
 238 import Data.Bits                (rotateL)
 239 import Data.Maybe               (listToMaybe)
 240 import Data.Array               (listArray)
 241 import qualified Data.Array as Array ((!))
 242
 243 import Control.Exception        (bracket)
 244
 245 import Foreign.C.Types          (CSize, CInt)
 246 import Foreign.C.String         (CString, CStringLen)
 247 import Foreign.Storable
 248 import Foreign.ForeignPtr
 249 import Foreign.Ptr
 250 import Foreign.Marshal.Array
 251
 252 import System.IO                (stdin,stdout,hClose,hFileSize
 253                                 ,hGetBuf,hPutBuf,openBinaryFile
 254                                 ,Handle,IOMode(..))
 255
 256 #if defined(__GLASGOW_HASKELL__)
 257
 258 import System.IO                (hGetBufNonBlocking)
 259
 260 import qualified Foreign.Concurrent as FC (newForeignPtr)
 261
 262 import Data.Generics            (Data(..), Typeable(..))
 263
 264 import System.IO.Error          (isEOFError)
 265 import Foreign.Marshal          (alloca)
 266
 267 import GHC.Handle
 268 import GHC.Prim
 269 import GHC.Base                 (build, unsafeChr)
 270 import GHC.Word hiding (Word8)
 271 import GHC.Ptr                  (Ptr(..))
 272 import GHC.ST                   (ST(..))
 273 import GHC.IOBase
 274
 275 #else
 276
 277 import System.IO.Unsafe
 278
 279 #endif
 280
 281 -- -----------------------------------------------------------------------------
 282 --
 283 -- Useful macros, until we have bang patterns
 284 --
 285
 286 #define STRICT1(f) f a | a `seq` False = undefined
 287 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 288 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 289 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 290 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 291
 292 -- -----------------------------------------------------------------------------
 293
 294 -- | A space-efficient representation of a Word8 vector, supporting many
 295 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 296 --
 297 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 298 --
 299 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 300                      {-# UNPACK #-} !Int
 301                      {-# UNPACK #-} !Int
 302
 303 #if defined(__GLASGOW_HASKELL__)
 304     deriving (Data, Typeable)
 305 #endif
 306
 307 instance Eq  ByteString
 308     where (==)    = eq
 309
 310 instance Ord ByteString
 311     where compare = compareBytes
 312
 313 instance Show ByteString where
 314     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 315
 316 instance Read ByteString where
 317     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 318
 319 {-
 320 instance Arbitrary PackedString where
 321     arbitrary = P.pack `fmap` arbitrary
 322     coarbitrary s = coarbitrary (P.unpack s)
 323 -}
 324
 325 -- | /O(n)/ Equality on the 'ByteString' type.
 326 eq :: ByteString -> ByteString -> Bool
 327 eq a b = (compareBytes a b) == EQ
 328 {-# INLINE eq #-}
 329
 330 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 331 compareBytes :: ByteString -> ByteString -> Ordering
 332 compareBytes (PS _ _ 0) (PS _ _ 0)       = EQ    -- short cut for empty strings
 333 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
 334     withForeignPtr x1 $ \p1 ->
 335     withForeignPtr x2 $ \p2 -> do
 336         i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
 337         return $ case i `compare` 0 of
 338                     EQ  -> l1 `compare` l2
 339                     x   -> x
 340 {-# INLINE compareBytes #-}
 341
 342 {-
 343 --
 344 -- About 4x slower over 32M
 345 --
 346 compareBytes :: ByteString -> ByteString -> Ordering
 347 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 348     withForeignPtr fp1 $ \p1 ->
 349         withForeignPtr fp2 $ \p2 ->
 350             cmp (p1 `plusPtr` off1)
 351                 (p2 `plusPtr` off2) 0 len1 len2
 352
 353 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 354 STRICT5(cmp)
 355 cmp p1 p2 n len1 len2
 356       | n == len1 = if n == len2 then return EQ else return LT
 357       | n == len2 = return GT
 358       | otherwise = do
 359           (a :: Word8) <- peekByteOff p1 n
 360           (b :: Word8) <- peekByteOff p2 n
 361           case a `compare` b of
 362                 EQ -> cmp p1 p2 (n+1) len1 len2
 363                 LT -> return LT
 364                 GT -> return GT
 365 {-# INLINE compareBytes #-}
 366 -}
 367
 368 -- -----------------------------------------------------------------------------
 369 -- Introducing and eliminating 'ByteString's
 370
 371 -- | /O(1)/ The empty 'ByteString'
 372 empty :: ByteString
 373 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 374 {-# NOINLINE empty #-}
 375
 376 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 377 packByte :: Word8 -> ByteString
 378 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
 379     withForeignPtr fp $ \p -> poke p c
 380     return $ PS fp 0 1
 381 {-# NOINLINE packByte #-}
 382
 383 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 384 --
 385 -- For applications with large numbers of string literals, pack can be a
 386 -- bottleneck. In such cases, consider using packAddress (GHC only).
 387 pack :: [Word8] -> ByteString
 388
 389 #if !defined(__GLASGOW_HASKELL__)
 390
 391 pack str = create (P.length str) $ \p -> go p str
 392     where
 393         go _ []     = return ()
 394         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 395
 396 #else /* hack away */
 397
 398 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 399     where
 400         go _ _ []        = return ()
 401         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 402
 403         writeByte p i c = ST $ \s# ->
 404             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 405
 406 #endif
 407
 408 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 409 unpack :: ByteString -> [Word8]
 410
 411 #if !defined(__GLASGOW_HASKELL__)
 412
 413 unpack (PS _  _ 0) = []
 414 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 415         go (p `plusPtr` s) (l - 1) []
 416     where
 417         STRICT3(go)
 418         go p 0 acc = peek p          >>= \e -> return (e : acc)
 419         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 420 {-# INLINE unpack #-}
 421
 422 #else
 423
 424 unpack ps = build (unpackFoldr ps)
 425 {-# INLINE unpack #-}
 426
 427 unpackList :: ByteString -> [Word8]
 428 unpackList (PS fp off len) = withPtr fp $ \p -> do
 429     let STRICT3(loop)
 430         loop _ (-1) acc = return acc
 431         loop q n acc = do
 432            a <- peekByteOff q n
 433            loop q (n-1) (a : acc)
 434     loop (p `plusPtr` off) (len-1) []
 435
 436 {-# RULES
 437 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 438  #-}
 439
 440 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 441 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 442     let STRICT3(loop)
 443         loop _ (-1) acc = return acc
 444         loop q n    acc = do
 445            a <- peekByteOff q n
 446            loop q (n-1) (a `f` acc)
 447     loop (p `plusPtr` off) (len-1) ch
 448 {-# INLINE [0] unpackFoldr #-}
 449
 450 #endif
 451
 452 ------------------------------------------------------------------------
 453
 454 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 455 -- conversion function
 456 packWith :: (a -> Word8) -> [a] -> ByteString
 457 packWith k str = create (P.length str) $ \p -> go p str
 458     where
 459         STRICT2(go)
 460         go _ []     = return ()
 461         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 462 {-# INLINE packWith #-}
 463 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 464
 465 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 466 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 467 unpackWith _ (PS _  _ 0) = []
 468 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 469         go (p `plusPtr` s) (l - 1) []
 470     where
 471         STRICT3(go)
 472         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 473         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 474 {-# INLINE unpackWith #-}
 475 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 476
 477 -- ---------------------------------------------------------------------
 478 -- Basic interface
 479
 480 -- | /O(1)/ Test whether a ByteString is empty.
 481 null :: ByteString -> Bool
 482 null (PS _ _ l) = l == 0
 483 {-# INLINE null #-}
 484
 485 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 486 length :: ByteString -> Int
 487 length (PS _ _ l) = l
 488 {-# INLINE length #-}
 489
 490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 491 -- complexity, as it requires a memcpy.
 492 cons :: Word8 -> ByteString -> ByteString
 493 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 494         memcpy (p `plusPtr` 1) (f `plusPtr` s) l
 495         poke p c
 496 {-# INLINE cons #-}
 497
 498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 499 snoc :: ByteString -> Word8 -> ByteString
 500 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 501         memcpy p (f `plusPtr` s) l
 502         poke (p `plusPtr` l) c
 503 {-# INLINE snoc #-}
 504
 505 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 506 head :: ByteString -> Word8
 507 head ps@(PS x s _)
 508     | null ps   = errorEmptyList "head"
 509     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 510 {-# INLINE head #-}
 511
 512 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 513 tail :: ByteString -> ByteString
 514 tail (PS p s l)
 515     | l <= 0    = errorEmptyList "tail"
 516     | otherwise = PS p (s+1) (l-1)
 517 {-# INLINE tail #-}
 518
 519 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 520 last :: ByteString -> Word8
 521 last ps@(PS x s l)
 522     | null ps   = errorEmptyList "last"
 523     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 524 {-# INLINE last #-}
 525
 526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 527 init :: ByteString -> ByteString
 528 init (PS p s l)
 529     | l <= 0    = errorEmptyList "init"
 530     | otherwise = PS p s (l-1)
 531 {-# INLINE init #-}
 532
 533 -- | /O(n)/ Append two ByteStrings
 534 append :: ByteString -> ByteString -> ByteString
 535 append xs ys | null xs   = ys
 536              | null ys   = xs
 537              | otherwise = concat [xs,ys]
 538 {-# INLINE append #-}
 539
 540 {-
 541 --
 542 -- About 30% faster, but allocating in a big chunk isn't good for memory use
 543 --
 544 append :: ByteString -> ByteString -> ByteString
 545 append xs@(PS ffp s l) ys@(PS fgp t m)
 546     | null xs   = ys
 547     | null ys   = xs
 548     | otherwise = create len $ \ptr ->
 549         withForeignPtr ffp $ \fp ->
 550         withForeignPtr fgp $ \gp -> do
 551             memcpy ptr               (fp `plusPtr` s) l
 552             memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
 553         where len = length xs + length ys
 554 -}
 555
 556 -- ---------------------------------------------------------------------
 557 -- Transformations
 558
 559 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 560 -- element of @xs@
 561 --
 562 map :: (Word8 -> Word8) -> ByteString -> ByteString
 563 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
 564     new_fp <- mallocByteString len
 565     withForeignPtr new_fp $ \new_p -> do
 566         map_ f (len-1) (p `plusPtr` start) new_p
 567         return (PS new_fp 0 len)
 568 {-# INLINE map #-}
 569
 570 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 571 STRICT4(map_)
 572 map_ f n p1 p2
 573    | n < 0 = return ()
 574    | otherwise = do
 575         x <- peekByteOff p1 n
 576         pokeByteOff p2 n (f x)
 577         map_ f (n-1) p1 p2
 578 {-# INLINE map_ #-}
 579
 580 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 581 reverse :: ByteString -> ByteString
 582 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 583         c_reverse p (f `plusPtr` s) l
 584
 585 -- reverse = pack . P.reverse . unpack
 586
 587 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 588 -- 'ByteString' and \`intersperses\' that byte between the elements of
 589 -- the 'ByteString'.  It is analogous to the intersperse function on
 590 -- Lists.
 591 intersperse :: Word8 -> ByteString -> ByteString
 592 intersperse c ps@(PS x s l)
 593     | length ps < 2  = ps
 594     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 595         c_intersperse p (f `plusPtr` s) l c
 596
 597 -- intersperse c = pack . List.intersperse c . unpack
 598
 599 -- | The 'transpose' function transposes the rows and columns of its
 600 -- 'ByteString' argument.
 601 transpose :: [ByteString] -> [ByteString]
 602 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 603
 604 -- ---------------------------------------------------------------------
 605 -- Reducing 'ByteString's
 606
 607 -- | 'foldl', applied to a binary operator, a starting value (typically
 608 -- the left-identity of the operator), and a ByteString, reduces the
 609 -- ByteString using the binary operator, from left to right.
 610 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 611 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 612         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 613     where
 614         STRICT3(lgo)
 615         lgo z p q | p == q    = return z
 616                   | otherwise = do c <- peek p
 617                                    lgo (f z c) (p `plusPtr` 1) q
 618
 619 -- | 'foldr', applied to a binary operator, a starting value
 620 -- (typically the right-identity of the operator), and a ByteString,
 621 -- reduces the ByteString using the binary operator, from right to left.
 622 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 623 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 624         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 625     where
 626         STRICT2(go)
 627         go p q | p == q    = return z
 628                | otherwise = do c  <- peek p
 629                                 ws <- go (p `plusPtr` 1) q
 630                                 return $ c `k` ws
 631
 632 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 633 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 634 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 635 foldl1 f ps
 636     | null ps   = errorEmptyList "foldl1"
 637     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 638
 639 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 640 -- and thus must be applied to non-empty 'ByteString's
 641 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 642 foldr1 f ps
 643     | null ps        = errorEmptyList "foldr1"
 644     | otherwise      = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
 645
 646 -- ---------------------------------------------------------------------
 647 -- Special folds
 648
 649 -- | /O(n)/ Concatenate a list of ByteStrings.
 650 concat :: [ByteString] -> ByteString
 651 concat []     = empty
 652 concat [ps]   = ps
 653 concat xs     = inlinePerformIO $ do
 654     let start_size = 1024
 655     p <- mallocArray start_size
 656     f p 0 1024 xs
 657
 658     where f ptr len _ [] = do
 659                 ptr' <- reallocArray ptr (len+1)
 660                 poke (ptr' `plusPtr` len) (0::Word8)    -- XXX so CStrings work
 661                 fp   <- newForeignFreePtr ptr'
 662                 return $ PS fp 0 len
 663
 664           f ptr len to_go pss@(PS p s l:pss')
 665            | l <= to_go = do withForeignPtr p $ \pf ->
 666                                  memcpy (ptr `plusPtr` len)
 667                                           (pf `plusPtr` s) l
 668                              f ptr (len + l) (to_go - l) pss'
 669
 670            | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
 671                             ptr' <- reallocArray ptr new_total
 672                             f ptr' len (new_total - len) pss
 673
 674 -- | Map a function over a 'ByteString' and concatenate the results
 675 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 676 concatMap f = foldr (append . f) empty
 677
 678 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 679 -- any element of the 'ByteString' satisfies the predicate.
 680 any :: (Word8 -> Bool) -> ByteString -> Bool
 681 any _ (PS _ _ 0) = False
 682 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 683         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 684     where
 685         STRICT2(go)
 686         go p q | p == q    = return False
 687                | otherwise = do c <- peek p
 688                                 if f c then return True
 689                                        else go (p `plusPtr` 1) q
 690
 691 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 692 -- if all elements of the 'ByteString' satisfy the predicate.
 693 all :: (Word8 -> Bool) -> ByteString -> Bool
 694 all _ (PS _ _ 0) = True
 695 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 696         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 697     where
 698         STRICT2(go)
 699         go p q | p == q     = return True  -- end of list
 700                | otherwise  = do c <- peek p
 701                                  if f c
 702                                     then go (p `plusPtr` 1) q
 703                                     else return False
 704
 705 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 706 maximum :: ByteString -> Word8
 707 maximum xs@(PS x s l)
 708     | null xs   = errorEmptyList "maximum"
 709     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 710                     return $ c_maximum (p `plusPtr` s) l
 711
 712 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 713 minimum :: ByteString -> Word8
 714 minimum xs@(PS x s l)
 715     | null xs   = errorEmptyList "minimum"
 716     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 717                     return $ c_minimum (p `plusPtr` s) l
 718
 719 {-
 720 maximum xs@(PS x s l)
 721     | null xs   = errorEmptyList "maximum"
 722     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 723                         w <- peek p
 724                         maximum_ (p `plusPtr` s) 0 l w
 725 {-# INLINE maximum #-}
 726
 727 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 728 STRICT4(maximum_)
 729 maximum_ ptr n m c
 730     | n >= m    = return c
 731     | otherwise = do w <- peekByteOff ptr n
 732                      maximum_ ptr (n+1) m (if w > c then w else c)
 733
 734 minimum xs@(PS x s l)
 735     | null xs   = errorEmptyList "minimum"
 736     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 737                         w <- peek p
 738                         minimum_ (p `plusPtr` s) 0 l w
 739 {-# INLINE minimum #-}
 740
 741 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 742 STRICT4(minimum_)
 743 minimum_ ptr n m c
 744     | n >= m    = return c
 745     | otherwise = do w <- peekByteOff ptr n
 746                      minimum_ ptr (n+1) m (if w < c then w else c)
 747 -}
 748 -- | /O(n)/ map Word8 functions, provided with the index at each position
 749 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 750 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 751     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 752   where
 753     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 754     STRICT4(go)
 755     go n f t p | f == p    = return ()
 756                | otherwise = do w <- peek f
 757                                 ((poke t) . k n) w
 758                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 759
 760 -- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key.
 761 hash :: ByteString -> Int32
 762 hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 763     go (0 :: Int32) (p `plusPtr` s) l
 764   where
 765     go :: Int32 -> Ptr Word8 -> Int -> IO Int32
 766     STRICT3(go)
 767     go h _ 0 = return h
 768     go h p n = do w <- peek p
 769                   go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1)
 770
 771 -- ---------------------------------------------------------------------
 772 -- Unfolds and replicates
 773
 774 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 775 -- the value of every element. The following holds:
 776 --
 777 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 778 --
 779 -- This implemenation uses @memset(3)@
 780 replicate :: Int -> Word8 -> ByteString
 781 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 782
 783 {-
 784 -- About 5x slower
 785 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 786     where
 787         STRICT2(go)
 788         go _   0 = return w
 789         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 790 -}
 791
 792 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 793 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 794 -- the element and returns 'Nothing' if it is done producing the
 795 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 796 -- prepending to the ByteString and @b@ is used as the next element in a
 797 -- recursive call.
 798 --
 799 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 800 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 801 -- final size of the ByteString as an argument. 'cons' can then be
 802 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 803 -- linear complexity. The depth of the recursion is limited to this
 804 -- size, but may be less. For lazy, infinite unfoldr, use
 805 -- 'Data.List.unfoldr' (from 'Data.List').
 806 --
 807 -- Examples:
 808 --
 809 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 810 --
 811 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 812 --
 813 -- > unfoldrN n == take n $ List.unfoldr
 814 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 815 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 816     where
 817         STRICT3(go)
 818         go q c n | n == i    = return n      -- stop if we reach `i'
 819                  | otherwise = case f c of
 820                                    Nothing        -> return n
 821                                    Just (a,new_c) -> do
 822                                         poke q a
 823                                         go (q `plusPtr` 1) new_c (n+1)
 824
 825 -- ---------------------------------------------------------------------
 826 -- Substrings
 827
 828 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 829 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 830 take :: Int -> ByteString -> ByteString
 831 take n ps@(PS x s l)
 832     | n < 0     = empty
 833     | n >= l    = ps
 834     | otherwise = PS x s n
 835 {-# INLINE take #-}
 836
 837 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 838 -- elements, or @[]@ if @n > 'length' xs@.
 839 drop  :: Int -> ByteString -> ByteString
 840 drop n ps@(PS x s l)
 841     | n <= 0    = ps
 842     | n >  l    = empty
 843     | otherwise = PS x (s+n) (l-n)
 844 {-# INLINE drop #-}
 845
 846 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 847 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 848 splitAt  n ps  = (take n ps, drop n ps)
 849 {-# INLINE splitAt #-}
 850
 851 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 852 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 853 -- satisfy @p@.
 854 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 855 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 856 {-# INLINE takeWhile #-}
 857
 858 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 859 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 860 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 861 {-# INLINE dropWhile #-}
 862
 863 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 864 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 865 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 866 {-# INLINE break #-}
 867
 868 -- | 'breakByte' breaks its ByteString argument at the first occurence
 869 -- of the specified byte. It is more efficient than 'break' as it is
 870 -- implemented with @memchr(3)@. I.e.
 871 --
 872 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 873 --
 874 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 875 breakByte c p = case elemIndex c p of
 876     Nothing -> (p,empty)
 877     Just n  -> (take n p, drop n p)
 878 {-# INLINE breakByte #-}
 879
 880 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 881 -- occurence of @w@. It behaves like 'break', except the delimiter is
 882 -- not returned, and @Nothing@ is returned if the delimiter is not in
 883 -- the ByteString. I.e.
 884 --
 885 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 886 --
 887 -- > breakFirst c xs ==
 888 -- > let (x,y) = break (== c) xs
 889 -- > in if null y then Nothing else Just (x, drop 1 y))
 890 --
 891 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 892 breakFirst c p = case elemIndex c p of
 893    Nothing -> Nothing
 894    Just n -> Just (take n p, drop (n+1) p)
 895 {-# INLINE breakFirst #-}
 896
 897 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 898 -- ByteString.
 899 --
 900 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 901 --
 902 -- and the following are equivalent:
 903 --
 904 -- > breakLast 'c' "abcdef"
 905 -- > let (x,y) = break (=='c') (reverse "abcdef")
 906 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 907 --
 908 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 909 breakLast c p = case elemIndexLast c p of
 910     Nothing -> Nothing
 911     Just n -> Just (take n p, drop (n+1) p)
 912 {-# INLINE breakLast #-}
 913
 914 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 915 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 916 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 917 span  p ps = break (not . p) ps
 918 {-# INLINE span #-}
 919
 920 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 921 -- We have
 922 --
 923 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 924 --
 925 -- and
 926 --
 927 -- > spanEnd (not . isSpace) ps
 928 -- >    ==
 929 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 930 --
 931 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 932 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 933
 934 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 935 -- separators, where the predicate returns True for a separator element.
 936 -- The resulting components do not contain the separators.  Two adjacent
 937 -- separators result in an empty component in the output.  eg.
 938 --
 939 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 940 -- > splitWith (=='a') []        == []
 941 --
 942 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 943
 944 #if defined(__GLASGOW_HASKELL__)
 945 splitWith _pred (PS _  _   0) = []
 946 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
 947   where pred# c# = pred_ (W8# c#)
 948
 949         splitWith' pred' off' len' fp' = withPtr fp $ \p ->
 950             splitLoop pred' p 0 off' len' fp'
 951
 952         splitLoop :: (Word# -> Bool)
 953                   -> Ptr Word8
 954                   -> Int -> Int -> Int
 955                   -> ForeignPtr Word8
 956                   -> IO [ByteString]
 957
 958         splitLoop pred' p idx' off' len' fp'
 959             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
 960             | idx' >= len'  = return [PS fp' off' idx']
 961             | otherwise = do
 962                 w <- peekElemOff p (off'+idx')
 963                 if pred' (case w of W8# w# -> w#)
 964                    then return (PS fp' off' idx' :
 965                               splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
 966                    else splitLoop pred' p (idx'+1) off' len' fp'
 967 {-# INLINE splitWith #-}
 968
 969 #else
 970 splitWith _ (PS _ _ 0) = []
 971 splitWith p ps = splitWith' p ps
 972     where
 973         STRICT2(splitWith')
 974         splitWith' q qs = if null rest then [chunk]
 975                                        else chunk : splitWith' q (unsafeTail rest)
 976             where (chunk,rest) = break q qs
 977 #endif
 978
 979 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 980 -- argument, consuming the delimiter. I.e.
 981 --
 982 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 983 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
 984 -- > split 'x'  "x"          == ["",""]
 985 --
 986 -- and
 987 --
 988 -- > join [c] . split c == id
 989 -- > split == splitWith . (==)
 990 --
 991 -- As for all splitting functions in this library, this function does
 992 -- not copy the substrings, it just constructs new 'ByteStrings' that
 993 -- are slices of the original.
 994 --
 995 split :: Word8 -> ByteString -> [ByteString]
 996 split _ (PS _ _ 0) = []
 997 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 998     let ptr = p `plusPtr` s
 999
1000         STRICT1(loop)
1001         loop n = do
1002             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1003             if q == nullPtr
1004                 then return [PS x (s+n) (l-n)]
1005                 else do let i = q `minusPtr` ptr
1006                         ls <- loop (i+1)
1007                         return $! PS x (s+n) (i-n) : ls
1008     loop 0
1009 {-# INLINE split #-}
1010
1011 {-
1012 -- slower. but stays inside Haskell.
1013 split _ (PS _  _   0) = []
1014 split (W8# w#) (PS fp off len) = splitWith' off len fp
1015     where
1016         splitWith' off' len' fp' = withPtr fp $ \p ->
1017             splitLoop p 0 off' len' fp'
1018
1019         splitLoop :: Ptr Word8
1020                   -> Int -> Int -> Int
1021                   -> ForeignPtr Word8
1022                   -> IO [ByteString]
1023
1024         STRICT5(splitLoop)
1025         splitLoop p idx' off' len' fp'
1026             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1027             | idx' >= len'  = return [PS fp' off' idx']
1028             | otherwise = do
1029                 (W8# x#) <- peekElemOff p (off'+idx')
1030                 if word2Int# w# ==# word2Int# x#
1031                    then return (PS fp' off' idx' :
1032                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1033                    else splitLoop p (idx'+1) off' len' fp'
1034 -}
1035
1036 -- | Like 'splitWith', except that sequences of adjacent separators are
1037 -- treated as a single separator. eg.
1038 --
1039 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1040 --
1041 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1042 tokens f = P.filter (not.null) . splitWith f
1043
1044 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1045 -- 'ByteString's and concatenates the list after interspersing the first
1046 -- argument between each element of the list.
1047 join :: ByteString -> [ByteString] -> ByteString
1048 join filler pss = concat (splice pss)
1049     where
1050         splice []  = []
1051         splice [x] = [x]
1052         splice (x:y:xs) = x:filler:splice (y:xs)
1053
1054 --
1055 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1056 -- with a char. Around 4 times faster than the generalised join.
1057 --
1058 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1059 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1060     withForeignPtr ffp $ \fp ->
1061     withForeignPtr fgp $ \gp -> do
1062         memcpy ptr (fp `plusPtr` s) l
1063         poke (ptr `plusPtr` l) c
1064         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1065     where
1066       len = length f + length g + 1
1067 {-# INLINE joinWithByte #-}
1068
1069 -- ---------------------------------------------------------------------
1070 -- Indexing ByteStrings
1071
1072 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1073 index :: ByteString -> Int -> Word8
1074 index ps n
1075     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1076     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1077                                 ++ ", length = " ++ show (length ps)
1078     | otherwise      = ps `unsafeIndex` n
1079 {-# INLINE index #-}
1080
1081 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1082 -- element in the given 'ByteString' which is equal to the query
1083 -- element, or 'Nothing' if there is no such element.
1084 -- This implementation uses memchr(3).
1085 elemIndex :: Word8 -> ByteString -> Maybe Int
1086 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1087     let p' = p `plusPtr` s
1088         q  = memchr p' c (fromIntegral l)
1089     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1090 {-# INLINE elemIndex #-}
1091
1092 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1093 -- element in the given 'ByteString' which is equal to the query
1094 -- element, or 'Nothing' if there is no such element. The following
1095 -- holds:
1096 --
1097 -- > elemIndexLast c xs ==
1098 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1099 --
1100 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1101 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1102     go (p `plusPtr` s) (l-1)
1103   where
1104     STRICT2(go)
1105     go p i | i < 0     = return Nothing
1106            | otherwise = do ch' <- peekByteOff p i
1107                             if ch == ch'
1108                                 then return $ Just i
1109                                 else go p (i-1)
1110 {-# INLINE elemIndexLast #-}
1111
1112 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1113 -- the indices of all elements equal to the query element, in ascending order.
1114 -- This implementation uses memchr(3).
1115 elemIndices :: Word8 -> ByteString -> [Int]
1116 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1117     let ptr = p `plusPtr` s
1118
1119         STRICT1(loop)
1120         loop n = do
1121                 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1122                 if q == nullPtr
1123                     then return []
1124                     else do let i = q `minusPtr` ptr
1125                             ls <- loop (i+1)
1126                             return $! i:ls
1127     loop 0
1128
1129 {-
1130 -- much slower
1131 elemIndices :: Word8 -> ByteString -> [Int]
1132 elemIndices c ps = loop 0 ps
1133    where STRICT2(loop)
1134          loop _ ps' | null ps'            = []
1135          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1136                     | otherwise           = loop (n+1) (unsafeTail ps')
1137 -}
1138
1139 -- | count returns the number of times its argument appears in the ByteString
1140 --
1141 -- > count = length . elemIndices
1142 --
1143 -- But more efficiently than using length on the intermediate list.
1144 count :: Word8 -> ByteString -> Int
1145 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1146      go (p `plusPtr` s) (fromIntegral m) 0
1147     where
1148         go :: Ptr Word8 -> CSize -> Int -> IO Int
1149         STRICT3(go)
1150         go p l i = do
1151             let q = memchr p w l
1152             if q == nullPtr
1153                 then return i
1154                 else do let k = fromIntegral $ q `minusPtr` p
1155                         go (q `plusPtr` 1) (l-k-1) (i+1)
1156 {-# INLINE count #-}
1157
1158 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1159 -- returns the index of the first element in the ByteString
1160 -- satisfying the predicate.
1161 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1162 findIndex = (listToMaybe .) . findIndices
1163
1164 -- | The 'findIndices' function extends 'findIndex', by returning the
1165 -- indices of all elements satisfying the predicate, in ascending order.
1166 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1167 findIndices p ps = loop 0 ps
1168    where
1169      STRICT2(loop)
1170      loop _ qs | null qs           = []
1171      loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1172                | otherwise         =     loop (n+1) (unsafeTail qs)
1173
1174 -- ---------------------------------------------------------------------
1175 -- Searching ByteStrings
1176
1177 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1178 elem :: Word8 -> ByteString -> Bool
1179 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1180 {-# INLINE elem #-}
1181
1182 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1183 notElem :: Word8 -> ByteString -> Bool
1184 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1185 {-# INLINE notElem #-}
1186
1187 --
1188 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1189 -- case of filtering a single byte. It is more efficient to use
1190 -- /filterByte/ in this case.
1191 --
1192 -- > filterByte == filter . (==)
1193 --
1194 -- filterByte is around 10x faster, and uses much less space, than its
1195 -- filter equivalent
1196 filterByte :: Word8 -> ByteString -> ByteString
1197 filterByte w ps = replicate (count w ps) w
1198
1199 {-
1200 -- slower than the replicate version
1201
1202 filterByte ch ps@(PS x s l)
1203     | null ps   = ps
1204     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1205         t <- go (f `plusPtr` s) p l
1206         return (t `minusPtr` p) -- actual length
1207     where
1208         STRICT3(go)
1209         go _ t 0 = return t
1210         go f t e = do w <- peek f
1211                       if w == ch
1212                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1213                         else             go (f `plusPtr` 1) t               (e-1)
1214 -}
1215
1216 --
1217 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1218 -- case of filtering a single byte out of a list. It is more efficient
1219 -- to use /filterNotByte/ in this case.
1220 --
1221 -- > filterNotByte == filter . (/=)
1222 --
1223 -- filterNotByte is around 3x faster, and uses much less space, than its
1224 -- filter equivalent
1225 filterNotByte :: Word8 -> ByteString -> ByteString
1226 filterNotByte ch ps@(PS x s l)
1227     | null ps   = ps
1228     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1229         t <- go (f `plusPtr` s) p l
1230         return (t `minusPtr` p) -- actual length
1231     where
1232         STRICT3(go)
1233         go _ t 0 = return t
1234         go f t e = do w <- peek f
1235                       if w /= ch
1236                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1237                         else             go (f `plusPtr` 1) t               (e-1)
1238
1239 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1240 -- returns a ByteString containing those characters that satisfy the
1241 -- predicate.
1242 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1243 filter k ps@(PS x s l)
1244     | null ps   = ps
1245     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1246         t <- go (f `plusPtr` s) p l
1247         return (t `minusPtr` p) -- actual length
1248     where
1249         STRICT3(go)
1250         go _ t 0 = return t
1251         go f t e = do w <- peek f
1252                       if k w
1253                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1254                         else             go (f `plusPtr` 1) t               (e - 1)
1255
1256 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1257
1258 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1259 -- and returns the first element in matching the predicate, or 'Nothing'
1260 -- if there is no such element.
1261 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1262 find p ps = case filter p ps of
1263     q | null q -> Nothing
1264       | otherwise -> Just (unsafeHead q)
1265
1266 -- ---------------------------------------------------------------------
1267 -- Searching for substrings
1268
1269 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1270 -- iff the first is a prefix of the second.
1271 isPrefixOf :: ByteString -> ByteString -> Bool
1272 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1273     | l1 == 0   = True
1274     | l2 < l1   = False
1275     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1276         withForeignPtr x2 $ \p2 -> do
1277             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1278             return (i == 0)
1279
1280 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1281 -- iff the first is a suffix of the second.
1282 --
1283 -- The following holds:
1284 --
1285 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1286 --
1287 -- However, the real implemenation uses memcmp to compare the end of the
1288 -- string only, with no reverse required..
1289 isSuffixOf :: ByteString -> ByteString -> Bool
1290 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1291     | l1 == 0   = True
1292     | l2 < l1   = False
1293     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1294         withForeignPtr x2 $ \p2 -> do
1295             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1296             return (i == 0)
1297
1298 -- | Check whether one string is a substring of another. @isSubstringOf
1299 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1300 isSubstringOf :: ByteString -- ^ String to search for.
1301               -> ByteString -- ^ String to search in.
1302               -> Bool
1303 isSubstringOf p s = not $ P.null $ findSubstrings p s
1304
1305 -- | Get the first index of a substring in another string,
1306 --   or 'Nothing' if the string is not found.
1307 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1308 findSubstring :: ByteString -- ^ String to search for.
1309               -> ByteString -- ^ String to seach in.
1310               -> Maybe Int
1311 findSubstring = (listToMaybe .) . findSubstrings
1312
1313 -- | Find the indexes of all (possibly overlapping) occurances of a
1314 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1315 -- string matching algorithm.
1316 findSubstrings :: ByteString -- ^ String to search for.
1317                -> ByteString -- ^ String to seach in.
1318                -> [Int]
1319
1320 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1321   where
1322       patc x = pat `unsafeIndex` x
1323       strc x = str `unsafeIndex` x
1324
1325       -- maybe we should make kmpNext a UArray before using it in search?
1326       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1327       kmpNextL p _ | null p = []
1328       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1329                          ps = unsafeTail p
1330                          x = if not (null ps) && unsafeHead ps == patc j'
1331                                 then kmpNext Array.! j' else j'
1332                         in x:kmpNextL ps j'
1333       search i j = match ++ rest -- i: position in string, j: position in pattern
1334         where match = if j == m then [(i - j)] else []
1335               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1336       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1337                | otherwise = j
1338
1339 -- ---------------------------------------------------------------------
1340 -- Zipping
1341
1342 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1343 -- corresponding pairs of bytes. If one input ByteString is short,
1344 -- excess elements of the longer ByteString are discarded. This is
1345 -- equivalent to a pair of 'unpack' operations.
1346 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1347 zip ps qs
1348     | null ps || null qs = []
1349     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1350
1351 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1352 -- the first argument, instead of a tupling function.  For example,
1353 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1354 -- corresponding sums.
1355 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1356 zipWith f ps qs
1357     | null ps || null qs = []
1358     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1359
1360 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1361 -- ByteStrings. Note that this performs two 'pack' operations.
1362 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1363 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1364 {-# INLINE unzip #-}
1365
1366 -- ---------------------------------------------------------------------
1367 -- Special lists
1368
1369 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1370 inits :: ByteString -> [ByteString]
1371 inits (PS x s l) = [PS x s n | n <- [0..l]]
1372
1373 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1374 tails :: ByteString -> [ByteString]
1375 tails p | null p    = [empty]
1376         | otherwise = p : tails (unsafeTail p)
1377
1378 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1379
1380 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1381 elems :: ByteString -> [ByteString]
1382 elems (PS _ _ 0) = []
1383 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1384 {-# INLINE elems #-}
1385
1386 -- ---------------------------------------------------------------------
1387 -- ** Ordered 'ByteString's
1388
1389 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1390 sort :: ByteString -> ByteString
1391 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1392         memcpy p (f `plusPtr` s) l
1393         c_qsort p l -- inplace
1394
1395 -- sort = pack . List.sort . unpack
1396
1397 -- ---------------------------------------------------------------------
1398 --
1399 -- Extensions to the basic interface
1400 --
1401
1402 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1403 -- check for the empty case, so there is an obligation on the programmer
1404 -- to provide a proof that the ByteString is non-empty.
1405 unsafeHead :: ByteString -> Word8
1406 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1407 {-# INLINE unsafeHead #-}
1408
1409 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1410 -- check for the empty case. As with 'unsafeHead', the programmer must
1411 -- provide a separate proof that the ByteString is non-empty.
1412 unsafeTail :: ByteString -> ByteString
1413 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1414 {-# INLINE unsafeTail #-}
1415
1416 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1417 -- This omits the bounds check, which means there is an accompanying
1418 -- obligation on the programmer to ensure the bounds are checked in some
1419 -- other way.
1420 unsafeIndex :: ByteString -> Int -> Word8
1421 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1422 {-# INLINE unsafeIndex #-}
1423
1424 -- ---------------------------------------------------------------------
1425 -- Low level constructors
1426
1427 #if defined(__GLASGOW_HASKELL__)
1428 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1429 -- Addr\# (an arbitrary machine address assumed to point outside the
1430 -- garbage-collected heap) into a @ByteString@. A much faster way to
1431 -- create an Addr\# is with an unboxed string literal, than to pack a
1432 -- boxed string. A unboxed string literal is compiled to a static @char
1433 -- []@ by GHC. Establishing the length of the string requires a call to
1434 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1435 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1436 -- if you know the length of the string statically.
1437 --
1438 -- An example:
1439 --
1440 -- > literalFS = packAddress "literal"#
1441 --
1442 packAddress :: Addr# -> ByteString
1443 packAddress addr# = inlinePerformIO $ do
1444     p <- newForeignPtr_ cstr
1445     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1446   where
1447     cstr = Ptr addr#
1448 {-# INLINE packAddress #-}
1449
1450 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1451 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1452 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1453 -- 'Addr\#' to the string, and the length of the string. Make sure the
1454 -- length is correct, otherwise use the safer 'packAddress' (where the
1455 -- length will be calculated once at runtime).
1456 unsafePackAddress :: Int -> Addr# -> ByteString
1457 unsafePackAddress len addr# = inlinePerformIO $ do
1458     p <- newForeignPtr_ cstr
1459     return $ PS p 0 len
1460     where cstr = Ptr addr#
1461
1462 #endif
1463
1464 -- | /O(1)/ Build a ByteString from a ForeignPtr
1465 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1466 fromForeignPtr fp l = PS fp 0 l
1467
1468 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1469 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1470 toForeignPtr (PS ps s l) = (ps, s, l)
1471
1472 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1473 -- current 'ByteString' from any larger string it was created from, as
1474 -- an 'Int'.
1475 skipIndex :: ByteString -> Int
1476 skipIndex (PS _ s _) = s
1477 {-# INLINE skipIndex #-}
1478
1479 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1480 -- finalizer associated to it. The ByteString length is calculated using
1481 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1482 packCString :: CString -> ByteString
1483 packCString cstr = inlinePerformIO $ do
1484     fp <- newForeignPtr_ (castPtr cstr)
1485     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1486
1487 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1488 -- have /no/ finalizer associated with it. This operation has /O(1)/
1489 -- complexity as we already know the final size, so no /strlen(3)/ is
1490 -- required.
1491 packCStringLen :: CStringLen -> ByteString
1492 packCStringLen (ptr,len) = inlinePerformIO $ do
1493     fp <- newForeignPtr_ (castPtr ptr)
1494     return $ PS fp 0 (fromIntegral len)
1495
1496 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1497 -- have a @free(3)@ finalizer associated to it.
1498 packMallocCString :: CString -> ByteString
1499 packMallocCString cstr = inlinePerformIO $ do
1500     fp <- newForeignFreePtr (castPtr cstr)
1501     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1502
1503 #if defined(__GLASGOW_HASKELL__)
1504 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1505 -- length, and an IO action representing a finalizer. This function is
1506 -- not available on Hugs.
1507 --
1508 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1509 packCStringFinalizer p l f = do
1510     fp <- FC.newForeignPtr p f
1511     return $ PS fp 0 l
1512
1513 -- | Explicitly run the finaliser associated with a 'ByteString'.
1514 -- Further references to this value may generate invalid memory
1515 -- references. This operation is unsafe, as there may be other
1516 -- 'ByteStrings' referring to the same underlying pages. If you use
1517 -- this, you need to have a proof of some kind that all 'ByteString's
1518 -- ever generated from the underlying byte array are no longer live.
1519 unsafeFinalize :: ByteString -> IO ()
1520 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1521
1522 #endif
1523
1524 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1525 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1526 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1527 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1528     where
1529       alloc = withForeignPtr ps $ \p -> do
1530                 buf <- c_malloc (fromIntegral l+1)
1531                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1532                 poke (buf `plusPtr` l) (0::Word8)
1533                 return $ castPtr buf
1534
1535 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1536 -- Warning: modifying the @CString@ will affect the @ByteString@.
1537 -- Why is this function unsafe? It relies on the null byte at the end of
1538 -- the ByteString to be there. This is /not/ the case if your ByteString
1539 -- has been spliced from a larger string (i.e. with take or drop).
1540 -- Unless you can guarantee the null byte, you should use the safe
1541 -- version, which will copy the string first.
1542 --
1543 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1544 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1545
1546 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1547 --   This is mainly useful to allow the rest of the data pointed
1548 --   to by the 'ByteString' to be garbage collected, for example
1549 --   if a large string has been read in, and only a small part of it
1550 --   is needed in the rest of the program.
1551 copy :: ByteString -> ByteString
1552 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1553
1554 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1555 -- CString is going to be deallocated from C land.
1556 copyCString :: CString -> ByteString
1557 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1558
1559 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1560 copyCStringLen :: CStringLen -> ByteString
1561 copyCStringLen (cstr, len) = inlinePerformIO $ do
1562     fp <- mallocForeignPtrArray (len+1)
1563     withForeignPtr fp $ \p -> do
1564         memcpy p (castPtr cstr) len
1565         poke (p `plusPtr` len) (0 :: Word8)
1566     return $! PS fp 0 len
1567
1568 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1569 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1570 -- This is analogous to unsafeUseAsCString, and comes with the same
1571 -- safety requirements.
1572 --
1573 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1574 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1575
1576 -- | Given the maximum size needed and a function to make the contents
1577 -- of a ByteString, generate makes the 'ByteString'. The generating
1578 -- function is required to return the actual final size (<= the maximum
1579 -- size), and the resulting byte array is realloced to this size.  The
1580 -- string is padded at the end with a null byte.
1581 --
1582 -- generate is the main mechanism for creating custom, efficient
1583 -- ByteString functions, using Haskell or C functions to fill the space.
1584 --
1585 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1586 generate i f = do
1587     p <- mallocArray i
1588     i' <- f p
1589     p' <- reallocArray p (i'+1)
1590     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1591     fp <- newForeignFreePtr p'
1592     return $ PS fp 0 i'
1593
1594 -- ---------------------------------------------------------------------
1595 -- line IO
1596
1597 #if defined(__GLASGOW_HASKELL__)
1598
1599 -- | getLine, read a line from stdin.
1600 getLine :: IO ByteString
1601 getLine = hGetLine stdin
1602
1603 -- | hGetLine. read a ByteString from a handle
1604 hGetLine :: Handle -> IO ByteString
1605 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1606     case haBufferMode handle_ of
1607        NoBuffering -> error "no buffering"
1608        _other      -> hGetLineBuffered handle_
1609
1610  where
1611     hGetLineBuffered handle_ = do
1612         let ref = haBuffer handle_
1613         buf <- readIORef ref
1614         hGetLineBufferedLoop handle_ ref buf 0 []
1615
1616     hGetLineBufferedLoop handle_ ref
1617             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1618         len `seq` do
1619         off <- findEOL r w raw
1620         let new_len = len + off - r
1621         xs <- mkPS raw r off
1622
1623       -- if eol == True, then off is the offset of the '\n'
1624       -- otherwise off == w and the buffer is now empty.
1625         if off /= w
1626             then do if (w == off + 1)
1627                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1628                             else writeIORef ref buf{ bufRPtr = off + 1 }
1629                     mkBigPS new_len (xs:xss)
1630             else do
1631                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1632                                     buf{ bufWPtr=0, bufRPtr=0 }
1633                  case maybe_buf of
1634                     -- Nothing indicates we caught an EOF, and we may have a
1635                     -- partial line to return.
1636                     Nothing -> do
1637                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1638                          if new_len > 0
1639                             then mkBigPS new_len (xs:xss)
1640                             else ioe_EOF
1641                     Just new_buf ->
1642                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1643
1644     -- find the end-of-line character, if there is one
1645     findEOL r w raw
1646         | r == w = return w
1647         | otherwise =  do
1648             (c,r') <- readCharFromBuffer raw r
1649             if c == '\n'
1650                 then return r -- NB. not r': don't include the '\n'
1651                 else findEOL r' w raw
1652
1653     maybeFillReadBuffer fd is_line is_stream buf = catch
1654         (do buf' <- fillReadBuffer fd is_line is_stream buf
1655             return (Just buf'))
1656         (\e -> if isEOFError e then return Nothing else ioError e)
1657
1658 -- TODO, rewrite to use normal memcpy
1659 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1660 mkPS buf start end = do
1661     let len = end - start
1662     fp <- mallocByteString (len `quot` 8)
1663     withForeignPtr fp $ \p -> do
1664         memcpy_ptr_baoff p buf start (fromIntegral len)
1665         return (PS fp 0 len)
1666
1667 mkBigPS :: Int -> [ByteString] -> IO ByteString
1668 mkBigPS _ [ps] = return ps
1669 mkBigPS _ pss = return $! concat (P.reverse pss)
1670
1671 #endif
1672
1673 -- ---------------------------------------------------------------------
1674 -- Block IO
1675
1676 -- | Outputs a 'ByteString' to the specified 'Handle'.
1677 hPut :: Handle -> ByteString -> IO ()
1678 hPut _ (PS _ _ 0)  = return ()
1679 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1680 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1681
1682 -- | Write a ByteString to stdout
1683 putStr :: ByteString -> IO ()
1684 putStr = hPut stdout
1685
1686 -- | Write a ByteString to stdout, appending a newline byte
1687 putStrLn :: ByteString -> IO ()
1688 putStrLn ps = hPut stdout ps >> hPut stdout nl
1689     where nl = packByte 0x0a
1690
1691 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1692 -- is far more efficient than reading the characters into a 'String'
1693 -- and then using 'pack'.
1694 hGet :: Handle -> Int -> IO ByteString
1695 hGet _ 0 = return empty
1696 hGet h i = do fp <- mallocByteString i
1697               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1698               return $ PS fp 0 l
1699
1700 #if defined(__GLASGOW_HASKELL__)
1701 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1702 -- waiting for data to become available, instead it returns only whatever data
1703 -- is available.
1704 hGetNonBlocking :: Handle -> Int -> IO ByteString
1705 hGetNonBlocking _ 0 = return empty
1706 hGetNonBlocking h i = do
1707     fp <- mallocByteString i
1708     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1709     return $ PS fp 0 l
1710 #endif
1711
1712 -- | Read entire handle contents into a 'ByteString'.
1713 --
1714 -- As with 'hGet', the string representation in the file is assumed to
1715 -- be ISO-8859-1.
1716 --
1717 hGetContents :: Handle -> IO ByteString
1718 hGetContents h = do
1719     let start_size = 1024
1720     p <- mallocArray start_size
1721     i <- hGetBuf h p start_size
1722     if i < start_size
1723         then do p' <- reallocArray p i
1724                 fp <- newForeignFreePtr p'
1725                 return $ PS fp 0 i
1726         else f p start_size
1727     where
1728         f p s = do
1729         let s' = 2 * s
1730         p' <- reallocArray p s'
1731         i  <- hGetBuf h (p' `plusPtr` s) s
1732         if i < s
1733             then do let i' = s + i
1734                     p'' <- reallocArray p' i'
1735                     fp  <- newForeignFreePtr p''
1736                     return $ PS fp 0 i'
1737             else f p' s'
1738
1739 -- | getContents. Equivalent to hGetContents stdin
1740 getContents :: IO ByteString
1741 getContents = hGetContents stdin
1742
1743 -- | Read an entire file directly into a 'ByteString'.  This is far more
1744 -- efficient than reading the characters into a 'String' and then using
1745 -- 'pack'.  It also may be more efficient than opening the file and
1746 -- reading it using hGet.
1747 readFile :: FilePath -> IO ByteString
1748 readFile f = do
1749     h <- openBinaryFile f ReadMode
1750     l <- hFileSize h
1751     s <- hGet h $ fromIntegral l
1752     hClose h
1753     return s
1754
1755 -- | Write a 'ByteString' to a file.
1756 writeFile :: FilePath -> ByteString -> IO ()
1757 writeFile f ps = do
1758     h <- openBinaryFile f WriteMode
1759     hPut h ps
1760     hClose h
1761
1762 #if defined(__GLASGOW_HASKELL__)
1763 --
1764 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1765 --
1766 getArgs :: IO [ByteString]
1767 getArgs =
1768   alloca $ \ p_argc ->
1769   alloca $ \ p_argv -> do
1770     getProgArgv p_argc p_argv
1771     p    <- fromIntegral `fmap` peek p_argc
1772     argv <- peek p_argv
1773     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1774 #endif
1775
1776 -- ---------------------------------------------------------------------
1777 -- Internal utilities
1778
1779 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1780 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1781 -- convenience for ByteString construction.
1782 w2c :: Word8 -> Char
1783 #if !defined(__GLASGOW_HASKELL__)
1784 w2c = chr . fromIntegral
1785 #else
1786 w2c = unsafeChr . fromIntegral
1787 #endif
1788 {-# INLINE w2c #-}
1789
1790 c2w :: Char -> Word8
1791 c2w = fromIntegral . ord
1792 {-# INLINE c2w #-}
1793
1794 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1795 -- is padded with a null byte.
1796 mallocByteString :: Int -> IO (ForeignPtr Word8)
1797 mallocByteString l = do
1798     fp <- mallocForeignPtrArray (l+1)
1799     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1800     return fp
1801
1802 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1803 -- argument gives the final size of the ByteString. Unlike 'generate'
1804 -- the ByteString is no reallocated if the final size is less than the
1805 -- estimated size.
1806 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1807 create l write_ptr = inlinePerformIO $ do
1808     fp <- mallocByteString (l+1)
1809     withForeignPtr fp $ \p -> write_ptr p
1810     return $ PS fp 0 l
1811 {-# INLINE create #-}
1812
1813 -- | Perform an operation with a temporary ByteString
1814 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1815 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1816 {-# INLINE withPtr #-}
1817
1818 -- Common up near identical calls to `error' to reduce the number
1819 -- constant strings created when compiled:
1820 errorEmptyList :: String -> a
1821 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1822 {-# INLINE errorEmptyList #-}
1823
1824 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1825 -- of the string if no element is found, rather than Nothing.
1826 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1827 STRICT2(findIndexOrEnd)
1828 findIndexOrEnd f ps
1829     | null ps           = 0
1830     | f (unsafeHead ps) = 0
1831     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
1832 {-# INLINE findIndexOrEnd #-}
1833
1834 -- Find from the end of the string using predicate
1835 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1836 STRICT2(findFromEndUntil)
1837 findFromEndUntil f ps@(PS x s l) =
1838     if null ps then 0
1839     else if f (last ps) then l
1840          else findFromEndUntil f (PS x s (l-1))
1841
1842 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1843 -- it exposes lots of things to further inlining
1844 --
1845 {-# INLINE inlinePerformIO #-}
1846 inlinePerformIO :: IO a -> a
1847 #if defined(__GLASGOW_HASKELL__)
1848 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1849 #else
1850 inlinePerformIO = unsafePerformIO
1851 #endif
1852
1853 {-# INLINE newForeignFreePtr #-}
1854 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1855 #if defined(__GLASGOW_HASKELL__)
1856 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1857 #else
1858 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1859 #endif
1860
1861 -- ---------------------------------------------------------------------
1862 --
1863 -- Standard C functions
1864 --
1865
1866 foreign import ccall unsafe "string.h strlen" c_strlen
1867     :: CString -> CInt
1868
1869 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1870     :: CInt -> IO (Ptr Word8)
1871
1872 foreign import ccall unsafe "static stdlib.h free" c_free
1873     :: Ptr Word8 -> IO ()
1874
1875 #if !defined(__GLASGOW_HASKELL__)
1876 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1877     :: FunPtr (Ptr Word8 -> IO ())
1878 #endif
1879
1880 foreign import ccall unsafe "string.h memset" memset
1881     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1882
1883 foreign import ccall unsafe "string.h memchr" memchr
1884     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1885
1886 foreign import ccall unsafe "string.h memcmp" memcmp
1887     :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1888
1889 foreign import ccall unsafe "string.h memcpy" memcpy
1890     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1891
1892 -- ---------------------------------------------------------------------
1893 --
1894 -- Uses our C code
1895 --
1896
1897 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1898     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1899
1900 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1901     :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1902
1903 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1904     :: Ptr Word8 -> Int -> Word8
1905
1906 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1907     :: Ptr Word8 -> Int -> Word8
1908
1909 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1910     :: Ptr Word8 -> Int -> IO ()
1911
1912 -- ---------------------------------------------------------------------
1913 -- Internal GHC Haskell magic
1914
1915 #if defined(__GLASGOW_HASKELL__)
1916 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1917     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
1918
1919 foreign import ccall unsafe "__hscore_memcpy_src_off"
1920    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
1921 #endif