Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 -- License     : BSD-style
  10 --
  11 -- Maintainer  : dons@cse.unsw.edu.au
  12 -- Stability   : experimental
  13 -- Portability : portable, requires ffi and cpp
  14 -- Tested with : GHC 6.4.1 and Hugs March 2005
  15 --
  16
  17 --
  18 -- | A time and space-efficient implementation of byte vectors using
  19 -- packed Word8 arrays, suitable for high performance use, both in terms
  20 -- of large data quantities, or high speed requirements. Byte vectors
  21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
  22 -- be passed between C and Haskell with little effort.
  23 --
  24 -- This module is intended to be imported @qualified@, to avoid name
  25 -- clashes with Prelude functions.  eg.
  26 --
  27 -- > import qualified Data.ByteString as B
  28 --
  29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  30 -- UArray by Simon Marlow. Rewritten to support slices and use
  31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  32 --
  33
  34 module Data.ByteString (
  35
  36         -- * The @ByteString@ type
  37         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  38
  39         -- * Introducing and eliminating 'ByteString's
  40         empty,                  -- :: ByteString
  41         packByte,               -- :: Word8   -> ByteString
  42         pack,                   -- :: [Word8] -> ByteString
  43         unpack,                 -- :: ByteString -> [Word8]
  44         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  45         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  46
  47         -- * Basic interface
  48         cons,                   -- :: Word8 -> ByteString -> ByteString
  49         snoc,                   -- :: Word8 -> ByteString -> ByteString
  50         null,                   -- :: ByteString -> Bool
  51         length,                 -- :: ByteString -> Int
  52         head,                   -- :: ByteString -> Word8
  53         tail,                   -- :: ByteString -> ByteString
  54         last,                   -- :: ByteString -> Word8
  55         init,                   -- :: ByteString -> ByteString
  56         append,                 -- :: ByteString -> ByteString -> ByteString
  57
  58         -- * Special ByteStrings
  59         inits,                  -- :: ByteString -> [ByteString]
  60         tails,                  -- :: ByteString -> [ByteString]
  61         elems,                  -- :: ByteString -> [ByteString]
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         reverse,                -- :: ByteString -> ByteString
  66         intersperse,            -- :: Word8 -> ByteString -> ByteString
  67         transpose,              -- :: [ByteString] -> [ByteString]
  68
  69         -- * Reducing 'ByteString's
  70         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  71         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  72         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  73         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74
  75         -- ** Special folds
  76         concat,                 -- :: [ByteString] -> ByteString
  77         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  78         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  79         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  80         maximum,                -- :: ByteString -> Word8
  81         minimum,                -- :: ByteString -> Word8
  82         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  83
  84         -- * Generating and unfolding ByteStrings
  85         replicate,              -- :: Int -> Word8 -> ByteString
  86         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  87
  88         -- * Substrings
  89
  90         -- ** Breaking strings
  91         take,                   -- :: Int -> ByteString -> ByteString
  92         drop,                   -- :: Int -> ByteString -> ByteString
  93         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  94         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  95         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  96         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  97         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  98         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  99
 100         -- ** Breaking and dropping on specific bytes
 101         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 102         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 103         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 104
 105         -- ** Breaking into many substrings
 106         split,                  -- :: Word8 -> ByteString -> [ByteString]
 107         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 108         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 109
 110         -- ** Joining strings
 111         join,                   -- :: ByteString -> [ByteString] -> ByteString
 112         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 113
 114         -- * Indexing ByteStrings
 115         index,                  -- :: ByteString -> Int -> Word8
 116         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 117         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 118         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 119         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 120         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 121         count,                  -- :: Word8 -> ByteString -> Int
 122
 123         -- * Ordered ByteStrings
 124         sort,                   -- :: ByteString -> ByteString
 125
 126         -- * Searching ByteStrings
 127
 128         -- ** Searching by equality
 129         -- | These functions use memchr(3) to efficiently search the ByteString
 130
 131         elem,                   -- :: Word8 -> ByteString -> Bool
 132         notElem,                -- :: Word8 -> ByteString -> Bool
 133         filterByte,             -- :: Word8 -> ByteString -> ByteString
 134         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 135
 136         -- ** Searching with a predicate
 137         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 138         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 139
 140         -- ** Prefixes and suffixes
 141         -- | These functions use memcmp(3) to efficiently compare substrings
 142         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 143         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 144
 145         -- ** Search for arbitrary substrings
 146         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 147         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 148         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 149
 150         -- * Zipping and unzipping ByteStrings
 151         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 152         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 153         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 154
 155         -- * Unchecked access
 156         unsafeHead,             -- :: ByteString -> Word8
 157         unsafeTail,             -- :: ByteString -> ByteString
 158         unsafeIndex,            -- :: ByteString -> Int -> Word8
 159
 160         -- * Low level introduction and elimination
 161         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 162         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 163         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 164         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 165         skipIndex,              -- :: ByteString -> Int
 166
 167         -- ** Packing CStrings and pointers
 168         packCString,            -- :: CString -> ByteString
 169         packCStringLen,         -- :: CString -> ByteString
 170         packMallocCString,      -- :: CString -> ByteString
 171
 172 #if defined(__GLASGOW_HASKELL__)
 173         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 174         packAddress,            -- :: Addr# -> ByteString
 175         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 176         unsafeFinalize,         -- :: ByteString -> IO ()
 177 #endif
 178
 179         -- ** Using ByteStrings as CStrings
 180         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 181         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 182         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 183
 184         -- ** Copying ByteStrings
 185         -- | These functions perform memcpy(3) operations
 186         copy,                   -- :: ByteString -> ByteString
 187         copyCString,            -- :: CString -> ByteString
 188         copyCStringLen,         -- :: CStringLen -> ByteString
 189
 190         -- * I\/O with @ByteString@s
 191
 192         -- ** Standard input and output
 193
 194 #if defined(__GLASGOW_HASKELL__)
 195         getLine,                -- :: IO ByteString
 196 #endif
 197         getContents,            -- :: IO ByteString
 198         putStr,                 -- :: ByteString -> IO ()
 199         putStrLn,               -- :: ByteString -> IO ()
 200
 201         -- ** Files
 202         readFile,               -- :: FilePath -> IO ByteString
 203         writeFile,              -- :: FilePath -> ByteString -> IO ()
 204 --      mmapFile,               -- :: FilePath -> IO ByteString
 205
 206         -- ** I\/O with Handles
 207 #if defined(__GLASGOW_HASKELL__)
 208         getArgs,                -- :: IO [ByteString]
 209         hGetLine,               -- :: Handle -> IO ByteString
 210         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 211 #endif
 212         hGetContents,           -- :: Handle -> IO ByteString
 213         hGet,                   -- :: Handle -> Int -> IO ByteString
 214         hPut,                   -- :: Handle -> ByteString -> IO ()
 215
 216 #if defined(__GLASGOW_HASKELL__)
 217         -- * Miscellaneous
 218         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 219 #endif
 220
 221   ) where
 222
 223 import qualified Prelude as P
 224 import Prelude hiding           (reverse,head,tail,last,init,null
 225                                 ,length,map,lines,foldl,foldr,unlines
 226                                 ,concat,any,take,drop,splitAt,takeWhile
 227                                 ,dropWhile,span,break,elem,filter,maximum
 228                                 ,minimum,all,concatMap,foldl1,foldr1
 229                                 ,readFile,writeFile,replicate
 230                                 ,getContents,getLine,putStr,putStrLn
 231                                 ,zip,zipWith,unzip,notElem)
 232
 233 import qualified Data.List as List
 234
 235 import Data.Char
 236 import Data.Word                (Word8)
 237 import Data.Maybe               (listToMaybe)
 238 import Data.Array               (listArray)
 239 import qualified Data.Array as Array ((!))
 240
 241 import Control.Exception        (bracket)
 242
 243 import Foreign.C.String         (CString, CStringLen)
 244 import Foreign.C.Types          (CSize, CInt)
 245 import Foreign.ForeignPtr
 246 import Foreign.Marshal.Array
 247 import Foreign.Ptr
 248 import Foreign.Storable         (Storable(..))
 249
 250 import System.IO                (stdin,stdout,hClose,hFileSize
 251                                 ,hGetBuf,hPutBuf,openBinaryFile
 252                                 ,Handle,IOMode(..))
 253
 254 #if !defined(__GLASGOW_HASKELL__)
 255 import System.IO.Unsafe
 256 #endif
 257
 258 #if defined(__GLASGOW_HASKELL__)
 259
 260 import Data.Generics            (Data(..), Typeable(..))
 261
 262 import System.IO                (hGetBufNonBlocking)
 263 import System.IO.Error          (isEOFError)
 264
 265 import Foreign.Marshal          (alloca)
 266 import qualified Foreign.Concurrent as FC (newForeignPtr)
 267
 268 import GHC.Handle
 269 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 270 import GHC.Base                 (build, unsafeChr)
 271 import GHC.Word hiding (Word8)
 272 import GHC.Ptr                  (Ptr(..))
 273 import GHC.ST                   (ST(..))
 274 import GHC.IOBase
 275
 276 #endif
 277
 278 -- CFILES stuff is Hugs only
 279 {-# CFILES cbits/fpstring.c #-}
 280
 281 -- -----------------------------------------------------------------------------
 282 --
 283 -- Useful macros, until we have bang patterns
 284 --
 285
 286 #define STRICT1(f) f a | a `seq` False = undefined
 287 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 288 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 289 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 290 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 291
 292 -- -----------------------------------------------------------------------------
 293
 294 -- | A space-efficient representation of a Word8 vector, supporting many
 295 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 296 --
 297 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 298 --
 299 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 300                      {-# UNPACK #-} !Int
 301                      {-# UNPACK #-} !Int
 302
 303 #if defined(__GLASGOW_HASKELL__)
 304     deriving (Data, Typeable)
 305 #endif
 306
 307 instance Eq  ByteString
 308     where (==)    = eq
 309
 310 instance Ord ByteString
 311     where compare = compareBytes
 312
 313 instance Show ByteString where
 314     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 315
 316 instance Read ByteString where
 317     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 318
 319 {-
 320 instance Arbitrary PackedString where
 321     arbitrary = P.pack `fmap` arbitrary
 322     coarbitrary s = coarbitrary (P.unpack s)
 323 -}
 324
 325 -- | /O(n)/ Equality on the 'ByteString' type.
 326 eq :: ByteString -> ByteString -> Bool
 327 eq a b = (compareBytes a b) == EQ
 328 {-# INLINE eq #-}
 329
 330 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 331 compareBytes :: ByteString -> ByteString -> Ordering
 332 compareBytes (PS _ _ 0) (PS _ _ 0)       = EQ    -- short cut for empty strings
 333 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
 334     withForeignPtr x1 $ \p1 ->
 335     withForeignPtr x2 $ \p2 -> do
 336         i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
 337         return $ case i `compare` 0 of
 338                     EQ  -> l1 `compare` l2
 339                     x   -> x
 340 {-# INLINE compareBytes #-}
 341
 342 {-
 343 --
 344 -- About 4x slower over 32M
 345 --
 346 compareBytes :: ByteString -> ByteString -> Ordering
 347 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 348     withForeignPtr fp1 $ \p1 ->
 349         withForeignPtr fp2 $ \p2 ->
 350             cmp (p1 `plusPtr` off1)
 351                 (p2 `plusPtr` off2) 0 len1 len2
 352
 353 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 354 STRICT5(cmp)
 355 cmp p1 p2 n len1 len2
 356       | n == len1 = if n == len2 then return EQ else return LT
 357       | n == len2 = return GT
 358       | otherwise = do
 359           (a :: Word8) <- peekByteOff p1 n
 360           (b :: Word8) <- peekByteOff p2 n
 361           case a `compare` b of
 362                 EQ -> cmp p1 p2 (n+1) len1 len2
 363                 LT -> return LT
 364                 GT -> return GT
 365 {-# INLINE compareBytes #-}
 366 -}
 367
 368 -- -----------------------------------------------------------------------------
 369 -- Introducing and eliminating 'ByteString's
 370
 371 -- | /O(1)/ The empty 'ByteString'
 372 empty :: ByteString
 373 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 374 {-# NOINLINE empty #-}
 375
 376 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 377 packByte :: Word8 -> ByteString
 378 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
 379     withForeignPtr fp $ \p -> poke p c
 380     return $ PS fp 0 1
 381 {-# NOINLINE packByte #-}
 382
 383 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 384 --
 385 -- For applications with large numbers of string literals, pack can be a
 386 -- bottleneck. In such cases, consider using packAddress (GHC only).
 387 pack :: [Word8] -> ByteString
 388
 389 #if !defined(__GLASGOW_HASKELL__)
 390
 391 pack str = create (P.length str) $ \p -> go p str
 392     where
 393         go _ []     = return ()
 394         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 395
 396 #else /* hack away */
 397
 398 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 399     where
 400         go _ _ []        = return ()
 401         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 402
 403         writeByte p i c = ST $ \s# ->
 404             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 405
 406 #endif
 407
 408 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 409 unpack :: ByteString -> [Word8]
 410
 411 #if !defined(__GLASGOW_HASKELL__)
 412
 413 unpack (PS _  _ 0) = []
 414 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 415         go (p `plusPtr` s) (l - 1) []
 416     where
 417         STRICT3(go)
 418         go p 0 acc = peek p          >>= \e -> return (e : acc)
 419         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 420 {-# INLINE unpack #-}
 421
 422 #else
 423
 424 unpack ps = build (unpackFoldr ps)
 425 {-# INLINE unpack #-}
 426
 427 unpackList :: ByteString -> [Word8]
 428 unpackList (PS fp off len) = withPtr fp $ \p -> do
 429     let STRICT3(loop)
 430         loop _ (-1) acc = return acc
 431         loop q n acc = do
 432            a <- peekByteOff q n
 433            loop q (n-1) (a : acc)
 434     loop (p `plusPtr` off) (len-1) []
 435
 436 {-# RULES
 437 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 438  #-}
 439
 440 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 441 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 442     let STRICT3(loop)
 443         loop _ (-1) acc = return acc
 444         loop q n    acc = do
 445            a <- peekByteOff q n
 446            loop q (n-1) (a `f` acc)
 447     loop (p `plusPtr` off) (len-1) ch
 448 {-# INLINE [0] unpackFoldr #-}
 449
 450 #endif
 451
 452 ------------------------------------------------------------------------
 453
 454 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 455 -- conversion function
 456 packWith :: (a -> Word8) -> [a] -> ByteString
 457 packWith k str = create (P.length str) $ \p -> go p str
 458     where
 459         STRICT2(go)
 460         go _ []     = return ()
 461         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 462 {-# INLINE packWith #-}
 463 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 464
 465 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 466 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 467 unpackWith _ (PS _  _ 0) = []
 468 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 469         go (p `plusPtr` s) (l - 1) []
 470     where
 471         STRICT3(go)
 472         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 473         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 474 {-# INLINE unpackWith #-}
 475 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 476
 477 -- ---------------------------------------------------------------------
 478 -- Basic interface
 479
 480 -- | /O(1)/ Test whether a ByteString is empty.
 481 null :: ByteString -> Bool
 482 null (PS _ _ l) = l == 0
 483 {-# INLINE null #-}
 484
 485 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 486 length :: ByteString -> Int
 487 length (PS _ _ l) = l
 488 {-# INLINE length #-}
 489
 490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 491 -- complexity, as it requires a memcpy.
 492 cons :: Word8 -> ByteString -> ByteString
 493 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 494         memcpy (p `plusPtr` 1) (f `plusPtr` s) l
 495         poke p c
 496 {-# INLINE cons #-}
 497
 498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 499 snoc :: ByteString -> Word8 -> ByteString
 500 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 501         memcpy p (f `plusPtr` s) l
 502         poke (p `plusPtr` l) c
 503 {-# INLINE snoc #-}
 504
 505 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 506 head :: ByteString -> Word8
 507 head ps@(PS x s _)
 508     | null ps   = errorEmptyList "head"
 509     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 510 {-# INLINE head #-}
 511
 512 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 513 tail :: ByteString -> ByteString
 514 tail (PS p s l)
 515     | l <= 0    = errorEmptyList "tail"
 516     | otherwise = PS p (s+1) (l-1)
 517 {-# INLINE tail #-}
 518
 519 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 520 last :: ByteString -> Word8
 521 last ps@(PS x s l)
 522     | null ps   = errorEmptyList "last"
 523     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 524 {-# INLINE last #-}
 525
 526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 527 init :: ByteString -> ByteString
 528 init (PS p s l)
 529     | l <= 0    = errorEmptyList "init"
 530     | otherwise = PS p s (l-1)
 531 {-# INLINE init #-}
 532
 533 -- | /O(n)/ Append two ByteStrings
 534 append :: ByteString -> ByteString -> ByteString
 535 append xs ys | null xs   = ys
 536              | null ys   = xs
 537              | otherwise = concat [xs,ys]
 538 {-# INLINE append #-}
 539
 540 {-
 541 --
 542 -- About 30% faster, but allocating in a big chunk isn't good for memory use
 543 --
 544 append :: ByteString -> ByteString -> ByteString
 545 append xs@(PS ffp s l) ys@(PS fgp t m)
 546     | null xs   = ys
 547     | null ys   = xs
 548     | otherwise = create len $ \ptr ->
 549         withForeignPtr ffp $ \fp ->
 550         withForeignPtr fgp $ \gp -> do
 551             memcpy ptr               (fp `plusPtr` s) l
 552             memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
 553         where len = length xs + length ys
 554 -}
 555
 556 -- ---------------------------------------------------------------------
 557 -- Transformations
 558
 559 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 560 -- element of @xs@
 561 --
 562 map :: (Word8 -> Word8) -> ByteString -> ByteString
 563 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
 564     new_fp <- mallocByteString len
 565     withForeignPtr new_fp $ \new_p -> do
 566         map_ f (len-1) (p `plusPtr` start) new_p
 567         return (PS new_fp 0 len)
 568 {-# INLINE map #-}
 569
 570 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 571 STRICT4(map_)
 572 map_ f n p1 p2
 573    | n < 0 = return ()
 574    | otherwise = do
 575         x <- peekByteOff p1 n
 576         pokeByteOff p2 n (f x)
 577         map_ f (n-1) p1 p2
 578 {-# INLINE map_ #-}
 579
 580 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 581 reverse :: ByteString -> ByteString
 582 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 583         c_reverse p (f `plusPtr` s) l
 584
 585 {-
 586 reverse = pack . P.reverse . unpack
 587 -}
 588
 589 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 590 -- 'ByteString' and \`intersperses\' that byte between the elements of
 591 -- the 'ByteString'.  It is analogous to the intersperse function on
 592 -- Lists.
 593 intersperse :: Word8 -> ByteString -> ByteString
 594 intersperse c ps@(PS x s l)
 595     | length ps < 2  = ps
 596     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 597         c_intersperse p (f `plusPtr` s) l c
 598
 599 {-
 600 intersperse c = pack . List.intersperse c . unpack
 601 -}
 602
 603 -- | The 'transpose' function transposes the rows and columns of its
 604 -- 'ByteString' argument.
 605 transpose :: [ByteString] -> [ByteString]
 606 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 607
 608 -- ---------------------------------------------------------------------
 609 -- Reducing 'ByteString's
 610
 611 -- | 'foldl', applied to a binary operator, a starting value (typically
 612 -- the left-identity of the operator), and a ByteString, reduces the
 613 -- ByteString using the binary operator, from left to right.
 614 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 615 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 616         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 617     where
 618         STRICT3(lgo)
 619         lgo z p q | p == q    = return z
 620                   | otherwise = do c <- peek p
 621                                    lgo (f z c) (p `plusPtr` 1) q
 622
 623 -- | 'foldr', applied to a binary operator, a starting value
 624 -- (typically the right-identity of the operator), and a ByteString,
 625 -- reduces the ByteString using the binary operator, from right to left.
 626 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 627 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 628         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 629     where
 630         STRICT2(go)
 631         go p q | p == q    = return z
 632                | otherwise = do c  <- peek p
 633                                 ws <- go (p `plusPtr` 1) q
 634                                 return $ c `k` ws
 635
 636 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 637 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 638 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 639 foldl1 f ps
 640     | null ps   = errorEmptyList "foldl1"
 641     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 642
 643 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 644 -- and thus must be applied to non-empty 'ByteString's
 645 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 646 foldr1 f ps
 647     | null ps        = errorEmptyList "foldr1"
 648     | otherwise      = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
 649
 650 -- ---------------------------------------------------------------------
 651 -- Special folds
 652
 653 -- | /O(n)/ Concatenate a list of ByteStrings.
 654 concat :: [ByteString] -> ByteString
 655 concat []     = empty
 656 concat [ps]   = ps
 657 concat xs     = inlinePerformIO $ do
 658     let start_size = 1024
 659     p <- mallocArray start_size
 660     f p 0 1024 xs
 661
 662     where f ptr len _ [] = do
 663                 ptr' <- reallocArray ptr (len+1)
 664                 poke (ptr' `plusPtr` len) (0::Word8)    -- XXX so CStrings work
 665                 fp   <- newForeignFreePtr ptr'
 666                 return $ PS fp 0 len
 667
 668           f ptr len to_go pss@(PS p s l:pss')
 669            | l <= to_go = do withForeignPtr p $ \pf ->
 670                                  memcpy (ptr `plusPtr` len)
 671                                           (pf `plusPtr` s) l
 672                              f ptr (len + l) (to_go - l) pss'
 673
 674            | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
 675                             ptr' <- reallocArray ptr new_total
 676                             f ptr' len (new_total - len) pss
 677
 678 -- | Map a function over a 'ByteString' and concatenate the results
 679 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 680 concatMap f = foldr (append . f) empty
 681
 682 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 683 -- any element of the 'ByteString' satisfies the predicate.
 684 any :: (Word8 -> Bool) -> ByteString -> Bool
 685 any _ (PS _ _ 0) = False
 686 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 687         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 688     where
 689         STRICT2(go)
 690         go p q | p == q    = return False
 691                | otherwise = do c <- peek p
 692                                 if f c then return True
 693                                        else go (p `plusPtr` 1) q
 694
 695 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 696 -- if all elements of the 'ByteString' satisfy the predicate.
 697 all :: (Word8 -> Bool) -> ByteString -> Bool
 698 all _ (PS _ _ 0) = True
 699 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 700         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 701     where
 702         STRICT2(go)
 703         go p q | p == q     = return True  -- end of list
 704                | otherwise  = do c <- peek p
 705                                  if f c
 706                                     then go (p `plusPtr` 1) q
 707                                     else return False
 708
 709 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 710 maximum :: ByteString -> Word8
 711 maximum xs@(PS x s l)
 712     | null xs   = errorEmptyList "maximum"
 713     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 714                     return $ c_maximum (p `plusPtr` s) l
 715 {-# INLINE maximum #-}
 716
 717 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 718 minimum :: ByteString -> Word8
 719 minimum xs@(PS x s l)
 720     | null xs   = errorEmptyList "minimum"
 721     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 722                     return $ c_minimum (p `plusPtr` s) l
 723 {-# INLINE minimum #-}
 724
 725 {-
 726 maximum xs@(PS x s l)
 727     | null xs   = errorEmptyList "maximum"
 728     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 729                         w <- peek p
 730                         maximum_ (p `plusPtr` s) 0 l w
 731
 732 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 733 STRICT4(maximum_)
 734 maximum_ ptr n m c
 735     | n >= m    = return c
 736     | otherwise = do w <- peekByteOff ptr n
 737                      maximum_ ptr (n+1) m (if w > c then w else c)
 738
 739 minimum xs@(PS x s l)
 740     | null xs   = errorEmptyList "minimum"
 741     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 742                         w <- peek p
 743                         minimum_ (p `plusPtr` s) 0 l w
 744
 745 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 746 STRICT4(minimum_)
 747 minimum_ ptr n m c
 748     | n >= m    = return c
 749     | otherwise = do w <- peekByteOff ptr n
 750                      minimum_ ptr (n+1) m (if w < c then w else c)
 751 -}
 752
 753 -- | /O(n)/ map Word8 functions, provided with the index at each position
 754 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 755 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 756     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 757   where
 758     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 759     STRICT4(go)
 760     go n f t p | f == p    = return ()
 761                | otherwise = do w <- peek f
 762                                 ((poke t) . k n) w
 763                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 764
 765 -- ---------------------------------------------------------------------
 766 -- Unfolds and replicates
 767
 768 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 769 -- the value of every element. The following holds:
 770 --
 771 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 772 --
 773 -- This implemenation uses @memset(3)@
 774 replicate :: Int -> Word8 -> ByteString
 775 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 776
 777 {-
 778 -- About 5x slower
 779 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 780     where
 781         STRICT2(go)
 782         go _   0 = return w
 783         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 784 -}
 785
 786 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 787 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 788 -- the element and returns 'Nothing' if it is done producing the
 789 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 790 -- prepending to the ByteString and @b@ is used as the next element in a
 791 -- recursive call.
 792 --
 793 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 794 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 795 -- final size of the ByteString as an argument. 'cons' can then be
 796 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 797 -- linear complexity. The depth of the recursion is limited to this
 798 -- size, but may be less. For lazy, infinite unfoldr, use
 799 -- 'Data.List.unfoldr' (from 'Data.List').
 800 --
 801 -- Examples:
 802 --
 803 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 804 --
 805 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 806 --
 807 -- > unfoldrN n == take n $ List.unfoldr
 808 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 809 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 810     where
 811         STRICT3(go)
 812         go q c n | n == i    = return n      -- stop if we reach `i'
 813                  | otherwise = case f c of
 814                                    Nothing        -> return n
 815                                    Just (a,new_c) -> do
 816                                         poke q a
 817                                         go (q `plusPtr` 1) new_c (n+1)
 818
 819 -- ---------------------------------------------------------------------
 820 -- Substrings
 821
 822 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 823 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 824 take :: Int -> ByteString -> ByteString
 825 take n ps@(PS x s l)
 826     | n < 0     = empty
 827     | n >= l    = ps
 828     | otherwise = PS x s n
 829 {-# INLINE take #-}
 830
 831 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 832 -- elements, or @[]@ if @n > 'length' xs@.
 833 drop  :: Int -> ByteString -> ByteString
 834 drop n ps@(PS x s l)
 835     | n <= 0    = ps
 836     | n >  l    = empty
 837     | otherwise = PS x (s+n) (l-n)
 838 {-# INLINE drop #-}
 839
 840 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 841 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 842 splitAt  n ps  = (take n ps, drop n ps)
 843 {-# INLINE splitAt #-}
 844
 845 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 846 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 847 -- satisfy @p@.
 848 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 849 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 850 {-# INLINE takeWhile #-}
 851
 852 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 853 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 854 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 855 {-# INLINE dropWhile #-}
 856
 857 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 858 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 859 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 860 {-# INLINE break #-}
 861
 862 -- | 'breakByte' breaks its ByteString argument at the first occurence
 863 -- of the specified byte. It is more efficient than 'break' as it is
 864 -- implemented with @memchr(3)@. I.e.
 865 --
 866 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 867 --
 868 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 869 breakByte c p = case elemIndex c p of
 870     Nothing -> (p,empty)
 871     Just n  -> (take n p, drop n p)
 872 {-# INLINE breakByte #-}
 873
 874 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 875 -- occurence of @w@. It behaves like 'break', except the delimiter is
 876 -- not returned, and @Nothing@ is returned if the delimiter is not in
 877 -- the ByteString. I.e.
 878 --
 879 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 880 --
 881 -- > breakFirst c xs ==
 882 -- > let (x,y) = break (== c) xs
 883 -- > in if null y then Nothing else Just (x, drop 1 y))
 884 --
 885 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 886 breakFirst c p = case elemIndex c p of
 887    Nothing -> Nothing
 888    Just n -> Just (take n p, drop (n+1) p)
 889 {-# INLINE breakFirst #-}
 890
 891 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 892 -- ByteString.
 893 --
 894 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 895 --
 896 -- and the following are equivalent:
 897 --
 898 -- > breakLast 'c' "abcdef"
 899 -- > let (x,y) = break (=='c') (reverse "abcdef")
 900 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 901 --
 902 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 903 breakLast c p = case elemIndexLast c p of
 904     Nothing -> Nothing
 905     Just n -> Just (take n p, drop (n+1) p)
 906 {-# INLINE breakLast #-}
 907
 908 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 909 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 910 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 911 span  p ps = break (not . p) ps
 912 {-# INLINE span #-}
 913
 914 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 915 -- We have
 916 --
 917 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 918 --
 919 -- and
 920 --
 921 -- > spanEnd (not . isSpace) ps
 922 -- >    ==
 923 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 924 --
 925 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 926 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 927
 928 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 929 -- separators, where the predicate returns True for a separator element.
 930 -- The resulting components do not contain the separators.  Two adjacent
 931 -- separators result in an empty component in the output.  eg.
 932 --
 933 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 934 -- > splitWith (=='a') []        == []
 935 --
 936 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 937
 938 #if defined(__GLASGOW_HASKELL__)
 939 splitWith _pred (PS _  _   0) = []
 940 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
 941   where pred# c# = pred_ (W8# c#)
 942
 943         splitWith' pred' off' len' fp' = withPtr fp $ \p ->
 944             splitLoop pred' p 0 off' len' fp'
 945
 946         splitLoop :: (Word# -> Bool)
 947                   -> Ptr Word8
 948                   -> Int -> Int -> Int
 949                   -> ForeignPtr Word8
 950                   -> IO [ByteString]
 951
 952         splitLoop pred' p idx' off' len' fp'
 953             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
 954             | idx' >= len'  = return [PS fp' off' idx']
 955             | otherwise = do
 956                 w <- peekElemOff p (off'+idx')
 957                 if pred' (case w of W8# w# -> w#)
 958                    then return (PS fp' off' idx' :
 959                               splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
 960                    else splitLoop pred' p (idx'+1) off' len' fp'
 961 {-# INLINE splitWith #-}
 962
 963 #else
 964 splitWith _ (PS _ _ 0) = []
 965 splitWith p ps = splitWith' p ps
 966     where
 967         STRICT2(splitWith')
 968         splitWith' q qs = if null rest then [chunk]
 969                                        else chunk : splitWith' q (unsafeTail rest)
 970             where (chunk,rest) = break q qs
 971 #endif
 972
 973 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 974 -- argument, consuming the delimiter. I.e.
 975 --
 976 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 977 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
 978 -- > split 'x'  "x"          == ["",""]
 979 --
 980 -- and
 981 --
 982 -- > join [c] . split c == id
 983 -- > split == splitWith . (==)
 984 --
 985 -- As for all splitting functions in this library, this function does
 986 -- not copy the substrings, it just constructs new 'ByteStrings' that
 987 -- are slices of the original.
 988 --
 989 split :: Word8 -> ByteString -> [ByteString]
 990 split _ (PS _ _ 0) = []
 991 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 992     let ptr = p `plusPtr` s
 993
 994         STRICT1(loop)
 995         loop n = do
 996             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
 997             if q == nullPtr
 998                 then return [PS x (s+n) (l-n)]
 999                 else do let i = q `minusPtr` ptr
1000                         ls <- loop (i+1)
1001                         return $! PS x (s+n) (i-n) : ls
1002     loop 0
1003 {-# INLINE split #-}
1004
1005 {-
1006 -- slower. but stays inside Haskell.
1007 split _ (PS _  _   0) = []
1008 split (W8# w#) (PS fp off len) = splitWith' off len fp
1009     where
1010         splitWith' off' len' fp' = withPtr fp $ \p ->
1011             splitLoop p 0 off' len' fp'
1012
1013         splitLoop :: Ptr Word8
1014                   -> Int -> Int -> Int
1015                   -> ForeignPtr Word8
1016                   -> IO [ByteString]
1017
1018         STRICT5(splitLoop)
1019         splitLoop p idx' off' len' fp'
1020             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1021             | idx' >= len'  = return [PS fp' off' idx']
1022             | otherwise = do
1023                 (W8# x#) <- peekElemOff p (off'+idx')
1024                 if word2Int# w# ==# word2Int# x#
1025                    then return (PS fp' off' idx' :
1026                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1027                    else splitLoop p (idx'+1) off' len' fp'
1028 -}
1029
1030 -- | Like 'splitWith', except that sequences of adjacent separators are
1031 -- treated as a single separator. eg.
1032 --
1033 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1034 --
1035 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1036 tokens f = P.filter (not.null) . splitWith f
1037
1038 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1039 -- 'ByteString's and concatenates the list after interspersing the first
1040 -- argument between each element of the list.
1041 join :: ByteString -> [ByteString] -> ByteString
1042 join filler pss = concat (splice pss)
1043     where
1044         splice []  = []
1045         splice [x] = [x]
1046         splice (x:y:xs) = x:filler:splice (y:xs)
1047
1048 --
1049 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1050 -- with a char. Around 4 times faster than the generalised join.
1051 --
1052 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1053 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1054     withForeignPtr ffp $ \fp ->
1055     withForeignPtr fgp $ \gp -> do
1056         memcpy ptr (fp `plusPtr` s) l
1057         poke (ptr `plusPtr` l) c
1058         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1059     where
1060       len = length f + length g + 1
1061 {-# INLINE joinWithByte #-}
1062
1063 -- ---------------------------------------------------------------------
1064 -- Indexing ByteStrings
1065
1066 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1067 index :: ByteString -> Int -> Word8
1068 index ps n
1069     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1070     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1071                                 ++ ", length = " ++ show (length ps)
1072     | otherwise      = ps `unsafeIndex` n
1073 {-# INLINE index #-}
1074
1075 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1076 -- element in the given 'ByteString' which is equal to the query
1077 -- element, or 'Nothing' if there is no such element.
1078 -- This implementation uses memchr(3).
1079 elemIndex :: Word8 -> ByteString -> Maybe Int
1080 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1081     let p' = p `plusPtr` s
1082         q  = memchr p' c (fromIntegral l)
1083     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1084 {-# INLINE elemIndex #-}
1085
1086 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1087 -- element in the given 'ByteString' which is equal to the query
1088 -- element, or 'Nothing' if there is no such element. The following
1089 -- holds:
1090 --
1091 -- > elemIndexLast c xs ==
1092 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1093 --
1094 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1095 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1096     go (p `plusPtr` s) (l-1)
1097   where
1098     STRICT2(go)
1099     go p i | i < 0     = return Nothing
1100            | otherwise = do ch' <- peekByteOff p i
1101                             if ch == ch'
1102                                 then return $ Just i
1103                                 else go p (i-1)
1104 {-# INLINE elemIndexLast #-}
1105
1106 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1107 -- the indices of all elements equal to the query element, in ascending order.
1108 -- This implementation uses memchr(3).
1109 elemIndices :: Word8 -> ByteString -> [Int]
1110 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1111     let ptr = p `plusPtr` s
1112
1113         STRICT1(loop)
1114         loop n = do
1115                 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1116                 if q == nullPtr
1117                     then return []
1118                     else do let i = q `minusPtr` ptr
1119                             ls <- loop (i+1)
1120                             return $! i:ls
1121     loop 0
1122
1123 {-
1124 -- much slower
1125 elemIndices :: Word8 -> ByteString -> [Int]
1126 elemIndices c ps = loop 0 ps
1127    where STRICT2(loop)
1128          loop _ ps' | null ps'            = []
1129          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1130                     | otherwise           = loop (n+1) (unsafeTail ps')
1131 -}
1132
1133 -- | count returns the number of times its argument appears in the ByteString
1134 --
1135 -- > count = length . elemIndices
1136 --
1137 -- But more efficiently than using length on the intermediate list.
1138 count :: Word8 -> ByteString -> Int
1139 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1140     return $ c_count (p `plusPtr` s) (fromIntegral m) w
1141 {-# INLINE count #-}
1142
1143 {-
1144 --
1145 -- around 30% slower
1146 --
1147 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1148      go (p `plusPtr` s) (fromIntegral m) 0
1149     where
1150         go :: Ptr Word8 -> CSize -> Int -> IO Int
1151         STRICT3(go)
1152         go p l i = do
1153             let q = memchr p w l
1154             if q == nullPtr
1155                 then return i
1156                 else do let k = fromIntegral $ q `minusPtr` p
1157                         go (q `plusPtr` 1) (l-k-1) (i+1)
1158 -}
1159
1160 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1161 -- returns the index of the first element in the ByteString
1162 -- satisfying the predicate.
1163 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1164 findIndex = (listToMaybe .) . findIndices
1165
1166 -- | The 'findIndices' function extends 'findIndex', by returning the
1167 -- indices of all elements satisfying the predicate, in ascending order.
1168 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1169 findIndices p ps = loop 0 ps
1170    where
1171      STRICT2(loop)
1172      loop _ qs | null qs           = []
1173      loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1174                | otherwise         =     loop (n+1) (unsafeTail qs)
1175
1176 -- ---------------------------------------------------------------------
1177 -- Searching ByteStrings
1178
1179 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1180 elem :: Word8 -> ByteString -> Bool
1181 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1182 {-# INLINE elem #-}
1183
1184 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1185 notElem :: Word8 -> ByteString -> Bool
1186 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1187 {-# INLINE notElem #-}
1188
1189 --
1190 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1191 -- case of filtering a single byte. It is more efficient to use
1192 -- /filterByte/ in this case.
1193 --
1194 -- > filterByte == filter . (==)
1195 --
1196 -- filterByte is around 10x faster, and uses much less space, than its
1197 -- filter equivalent
1198 filterByte :: Word8 -> ByteString -> ByteString
1199 filterByte w ps = replicate (count w ps) w
1200
1201 {-
1202 -- slower than the replicate version
1203
1204 filterByte ch ps@(PS x s l)
1205     | null ps   = ps
1206     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1207         t <- go (f `plusPtr` s) p l
1208         return (t `minusPtr` p) -- actual length
1209     where
1210         STRICT3(go)
1211         go _ t 0 = return t
1212         go f t e = do w <- peek f
1213                       if w == ch
1214                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1215                         else             go (f `plusPtr` 1) t               (e-1)
1216 -}
1217
1218 --
1219 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1220 -- case of filtering a single byte out of a list. It is more efficient
1221 -- to use /filterNotByte/ in this case.
1222 --
1223 -- > filterNotByte == filter . (/=)
1224 --
1225 -- filterNotByte is around 3x faster, and uses much less space, than its
1226 -- filter equivalent
1227 filterNotByte :: Word8 -> ByteString -> ByteString
1228 filterNotByte ch ps@(PS x s l)
1229     | null ps   = ps
1230     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1231         t <- go (f `plusPtr` s) p l
1232         return (t `minusPtr` p) -- actual length
1233     where
1234         STRICT3(go)
1235         go _ t 0 = return t
1236         go f t e = do w <- peek f
1237                       if w /= ch
1238                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1239                         else             go (f `plusPtr` 1) t               (e-1)
1240
1241 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1242 -- returns a ByteString containing those characters that satisfy the
1243 -- predicate.
1244 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1245 filter k ps@(PS x s l)
1246     | null ps   = ps
1247     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1248         t <- go (f `plusPtr` s) p l
1249         return (t `minusPtr` p) -- actual length
1250     where
1251         STRICT3(go)
1252         go _ t 0 = return t
1253         go f t e = do w <- peek f
1254                       if k w
1255                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1256                         else             go (f `plusPtr` 1) t               (e - 1)
1257
1258 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1259
1260 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1261 -- and returns the first element in matching the predicate, or 'Nothing'
1262 -- if there is no such element.
1263 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1264 find p ps = case filter p ps of
1265     q | null q -> Nothing
1266       | otherwise -> Just (unsafeHead q)
1267
1268 -- ---------------------------------------------------------------------
1269 -- Searching for substrings
1270
1271 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1272 -- iff the first is a prefix of the second.
1273 isPrefixOf :: ByteString -> ByteString -> Bool
1274 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1275     | l1 == 0   = True
1276     | l2 < l1   = False
1277     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1278         withForeignPtr x2 $ \p2 -> do
1279             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1280             return (i == 0)
1281
1282 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1283 -- iff the first is a suffix of the second.
1284 --
1285 -- The following holds:
1286 --
1287 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1288 --
1289 -- However, the real implemenation uses memcmp to compare the end of the
1290 -- string only, with no reverse required..
1291 isSuffixOf :: ByteString -> ByteString -> Bool
1292 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1293     | l1 == 0   = True
1294     | l2 < l1   = False
1295     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1296         withForeignPtr x2 $ \p2 -> do
1297             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1298             return (i == 0)
1299
1300 -- | Check whether one string is a substring of another. @isSubstringOf
1301 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1302 isSubstringOf :: ByteString -- ^ String to search for.
1303               -> ByteString -- ^ String to search in.
1304               -> Bool
1305 isSubstringOf p s = not $ P.null $ findSubstrings p s
1306
1307 -- | Get the first index of a substring in another string,
1308 --   or 'Nothing' if the string is not found.
1309 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1310 findSubstring :: ByteString -- ^ String to search for.
1311               -> ByteString -- ^ String to seach in.
1312               -> Maybe Int
1313 findSubstring = (listToMaybe .) . findSubstrings
1314
1315 -- | Find the indexes of all (possibly overlapping) occurances of a
1316 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1317 -- string matching algorithm.
1318 findSubstrings :: ByteString -- ^ String to search for.
1319                -> ByteString -- ^ String to seach in.
1320                -> [Int]
1321
1322 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1323   where
1324       patc x = pat `unsafeIndex` x
1325       strc x = str `unsafeIndex` x
1326
1327       -- maybe we should make kmpNext a UArray before using it in search?
1328       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1329       kmpNextL p _ | null p = []
1330       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1331                          ps = unsafeTail p
1332                          x = if not (null ps) && unsafeHead ps == patc j'
1333                                 then kmpNext Array.! j' else j'
1334                         in x:kmpNextL ps j'
1335       search i j = match ++ rest -- i: position in string, j: position in pattern
1336         where match = if j == m then [(i - j)] else []
1337               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1338       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1339                | otherwise = j
1340
1341 -- ---------------------------------------------------------------------
1342 -- Zipping
1343
1344 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1345 -- corresponding pairs of bytes. If one input ByteString is short,
1346 -- excess elements of the longer ByteString are discarded. This is
1347 -- equivalent to a pair of 'unpack' operations.
1348 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1349 zip ps qs
1350     | null ps || null qs = []
1351     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1352
1353 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1354 -- the first argument, instead of a tupling function.  For example,
1355 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1356 -- corresponding sums.
1357 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1358 zipWith f ps qs
1359     | null ps || null qs = []
1360     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1361
1362 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1363 -- ByteStrings. Note that this performs two 'pack' operations.
1364 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1365 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1366 {-# INLINE unzip #-}
1367
1368 -- ---------------------------------------------------------------------
1369 -- Special lists
1370
1371 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1372 inits :: ByteString -> [ByteString]
1373 inits (PS x s l) = [PS x s n | n <- [0..l]]
1374
1375 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1376 tails :: ByteString -> [ByteString]
1377 tails p | null p    = [empty]
1378         | otherwise = p : tails (unsafeTail p)
1379
1380 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1381
1382 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1383 elems :: ByteString -> [ByteString]
1384 elems (PS _ _ 0) = []
1385 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1386 {-# INLINE elems #-}
1387
1388 -- ---------------------------------------------------------------------
1389 -- ** Ordered 'ByteString's
1390
1391 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1392 sort :: ByteString -> ByteString
1393 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1394         memcpy p (f `plusPtr` s) l
1395         c_qsort p l -- inplace
1396
1397 {-
1398 sort = pack . List.sort . unpack
1399 -}
1400
1401 -- ---------------------------------------------------------------------
1402 --
1403 -- Extensions to the basic interface
1404 --
1405
1406 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1407 -- check for the empty case, so there is an obligation on the programmer
1408 -- to provide a proof that the ByteString is non-empty.
1409 unsafeHead :: ByteString -> Word8
1410 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1411 {-# INLINE unsafeHead #-}
1412
1413 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1414 -- check for the empty case. As with 'unsafeHead', the programmer must
1415 -- provide a separate proof that the ByteString is non-empty.
1416 unsafeTail :: ByteString -> ByteString
1417 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1418 {-# INLINE unsafeTail #-}
1419
1420 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1421 -- This omits the bounds check, which means there is an accompanying
1422 -- obligation on the programmer to ensure the bounds are checked in some
1423 -- other way.
1424 unsafeIndex :: ByteString -> Int -> Word8
1425 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1426 {-# INLINE unsafeIndex #-}
1427
1428 -- ---------------------------------------------------------------------
1429 -- Low level constructors
1430
1431 #if defined(__GLASGOW_HASKELL__)
1432 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1433 -- Addr\# (an arbitrary machine address assumed to point outside the
1434 -- garbage-collected heap) into a @ByteString@. A much faster way to
1435 -- create an Addr\# is with an unboxed string literal, than to pack a
1436 -- boxed string. A unboxed string literal is compiled to a static @char
1437 -- []@ by GHC. Establishing the length of the string requires a call to
1438 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1439 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1440 -- if you know the length of the string statically.
1441 --
1442 -- An example:
1443 --
1444 -- > literalFS = packAddress "literal"#
1445 --
1446 packAddress :: Addr# -> ByteString
1447 packAddress addr# = inlinePerformIO $ do
1448     p <- newForeignPtr_ cstr
1449     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1450   where
1451     cstr = Ptr addr#
1452 {-# INLINE packAddress #-}
1453
1454 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1455 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1456 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1457 -- 'Addr\#' to the string, and the length of the string. Make sure the
1458 -- length is correct, otherwise use the safer 'packAddress' (where the
1459 -- length will be calculated once at runtime).
1460 unsafePackAddress :: Int -> Addr# -> ByteString
1461 unsafePackAddress len addr# = inlinePerformIO $ do
1462     p <- newForeignPtr_ cstr
1463     return $ PS p 0 len
1464     where cstr = Ptr addr#
1465
1466 #endif
1467
1468 -- | /O(1)/ Build a ByteString from a ForeignPtr
1469 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1470 fromForeignPtr fp l = PS fp 0 l
1471
1472 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1473 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1474 toForeignPtr (PS ps s l) = (ps, s, l)
1475
1476 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1477 -- current 'ByteString' from any larger string it was created from, as
1478 -- an 'Int'.
1479 skipIndex :: ByteString -> Int
1480 skipIndex (PS _ s _) = s
1481 {-# INLINE skipIndex #-}
1482
1483 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1484 -- finalizer associated to it. The ByteString length is calculated using
1485 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1486 packCString :: CString -> ByteString
1487 packCString cstr = inlinePerformIO $ do
1488     fp <- newForeignPtr_ (castPtr cstr)
1489     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1490
1491 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1492 -- have /no/ finalizer associated with it. This operation has /O(1)/
1493 -- complexity as we already know the final size, so no /strlen(3)/ is
1494 -- required.
1495 packCStringLen :: CStringLen -> ByteString
1496 packCStringLen (ptr,len) = inlinePerformIO $ do
1497     fp <- newForeignPtr_ (castPtr ptr)
1498     return $ PS fp 0 (fromIntegral len)
1499
1500 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1501 -- have a @free(3)@ finalizer associated to it.
1502 packMallocCString :: CString -> ByteString
1503 packMallocCString cstr = inlinePerformIO $ do
1504     fp <- newForeignFreePtr (castPtr cstr)
1505     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1506
1507 #if defined(__GLASGOW_HASKELL__)
1508 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1509 -- length, and an IO action representing a finalizer. This function is
1510 -- not available on Hugs.
1511 --
1512 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1513 packCStringFinalizer p l f = do
1514     fp <- FC.newForeignPtr p f
1515     return $ PS fp 0 l
1516
1517 -- | Explicitly run the finaliser associated with a 'ByteString'.
1518 -- Further references to this value may generate invalid memory
1519 -- references. This operation is unsafe, as there may be other
1520 -- 'ByteStrings' referring to the same underlying pages. If you use
1521 -- this, you need to have a proof of some kind that all 'ByteString's
1522 -- ever generated from the underlying byte array are no longer live.
1523 unsafeFinalize :: ByteString -> IO ()
1524 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1525
1526 #endif
1527
1528 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1529 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1530 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1531 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1532     where
1533       alloc = withForeignPtr ps $ \p -> do
1534                 buf <- c_malloc (fromIntegral l+1)
1535                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1536                 poke (buf `plusPtr` l) (0::Word8)
1537                 return $ castPtr buf
1538
1539 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1540 -- Warning: modifying the @CString@ will affect the @ByteString@.
1541 -- Why is this function unsafe? It relies on the null byte at the end of
1542 -- the ByteString to be there. This is /not/ the case if your ByteString
1543 -- has been spliced from a larger string (i.e. with take or drop).
1544 -- Unless you can guarantee the null byte, you should use the safe
1545 -- version, which will copy the string first.
1546 --
1547 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1548 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1549
1550 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1551 --   This is mainly useful to allow the rest of the data pointed
1552 --   to by the 'ByteString' to be garbage collected, for example
1553 --   if a large string has been read in, and only a small part of it
1554 --   is needed in the rest of the program.
1555 copy :: ByteString -> ByteString
1556 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1557
1558 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1559 -- CString is going to be deallocated from C land.
1560 copyCString :: CString -> ByteString
1561 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1562
1563 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1564 copyCStringLen :: CStringLen -> ByteString
1565 copyCStringLen (cstr, len) = inlinePerformIO $ do
1566     fp <- mallocForeignPtrArray (len+1)
1567     withForeignPtr fp $ \p -> do
1568         memcpy p (castPtr cstr) len
1569         poke (p `plusPtr` len) (0 :: Word8)
1570     return $! PS fp 0 len
1571
1572 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1573 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1574 -- This is analogous to unsafeUseAsCString, and comes with the same
1575 -- safety requirements.
1576 --
1577 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1578 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1579
1580 -- | Given the maximum size needed and a function to make the contents
1581 -- of a ByteString, generate makes the 'ByteString'. The generating
1582 -- function is required to return the actual final size (<= the maximum
1583 -- size), and the resulting byte array is realloced to this size.  The
1584 -- string is padded at the end with a null byte.
1585 --
1586 -- generate is the main mechanism for creating custom, efficient
1587 -- ByteString functions, using Haskell or C functions to fill the space.
1588 --
1589 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1590 generate i f = do
1591     p <- mallocArray i
1592     i' <- f p
1593     p' <- reallocArray p (i'+1)
1594     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1595     fp <- newForeignFreePtr p'
1596     return $ PS fp 0 i'
1597
1598 -- ---------------------------------------------------------------------
1599 -- line IO
1600
1601 #if defined(__GLASGOW_HASKELL__)
1602
1603 -- | getLine, read a line from stdin.
1604 getLine :: IO ByteString
1605 getLine = hGetLine stdin
1606
1607 -- | hGetLine. read a ByteString from a handle
1608 hGetLine :: Handle -> IO ByteString
1609 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1610     case haBufferMode handle_ of
1611        NoBuffering -> error "no buffering"
1612        _other      -> hGetLineBuffered handle_
1613
1614  where
1615     hGetLineBuffered handle_ = do
1616         let ref = haBuffer handle_
1617         buf <- readIORef ref
1618         hGetLineBufferedLoop handle_ ref buf 0 []
1619
1620     hGetLineBufferedLoop handle_ ref
1621             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1622         len `seq` do
1623         off <- findEOL r w raw
1624         let new_len = len + off - r
1625         xs <- mkPS raw r off
1626
1627       -- if eol == True, then off is the offset of the '\n'
1628       -- otherwise off == w and the buffer is now empty.
1629         if off /= w
1630             then do if (w == off + 1)
1631                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1632                             else writeIORef ref buf{ bufRPtr = off + 1 }
1633                     mkBigPS new_len (xs:xss)
1634             else do
1635                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1636                                     buf{ bufWPtr=0, bufRPtr=0 }
1637                  case maybe_buf of
1638                     -- Nothing indicates we caught an EOF, and we may have a
1639                     -- partial line to return.
1640                     Nothing -> do
1641                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1642                          if new_len > 0
1643                             then mkBigPS new_len (xs:xss)
1644                             else ioe_EOF
1645                     Just new_buf ->
1646                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1647
1648     -- find the end-of-line character, if there is one
1649     findEOL r w raw
1650         | r == w = return w
1651         | otherwise =  do
1652             (c,r') <- readCharFromBuffer raw r
1653             if c == '\n'
1654                 then return r -- NB. not r': don't include the '\n'
1655                 else findEOL r' w raw
1656
1657     maybeFillReadBuffer fd is_line is_stream buf = catch
1658         (do buf' <- fillReadBuffer fd is_line is_stream buf
1659             return (Just buf'))
1660         (\e -> if isEOFError e then return Nothing else ioError e)
1661
1662 -- TODO, rewrite to use normal memcpy
1663 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1664 mkPS buf start end = do
1665     let len = end - start
1666     fp <- mallocByteString (len `quot` 8)
1667     withForeignPtr fp $ \p -> do
1668         memcpy_ptr_baoff p buf start (fromIntegral len)
1669         return (PS fp 0 len)
1670
1671 mkBigPS :: Int -> [ByteString] -> IO ByteString
1672 mkBigPS _ [ps] = return ps
1673 mkBigPS _ pss = return $! concat (P.reverse pss)
1674
1675 #endif
1676
1677 -- ---------------------------------------------------------------------
1678 -- Block IO
1679
1680 -- | Outputs a 'ByteString' to the specified 'Handle'.
1681 hPut :: Handle -> ByteString -> IO ()
1682 hPut _ (PS _ _ 0)  = return ()
1683 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1684 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1685
1686 -- | Write a ByteString to stdout
1687 putStr :: ByteString -> IO ()
1688 putStr = hPut stdout
1689
1690 -- | Write a ByteString to stdout, appending a newline byte
1691 putStrLn :: ByteString -> IO ()
1692 putStrLn ps = hPut stdout ps >> hPut stdout nl
1693     where nl = packByte 0x0a
1694
1695 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1696 -- is far more efficient than reading the characters into a 'String'
1697 -- and then using 'pack'.
1698 hGet :: Handle -> Int -> IO ByteString
1699 hGet _ 0 = return empty
1700 hGet h i = do fp <- mallocByteString i
1701               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1702               return $ PS fp 0 l
1703
1704 #if defined(__GLASGOW_HASKELL__)
1705 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1706 -- waiting for data to become available, instead it returns only whatever data
1707 -- is available.
1708 hGetNonBlocking :: Handle -> Int -> IO ByteString
1709 hGetNonBlocking _ 0 = return empty
1710 hGetNonBlocking h i = do
1711     fp <- mallocByteString i
1712     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1713     return $ PS fp 0 l
1714 #endif
1715
1716 -- | Read entire handle contents into a 'ByteString'.
1717 --
1718 -- As with 'hGet', the string representation in the file is assumed to
1719 -- be ISO-8859-1.
1720 --
1721 hGetContents :: Handle -> IO ByteString
1722 hGetContents h = do
1723     let start_size = 1024
1724     p <- mallocArray start_size
1725     i <- hGetBuf h p start_size
1726     if i < start_size
1727         then do p' <- reallocArray p i
1728                 fp <- newForeignFreePtr p'
1729                 return $ PS fp 0 i
1730         else f p start_size
1731     where
1732         f p s = do
1733             let s' = 2 * s
1734             p' <- reallocArray p s'
1735             i  <- hGetBuf h (p' `plusPtr` s) s
1736             if i < s
1737                 then do let i' = s + i
1738                         p'' <- reallocArray p' i'
1739                         fp  <- newForeignFreePtr p''
1740                         return $ PS fp 0 i'
1741                 else f p' s'
1742
1743 -- | getContents. Equivalent to hGetContents stdin
1744 getContents :: IO ByteString
1745 getContents = hGetContents stdin
1746
1747 -- | Read an entire file directly into a 'ByteString'.  This is far more
1748 -- efficient than reading the characters into a 'String' and then using
1749 -- 'pack'.  It also may be more efficient than opening the file and
1750 -- reading it using hGet.
1751 readFile :: FilePath -> IO ByteString
1752 readFile f = do
1753     h <- openBinaryFile f ReadMode
1754     l <- hFileSize h
1755     s <- hGet h $ fromIntegral l
1756     hClose h
1757     return s
1758
1759 -- | Write a 'ByteString' to a file.
1760 writeFile :: FilePath -> ByteString -> IO ()
1761 writeFile f ps = do
1762     h <- openBinaryFile f WriteMode
1763     hPut h ps
1764     hClose h
1765
1766 {-
1767 --
1768 -- Disable until we can move it into a portable .hsc file
1769 --
1770
1771 -- | Like readFile, this reads an entire file directly into a
1772 -- 'ByteString', but it is even more efficient.  It involves directly
1773 -- mapping the file to memory.  This has the advantage that the contents
1774 -- of the file never need to be copied.  Also, under memory pressure the
1775 -- page may simply be discarded, while in the case of readFile it would
1776 -- need to be written to swap.  If you read many small files, mmapFile
1777 -- will be less memory-efficient than readFile, since each mmapFile
1778 -- takes up a separate page of memory.  Also, you can run into bus
1779 -- errors if the file is modified.  As with 'readFile', the string
1780 -- representation in the file is assumed to be ISO-8859-1.
1781 --
1782 -- On systems without mmap, this is the same as a readFile.
1783 --
1784 mmapFile :: FilePath -> IO ByteString
1785 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1786
1787 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1788 mmap f = do
1789     h <- openBinaryFile f ReadMode
1790     l <- fromIntegral `fmap` hFileSize h
1791     -- Don't bother mmaping small files because each mmapped file takes up
1792     -- at least one full VM block.
1793     if l < mmap_limit
1794        then do thefp <- mallocByteString l
1795                withForeignPtr thefp $ \p-> hGetBuf h p l
1796                hClose h
1797                return (thefp, l)
1798        else do
1799                -- unix only :(
1800                fd <- fromIntegral `fmap` handleToFd h
1801                p  <- my_mmap l fd
1802                fp <- if p == nullPtr
1803                      then do thefp <- mallocByteString l
1804                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1805                              return thefp
1806                      else do
1807                           -- The munmap leads to crashes on OpenBSD.
1808                           -- maybe there's a use after unmap in there somewhere?
1809 #if !defined(__OpenBSD__)
1810                              let unmap = c_munmap p l >> return ()
1811 #else
1812                              let unmap = return ()
1813 #endif
1814                              fp <- FC.newForeignPtr p unmap
1815                              return fp
1816                c_close fd
1817                hClose h
1818                return (fp, l)
1819     where mmap_limit = 16*1024
1820 -}
1821
1822 #if defined(__GLASGOW_HASKELL__)
1823 --
1824 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1825 --
1826 getArgs :: IO [ByteString]
1827 getArgs =
1828   alloca $ \ p_argc ->
1829   alloca $ \ p_argv -> do
1830     getProgArgv p_argc p_argv
1831     p    <- fromIntegral `fmap` peek p_argc
1832     argv <- peek p_argv
1833     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1834 #endif
1835
1836 -- ---------------------------------------------------------------------
1837 -- Internal utilities
1838
1839 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1840 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1841 -- convenience for ByteString construction.
1842 w2c :: Word8 -> Char
1843 #if !defined(__GLASGOW_HASKELL__)
1844 w2c = chr . fromIntegral
1845 #else
1846 w2c = unsafeChr . fromIntegral
1847 #endif
1848 {-# INLINE w2c #-}
1849
1850 c2w :: Char -> Word8
1851 c2w = fromIntegral . ord
1852 {-# INLINE c2w #-}
1853
1854 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1855 -- is padded with a null byte.
1856 mallocByteString :: Int -> IO (ForeignPtr Word8)
1857 mallocByteString l = do
1858     fp <- mallocForeignPtrArray (l+1)
1859     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1860     return fp
1861
1862 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1863 -- argument gives the final size of the ByteString. Unlike 'generate'
1864 -- the ByteString is no reallocated if the final size is less than the
1865 -- estimated size.
1866 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1867 create l write_ptr = inlinePerformIO $ do
1868     fp <- mallocByteString (l+1)
1869     withForeignPtr fp $ \p -> write_ptr p
1870     return $ PS fp 0 l
1871 {-# INLINE create #-}
1872
1873 -- | Perform an operation with a temporary ByteString
1874 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1875 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1876 {-# INLINE withPtr #-}
1877
1878 -- Common up near identical calls to `error' to reduce the number
1879 -- constant strings created when compiled:
1880 errorEmptyList :: String -> a
1881 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1882 {-# INLINE errorEmptyList #-}
1883
1884 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1885 -- of the string if no element is found, rather than Nothing.
1886 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1887 STRICT2(findIndexOrEnd)
1888 findIndexOrEnd f ps
1889     | null ps           = 0
1890     | f (unsafeHead ps) = 0
1891     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
1892 {-# INLINE findIndexOrEnd #-}
1893
1894 -- Find from the end of the string using predicate
1895 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1896 STRICT2(findFromEndUntil)
1897 findFromEndUntil f ps@(PS x s l) =
1898     if null ps then 0
1899     else if f (last ps) then l
1900          else findFromEndUntil f (PS x s (l-1))
1901
1902 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1903 -- it exposes lots of things to further inlining
1904 --
1905 {-# INLINE inlinePerformIO #-}
1906 inlinePerformIO :: IO a -> a
1907 #if defined(__GLASGOW_HASKELL__)
1908 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1909 #else
1910 inlinePerformIO = unsafePerformIO
1911 #endif
1912
1913 {-# INLINE newForeignFreePtr #-}
1914 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1915 #if defined(__GLASGOW_HASKELL__)
1916 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1917 #else
1918 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1919 #endif
1920
1921 -- ---------------------------------------------------------------------
1922 --
1923 -- Standard C functions
1924 --
1925
1926 foreign import ccall unsafe "string.h strlen" c_strlen
1927     :: CString -> CInt
1928
1929 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1930     :: CInt -> IO (Ptr Word8)
1931
1932 foreign import ccall unsafe "static stdlib.h free" c_free
1933     :: Ptr Word8 -> IO ()
1934
1935 #if !defined(__GLASGOW_HASKELL__)
1936 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1937     :: FunPtr (Ptr Word8 -> IO ())
1938 #endif
1939
1940 foreign import ccall unsafe "string.h memset" memset
1941     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1942
1943 foreign import ccall unsafe "string.h memchr" memchr
1944     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1945
1946 foreign import ccall unsafe "string.h memcmp" memcmp
1947     :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1948
1949 foreign import ccall unsafe "string.h memcpy" memcpy
1950     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1951
1952 -- ---------------------------------------------------------------------
1953 --
1954 -- Uses our C code
1955 --
1956
1957 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1958     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1959
1960 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1961     :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1962
1963 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1964     :: Ptr Word8 -> Int -> Word8
1965
1966 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1967     :: Ptr Word8 -> Int -> Word8
1968
1969 foreign import ccall unsafe "static fpstring.h count" c_count
1970     :: Ptr Word8 -> Int -> Word8 -> Int
1971
1972 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1973     :: Ptr Word8 -> Int -> IO ()
1974
1975 -- ---------------------------------------------------------------------
1976 -- MMap
1977
1978 {-
1979 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
1980     :: Int -> Int -> IO (Ptr Word8)
1981
1982 foreign import ccall unsafe "static unistd.h close" c_close
1983     :: Int -> IO Int
1984
1985 #  if !defined(__OpenBSD__)
1986 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
1987     :: Ptr Word8 -> Int -> IO Int
1988 #  endif
1989 -}
1990
1991 -- ---------------------------------------------------------------------
1992 -- Internal GHC Haskell magic
1993
1994 #if defined(__GLASGOW_HASKELL__)
1995 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1996     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
1997
1998 foreign import ccall unsafe "__hscore_memcpy_src_off"
1999    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2000 #endif