Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 -- License     : BSD-style
  10 --
  11 -- Maintainer  : dons@cse.unsw.edu.au
  12 -- Stability   : experimental
  13 -- Portability : portable, requires ffi and cpp
  14 -- Tested with : GHC 6.4.1 and Hugs March 2005
  15 --
  16
  17 --
  18 -- | A time and space-efficient implementation of byte vectors using
  19 -- packed Word8 arrays, suitable for high performance use, both in terms
  20 -- of large data quantities, or high speed requirements. Byte vectors
  21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
  22 -- be passed between C and Haskell with little effort.
  23 --
  24 -- This module is intended to be imported @qualified@, to avoid name
  25 -- clashes with Prelude functions.  eg.
  26 --
  27 -- > import qualified Data.ByteString as B
  28 --
  29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  30 -- UArray by Simon Marlow. Rewritten to support slices and use
  31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  32 --
  33
  34 module Data.ByteString (
  35
  36         -- * The @ByteString@ type
  37         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  38
  39         -- * Introducing and eliminating 'ByteString's
  40         empty,                  -- :: ByteString
  41         packByte,               -- :: Word8   -> ByteString
  42         pack,                   -- :: [Word8] -> ByteString
  43         unpack,                 -- :: ByteString -> [Word8]
  44         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  45         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  46
  47         -- * Basic interface
  48         cons,                   -- :: Word8 -> ByteString -> ByteString
  49         snoc,                   -- :: Word8 -> ByteString -> ByteString
  50         null,                   -- :: ByteString -> Bool
  51         length,                 -- :: ByteString -> Int
  52         head,                   -- :: ByteString -> Word8
  53         tail,                   -- :: ByteString -> ByteString
  54         last,                   -- :: ByteString -> Word8
  55         init,                   -- :: ByteString -> ByteString
  56         append,                 -- :: ByteString -> ByteString -> ByteString
  57
  58         -- * Special ByteStrings
  59         inits,                  -- :: ByteString -> [ByteString]
  60         tails,                  -- :: ByteString -> [ByteString]
  61         elems,                  -- :: ByteString -> [ByteString]
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         reverse,                -- :: ByteString -> ByteString
  66         intersperse,            -- :: Word8 -> ByteString -> ByteString
  67         transpose,              -- :: [ByteString] -> [ByteString]
  68
  69         -- * Reducing 'ByteString's
  70         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  71         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  72         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  73         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74
  75         -- ** Special folds
  76         concat,                 -- :: [ByteString] -> ByteString
  77         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  78         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  79         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  80         maximum,                -- :: ByteString -> Word8
  81         minimum,                -- :: ByteString -> Word8
  82         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  83
  84         -- * Generating and unfolding ByteStrings
  85         replicate,              -- :: Int -> Word8 -> ByteString
  86         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  87
  88         -- * Substrings
  89
  90         -- ** Breaking strings
  91         take,                   -- :: Int -> ByteString -> ByteString
  92         drop,                   -- :: Int -> ByteString -> ByteString
  93         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  94         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  95         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  96         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  97         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  98         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  99
 100         -- ** Breaking and dropping on specific bytes
 101         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 102         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 103         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 104         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 105
 106         -- ** Breaking into many substrings
 107         split,                  -- :: Word8 -> ByteString -> [ByteString]
 108         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 109         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 110         group,                  -- :: ByteString -> [ByteString]
 111         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 112
 113         -- ** Joining strings
 114         join,                   -- :: ByteString -> [ByteString] -> ByteString
 115         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 116
 117         -- * Indexing ByteStrings
 118         index,                  -- :: ByteString -> Int -> Word8
 119         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 120         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 121         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 122         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 123         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 124         count,                  -- :: Word8 -> ByteString -> Int
 125
 126         -- * Ordered ByteStrings
 127         sort,                   -- :: ByteString -> ByteString
 128
 129         -- * Searching ByteStrings
 130
 131         -- ** Searching by equality
 132         -- | These functions use memchr(3) to efficiently search the ByteString
 133
 134         elem,                   -- :: Word8 -> ByteString -> Bool
 135         notElem,                -- :: Word8 -> ByteString -> Bool
 136         filterByte,             -- :: Word8 -> ByteString -> ByteString
 137         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 138
 139         -- ** Searching with a predicate
 140         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 141         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 142
 143         -- ** Prefixes and suffixes
 144         -- | These functions use memcmp(3) to efficiently compare substrings
 145         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 146         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 147
 148         -- ** Search for arbitrary substrings
 149         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 150         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 151         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 152
 153         -- * Zipping and unzipping ByteStrings
 154         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 155         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 156         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 157
 158         -- * Unchecked access
 159         unsafeHead,             -- :: ByteString -> Word8
 160         unsafeTail,             -- :: ByteString -> ByteString
 161         unsafeIndex,            -- :: ByteString -> Int -> Word8
 162
 163         -- * Low level introduction and elimination
 164         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 165         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 166         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 167         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 168         skipIndex,              -- :: ByteString -> Int
 169
 170         -- ** Packing CStrings and pointers
 171         packCString,            -- :: CString -> ByteString
 172         packCStringLen,         -- :: CString -> ByteString
 173         packMallocCString,      -- :: CString -> ByteString
 174
 175 #if defined(__GLASGOW_HASKELL__)
 176         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 177         packAddress,            -- :: Addr# -> ByteString
 178         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 179         unsafeFinalize,         -- :: ByteString -> IO ()
 180 #endif
 181
 182         -- ** Using ByteStrings as CStrings
 183         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 184         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 185         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 186
 187         -- ** Copying ByteStrings
 188         -- | These functions perform memcpy(3) operations
 189         copy,                   -- :: ByteString -> ByteString
 190         copyCString,            -- :: CString -> ByteString
 191         copyCStringLen,         -- :: CStringLen -> ByteString
 192
 193         -- * I\/O with @ByteString@s
 194
 195         -- ** Standard input and output
 196
 197 #if defined(__GLASGOW_HASKELL__)
 198         getLine,                -- :: IO ByteString
 199 #endif
 200         getContents,            -- :: IO ByteString
 201         putStr,                 -- :: ByteString -> IO ()
 202         putStrLn,               -- :: ByteString -> IO ()
 203
 204         -- ** Files
 205         readFile,               -- :: FilePath -> IO ByteString
 206         writeFile,              -- :: FilePath -> ByteString -> IO ()
 207 --      mmapFile,               -- :: FilePath -> IO ByteString
 208
 209         -- ** I\/O with Handles
 210 #if defined(__GLASGOW_HASKELL__)
 211         getArgs,                -- :: IO [ByteString]
 212         hGetLine,               -- :: Handle -> IO ByteString
 213         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 214 #endif
 215         hGetContents,           -- :: Handle -> IO ByteString
 216         hGet,                   -- :: Handle -> Int -> IO ByteString
 217         hPut,                   -- :: Handle -> ByteString -> IO ()
 218
 219 #if defined(__GLASGOW_HASKELL__)
 220         -- * Miscellaneous
 221         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 222 #endif
 223
 224   ) where
 225
 226 import qualified Prelude as P
 227 import Prelude hiding           (reverse,head,tail,last,init,null
 228                                 ,length,map,lines,foldl,foldr,unlines
 229                                 ,concat,any,take,drop,splitAt,takeWhile
 230                                 ,dropWhile,span,break,elem,filter,maximum
 231                                 ,minimum,all,concatMap,foldl1,foldr1
 232                                 ,readFile,writeFile,replicate
 233                                 ,getContents,getLine,putStr,putStrLn
 234                                 ,zip,zipWith,unzip,notElem)
 235
 236 import qualified Data.List as List
 237
 238 import Data.Char
 239 import Data.Word                (Word8)
 240 import Data.Maybe               (listToMaybe)
 241 import Data.Array               (listArray)
 242 import qualified Data.Array as Array ((!))
 243
 244 -- Control.Exception.bracket not available in yhc or nhc
 245 import Control.Exception        (bracket)
 246 import Control.Monad            (when)
 247
 248 import Foreign.C.String         (CString, CStringLen)
 249 import Foreign.C.Types          (CSize, CInt)
 250 import Foreign.ForeignPtr
 251 import Foreign.Marshal.Array
 252 import Foreign.Ptr
 253 import Foreign.Storable         (Storable(..))
 254
 255 -- hGetBuf and hPutBuf not available in yhc or nhc
 256 import System.IO                (stdin,stdout,hClose,hFileSize
 257                                 ,hGetBuf,hPutBuf,openBinaryFile
 258                                 ,Handle,IOMode(..))
 259
 260 #if !defined(__GLASGOW_HASKELL__)
 261 import System.IO.Unsafe
 262 #endif
 263
 264 #if defined(__GLASGOW_HASKELL__)
 265
 266 import Data.Generics            (Data(..), Typeable(..))
 267
 268 import System.IO                (hGetBufNonBlocking)
 269 import System.IO.Error          (isEOFError)
 270
 271 import Foreign.Marshal          (alloca)
 272 import qualified Foreign.Concurrent as FC (newForeignPtr)
 273
 274 import GHC.Handle
 275 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 276 import GHC.Base                 (build, unsafeChr)
 277 import GHC.Word hiding (Word8)
 278 import GHC.Ptr                  (Ptr(..))
 279 import GHC.ST                   (ST(..))
 280 import GHC.IOBase
 281
 282 #endif
 283
 284 -- CFILES stuff is Hugs only
 285 {-# CFILES cbits/fpstring.c #-}
 286
 287 -- -----------------------------------------------------------------------------
 288 --
 289 -- Useful macros, until we have bang patterns
 290 --
 291
 292 #define STRICT1(f) f a | a `seq` False = undefined
 293 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 294 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 295 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 296 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 297
 298 -- -----------------------------------------------------------------------------
 299
 300 -- | A space-efficient representation of a Word8 vector, supporting many
 301 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 302 --
 303 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 304 --
 305 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 306                      {-# UNPACK #-} !Int
 307                      {-# UNPACK #-} !Int
 308
 309 #if defined(__GLASGOW_HASKELL__)
 310     deriving (Data, Typeable)
 311 #endif
 312
 313 instance Eq  ByteString
 314     where (==)    = eq
 315
 316 instance Ord ByteString
 317     where compare = compareBytes
 318
 319 instance Show ByteString where
 320     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 321
 322 instance Read ByteString where
 323     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 324
 325 {-
 326 instance Arbitrary PackedString where
 327     arbitrary = P.pack `fmap` arbitrary
 328     coarbitrary s = coarbitrary (P.unpack s)
 329 -}
 330
 331 -- | /O(n)/ Equality on the 'ByteString' type.
 332 eq :: ByteString -> ByteString -> Bool
 333 eq a b = (compareBytes a b) == EQ
 334 {-# INLINE eq #-}
 335
 336 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 337 compareBytes :: ByteString -> ByteString -> Ordering
 338 compareBytes (PS _ _ 0) (PS _ _ 0)       = EQ    -- short cut for empty strings
 339 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
 340     withForeignPtr x1 $ \p1 ->
 341     withForeignPtr x2 $ \p2 -> do
 342         i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
 343         return $ case i `compare` 0 of
 344                     EQ  -> l1 `compare` l2
 345                     x   -> x
 346 {-# INLINE compareBytes #-}
 347
 348 {-
 349 --
 350 -- About 4x slower over 32M
 351 --
 352 compareBytes :: ByteString -> ByteString -> Ordering
 353 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 354     withForeignPtr fp1 $ \p1 ->
 355         withForeignPtr fp2 $ \p2 ->
 356             cmp (p1 `plusPtr` off1)
 357                 (p2 `plusPtr` off2) 0 len1 len2
 358
 359 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 360 STRICT5(cmp)
 361 cmp p1 p2 n len1 len2
 362       | n == len1 = if n == len2 then return EQ else return LT
 363       | n == len2 = return GT
 364       | otherwise = do
 365           (a :: Word8) <- peekByteOff p1 n
 366           (b :: Word8) <- peekByteOff p2 n
 367           case a `compare` b of
 368                 EQ -> cmp p1 p2 (n+1) len1 len2
 369                 LT -> return LT
 370                 GT -> return GT
 371 {-# INLINE compareBytes #-}
 372 -}
 373
 374 -- -----------------------------------------------------------------------------
 375 -- Introducing and eliminating 'ByteString's
 376
 377 -- | /O(1)/ The empty 'ByteString'
 378 empty :: ByteString
 379 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 380 {-# NOINLINE empty #-}
 381
 382 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 383 packByte :: Word8 -> ByteString
 384 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
 385     withForeignPtr fp $ \p -> poke p c
 386     return $ PS fp 0 1
 387 {-# NOINLINE packByte #-}
 388
 389 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 390 --
 391 -- For applications with large numbers of string literals, pack can be a
 392 -- bottleneck. In such cases, consider using packAddress (GHC only).
 393 pack :: [Word8] -> ByteString
 394
 395 #if !defined(__GLASGOW_HASKELL__)
 396
 397 pack str = create (P.length str) $ \p -> go p str
 398     where
 399         go _ []     = return ()
 400         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 401
 402 #else /* hack away */
 403
 404 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 405     where
 406         go _ _ []        = return ()
 407         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 408
 409         writeByte p i c = ST $ \s# ->
 410             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 411
 412 #endif
 413
 414 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 415 unpack :: ByteString -> [Word8]
 416
 417 #if !defined(__GLASGOW_HASKELL__)
 418
 419 unpack (PS _  _ 0) = []
 420 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 421         go (p `plusPtr` s) (l - 1) []
 422     where
 423         STRICT3(go)
 424         go p 0 acc = peek p          >>= \e -> return (e : acc)
 425         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 426 {-# INLINE unpack #-}
 427
 428 #else
 429
 430 unpack ps = build (unpackFoldr ps)
 431 {-# INLINE unpack #-}
 432
 433 unpackList :: ByteString -> [Word8]
 434 unpackList (PS fp off len) = withPtr fp $ \p -> do
 435     let STRICT3(loop)
 436         loop _ (-1) acc = return acc
 437         loop q n acc = do
 438            a <- peekByteOff q n
 439            loop q (n-1) (a : acc)
 440     loop (p `plusPtr` off) (len-1) []
 441
 442 {-# RULES
 443 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 444  #-}
 445
 446 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 447 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 448     let STRICT3(loop)
 449         loop _ (-1) acc = return acc
 450         loop q n    acc = do
 451            a <- peekByteOff q n
 452            loop q (n-1) (a `f` acc)
 453     loop (p `plusPtr` off) (len-1) ch
 454 {-# INLINE [0] unpackFoldr #-}
 455
 456 #endif
 457
 458 ------------------------------------------------------------------------
 459
 460 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 461 -- conversion function
 462 packWith :: (a -> Word8) -> [a] -> ByteString
 463 packWith k str = create (P.length str) $ \p -> go p str
 464     where
 465         STRICT2(go)
 466         go _ []     = return ()
 467         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 468 {-# INLINE packWith #-}
 469 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 470
 471 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 472 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 473 unpackWith _ (PS _  _ 0) = []
 474 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 475         go (p `plusPtr` s) (l - 1) []
 476     where
 477         STRICT3(go)
 478         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 479         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 480 {-# INLINE unpackWith #-}
 481 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 482
 483 -- ---------------------------------------------------------------------
 484 -- Basic interface
 485
 486 -- | /O(1)/ Test whether a ByteString is empty.
 487 null :: ByteString -> Bool
 488 null (PS _ _ l) = l == 0
 489 {-# INLINE null #-}
 490
 491 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 492 length :: ByteString -> Int
 493 length (PS _ _ l) = l
 494 {-# INLINE length #-}
 495
 496 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 497 -- complexity, as it requires a memcpy.
 498 cons :: Word8 -> ByteString -> ByteString
 499 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 500         memcpy (p `plusPtr` 1) (f `plusPtr` s) l
 501         poke p c
 502 {-# INLINE cons #-}
 503
 504 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 505 snoc :: ByteString -> Word8 -> ByteString
 506 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 507         memcpy p (f `plusPtr` s) l
 508         poke (p `plusPtr` l) c
 509 {-# INLINE snoc #-}
 510
 511 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 512 head :: ByteString -> Word8
 513 head ps@(PS x s _)
 514     | null ps   = errorEmptyList "head"
 515     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 516 {-# INLINE head #-}
 517
 518 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 519 tail :: ByteString -> ByteString
 520 tail (PS p s l)
 521     | l <= 0    = errorEmptyList "tail"
 522     | otherwise = PS p (s+1) (l-1)
 523 {-# INLINE tail #-}
 524
 525 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 526 last :: ByteString -> Word8
 527 last ps@(PS x s l)
 528     | null ps   = errorEmptyList "last"
 529     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 530 {-# INLINE last #-}
 531
 532 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 533 init :: ByteString -> ByteString
 534 init (PS p s l)
 535     | l <= 0    = errorEmptyList "init"
 536     | otherwise = PS p s (l-1)
 537 {-# INLINE init #-}
 538
 539 -- | /O(n)/ Append two ByteStrings
 540 append :: ByteString -> ByteString -> ByteString
 541 append xs ys | null xs   = ys
 542              | null ys   = xs
 543              | otherwise = concat [xs,ys]
 544 {-# INLINE append #-}
 545
 546 {-
 547 --
 548 -- About 30% faster, but allocating in a big chunk isn't good for memory use
 549 --
 550 append :: ByteString -> ByteString -> ByteString
 551 append xs@(PS ffp s l) ys@(PS fgp t m)
 552     | null xs   = ys
 553     | null ys   = xs
 554     | otherwise = create len $ \ptr ->
 555         withForeignPtr ffp $ \fp ->
 556         withForeignPtr fgp $ \gp -> do
 557             memcpy ptr               (fp `plusPtr` s) l
 558             memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
 559         where len = length xs + length ys
 560 -}
 561
 562 -- ---------------------------------------------------------------------
 563 -- Transformations
 564
 565 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 566 -- element of @xs@
 567 --
 568 map :: (Word8 -> Word8) -> ByteString -> ByteString
 569 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
 570     new_fp <- mallocByteString len
 571     withForeignPtr new_fp $ \new_p -> do
 572         map_ f (len-1) (p `plusPtr` start) new_p
 573         return (PS new_fp 0 len)
 574 {-# INLINE map #-}
 575
 576 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 577 STRICT4(map_)
 578 map_ f n p1 p2
 579    | n < 0 = return ()
 580    | otherwise = do
 581         x <- peekByteOff p1 n
 582         pokeByteOff p2 n (f x)
 583         map_ f (n-1) p1 p2
 584 {-# INLINE map_ #-}
 585
 586 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 587 reverse :: ByteString -> ByteString
 588 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 589         c_reverse p (f `plusPtr` s) l
 590
 591 {-
 592 reverse = pack . P.reverse . unpack
 593 -}
 594
 595 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 596 -- 'ByteString' and \`intersperses\' that byte between the elements of
 597 -- the 'ByteString'.  It is analogous to the intersperse function on
 598 -- Lists.
 599 intersperse :: Word8 -> ByteString -> ByteString
 600 intersperse c ps@(PS x s l)
 601     | length ps < 2  = ps
 602     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 603         c_intersperse p (f `plusPtr` s) l c
 604
 605 {-
 606 intersperse c = pack . List.intersperse c . unpack
 607 -}
 608
 609 -- | The 'transpose' function transposes the rows and columns of its
 610 -- 'ByteString' argument.
 611 transpose :: [ByteString] -> [ByteString]
 612 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 613
 614 -- ---------------------------------------------------------------------
 615 -- Reducing 'ByteString's
 616
 617 -- | 'foldl', applied to a binary operator, a starting value (typically
 618 -- the left-identity of the operator), and a ByteString, reduces the
 619 -- ByteString using the binary operator, from left to right.
 620 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 621 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 622         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 623     where
 624         STRICT3(lgo)
 625         lgo z p q | p == q    = return z
 626                   | otherwise = do c <- peek p
 627                                    lgo (f z c) (p `plusPtr` 1) q
 628
 629 -- | 'foldr', applied to a binary operator, a starting value
 630 -- (typically the right-identity of the operator), and a ByteString,
 631 -- reduces the ByteString using the binary operator, from right to left.
 632 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 633 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 634         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 635     where
 636         STRICT2(go)
 637         go p q | p == q    = return z
 638                | otherwise = do c  <- peek p
 639                                 ws <- go (p `plusPtr` 1) q
 640                                 return $ c `k` ws
 641
 642 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 643 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 644 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 645 foldl1 f ps
 646     | null ps   = errorEmptyList "foldl1"
 647     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 648
 649 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 650 -- and thus must be applied to non-empty 'ByteString's
 651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 652 foldr1 f ps
 653     | null ps        = errorEmptyList "foldr1"
 654     | otherwise      = foldr f (last ps) (init ps)
 655
 656 -- ---------------------------------------------------------------------
 657 -- Special folds
 658
 659 -- | /O(n)/ Concatenate a list of ByteStrings.
 660 concat :: [ByteString] -> ByteString
 661 concat []     = empty
 662 concat [ps]   = ps
 663 concat xs     = inlinePerformIO $ do
 664     let start_size = 1024
 665     p <- mallocArray start_size
 666     f p 0 1024 xs
 667
 668     where f ptr len _ [] = do
 669                 ptr' <- reallocArray ptr (len+1)
 670                 poke (ptr' `plusPtr` len) (0::Word8)    -- XXX so CStrings work
 671                 fp   <- newForeignFreePtr ptr'
 672                 return $ PS fp 0 len
 673
 674           f ptr len to_go pss@(PS p s l:pss')
 675            | l <= to_go = do withForeignPtr p $ \pf ->
 676                                  memcpy (ptr `plusPtr` len)
 677                                           (pf `plusPtr` s) l
 678                              f ptr (len + l) (to_go - l) pss'
 679
 680            | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
 681                             ptr' <- reallocArray ptr new_total
 682                             f ptr' len (new_total - len) pss
 683
 684 -- | Map a function over a 'ByteString' and concatenate the results
 685 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 686 concatMap f = foldr (append . f) empty
 687
 688 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 689 -- any element of the 'ByteString' satisfies the predicate.
 690 any :: (Word8 -> Bool) -> ByteString -> Bool
 691 any _ (PS _ _ 0) = False
 692 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 693         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 694     where
 695         STRICT2(go)
 696         go p q | p == q    = return False
 697                | otherwise = do c <- peek p
 698                                 if f c then return True
 699                                        else go (p `plusPtr` 1) q
 700
 701 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 702 -- if all elements of the 'ByteString' satisfy the predicate.
 703 all :: (Word8 -> Bool) -> ByteString -> Bool
 704 all _ (PS _ _ 0) = True
 705 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 706         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 707     where
 708         STRICT2(go)
 709         go p q | p == q     = return True  -- end of list
 710                | otherwise  = do c <- peek p
 711                                  if f c
 712                                     then go (p `plusPtr` 1) q
 713                                     else return False
 714
 715 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 716 maximum :: ByteString -> Word8
 717 maximum xs@(PS x s l)
 718     | null xs   = errorEmptyList "maximum"
 719     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 720                     return $ c_maximum (p `plusPtr` s) l
 721 {-# INLINE maximum #-}
 722
 723 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 724 minimum :: ByteString -> Word8
 725 minimum xs@(PS x s l)
 726     | null xs   = errorEmptyList "minimum"
 727     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 728                     return $ c_minimum (p `plusPtr` s) l
 729 {-# INLINE minimum #-}
 730
 731 {-
 732 maximum xs@(PS x s l)
 733     | null xs   = errorEmptyList "maximum"
 734     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 735                         w <- peek p
 736                         maximum_ (p `plusPtr` s) 0 l w
 737
 738 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 739 STRICT4(maximum_)
 740 maximum_ ptr n m c
 741     | n >= m    = return c
 742     | otherwise = do w <- peekByteOff ptr n
 743                      maximum_ ptr (n+1) m (if w > c then w else c)
 744
 745 minimum xs@(PS x s l)
 746     | null xs   = errorEmptyList "minimum"
 747     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 748                         w <- peek p
 749                         minimum_ (p `plusPtr` s) 0 l w
 750
 751 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 752 STRICT4(minimum_)
 753 minimum_ ptr n m c
 754     | n >= m    = return c
 755     | otherwise = do w <- peekByteOff ptr n
 756                      minimum_ ptr (n+1) m (if w < c then w else c)
 757 -}
 758
 759 -- | /O(n)/ map Word8 functions, provided with the index at each position
 760 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 761 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 762     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 763   where
 764     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 765     STRICT4(go)
 766     go n f t p | f == p    = return ()
 767                | otherwise = do w <- peek f
 768                                 ((poke t) . k n) w
 769                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 770
 771 -- ---------------------------------------------------------------------
 772 -- Unfolds and replicates
 773
 774 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 775 -- the value of every element. The following holds:
 776 --
 777 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 778 --
 779 -- This implemenation uses @memset(3)@
 780 replicate :: Int -> Word8 -> ByteString
 781 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 782
 783 {-
 784 -- About 5x slower
 785 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 786     where
 787         STRICT2(go)
 788         go _   0 = return w
 789         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 790 -}
 791
 792 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 793 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 794 -- the element and returns 'Nothing' if it is done producing the
 795 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 796 -- prepending to the ByteString and @b@ is used as the next element in a
 797 -- recursive call.
 798 --
 799 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 800 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 801 -- final size of the ByteString as an argument. 'cons' can then be
 802 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 803 -- linear complexity. The depth of the recursion is limited to this
 804 -- size, but may be less. For lazy, infinite unfoldr, use
 805 -- 'Data.List.unfoldr' (from 'Data.List').
 806 --
 807 -- Examples:
 808 --
 809 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 810 --
 811 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 812 --
 813 -- > unfoldrN n == take n $ List.unfoldr
 814 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 815 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 816     where
 817         STRICT3(go)
 818         go q c n | n == i    = return n      -- stop if we reach `i'
 819                  | otherwise = case f c of
 820                                    Nothing        -> return n
 821                                    Just (a,new_c) -> do
 822                                         poke q a
 823                                         go (q `plusPtr` 1) new_c (n+1)
 824
 825 -- ---------------------------------------------------------------------
 826 -- Substrings
 827
 828 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 829 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 830 take :: Int -> ByteString -> ByteString
 831 take n ps@(PS x s l)
 832     | n < 0     = empty
 833     | n >= l    = ps
 834     | otherwise = PS x s n
 835 {-# INLINE take #-}
 836
 837 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 838 -- elements, or @[]@ if @n > 'length' xs@.
 839 drop  :: Int -> ByteString -> ByteString
 840 drop n ps@(PS x s l)
 841     | n <= 0    = ps
 842     | n >  l    = empty
 843     | otherwise = PS x (s+n) (l-n)
 844 {-# INLINE drop #-}
 845
 846 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 847 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 848 splitAt  n ps  = (take n ps, drop n ps)
 849 {-# INLINE splitAt #-}
 850
 851 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 852 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 853 -- satisfy @p@.
 854 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 855 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 856 {-# INLINE takeWhile #-}
 857
 858 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 859 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 860 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 861 {-# INLINE dropWhile #-}
 862
 863 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 864 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 865 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 866 {-# INLINE break #-}
 867
 868 -- | 'breakByte' breaks its ByteString argument at the first occurence
 869 -- of the specified byte. It is more efficient than 'break' as it is
 870 -- implemented with @memchr(3)@. I.e.
 871 --
 872 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 873 --
 874 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 875 breakByte c p = case elemIndex c p of
 876     Nothing -> (p,empty)
 877     Just n  -> (take n p, drop n p)
 878 {-# INLINE breakByte #-}
 879
 880 -- | 'spanByte' breaks its ByteString argument at the first
 881 -- occurence of a byte other than its argument. It is more efficient
 882 -- than 'span (==)'
 883 --
 884 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 885 --
 886 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 887 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 888     go (p `plusPtr` s) 0
 889   where
 890     STRICT2(go)
 891     go p i | i >= l    = return (ps, empty)
 892            | otherwise = do c' <- peekByteOff p i
 893                             if c /= c'
 894                                 then return (take i ps, drop i ps)
 895                                 else go p (i+1)
 896 {-# INLINE spanByte #-}
 897
 898 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 899 -- occurence of @w@. It behaves like 'break', except the delimiter is
 900 -- not returned, and @Nothing@ is returned if the delimiter is not in
 901 -- the ByteString. I.e.
 902 --
 903 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 904 --
 905 -- > breakFirst c xs ==
 906 -- > let (x,y) = break (== c) xs
 907 -- > in if null y then Nothing else Just (x, drop 1 y))
 908 --
 909 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 910 breakFirst c p = case elemIndex c p of
 911    Nothing -> Nothing
 912    Just n -> Just (take n p, drop (n+1) p)
 913 {-# INLINE breakFirst #-}
 914
 915 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 916 -- ByteString.
 917 --
 918 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 919 --
 920 -- and the following are equivalent:
 921 --
 922 -- > breakLast 'c' "abcdef"
 923 -- > let (x,y) = break (=='c') (reverse "abcdef")
 924 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 925 --
 926 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 927 breakLast c p = case elemIndexLast c p of
 928     Nothing -> Nothing
 929     Just n -> Just (take n p, drop (n+1) p)
 930 {-# INLINE breakLast #-}
 931
 932 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 933 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 934 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 935 span p ps = break (not . p) ps
 936 {-# INLINE span #-}
 937
 938 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 939 -- We have
 940 --
 941 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 942 --
 943 -- and
 944 --
 945 -- > spanEnd (not . isSpace) ps
 946 -- >    ==
 947 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 948 --
 949 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 950 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 951
 952 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 953 -- separators, where the predicate returns True for a separator element.
 954 -- The resulting components do not contain the separators.  Two adjacent
 955 -- separators result in an empty component in the output.  eg.
 956 --
 957 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 958 -- > splitWith (=='a') []        == []
 959 --
 960 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 961
 962 #if defined(__GLASGOW_HASKELL__)
 963 splitWith _pred (PS _  _   0) = []
 964 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
 965   where pred# c# = pred_ (W8# c#)
 966
 967         splitWith' pred' off' len' fp' = withPtr fp $ \p ->
 968             splitLoop pred' p 0 off' len' fp'
 969
 970         splitLoop :: (Word# -> Bool)
 971                   -> Ptr Word8
 972                   -> Int -> Int -> Int
 973                   -> ForeignPtr Word8
 974                   -> IO [ByteString]
 975
 976         splitLoop pred' p idx' off' len' fp'
 977             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
 978             | idx' >= len'  = return [PS fp' off' idx']
 979             | otherwise = do
 980                 w <- peekElemOff p (off'+idx')
 981                 if pred' (case w of W8# w# -> w#)
 982                    then return (PS fp' off' idx' :
 983                               splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
 984                    else splitLoop pred' p (idx'+1) off' len' fp'
 985 {-# INLINE splitWith #-}
 986
 987 #else
 988 splitWith _ (PS _ _ 0) = []
 989 splitWith p ps = splitWith' p ps
 990     where
 991         STRICT2(splitWith')
 992         splitWith' q qs = if null rest then [chunk]
 993                                        else chunk : splitWith' q (unsafeTail rest)
 994             where (chunk,rest) = break q qs
 995 #endif
 996
 997 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 998 -- argument, consuming the delimiter. I.e.
 999 --
1000 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1001 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1002 -- > split 'x'  "x"          == ["",""]
1003 --
1004 -- and
1005 --
1006 -- > join [c] . split c == id
1007 -- > split == splitWith . (==)
1008 --
1009 -- As for all splitting functions in this library, this function does
1010 -- not copy the substrings, it just constructs new 'ByteStrings' that
1011 -- are slices of the original.
1012 --
1013 split :: Word8 -> ByteString -> [ByteString]
1014 split _ (PS _ _ 0) = []
1015 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1016     let ptr = p `plusPtr` s
1017
1018         STRICT1(loop)
1019         loop n = do
1020             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1021             if q == nullPtr
1022                 then return [PS x (s+n) (l-n)]
1023                 else do let i = q `minusPtr` ptr
1024                         ls <- loop (i+1)
1025                         return $! PS x (s+n) (i-n) : ls
1026     loop 0
1027 {-# INLINE split #-}
1028
1029 {-
1030 -- slower. but stays inside Haskell.
1031 split _ (PS _  _   0) = []
1032 split (W8# w#) (PS fp off len) = splitWith' off len fp
1033     where
1034         splitWith' off' len' fp' = withPtr fp $ \p ->
1035             splitLoop p 0 off' len' fp'
1036
1037         splitLoop :: Ptr Word8
1038                   -> Int -> Int -> Int
1039                   -> ForeignPtr Word8
1040                   -> IO [ByteString]
1041
1042         STRICT5(splitLoop)
1043         splitLoop p idx' off' len' fp'
1044             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1045             | idx' >= len'  = return [PS fp' off' idx']
1046             | otherwise = do
1047                 (W8# x#) <- peekElemOff p (off'+idx')
1048                 if word2Int# w# ==# word2Int# x#
1049                    then return (PS fp' off' idx' :
1050                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1051                    else splitLoop p (idx'+1) off' len' fp'
1052 -}
1053
1054 -- | Like 'splitWith', except that sequences of adjacent separators are
1055 -- treated as a single separator. eg.
1056 --
1057 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1058 --
1059 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1060 tokens f = P.filter (not.null) . splitWith f
1061
1062 -- | The 'group' function takes a ByteString and returns a list of
1063 -- ByteStrings such that the concatenation of the result is equal to the
1064 -- argument.  Moreover, each sublist in the result contains only equal
1065 -- elements.  For example,
1066 --
1067 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1068 --
1069 -- It is a special case of 'groupBy', which allows the programmer to
1070 -- supply their own equality test. It is about 40% faster than
1071 -- /groupBy (==)/
1072 group :: ByteString -> [ByteString]
1073 group xs
1074     | null xs   = []
1075     | otherwise = ys : group zs
1076     where
1077         (ys, zs) = spanByte (unsafeHead xs) xs
1078
1079 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1080 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1081 groupBy k xs
1082     | null xs   = []
1083     | otherwise = take n xs : groupBy k (drop n xs)
1084     where
1085         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1086
1087 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1088 -- 'ByteString's and concatenates the list after interspersing the first
1089 -- argument between each element of the list.
1090 join :: ByteString -> [ByteString] -> ByteString
1091 join filler pss = concat (splice pss)
1092     where
1093         splice []  = []
1094         splice [x] = [x]
1095         splice (x:y:xs) = x:filler:splice (y:xs)
1096
1097 --
1098 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1099 -- with a char. Around 4 times faster than the generalised join.
1100 --
1101 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1102 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1103     withForeignPtr ffp $ \fp ->
1104     withForeignPtr fgp $ \gp -> do
1105         memcpy ptr (fp `plusPtr` s) l
1106         poke (ptr `plusPtr` l) c
1107         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1108     where
1109       len = length f + length g + 1
1110 {-# INLINE joinWithByte #-}
1111
1112 -- ---------------------------------------------------------------------
1113 -- Indexing ByteStrings
1114
1115 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1116 index :: ByteString -> Int -> Word8
1117 index ps n
1118     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1119     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1120                                 ++ ", length = " ++ show (length ps)
1121     | otherwise      = ps `unsafeIndex` n
1122 {-# INLINE index #-}
1123
1124 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1125 -- element in the given 'ByteString' which is equal to the query
1126 -- element, or 'Nothing' if there is no such element.
1127 -- This implementation uses memchr(3).
1128 elemIndex :: Word8 -> ByteString -> Maybe Int
1129 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1130     let p' = p `plusPtr` s
1131         q  = memchr p' c (fromIntegral l)
1132     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1133 {-# INLINE elemIndex #-}
1134
1135 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1136 -- element in the given 'ByteString' which is equal to the query
1137 -- element, or 'Nothing' if there is no such element. The following
1138 -- holds:
1139 --
1140 -- > elemIndexLast c xs ==
1141 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1142 --
1143 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1144 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1145     go (p `plusPtr` s) (l-1)
1146   where
1147     STRICT2(go)
1148     go p i | i < 0     = return Nothing
1149            | otherwise = do ch' <- peekByteOff p i
1150                             if ch == ch'
1151                                 then return $ Just i
1152                                 else go p (i-1)
1153 {-# INLINE elemIndexLast #-}
1154
1155 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1156 -- the indices of all elements equal to the query element, in ascending order.
1157 -- This implementation uses memchr(3).
1158 elemIndices :: Word8 -> ByteString -> [Int]
1159 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1160     let ptr = p `plusPtr` s
1161
1162         STRICT1(loop)
1163         loop n = do
1164                 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1165                 if q == nullPtr
1166                     then return []
1167                     else do let i = q `minusPtr` ptr
1168                             ls <- loop (i+1)
1169                             return $! i:ls
1170     loop 0
1171
1172 {-
1173 -- much slower
1174 elemIndices :: Word8 -> ByteString -> [Int]
1175 elemIndices c ps = loop 0 ps
1176    where STRICT2(loop)
1177          loop _ ps' | null ps'            = []
1178          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1179                     | otherwise           = loop (n+1) (unsafeTail ps')
1180 -}
1181
1182 -- | count returns the number of times its argument appears in the ByteString
1183 --
1184 -- > count = length . elemIndices
1185 --
1186 -- But more efficiently than using length on the intermediate list.
1187 count :: Word8 -> ByteString -> Int
1188 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1189     return $ c_count (p `plusPtr` s) (fromIntegral m) w
1190 {-# INLINE count #-}
1191
1192 {-
1193 --
1194 -- around 30% slower
1195 --
1196 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1197      go (p `plusPtr` s) (fromIntegral m) 0
1198     where
1199         go :: Ptr Word8 -> CSize -> Int -> IO Int
1200         STRICT3(go)
1201         go p l i = do
1202             let q = memchr p w l
1203             if q == nullPtr
1204                 then return i
1205                 else do let k = fromIntegral $ q `minusPtr` p
1206                         go (q `plusPtr` 1) (l-k-1) (i+1)
1207 -}
1208
1209 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1210 -- returns the index of the first element in the ByteString
1211 -- satisfying the predicate.
1212 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1213 findIndex = (listToMaybe .) . findIndices
1214
1215 -- | The 'findIndices' function extends 'findIndex', by returning the
1216 -- indices of all elements satisfying the predicate, in ascending order.
1217 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1218 findIndices p ps = loop 0 ps
1219    where
1220      STRICT2(loop)
1221      loop _ qs | null qs           = []
1222      loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1223                | otherwise         =     loop (n+1) (unsafeTail qs)
1224
1225 -- ---------------------------------------------------------------------
1226 -- Searching ByteStrings
1227
1228 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1229 elem :: Word8 -> ByteString -> Bool
1230 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1231 {-# INLINE elem #-}
1232
1233 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1234 notElem :: Word8 -> ByteString -> Bool
1235 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1236 {-# INLINE notElem #-}
1237
1238 --
1239 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1240 -- case of filtering a single byte. It is more efficient to use
1241 -- /filterByte/ in this case.
1242 --
1243 -- > filterByte == filter . (==)
1244 --
1245 -- filterByte is around 10x faster, and uses much less space, than its
1246 -- filter equivalent
1247 filterByte :: Word8 -> ByteString -> ByteString
1248 filterByte w ps = replicate (count w ps) w
1249
1250 {-
1251 -- slower than the replicate version
1252
1253 filterByte ch ps@(PS x s l)
1254     | null ps   = ps
1255     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1256         t <- go (f `plusPtr` s) p l
1257         return (t `minusPtr` p) -- actual length
1258     where
1259         STRICT3(go)
1260         go _ t 0 = return t
1261         go f t e = do w <- peek f
1262                       if w == ch
1263                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1264                         else             go (f `plusPtr` 1) t               (e-1)
1265 -}
1266
1267 --
1268 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1269 -- case of filtering a single byte out of a list. It is more efficient
1270 -- to use /filterNotByte/ in this case.
1271 --
1272 -- > filterNotByte == filter . (/=)
1273 --
1274 -- filterNotByte is around 3x faster, and uses much less space, than its
1275 -- filter equivalent
1276 filterNotByte :: Word8 -> ByteString -> ByteString
1277 filterNotByte ch ps@(PS x s l)
1278     | null ps   = ps
1279     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1280         t <- go (f `plusPtr` s) p l
1281         return (t `minusPtr` p) -- actual length
1282     where
1283         STRICT3(go)
1284         go _ t 0 = return t
1285         go f t e = do w <- peek f
1286                       if w /= ch
1287                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1288                         else             go (f `plusPtr` 1) t               (e-1)
1289
1290 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1291 -- returns a ByteString containing those characters that satisfy the
1292 -- predicate.
1293 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1294 filter k ps@(PS x s l)
1295     | null ps   = ps
1296     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1297         t <- go (f `plusPtr` s) p l
1298         return (t `minusPtr` p) -- actual length
1299     where
1300         STRICT3(go)
1301         go _ t 0 = return t
1302         go f t e = do w <- peek f
1303                       if k w
1304                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1305                         else             go (f `plusPtr` 1) t               (e - 1)
1306
1307 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1308
1309 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1310 -- and returns the first element in matching the predicate, or 'Nothing'
1311 -- if there is no such element.
1312 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1313 find p ps = case filter p ps of
1314     q | null q -> Nothing
1315       | otherwise -> Just (unsafeHead q)
1316
1317 -- ---------------------------------------------------------------------
1318 -- Searching for substrings
1319
1320 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1321 -- iff the first is a prefix of the second.
1322 isPrefixOf :: ByteString -> ByteString -> Bool
1323 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1324     | l1 == 0   = True
1325     | l2 < l1   = False
1326     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1327         withForeignPtr x2 $ \p2 -> do
1328             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1329             return (i == 0)
1330
1331 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1332 -- iff the first is a suffix of the second.
1333 --
1334 -- The following holds:
1335 --
1336 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1337 --
1338 -- However, the real implemenation uses memcmp to compare the end of the
1339 -- string only, with no reverse required..
1340 isSuffixOf :: ByteString -> ByteString -> Bool
1341 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1342     | l1 == 0   = True
1343     | l2 < l1   = False
1344     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1345         withForeignPtr x2 $ \p2 -> do
1346             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1347             return (i == 0)
1348
1349 -- | Check whether one string is a substring of another. @isSubstringOf
1350 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1351 isSubstringOf :: ByteString -- ^ String to search for.
1352               -> ByteString -- ^ String to search in.
1353               -> Bool
1354 isSubstringOf p s = not $ P.null $ findSubstrings p s
1355
1356 -- | Get the first index of a substring in another string,
1357 --   or 'Nothing' if the string is not found.
1358 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1359 findSubstring :: ByteString -- ^ String to search for.
1360               -> ByteString -- ^ String to seach in.
1361               -> Maybe Int
1362 findSubstring = (listToMaybe .) . findSubstrings
1363
1364 -- | Find the indexes of all (possibly overlapping) occurances of a
1365 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1366 -- string matching algorithm.
1367 findSubstrings :: ByteString -- ^ String to search for.
1368                -> ByteString -- ^ String to seach in.
1369                -> [Int]
1370
1371 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1372   where
1373       patc x = pat `unsafeIndex` x
1374       strc x = str `unsafeIndex` x
1375
1376       -- maybe we should make kmpNext a UArray before using it in search?
1377       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1378       kmpNextL p _ | null p = []
1379       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1380                          ps = unsafeTail p
1381                          x = if not (null ps) && unsafeHead ps == patc j'
1382                                 then kmpNext Array.! j' else j'
1383                         in x:kmpNextL ps j'
1384       search i j = match ++ rest -- i: position in string, j: position in pattern
1385         where match = if j == m then [(i - j)] else []
1386               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1387       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1388                | otherwise = j
1389
1390 -- ---------------------------------------------------------------------
1391 -- Zipping
1392
1393 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1394 -- corresponding pairs of bytes. If one input ByteString is short,
1395 -- excess elements of the longer ByteString are discarded. This is
1396 -- equivalent to a pair of 'unpack' operations.
1397 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1398 zip ps qs
1399     | null ps || null qs = []
1400     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1401
1402 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1403 -- the first argument, instead of a tupling function.  For example,
1404 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1405 -- corresponding sums.
1406 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1407 zipWith f ps qs
1408     | null ps || null qs = []
1409     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1410
1411 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1412 -- ByteStrings. Note that this performs two 'pack' operations.
1413 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1414 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1415 {-# INLINE unzip #-}
1416
1417 -- ---------------------------------------------------------------------
1418 -- Special lists
1419
1420 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1421 inits :: ByteString -> [ByteString]
1422 inits (PS x s l) = [PS x s n | n <- [0..l]]
1423
1424 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1425 tails :: ByteString -> [ByteString]
1426 tails p | null p    = [empty]
1427         | otherwise = p : tails (unsafeTail p)
1428
1429 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1430
1431 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1432 elems :: ByteString -> [ByteString]
1433 elems (PS _ _ 0) = []
1434 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1435 {-# INLINE elems #-}
1436
1437 -- ---------------------------------------------------------------------
1438 -- ** Ordered 'ByteString's
1439
1440 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1441 sort :: ByteString -> ByteString
1442 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1443
1444     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1445     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1446
1447     let STRICT2(go)
1448         go 256 _   = return ()
1449         go i   ptr = do n <- peekElemOff arr i
1450                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1451                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1452     go 0 p
1453
1454 -- "countEach counts str l" counts the number of occurences of each Word8 in
1455 -- str, and stores the result in counts.
1456 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1457 STRICT3(countEach)
1458 countEach counts str l = go 0
1459  where
1460     STRICT1(go)
1461     go i | i == l    = return ()
1462          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1463                           x <- peekElemOff counts k
1464                           pokeElemOff counts k (x + 1)
1465                           go (i + 1)
1466
1467 {-
1468 sort :: ByteString -> ByteString
1469 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1470         memcpy p (f `plusPtr` s) l
1471         c_qsort p l -- inplace
1472 -}
1473
1474 {-
1475 sort = pack . List.sort . unpack
1476 -}
1477
1478 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1479 --
1480 -- Try some linear sorts: radix, counting
1481 -- Or mergesort.
1482 --
1483 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1484 -- sortBy f ps = undefined
1485
1486 -- ---------------------------------------------------------------------
1487 --
1488 -- Extensions to the basic interface
1489 --
1490
1491 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1492 -- check for the empty case, so there is an obligation on the programmer
1493 -- to provide a proof that the ByteString is non-empty.
1494 unsafeHead :: ByteString -> Word8
1495 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1496 {-# INLINE unsafeHead #-}
1497
1498 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1499 -- check for the empty case. As with 'unsafeHead', the programmer must
1500 -- provide a separate proof that the ByteString is non-empty.
1501 unsafeTail :: ByteString -> ByteString
1502 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1503 {-# INLINE unsafeTail #-}
1504
1505 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1506 -- This omits the bounds check, which means there is an accompanying
1507 -- obligation on the programmer to ensure the bounds are checked in some
1508 -- other way.
1509 unsafeIndex :: ByteString -> Int -> Word8
1510 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1511 {-# INLINE unsafeIndex #-}
1512
1513 -- ---------------------------------------------------------------------
1514 -- Low level constructors
1515
1516 #if defined(__GLASGOW_HASKELL__)
1517 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1518 -- Addr\# (an arbitrary machine address assumed to point outside the
1519 -- garbage-collected heap) into a @ByteString@. A much faster way to
1520 -- create an Addr\# is with an unboxed string literal, than to pack a
1521 -- boxed string. A unboxed string literal is compiled to a static @char
1522 -- []@ by GHC. Establishing the length of the string requires a call to
1523 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1524 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1525 -- if you know the length of the string statically.
1526 --
1527 -- An example:
1528 --
1529 -- > literalFS = packAddress "literal"#
1530 --
1531 packAddress :: Addr# -> ByteString
1532 packAddress addr# = inlinePerformIO $ do
1533     p <- newForeignPtr_ cstr
1534     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1535   where
1536     cstr = Ptr addr#
1537 {-# INLINE packAddress #-}
1538
1539 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1540 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1541 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1542 -- 'Addr\#' to the string, and the length of the string. Make sure the
1543 -- length is correct, otherwise use the safer 'packAddress' (where the
1544 -- length will be calculated once at runtime).
1545 unsafePackAddress :: Int -> Addr# -> ByteString
1546 unsafePackAddress len addr# = inlinePerformIO $ do
1547     p <- newForeignPtr_ cstr
1548     return $ PS p 0 len
1549     where cstr = Ptr addr#
1550
1551 #endif
1552
1553 -- | /O(1)/ Build a ByteString from a ForeignPtr
1554 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1555 fromForeignPtr fp l = PS fp 0 l
1556
1557 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1558 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1559 toForeignPtr (PS ps s l) = (ps, s, l)
1560
1561 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1562 -- current 'ByteString' from any larger string it was created from, as
1563 -- an 'Int'.
1564 skipIndex :: ByteString -> Int
1565 skipIndex (PS _ s _) = s
1566 {-# INLINE skipIndex #-}
1567
1568 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1569 -- finalizer associated to it. The ByteString length is calculated using
1570 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1571 packCString :: CString -> ByteString
1572 packCString cstr = inlinePerformIO $ do
1573     fp <- newForeignPtr_ (castPtr cstr)
1574     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1575
1576 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1577 -- have /no/ finalizer associated with it. This operation has /O(1)/
1578 -- complexity as we already know the final size, so no /strlen(3)/ is
1579 -- required.
1580 packCStringLen :: CStringLen -> ByteString
1581 packCStringLen (ptr,len) = inlinePerformIO $ do
1582     fp <- newForeignPtr_ (castPtr ptr)
1583     return $ PS fp 0 (fromIntegral len)
1584
1585 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1586 -- have a @free(3)@ finalizer associated to it.
1587 packMallocCString :: CString -> ByteString
1588 packMallocCString cstr = inlinePerformIO $ do
1589     fp <- newForeignFreePtr (castPtr cstr)
1590     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1591
1592 #if defined(__GLASGOW_HASKELL__)
1593 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1594 -- length, and an IO action representing a finalizer. This function is
1595 -- not available on Hugs.
1596 --
1597 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1598 packCStringFinalizer p l f = do
1599     fp <- FC.newForeignPtr p f
1600     return $ PS fp 0 l
1601
1602 -- | Explicitly run the finaliser associated with a 'ByteString'.
1603 -- Further references to this value may generate invalid memory
1604 -- references. This operation is unsafe, as there may be other
1605 -- 'ByteStrings' referring to the same underlying pages. If you use
1606 -- this, you need to have a proof of some kind that all 'ByteString's
1607 -- ever generated from the underlying byte array are no longer live.
1608 unsafeFinalize :: ByteString -> IO ()
1609 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1610
1611 #endif
1612
1613 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1614 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1615 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1616 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1617     where
1618       alloc = withForeignPtr ps $ \p -> do
1619                 buf <- c_malloc (fromIntegral l+1)
1620                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1621                 poke (buf `plusPtr` l) (0::Word8)
1622                 return $ castPtr buf
1623
1624 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1625 -- Warning: modifying the @CString@ will affect the @ByteString@.
1626 -- Why is this function unsafe? It relies on the null byte at the end of
1627 -- the ByteString to be there. This is /not/ the case if your ByteString
1628 -- has been spliced from a larger string (i.e. with take or drop).
1629 -- Unless you can guarantee the null byte, you should use the safe
1630 -- version, which will copy the string first.
1631 --
1632 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1633 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1634
1635 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1636 --   This is mainly useful to allow the rest of the data pointed
1637 --   to by the 'ByteString' to be garbage collected, for example
1638 --   if a large string has been read in, and only a small part of it
1639 --   is needed in the rest of the program.
1640 copy :: ByteString -> ByteString
1641 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1642
1643 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1644 -- CString is going to be deallocated from C land.
1645 copyCString :: CString -> ByteString
1646 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1647
1648 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1649 copyCStringLen :: CStringLen -> ByteString
1650 copyCStringLen (cstr, len) = inlinePerformIO $ do
1651     fp <- mallocForeignPtrArray (len+1)
1652     withForeignPtr fp $ \p -> do
1653         memcpy p (castPtr cstr) len
1654         poke (p `plusPtr` len) (0 :: Word8)
1655     return $! PS fp 0 len
1656
1657 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1658 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1659 -- This is analogous to unsafeUseAsCString, and comes with the same
1660 -- safety requirements.
1661 --
1662 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1663 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1664
1665 -- | Given the maximum size needed and a function to make the contents
1666 -- of a ByteString, generate makes the 'ByteString'. The generating
1667 -- function is required to return the actual final size (<= the maximum
1668 -- size), and the resulting byte array is realloced to this size.  The
1669 -- string is padded at the end with a null byte.
1670 --
1671 -- generate is the main mechanism for creating custom, efficient
1672 -- ByteString functions, using Haskell or C functions to fill the space.
1673 --
1674 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1675 generate i f = do
1676     p <- mallocArray i
1677     i' <- f p
1678     p' <- reallocArray p (i'+1)
1679     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1680     fp <- newForeignFreePtr p'
1681     return $ PS fp 0 i'
1682
1683 -- ---------------------------------------------------------------------
1684 -- line IO
1685
1686 #if defined(__GLASGOW_HASKELL__)
1687
1688 -- | getLine, read a line from stdin.
1689 getLine :: IO ByteString
1690 getLine = hGetLine stdin
1691
1692 -- | hGetLine. read a ByteString from a handle
1693 hGetLine :: Handle -> IO ByteString
1694 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1695     case haBufferMode handle_ of
1696        NoBuffering -> error "no buffering"
1697        _other      -> hGetLineBuffered handle_
1698
1699  where
1700     hGetLineBuffered handle_ = do
1701         let ref = haBuffer handle_
1702         buf <- readIORef ref
1703         hGetLineBufferedLoop handle_ ref buf 0 []
1704
1705     hGetLineBufferedLoop handle_ ref
1706             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1707         len `seq` do
1708         off <- findEOL r w raw
1709         let new_len = len + off - r
1710         xs <- mkPS raw r off
1711
1712       -- if eol == True, then off is the offset of the '\n'
1713       -- otherwise off == w and the buffer is now empty.
1714         if off /= w
1715             then do if (w == off + 1)
1716                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1717                             else writeIORef ref buf{ bufRPtr = off + 1 }
1718                     mkBigPS new_len (xs:xss)
1719             else do
1720                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1721                                     buf{ bufWPtr=0, bufRPtr=0 }
1722                  case maybe_buf of
1723                     -- Nothing indicates we caught an EOF, and we may have a
1724                     -- partial line to return.
1725                     Nothing -> do
1726                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1727                          if new_len > 0
1728                             then mkBigPS new_len (xs:xss)
1729                             else ioe_EOF
1730                     Just new_buf ->
1731                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1732
1733     -- find the end-of-line character, if there is one
1734     findEOL r w raw
1735         | r == w = return w
1736         | otherwise =  do
1737             (c,r') <- readCharFromBuffer raw r
1738             if c == '\n'
1739                 then return r -- NB. not r': don't include the '\n'
1740                 else findEOL r' w raw
1741
1742     maybeFillReadBuffer fd is_line is_stream buf = catch
1743         (do buf' <- fillReadBuffer fd is_line is_stream buf
1744             return (Just buf'))
1745         (\e -> if isEOFError e then return Nothing else ioError e)
1746
1747 -- TODO, rewrite to use normal memcpy
1748 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1749 mkPS buf start end = do
1750     let len = end - start
1751     fp <- mallocByteString len
1752     withForeignPtr fp $ \p -> do
1753         memcpy_ptr_baoff p buf start (fromIntegral len)
1754         return (PS fp 0 len)
1755
1756 mkBigPS :: Int -> [ByteString] -> IO ByteString
1757 mkBigPS _ [ps] = return ps
1758 mkBigPS _ pss = return $! concat (P.reverse pss)
1759
1760 #endif
1761
1762 -- ---------------------------------------------------------------------
1763 -- Block IO
1764
1765 -- | Outputs a 'ByteString' to the specified 'Handle'.
1766 hPut :: Handle -> ByteString -> IO ()
1767 hPut _ (PS _ _ 0)  = return ()
1768 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1769 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1770
1771 -- | Write a ByteString to stdout
1772 putStr :: ByteString -> IO ()
1773 putStr = hPut stdout
1774
1775 -- | Write a ByteString to stdout, appending a newline byte
1776 putStrLn :: ByteString -> IO ()
1777 putStrLn ps = hPut stdout ps >> hPut stdout nl
1778     where nl = packByte 0x0a
1779
1780 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1781 -- is far more efficient than reading the characters into a 'String'
1782 -- and then using 'pack'.
1783 hGet :: Handle -> Int -> IO ByteString
1784 hGet _ 0 = return empty
1785 hGet h i = do fp <- mallocByteString i
1786               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1787               return $ PS fp 0 l
1788
1789 #if defined(__GLASGOW_HASKELL__)
1790 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1791 -- waiting for data to become available, instead it returns only whatever data
1792 -- is available.
1793 hGetNonBlocking :: Handle -> Int -> IO ByteString
1794 hGetNonBlocking _ 0 = return empty
1795 hGetNonBlocking h i = do
1796     fp <- mallocByteString i
1797     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1798     return $ PS fp 0 l
1799 #endif
1800
1801 -- | Read entire handle contents into a 'ByteString'.
1802 --
1803 -- As with 'hGet', the string representation in the file is assumed to
1804 -- be ISO-8859-1.
1805 --
1806 hGetContents :: Handle -> IO ByteString
1807 hGetContents h = do
1808     let start_size = 1024
1809     p <- mallocArray start_size
1810     i <- hGetBuf h p start_size
1811     if i < start_size
1812         then do p' <- reallocArray p i
1813                 fp <- newForeignFreePtr p'
1814                 return $ PS fp 0 i
1815         else f p start_size
1816     where
1817         f p s = do
1818             let s' = 2 * s
1819             p' <- reallocArray p s'
1820             i  <- hGetBuf h (p' `plusPtr` s) s
1821             if i < s
1822                 then do let i' = s + i
1823                         p'' <- reallocArray p' i'
1824                         fp  <- newForeignFreePtr p''
1825                         return $ PS fp 0 i'
1826                 else f p' s'
1827
1828 -- | getContents. Equivalent to hGetContents stdin
1829 getContents :: IO ByteString
1830 getContents = hGetContents stdin
1831
1832 -- | Read an entire file directly into a 'ByteString'.  This is far more
1833 -- efficient than reading the characters into a 'String' and then using
1834 -- 'pack'.  It also may be more efficient than opening the file and
1835 -- reading it using hGet.
1836 readFile :: FilePath -> IO ByteString
1837 readFile f = do
1838     h <- openBinaryFile f ReadMode
1839     l <- hFileSize h
1840     s <- hGet h $ fromIntegral l
1841     hClose h
1842     return s
1843
1844 -- | Write a 'ByteString' to a file.
1845 writeFile :: FilePath -> ByteString -> IO ()
1846 writeFile f ps = do
1847     h <- openBinaryFile f WriteMode
1848     hPut h ps
1849     hClose h
1850
1851 {-
1852 --
1853 -- Disable until we can move it into a portable .hsc file
1854 --
1855
1856 -- | Like readFile, this reads an entire file directly into a
1857 -- 'ByteString', but it is even more efficient.  It involves directly
1858 -- mapping the file to memory.  This has the advantage that the contents
1859 -- of the file never need to be copied.  Also, under memory pressure the
1860 -- page may simply be discarded, while in the case of readFile it would
1861 -- need to be written to swap.  If you read many small files, mmapFile
1862 -- will be less memory-efficient than readFile, since each mmapFile
1863 -- takes up a separate page of memory.  Also, you can run into bus
1864 -- errors if the file is modified.  As with 'readFile', the string
1865 -- representation in the file is assumed to be ISO-8859-1.
1866 --
1867 -- On systems without mmap, this is the same as a readFile.
1868 --
1869 mmapFile :: FilePath -> IO ByteString
1870 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1871
1872 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1873 mmap f = do
1874     h <- openBinaryFile f ReadMode
1875     l <- fromIntegral `fmap` hFileSize h
1876     -- Don't bother mmaping small files because each mmapped file takes up
1877     -- at least one full VM block.
1878     if l < mmap_limit
1879        then do thefp <- mallocByteString l
1880                withForeignPtr thefp $ \p-> hGetBuf h p l
1881                hClose h
1882                return (thefp, l)
1883        else do
1884                -- unix only :(
1885                fd <- fromIntegral `fmap` handleToFd h
1886                p  <- my_mmap l fd
1887                fp <- if p == nullPtr
1888                      then do thefp <- mallocByteString l
1889                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1890                              return thefp
1891                      else do
1892                           -- The munmap leads to crashes on OpenBSD.
1893                           -- maybe there's a use after unmap in there somewhere?
1894 #if !defined(__OpenBSD__)
1895                              let unmap = c_munmap p l >> return ()
1896 #else
1897                              let unmap = return ()
1898 #endif
1899                              fp <- FC.newForeignPtr p unmap
1900                              return fp
1901                c_close fd
1902                hClose h
1903                return (fp, l)
1904     where mmap_limit = 16*1024
1905 -}
1906
1907 #if defined(__GLASGOW_HASKELL__)
1908 --
1909 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1910 --
1911 getArgs :: IO [ByteString]
1912 getArgs =
1913   alloca $ \ p_argc ->
1914   alloca $ \ p_argv -> do
1915     getProgArgv p_argc p_argv
1916     p    <- fromIntegral `fmap` peek p_argc
1917     argv <- peek p_argv
1918     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1919 #endif
1920
1921 -- ---------------------------------------------------------------------
1922 -- Internal utilities
1923
1924 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1925 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1926 -- convenience for ByteString construction.
1927 w2c :: Word8 -> Char
1928 #if !defined(__GLASGOW_HASKELL__)
1929 w2c = chr . fromIntegral
1930 #else
1931 w2c = unsafeChr . fromIntegral
1932 #endif
1933 {-# INLINE w2c #-}
1934
1935 c2w :: Char -> Word8
1936 c2w = fromIntegral . ord
1937 {-# INLINE c2w #-}
1938
1939 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1940 -- is padded with a null byte.
1941 mallocByteString :: Int -> IO (ForeignPtr Word8)
1942 mallocByteString l = do
1943     fp <- mallocForeignPtrArray (l+1)
1944     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1945     return fp
1946
1947 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1948 -- argument gives the final size of the ByteString. Unlike 'generate'
1949 -- the ByteString is not reallocated if the final size is less than the
1950 -- estimated size. Also, unlike 'generate' ByteString's created this way
1951 -- are managed on the Haskell heap.
1952 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1953 create l write_ptr = inlinePerformIO $ do
1954     fp <- mallocByteString (l+1)
1955     withForeignPtr fp $ \p -> write_ptr p
1956     return $ PS fp 0 l
1957 {-# INLINE create #-}
1958
1959 -- | Perform an operation with a temporary ByteString
1960 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1961 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1962 {-# INLINE withPtr #-}
1963
1964 -- Common up near identical calls to `error' to reduce the number
1965 -- constant strings created when compiled:
1966 errorEmptyList :: String -> a
1967 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1968 {-# INLINE errorEmptyList #-}
1969
1970 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1971 -- of the string if no element is found, rather than Nothing.
1972 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1973 STRICT2(findIndexOrEnd)
1974 findIndexOrEnd f ps
1975     | null ps           = 0
1976     | f (unsafeHead ps) = 0
1977     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
1978 {-# INLINE findIndexOrEnd #-}
1979
1980 -- Find from the end of the string using predicate
1981 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1982 STRICT2(findFromEndUntil)
1983 findFromEndUntil f ps@(PS x s l) =
1984     if null ps then 0
1985     else if f (last ps) then l
1986          else findFromEndUntil f (PS x s (l-1))
1987
1988 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1989 -- it exposes lots of things to further inlining
1990 --
1991 {-# INLINE inlinePerformIO #-}
1992 inlinePerformIO :: IO a -> a
1993 #if defined(__GLASGOW_HASKELL__)
1994 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1995 #else
1996 inlinePerformIO = unsafePerformIO
1997 #endif
1998
1999 {-# INLINE newForeignFreePtr #-}
2000 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2001 #if defined(__GLASGOW_HASKELL__)
2002 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2003 #else
2004 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2005 #endif
2006
2007 -- ---------------------------------------------------------------------
2008 --
2009 -- Standard C functions
2010 --
2011
2012 foreign import ccall unsafe "string.h strlen" c_strlen
2013     :: CString -> CInt
2014
2015 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2016     :: CInt -> IO (Ptr Word8)
2017
2018 foreign import ccall unsafe "static stdlib.h free" c_free
2019     :: Ptr Word8 -> IO ()
2020
2021 #if !defined(__GLASGOW_HASKELL__)
2022 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2023     :: FunPtr (Ptr Word8 -> IO ())
2024 #endif
2025
2026 foreign import ccall unsafe "string.h memset" memset
2027     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2028
2029 foreign import ccall unsafe "string.h memchr" memchr
2030     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2031
2032 foreign import ccall unsafe "string.h memcmp" memcmp
2033     :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2034
2035 foreign import ccall unsafe "string.h memcpy" memcpy
2036     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2037
2038 -- ---------------------------------------------------------------------
2039 --
2040 -- Uses our C code
2041 --
2042
2043 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2044     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2045
2046 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2047     :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2048
2049 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2050     :: Ptr Word8 -> Int -> Word8
2051
2052 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2053     :: Ptr Word8 -> Int -> Word8
2054
2055 foreign import ccall unsafe "static fpstring.h count" c_count
2056     :: Ptr Word8 -> Int -> Word8 -> Int
2057
2058 -- ---------------------------------------------------------------------
2059 -- MMap
2060
2061 {-
2062 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2063     :: Int -> Int -> IO (Ptr Word8)
2064
2065 foreign import ccall unsafe "static unistd.h close" c_close
2066     :: Int -> IO Int
2067
2068 #  if !defined(__OpenBSD__)
2069 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2070     :: Ptr Word8 -> Int -> IO Int
2071 #  endif
2072 -}
2073
2074 -- ---------------------------------------------------------------------
2075 -- Internal GHC Haskell magic
2076
2077 #if defined(__GLASGOW_HASKELL__)
2078 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2079     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2080
2081 foreign import ccall unsafe "__hscore_memcpy_src_off"
2082    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2083 #endif