Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with Prelude functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         packByte,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: ByteString -> Word8 -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57         head,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         last,                   -- :: ByteString -> Word8
  60         init,                   -- :: ByteString -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62
  63         -- * Special ByteStrings
  64         inits,                  -- :: ByteString -> [ByteString]
  65         tails,                  -- :: ByteString -> [ByteString]
  66         elems,                  -- :: ByteString -> [ByteString]
  67
  68         -- * Transformating ByteStrings
  69         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  70         reverse,                -- :: ByteString -> ByteString
  71         intersperse,            -- :: Word8 -> ByteString -> ByteString
  72         transpose,              -- :: [ByteString] -> [ByteString]
  73         map',                   -- :: (Word8 -> Word8) -> ByteString -> ByteString
  74
  75         -- * Reducing 'ByteString's
  76         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  77         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  78         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  80         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  81
  82         -- ** Special folds
  83         concat,                 -- :: [ByteString] -> ByteString
  84         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  85         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  86         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  87         maximum,                -- :: ByteString -> Word8
  88         minimum,                -- :: ByteString -> Word8
  89         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  90
  91         -- * Generating and unfolding ByteStrings
  92         replicate,              -- :: Int -> Word8 -> ByteString
  93         unfoldrN,               -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  94
  95         -- * Substrings
  96
  97         -- ** Breaking strings
  98         take,                   -- :: Int -> ByteString -> ByteString
  99         drop,                   -- :: Int -> ByteString -> ByteString
 100         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 101         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 102         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 103         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 104         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 105         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 106
 107         -- ** Breaking and dropping on specific bytes
 108         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 109         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 110         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 111         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 112
 113         -- ** Breaking into many substrings
 114         split,                  -- :: Word8 -> ByteString -> [ByteString]
 115         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 116         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 117         group,                  -- :: ByteString -> [ByteString]
 118         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 119
 120         -- ** Joining strings
 121         join,                   -- :: ByteString -> [ByteString] -> ByteString
 122         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 123
 124         -- * Indexing ByteStrings
 125         index,                  -- :: ByteString -> Int -> Word8
 126         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 127         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 128         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 129         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 130         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 131         count,                  -- :: Word8 -> ByteString -> Int
 132
 133         -- * Ordered ByteStrings
 134         sort,                   -- :: ByteString -> ByteString
 135
 136         -- * Searching ByteStrings
 137
 138         -- ** Searching by equality
 139         -- | These functions use memchr(3) to efficiently search the ByteString
 140
 141         elem,                   -- :: Word8 -> ByteString -> Bool
 142         notElem,                -- :: Word8 -> ByteString -> Bool
 143         filterByte,             -- :: Word8 -> ByteString -> ByteString
 144         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 145
 146         -- ** Searching with a predicate
 147         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 148         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 149         filter',                -- :: (Word8 -> Bool) -> ByteString -> ByteString
 150
 151         -- ** Prefixes and suffixes
 152         -- | These functions use memcmp(3) to efficiently compare substrings
 153         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 154         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 155
 156         -- ** Search for arbitrary substrings
 157         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 158         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 159         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 160
 161         -- * Zipping and unzipping ByteStrings
 162         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 163         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 164         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 165
 166         -- * Unchecked access
 167         unsafeHead,             -- :: ByteString -> Word8
 168         unsafeTail,             -- :: ByteString -> ByteString
 169         unsafeIndex,            -- :: ByteString -> Int -> Word8
 170
 171         -- * Low level introduction and elimination
 172         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 173         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 174         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 175         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 176         skipIndex,              -- :: ByteString -> Int
 177
 178         -- ** Packing CStrings and pointers
 179         packCString,            -- :: CString -> ByteString
 180         packCStringLen,         -- :: CString -> ByteString
 181         packMallocCString,      -- :: CString -> ByteString
 182
 183 #if defined(__GLASGOW_HASKELL__)
 184         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 185         packAddress,            -- :: Addr# -> ByteString
 186         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 187         unsafeFinalize,         -- :: ByteString -> IO ()
 188 #endif
 189
 190         -- ** Using ByteStrings as CStrings
 191         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 192         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 193         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 194
 195         -- ** Copying ByteStrings
 196         -- | These functions perform memcpy(3) operations
 197         copy,                   -- :: ByteString -> ByteString
 198         copyCString,            -- :: CString -> ByteString
 199         copyCStringLen,         -- :: CStringLen -> ByteString
 200
 201         -- * I\/O with @ByteString@s
 202
 203         -- ** Standard input and output
 204
 205 #if defined(__GLASGOW_HASKELL__)
 206         getLine,                -- :: IO ByteString
 207 #endif
 208         getContents,            -- :: IO ByteString
 209         putStr,                 -- :: ByteString -> IO ()
 210         putStrLn,               -- :: ByteString -> IO ()
 211
 212         -- ** Files
 213         readFile,               -- :: FilePath -> IO ByteString
 214         writeFile,              -- :: FilePath -> ByteString -> IO ()
 215 --      mmapFile,               -- :: FilePath -> IO ByteString
 216
 217         -- ** I\/O with Handles
 218 #if defined(__GLASGOW_HASKELL__)
 219         getArgs,                -- :: IO [ByteString]
 220         hGetLine,               -- :: Handle -> IO ByteString
 221         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 222 #endif
 223         hGetContents,           -- :: Handle -> IO ByteString
 224         hGet,                   -- :: Handle -> Int -> IO ByteString
 225         hPut,                   -- :: Handle -> ByteString -> IO ()
 226
 227         -- * Fusion utilities
 228 #if defined(__GLASGOW_HASKELL__)
 229         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 230 #endif
 231
 232         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 233         loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL,
 234
 235   ) where
 236
 237 import qualified Prelude as P
 238 import Prelude hiding           (reverse,head,tail,last,init,null
 239                                 ,length,map,lines,foldl,foldr,unlines
 240                                 ,concat,any,take,drop,splitAt,takeWhile
 241                                 ,dropWhile,span,break,elem,filter,maximum
 242                                 ,minimum,all,concatMap,foldl1,foldr1
 243                                 ,readFile,writeFile,replicate
 244                                 ,getContents,getLine,putStr,putStrLn
 245                                 ,zip,zipWith,unzip,notElem)
 246
 247 import qualified Data.List as List
 248
 249 import Data.Char
 250 import Data.Word                (Word8)
 251 import Data.Maybe               (listToMaybe)
 252 import Data.Array               (listArray)
 253 import qualified Data.Array as Array ((!))
 254
 255 -- Control.Exception.bracket not available in yhc or nhc
 256 import Control.Exception        (bracket)
 257 import Control.Monad            (when)
 258
 259 import Foreign.C.String         (CString, CStringLen)
 260 import Foreign.C.Types          (CSize, CInt)
 261 import Foreign.ForeignPtr
 262 import Foreign.Marshal.Array
 263 import Foreign.Ptr
 264 import Foreign.Storable         (Storable(..))
 265
 266 -- hGetBuf and hPutBuf not available in yhc or nhc
 267 import System.IO                (stdin,stdout,hClose,hFileSize
 268                                 ,hGetBuf,hPutBuf,openBinaryFile
 269                                 ,Handle,IOMode(..))
 270
 271 #if !defined(__GLASGOW_HASKELL__)
 272 import System.IO.Unsafe
 273 #endif
 274
 275 #if defined(__GLASGOW_HASKELL__)
 276
 277 import Data.Generics            (Data(..), Typeable(..))
 278
 279 import System.IO                (hGetBufNonBlocking)
 280 import System.IO.Error          (isEOFError)
 281
 282 import Foreign.Marshal          (alloca)
 283 import qualified Foreign.Concurrent as FC (newForeignPtr)
 284
 285 import GHC.Handle
 286 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 287 import GHC.Base                 (build, unsafeChr)
 288 import GHC.Word hiding (Word8)
 289 import GHC.Ptr                  (Ptr(..))
 290 import GHC.ST                   (ST(..))
 291 import GHC.IOBase
 292
 293 #endif
 294
 295 -- CFILES stuff is Hugs only
 296 {-# CFILES cbits/fpstring.c #-}
 297
 298 -- -----------------------------------------------------------------------------
 299 --
 300 -- Useful macros, until we have bang patterns
 301 --
 302
 303 #define STRICT1(f) f a | a `seq` False = undefined
 304 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 305 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 306 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 307 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 308
 309 -- -----------------------------------------------------------------------------
 310
 311 -- | A space-efficient representation of a Word8 vector, supporting many
 312 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 313 --
 314 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 315 --
 316 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 317                      {-# UNPACK #-} !Int
 318                      {-# UNPACK #-} !Int
 319
 320 #if defined(__GLASGOW_HASKELL__)
 321     deriving (Data, Typeable)
 322 #endif
 323
 324 instance Eq  ByteString
 325     where (==)    = eq
 326
 327 instance Ord ByteString
 328     where compare = compareBytes
 329
 330 instance Show ByteString where
 331     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 332
 333 instance Read ByteString where
 334     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 335
 336 {-
 337 instance Arbitrary PackedString where
 338     arbitrary = P.pack `fmap` arbitrary
 339     coarbitrary s = coarbitrary (P.unpack s)
 340 -}
 341
 342 -- | /O(n)/ Equality on the 'ByteString' type.
 343 eq :: ByteString -> ByteString -> Bool
 344 eq a@(PS p s l) b@(PS p' s' l')
 345     | l /= l'            = False    -- short cut on length
 346     | p == p' && s == s' = True     -- short cut for the same string
 347     | otherwise          = compareBytes a b == EQ
 348 {-# INLINE eq #-}
 349
 350 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 351 compareBytes :: ByteString -> ByteString -> Ordering
 352 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 353     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 354     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 355     | otherwise                         = inlinePerformIO $
 356         withForeignPtr x1 $ \p1 ->
 357         withForeignPtr x2 $ \p2 -> do
 358             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 359             return $ case i `compare` 0 of
 360                         EQ  -> l1 `compare` l2
 361                         x   -> x
 362 {-# INLINE compareBytes #-}
 363
 364 {-
 365 --
 366 -- About 4x slower over 32M
 367 --
 368 compareBytes :: ByteString -> ByteString -> Ordering
 369 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 370     withForeignPtr fp1 $ \p1 ->
 371         withForeignPtr fp2 $ \p2 ->
 372             cmp (p1 `plusPtr` off1)
 373                 (p2 `plusPtr` off2) 0 len1 len2
 374
 375 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 376 STRICT5(cmp)
 377 cmp p1 p2 n len1 len2
 378       | n == len1 = if n == len2 then return EQ else return LT
 379       | n == len2 = return GT
 380       | otherwise = do
 381           (a :: Word8) <- peekByteOff p1 n
 382           (b :: Word8) <- peekByteOff p2 n
 383           case a `compare` b of
 384                 EQ -> cmp p1 p2 (n+1) len1 len2
 385                 LT -> return LT
 386                 GT -> return GT
 387 {-# INLINE compareBytes #-}
 388 -}
 389
 390 -- -----------------------------------------------------------------------------
 391 -- Introducing and eliminating 'ByteString's
 392
 393 -- | /O(1)/ The empty 'ByteString'
 394 empty :: ByteString
 395 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 396 {-# NOINLINE empty #-}
 397
 398 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 399 packByte :: Word8 -> ByteString
 400 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
 401     withForeignPtr fp $ \p -> poke p c
 402     return $ PS fp 0 1
 403 {-# INLINE packByte #-}
 404
 405 --
 406 -- XXX The unsafePerformIO is critical!
 407 --
 408 -- Otherwise:
 409 --
 410 --  packByte 255 `compare` packByte 127
 411 --
 412 -- is compiled to:
 413 --
 414 --  case mallocByteString 2 of
 415 --      ForeignPtr f internals ->
 416 --           case writeWord8OffAddr# f 0 255 of _ ->
 417 --           case writeWord8OffAddr# f 0 127 of _ ->
 418 --           case eqAddr# f f of
 419 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 420 --                                        (GHC.Prim.plusAddr# f 0)
 421 --
 422 --
 423
 424 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 425 --
 426 -- For applications with large numbers of string literals, pack can be a
 427 -- bottleneck. In such cases, consider using packAddress (GHC only).
 428 pack :: [Word8] -> ByteString
 429
 430 #if !defined(__GLASGOW_HASKELL__)
 431
 432 pack str = create (P.length str) $ \p -> go p str
 433     where
 434         go _ []     = return ()
 435         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 436
 437 #else /* hack away */
 438
 439 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 440     where
 441         go _ _ []        = return ()
 442         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 443
 444         writeByte p i c = ST $ \s# ->
 445             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 446
 447 #endif
 448
 449 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 450 unpack :: ByteString -> [Word8]
 451
 452 #if !defined(__GLASGOW_HASKELL__)
 453
 454 unpack (PS _  _ 0) = []
 455 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 456         go (p `plusPtr` s) (l - 1) []
 457     where
 458         STRICT3(go)
 459         go p 0 acc = peek p          >>= \e -> return (e : acc)
 460         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 461 {-# INLINE unpack #-}
 462
 463 #else
 464
 465 unpack ps = build (unpackFoldr ps)
 466 {-# INLINE unpack #-}
 467
 468 unpackList :: ByteString -> [Word8]
 469 unpackList (PS fp off len) = withPtr fp $ \p -> do
 470     let STRICT3(loop)
 471         loop _ (-1) acc = return acc
 472         loop q n acc = do
 473            a <- peekByteOff q n
 474            loop q (n-1) (a : acc)
 475     loop (p `plusPtr` off) (len-1) []
 476
 477 {-# RULES
 478 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 479  #-}
 480
 481 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 482 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 483     let STRICT3(loop)
 484         loop _ (-1) acc = return acc
 485         loop q n    acc = do
 486            a <- peekByteOff q n
 487            loop q (n-1) (a `f` acc)
 488     loop (p `plusPtr` off) (len-1) ch
 489 {-# INLINE [0] unpackFoldr #-}
 490
 491 #endif
 492
 493 ------------------------------------------------------------------------
 494
 495 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 496 -- conversion function
 497 packWith :: (a -> Word8) -> [a] -> ByteString
 498 packWith k str = create (P.length str) $ \p -> go p str
 499     where
 500         STRICT2(go)
 501         go _ []     = return ()
 502         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 503 {-# INLINE packWith #-}
 504 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 505
 506 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 507 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 508 unpackWith _ (PS _  _ 0) = []
 509 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 510         go (p `plusPtr` s) (l - 1) []
 511     where
 512         STRICT3(go)
 513         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 514         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 515 {-# INLINE unpackWith #-}
 516 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 517
 518 -- ---------------------------------------------------------------------
 519 -- Basic interface
 520
 521 -- | /O(1)/ Test whether a ByteString is empty.
 522 null :: ByteString -> Bool
 523 null (PS _ _ l) = l == 0
 524 {-# INLINE null #-}
 525
 526 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 527 length :: ByteString -> Int
 528 length (PS _ _ l) = l
 529
 530 #if defined(__GLASGOW_HASKELL__)
 531 {-# INLINE [1] length #-}
 532 #endif
 533
 534 {-#
 535
 536 -- Translate length into a loop.
 537 -- Performace ok, but allocates too much, so disable for now.
 538
 539   "length/loop" forall f acc s .
 540   length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
 541
 542   #-}
 543
 544 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 545 -- complexity, as it requires a memcpy.
 546 cons :: Word8 -> ByteString -> ByteString
 547 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 548         poke p c
 549         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 550 {-# INLINE cons #-}
 551
 552 -- todo fuse
 553
 554 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 555 snoc :: ByteString -> Word8 -> ByteString
 556 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 557         memcpy p (f `plusPtr` s) (fromIntegral l)
 558         poke (p `plusPtr` l) c
 559 {-# INLINE snoc #-}
 560
 561 -- todo fuse
 562
 563 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 564 head :: ByteString -> Word8
 565 head ps@(PS x s _)
 566     | null ps   = errorEmptyList "head"
 567     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 568 {-# INLINE head #-}
 569
 570 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 571 tail :: ByteString -> ByteString
 572 tail (PS p s l)
 573     | l <= 0    = errorEmptyList "tail"
 574     | otherwise = PS p (s+1) (l-1)
 575 {-# INLINE tail #-}
 576
 577 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 578 last :: ByteString -> Word8
 579 last ps@(PS x s l)
 580     | null ps   = errorEmptyList "last"
 581     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 582 {-# INLINE last #-}
 583
 584 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 585 init :: ByteString -> ByteString
 586 init (PS p s l)
 587     | l <= 0    = errorEmptyList "init"
 588     | otherwise = PS p s (l-1)
 589 {-# INLINE init #-}
 590
 591 -- | /O(n)/ Append two ByteStrings
 592 append :: ByteString -> ByteString -> ByteString
 593 append xs ys | null xs   = ys
 594              | null ys   = xs
 595              | otherwise = concat [xs,ys]
 596 {-# INLINE append #-}
 597
 598 -- ---------------------------------------------------------------------
 599 -- Transformations
 600
 601 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 602 -- element of @xs@. This function is subject to array fusion.
 603 map :: (Word8 -> Word8) -> ByteString -> ByteString
 604 map f = loopArr . loopU (mapEFL f) noAL
 605 {-# INLINE map #-}
 606
 607 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 608 -- slightly faster for one-shot cases.
 609 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 610 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 611     np <- mallocByteString (len+1)
 612     withForeignPtr np $ \p -> do
 613         map_ 0 (a `plusPtr` s) p
 614         return (PS np 0 len)
 615   where
 616
 617     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 618     STRICT3(map_)
 619     map_ n p1 p2
 620        | n >= len = return ()
 621        | otherwise = do
 622             x <- peekByteOff p1 n
 623             pokeByteOff p2 n (f x)
 624             map_ (n+1) p1 p2
 625 {-# INLINE map' #-}
 626
 627 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 628 reverse :: ByteString -> ByteString
 629 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 630         c_reverse p (f `plusPtr` s) (fromIntegral l)
 631
 632 {-
 633 reverse = pack . P.reverse . unpack
 634 -}
 635
 636 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 637 -- 'ByteString' and \`intersperses\' that byte between the elements of
 638 -- the 'ByteString'.  It is analogous to the intersperse function on
 639 -- Lists.
 640 intersperse :: Word8 -> ByteString -> ByteString
 641 intersperse c ps@(PS x s l)
 642     | length ps < 2  = ps
 643     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 644         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 645
 646 {-
 647 intersperse c = pack . List.intersperse c . unpack
 648 -}
 649
 650 -- | The 'transpose' function transposes the rows and columns of its
 651 -- 'ByteString' argument.
 652 transpose :: [ByteString] -> [ByteString]
 653 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 654
 655 -- ---------------------------------------------------------------------
 656 -- Reducing 'ByteString's
 657
 658 -- | 'foldl', applied to a binary operator, a starting value (typically
 659 -- the left-identity of the operator), and a ByteString, reduces the
 660 -- ByteString using the binary operator, from left to right.
 661 -- This function is subject to array fusion.
 662 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 663 foldl f z = loopAcc . loopU (foldEFL f) z
 664 {-# INLINE foldl #-}
 665
 666 {-
 667 --
 668 -- About twice as fast with 6.4.1, but not fuseable
 669 -- A simple fold . map is enough to make it worth while.
 670 --
 671 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 672         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 673     where
 674         STRICT3(lgo)
 675         lgo z p q | p == q    = return z
 676                   | otherwise = do c <- peek p
 677                                    lgo (f z c) (p `plusPtr` 1) q
 678 -}
 679
 680 -- | 'foldl\'' is like foldl, but strict in the accumulator.
 681 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 682 foldl' f z = loopAcc . loopU (foldEFL' f) z
 683 {-# INLINE foldl' #-}
 684
 685 -- | 'foldr', applied to a binary operator, a starting value
 686 -- (typically the right-identity of the operator), and a ByteString,
 687 -- reduces the ByteString using the binary operator, from right to left.
 688 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 689 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 690         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 691     where
 692         STRICT2(go)
 693         go p q | p == q    = return z
 694                | otherwise = do c  <- peek p
 695                                 ws <- go (p `plusPtr` 1) q
 696                                 return $ c `k` ws
 697
 698 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 699 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 700 -- This function is subject to array fusion.
 701 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 702 foldl1 f ps
 703     | null ps   = errorEmptyList "foldl1"
 704     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 705
 706 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 707 -- and thus must be applied to non-empty 'ByteString's
 708 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 709 foldr1 f ps
 710     | null ps        = errorEmptyList "foldr1"
 711     | otherwise      = foldr f (last ps) (init ps)
 712
 713 -- ---------------------------------------------------------------------
 714 -- Special folds
 715
 716 -- | /O(n)/ Concatenate a list of ByteStrings.
 717 concat :: [ByteString] -> ByteString
 718 concat []     = empty
 719 concat [ps]   = ps
 720 concat xs     = create len $ \ptr -> go xs ptr
 721   where len = P.sum . P.map length $ xs
 722         STRICT2(go)
 723         go []            _   = return ()
 724         go (PS p s l:ps) ptr = do
 725                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 726                 go ps (ptr `plusPtr` l)
 727
 728 -- | Map a function over a 'ByteString' and concatenate the results
 729 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 730 concatMap f = foldr (append . f) empty
 731
 732 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 733 -- any element of the 'ByteString' satisfies the predicate.
 734 any :: (Word8 -> Bool) -> ByteString -> Bool
 735 any _ (PS _ _ 0) = False
 736 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 737         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 738     where
 739         STRICT2(go)
 740         go p q | p == q    = return False
 741                | otherwise = do c <- peek p
 742                                 if f c then return True
 743                                        else go (p `plusPtr` 1) q
 744
 745 -- todo fuse
 746
 747 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 748 -- if all elements of the 'ByteString' satisfy the predicate.
 749 all :: (Word8 -> Bool) -> ByteString -> Bool
 750 all _ (PS _ _ 0) = True
 751 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 752         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 753     where
 754         STRICT2(go)
 755         go p q | p == q     = return True  -- end of list
 756                | otherwise  = do c <- peek p
 757                                  if f c
 758                                     then go (p `plusPtr` 1) q
 759                                     else return False
 760 -- todo fuse
 761
 762 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 763 maximum :: ByteString -> Word8
 764 maximum xs@(PS x s l)
 765     | null xs   = errorEmptyList "maximum"
 766     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 767                     return $ c_maximum (p `plusPtr` s) (fromIntegral l)
 768 {-# INLINE maximum #-}
 769
 770 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 771 minimum :: ByteString -> Word8
 772 minimum xs@(PS x s l)
 773     | null xs   = errorEmptyList "minimum"
 774     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 775                     return $ c_minimum (p `plusPtr` s) (fromIntegral l)
 776 {-# INLINE minimum #-}
 777
 778 -- fusion is too slow here (10x)
 779
 780 {-
 781 maximum xs@(PS x s l)
 782     | null xs   = errorEmptyList "maximum"
 783     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 784                         w <- peek p
 785                         maximum_ (p `plusPtr` s) 0 l w
 786
 787 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 788 STRICT4(maximum_)
 789 maximum_ ptr n m c
 790     | n >= m    = return c
 791     | otherwise = do w <- peekByteOff ptr n
 792                      maximum_ ptr (n+1) m (if w > c then w else c)
 793
 794 minimum xs@(PS x s l)
 795     | null xs   = errorEmptyList "minimum"
 796     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 797                         w <- peek p
 798                         minimum_ (p `plusPtr` s) 0 l w
 799
 800 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 801 STRICT4(minimum_)
 802 minimum_ ptr n m c
 803     | n >= m    = return c
 804     | otherwise = do w <- peekByteOff ptr n
 805                      minimum_ ptr (n+1) m (if w < c then w else c)
 806 -}
 807
 808 -- | /O(n)/ map Word8 functions, provided with the index at each position
 809 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 810 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 811     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 812   where
 813     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 814     STRICT4(go)
 815     go n f t p | f == p    = return ()
 816                | otherwise = do w <- peek f
 817                                 ((poke t) . k n) w
 818                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 819
 820 -- ---------------------------------------------------------------------
 821 -- Unfolds and replicates
 822
 823 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 824 -- the value of every element. The following holds:
 825 --
 826 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 827 --
 828 -- This implemenation uses @memset(3)@
 829 replicate :: Int -> Word8 -> ByteString
 830 replicate w c | w <= 0    = empty
 831               | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 832
 833 {-
 834 -- About 5x slower
 835 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 836     where
 837         STRICT2(go)
 838         go _   0 = return w
 839         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 840 -}
 841
 842 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 843 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 844 -- the element and returns 'Nothing' if it is done producing the
 845 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 846 -- prepending to the ByteString and @b@ is used as the next element in a
 847 -- recursive call.
 848 --
 849 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 850 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 851 -- final size of the ByteString as an argument. 'cons' can then be
 852 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 853 -- linear complexity. The depth of the recursion is limited to this
 854 -- size, but may be less. For lazy, infinite unfoldr, use
 855 -- 'Data.List.unfoldr' (from 'Data.List').
 856 --
 857 -- Examples:
 858 --
 859 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 860 --
 861 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 862 --
 863 -- > unfoldrN n == take n $ List.unfoldr
 864 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
 865 unfoldrN i f w
 866     | i <= 0    = empty
 867     | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0
 868     where
 869         STRICT3(go)
 870         go q c n | n == i    = return n      -- stop if we reach `i'
 871                  | otherwise = case f c of
 872                                    Nothing        -> return n
 873                                    Just (a,new_c) -> do
 874                                         poke q a
 875                                         go (q `plusPtr` 1) new_c (n+1)
 876
 877 -- ---------------------------------------------------------------------
 878 -- Substrings
 879
 880 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 881 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 882 take :: Int -> ByteString -> ByteString
 883 take n ps@(PS x s l)
 884     | n < 0     = empty
 885     | n >= l    = ps
 886     | otherwise = PS x s n
 887 {-# INLINE take #-}
 888
 889 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 890 -- elements, or @[]@ if @n > 'length' xs@.
 891 drop  :: Int -> ByteString -> ByteString
 892 drop n ps@(PS x s l)
 893     | n <= 0    = ps
 894     | n >  l    = empty
 895     | otherwise = PS x (s+n) (l-n)
 896 {-# INLINE drop #-}
 897
 898 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 899 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 900 splitAt n ps  = (take n ps, drop n ps)
 901 {-# INLINE splitAt #-}
 902
 903 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 904 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 905 -- satisfy @p@.
 906 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 907 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 908 {-# INLINE takeWhile #-}
 909
 910 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 911 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 912 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 913 {-# INLINE dropWhile #-}
 914
 915 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 916 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 917 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 918 {-# INLINE break #-}
 919
 920 -- | 'breakByte' breaks its ByteString argument at the first occurence
 921 -- of the specified byte. It is more efficient than 'break' as it is
 922 -- implemented with @memchr(3)@. I.e.
 923 --
 924 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 925 --
 926 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 927 breakByte c p = case elemIndex c p of
 928     Nothing -> (p,empty)
 929     Just n  -> (take n p, drop n p)
 930 {-# INLINE breakByte #-}
 931
 932 -- | 'spanByte' breaks its ByteString argument at the first
 933 -- occurence of a byte other than its argument. It is more efficient
 934 -- than 'span (==)'
 935 --
 936 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 937 --
 938 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 939 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 940     go (p `plusPtr` s) 0
 941   where
 942     STRICT2(go)
 943     go p i | i >= l    = return (ps, empty)
 944            | otherwise = do c' <- peekByteOff p i
 945                             if c /= c'
 946                                 then return (take i ps, drop i ps)
 947                                 else go p (i+1)
 948 {-# INLINE spanByte #-}
 949
 950 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 951 -- occurence of @w@. It behaves like 'break', except the delimiter is
 952 -- not returned, and @Nothing@ is returned if the delimiter is not in
 953 -- the ByteString. I.e.
 954 --
 955 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 956 --
 957 -- > breakFirst c xs ==
 958 -- > let (x,y) = break (== c) xs
 959 -- > in if null y then Nothing else Just (x, drop 1 y))
 960 --
 961 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 962 breakFirst c p = case elemIndex c p of
 963    Nothing -> Nothing
 964    Just n -> Just (take n p, drop (n+1) p)
 965 {-# INLINE breakFirst #-}
 966
 967 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 968 -- ByteString.
 969 --
 970 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 971 --
 972 -- and the following are equivalent:
 973 --
 974 -- > breakLast 'c' "abcdef"
 975 -- > let (x,y) = break (=='c') (reverse "abcdef")
 976 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 977 --
 978 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 979 breakLast c p = case elemIndexLast c p of
 980     Nothing -> Nothing
 981     Just n -> Just (take n p, drop (n+1) p)
 982 {-# INLINE breakLast #-}
 983
 984 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 985 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 986 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 987 span p ps = break (not . p) ps
 988 {-# INLINE span #-}
 989
 990 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 991 -- We have
 992 --
 993 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 994 --
 995 -- and
 996 --
 997 -- > spanEnd (not . isSpace) ps
 998 -- >    ==
 999 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1000 --
1001 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1002 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1003
1004 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1005 -- separators, where the predicate returns True for a separator element.
1006 -- The resulting components do not contain the separators.  Two adjacent
1007 -- separators result in an empty component in the output.  eg.
1008 --
1009 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1010 -- > splitWith (=='a') []        == []
1011 --
1012 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1013
1014 #if defined(__GLASGOW_HASKELL__)
1015 splitWith _pred (PS _  _   0) = []
1016 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1017   where pred# c# = pred_ (W8# c#)
1018
1019         STRICT4(splitWith0)
1020         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1021             splitLoop pred' p 0 off' len' fp'
1022
1023         splitLoop :: (Word# -> Bool)
1024                   -> Ptr Word8
1025                   -> Int -> Int -> Int
1026                   -> ForeignPtr Word8
1027                   -> IO [ByteString]
1028
1029         splitLoop pred' p idx' off' len' fp'
1030             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1031             | idx' >= len'  = return [PS fp' off' idx']
1032             | otherwise = do
1033                 w <- peekElemOff p (off'+idx')
1034                 if pred' (case w of W8# w# -> w#)
1035                    then return (PS fp' off' idx' :
1036                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1037                    else splitLoop pred' p (idx'+1) off' len' fp'
1038 {-# INLINE splitWith #-}
1039
1040 #else
1041 splitWith _ (PS _ _ 0) = []
1042 splitWith p ps = loop p ps
1043     where
1044         STRICT2(loop)
1045         loop q qs = if null rest then [chunk]
1046                                  else chunk : loop q (unsafeTail rest)
1047             where (chunk,rest) = break q qs
1048 #endif
1049
1050 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1051 -- argument, consuming the delimiter. I.e.
1052 --
1053 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1054 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1055 -- > split 'x'  "x"          == ["",""]
1056 --
1057 -- and
1058 --
1059 -- > join [c] . split c == id
1060 -- > split == splitWith . (==)
1061 --
1062 -- As for all splitting functions in this library, this function does
1063 -- not copy the substrings, it just constructs new 'ByteStrings' that
1064 -- are slices of the original.
1065 --
1066 split :: Word8 -> ByteString -> [ByteString]
1067 split _ (PS _ _ 0) = []
1068 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1069     let ptr = p `plusPtr` s
1070
1071         STRICT1(loop)
1072         loop n = do
1073             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1074             if q == nullPtr
1075                 then [PS x (s+n) (l-n)]
1076                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1077
1078     return (loop 0)
1079 {-# INLINE split #-}
1080
1081 {-
1082 -- slower. but stays inside Haskell.
1083 split _ (PS _  _   0) = []
1084 split (W8# w#) (PS fp off len) = splitWith' off len fp
1085     where
1086         splitWith' off' len' fp' = withPtr fp $ \p ->
1087             splitLoop p 0 off' len' fp'
1088
1089         splitLoop :: Ptr Word8
1090                   -> Int -> Int -> Int
1091                   -> ForeignPtr Word8
1092                   -> IO [ByteString]
1093
1094         STRICT5(splitLoop)
1095         splitLoop p idx' off' len' fp'
1096             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1097             | idx' >= len'  = return [PS fp' off' idx']
1098             | otherwise = do
1099                 (W8# x#) <- peekElemOff p (off'+idx')
1100                 if word2Int# w# ==# word2Int# x#
1101                    then return (PS fp' off' idx' :
1102                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1103                    else splitLoop p (idx'+1) off' len' fp'
1104 -}
1105
1106 -- | Like 'splitWith', except that sequences of adjacent separators are
1107 -- treated as a single separator. eg.
1108 --
1109 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1110 --
1111 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1112 tokens f = P.filter (not.null) . splitWith f
1113 {-# INLINE tokens #-}
1114
1115 -- | The 'group' function takes a ByteString and returns a list of
1116 -- ByteStrings such that the concatenation of the result is equal to the
1117 -- argument.  Moreover, each sublist in the result contains only equal
1118 -- elements.  For example,
1119 --
1120 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1121 --
1122 -- It is a special case of 'groupBy', which allows the programmer to
1123 -- supply their own equality test. It is about 40% faster than
1124 -- /groupBy (==)/
1125 group :: ByteString -> [ByteString]
1126 group xs
1127     | null xs   = []
1128     | otherwise = ys : group zs
1129     where
1130         (ys, zs) = spanByte (unsafeHead xs) xs
1131
1132 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1133 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1134 groupBy k xs
1135     | null xs   = []
1136     | otherwise = take n xs : groupBy k (drop n xs)
1137     where
1138         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1139
1140 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1141 -- 'ByteString's and concatenates the list after interspersing the first
1142 -- argument between each element of the list.
1143 join :: ByteString -> [ByteString] -> ByteString
1144 join filler pss = concat (splice pss)
1145     where
1146         splice []  = []
1147         splice [x] = [x]
1148         splice (x:y:xs) = x:filler:splice (y:xs)
1149 {-# INLINE join #-}
1150
1151 --
1152 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1153 -- with a char. Around 4 times faster than the generalised join.
1154 --
1155 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1156 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1157     withForeignPtr ffp $ \fp ->
1158     withForeignPtr fgp $ \gp -> do
1159         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1160         poke (ptr `plusPtr` l) c
1161         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1162     where
1163       len = length f + length g + 1
1164 {-# INLINE joinWithByte #-}
1165
1166 -- ---------------------------------------------------------------------
1167 -- Indexing ByteStrings
1168
1169 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1170 index :: ByteString -> Int -> Word8
1171 index ps n
1172     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1173     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1174                                          ++ ", length = " ++ show (length ps))
1175     | otherwise      = ps `unsafeIndex` n
1176 {-# INLINE index #-}
1177
1178 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1179 -- element in the given 'ByteString' which is equal to the query
1180 -- element, or 'Nothing' if there is no such element.
1181 -- This implementation uses memchr(3).
1182 elemIndex :: Word8 -> ByteString -> Maybe Int
1183 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1184     let p' = p `plusPtr` s
1185         q  = memchr p' c (fromIntegral l)
1186     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1187 {-# INLINE elemIndex #-}
1188
1189 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1190 -- element in the given 'ByteString' which is equal to the query
1191 -- element, or 'Nothing' if there is no such element. The following
1192 -- holds:
1193 --
1194 -- > elemIndexLast c xs ==
1195 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1196 --
1197 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1198 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1199     go (p `plusPtr` s) (l-1)
1200   where
1201     STRICT2(go)
1202     go p i | i < 0     = return Nothing
1203            | otherwise = do ch' <- peekByteOff p i
1204                             if ch == ch'
1205                                 then return $ Just i
1206                                 else go p (i-1)
1207 {-# INLINE elemIndexLast #-}
1208
1209 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1210 -- the indices of all elements equal to the query element, in ascending order.
1211 -- This implementation uses memchr(3).
1212 elemIndices :: Word8 -> ByteString -> [Int]
1213 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1214     let ptr = p `plusPtr` s
1215
1216         STRICT1(loop)
1217         loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1218                  in if q == nullPtr
1219                         then []
1220                         else let i = q `minusPtr` ptr
1221                              in i : loop (i+1)
1222     return (loop 0)
1223 {-# INLINE elemIndices #-}
1224
1225 {-
1226 -- much slower
1227 elemIndices :: Word8 -> ByteString -> [Int]
1228 elemIndices c ps = loop 0 ps
1229    where STRICT2(loop)
1230          loop _ ps' | null ps'            = []
1231          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1232                     | otherwise           = loop (n+1) (unsafeTail ps')
1233 -}
1234
1235 -- | count returns the number of times its argument appears in the ByteString
1236 --
1237 -- > count = length . elemIndices
1238 --
1239 -- But more efficiently than using length on the intermediate list.
1240 count :: Word8 -> ByteString -> Int
1241 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1242     return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1243 {-# INLINE count #-}
1244
1245 {-
1246 --
1247 -- around 30% slower
1248 --
1249 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1250      go (p `plusPtr` s) (fromIntegral m) 0
1251     where
1252         go :: Ptr Word8 -> CSize -> Int -> IO Int
1253         STRICT3(go)
1254         go p l i = do
1255             let q = memchr p w l
1256             if q == nullPtr
1257                 then return i
1258                 else do let k = fromIntegral $ q `minusPtr` p
1259                         go (q `plusPtr` 1) (l-k-1) (i+1)
1260 -}
1261
1262 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1263 -- returns the index of the first element in the ByteString
1264 -- satisfying the predicate.
1265 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1266 findIndex k ps@(PS x s l)
1267     | null ps   = Nothing
1268     | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1269   where
1270     STRICT2(go)
1271     go ptr n | n >= l    = return Nothing
1272              | otherwise = do w <- peek ptr
1273                               if k w
1274                                 then return (Just n)
1275                                 else go (ptr `plusPtr` 1) (n+1)
1276 {-# INLINE findIndex #-}
1277
1278 -- | The 'findIndices' function extends 'findIndex', by returning the
1279 -- indices of all elements satisfying the predicate, in ascending order.
1280 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1281 findIndices p ps = loop 0 ps
1282    where
1283      STRICT2(loop)
1284      loop n qs | null qs           = []
1285                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1286                | otherwise         =     loop (n+1) (unsafeTail qs)
1287
1288 -- ---------------------------------------------------------------------
1289 -- Searching ByteStrings
1290
1291 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1292 elem :: Word8 -> ByteString -> Bool
1293 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1294 {-# INLINE elem #-}
1295
1296 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1297 notElem :: Word8 -> ByteString -> Bool
1298 notElem c ps = not (elem c ps)
1299 {-# INLINE notElem #-}
1300
1301 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1302 -- returns a ByteString containing those characters that satisfy the
1303 -- predicate. This function is subject to array fusion.
1304 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1305 filter p  = loopArr . loopU (filterEFL p) noAL
1306 {-# INLINE filter #-}
1307
1308 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1309 -- around 2x faster for some one-shot applications.
1310 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1311 filter' k ps@(PS x s l)
1312     | null ps   = ps
1313     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1314         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1315         return (t `minusPtr` p) -- actual length
1316     where
1317         STRICT3(go)
1318         go f t end | f == end  = return t
1319                    | otherwise = do
1320                         w <- peek f
1321                         if k w
1322                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1323                             else             go (f `plusPtr` 1) t               end
1324 {-# INLINE filter' #-}
1325
1326 --
1327 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1328 -- case of filtering a single byte. It is more efficient to use
1329 -- /filterByte/ in this case.
1330 --
1331 -- > filterByte == filter . (==)
1332 --
1333 -- filterByte is around 10x faster, and uses much less space, than its
1334 -- filter equivalent
1335 filterByte :: Word8 -> ByteString -> ByteString
1336 filterByte w ps = replicate (count w ps) w
1337 {-# INLINE filterByte #-}
1338
1339 --
1340 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1341 -- case of filtering a single byte out of a list. It is more efficient
1342 -- to use /filterNotByte/ in this case.
1343 --
1344 -- > filterNotByte == filter . (/=)
1345 --
1346 -- filterNotByte is around 2x faster than its filter equivalent.
1347 filterNotByte :: Word8 -> ByteString -> ByteString
1348 filterNotByte w = filter' (/= w)
1349 {-# INLINE filterNotByte #-}
1350
1351 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1352 -- and returns the first element in matching the predicate, or 'Nothing'
1353 -- if there is no such element.
1354 --
1355 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1356 --
1357 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1358 find f p = case findIndex f p of
1359                     Just n -> Just (p `unsafeIndex` n)
1360                     _      -> Nothing
1361 {-# INLINE find #-}
1362
1363 {-
1364 --
1365 -- fuseable, but we don't want to walk the whole array.
1366 --
1367 find k = foldl findEFL Nothing
1368     where findEFL a@(Just _) _ = a
1369           findEFL _          c | k c       = Just c
1370                                | otherwise = Nothing
1371 -}
1372
1373 -- ---------------------------------------------------------------------
1374 -- Searching for substrings
1375
1376 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1377 -- iff the first is a prefix of the second.
1378 isPrefixOf :: ByteString -> ByteString -> Bool
1379 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1380     | l1 == 0   = True
1381     | l2 < l1   = False
1382     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1383         withForeignPtr x2 $ \p2 -> do
1384             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1385             return (i == 0)
1386
1387 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1388 -- iff the first is a suffix of the second.
1389 --
1390 -- The following holds:
1391 --
1392 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1393 --
1394 -- However, the real implemenation uses memcmp to compare the end of the
1395 -- string only, with no reverse required..
1396 isSuffixOf :: ByteString -> ByteString -> Bool
1397 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1398     | l1 == 0   = True
1399     | l2 < l1   = False
1400     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1401         withForeignPtr x2 $ \p2 -> do
1402             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1403             return (i == 0)
1404
1405 -- | Check whether one string is a substring of another. @isSubstringOf
1406 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1407 isSubstringOf :: ByteString -- ^ String to search for.
1408               -> ByteString -- ^ String to search in.
1409               -> Bool
1410 isSubstringOf p s = not $ P.null $ findSubstrings p s
1411
1412 -- | Get the first index of a substring in another string,
1413 --   or 'Nothing' if the string is not found.
1414 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1415 findSubstring :: ByteString -- ^ String to search for.
1416               -> ByteString -- ^ String to seach in.
1417               -> Maybe Int
1418 findSubstring = (listToMaybe .) . findSubstrings
1419
1420 -- | Find the indexes of all (possibly overlapping) occurances of a
1421 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1422 -- string matching algorithm.
1423 findSubstrings :: ByteString -- ^ String to search for.
1424                -> ByteString -- ^ String to seach in.
1425                -> [Int]
1426
1427 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1428   where
1429       patc x = pat `unsafeIndex` x
1430       strc x = str `unsafeIndex` x
1431
1432       -- maybe we should make kmpNext a UArray before using it in search?
1433       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1434       kmpNextL p _ | null p = []
1435       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1436                          ps = unsafeTail p
1437                          x = if not (null ps) && unsafeHead ps == patc j'
1438                                 then kmpNext Array.! j' else j'
1439                         in x:kmpNextL ps j'
1440       search i j = match ++ rest -- i: position in string, j: position in pattern
1441         where match = if j == m then [(i - j)] else []
1442               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1443       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1444                | otherwise = j
1445
1446 -- ---------------------------------------------------------------------
1447 -- Zipping
1448
1449 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1450 -- corresponding pairs of bytes. If one input ByteString is short,
1451 -- excess elements of the longer ByteString are discarded. This is
1452 -- equivalent to a pair of 'unpack' operations.
1453 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1454 zip ps qs
1455     | null ps || null qs = []
1456     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1457
1458 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1459 -- the first argument, instead of a tupling function.  For example,
1460 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1461 -- corresponding sums.
1462 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1463 zipWith f ps qs
1464     | null ps || null qs = []
1465     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1466
1467 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1468 -- ByteStrings. Note that this performs two 'pack' operations.
1469 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1470 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1471 {-# INLINE unzip #-}
1472
1473 -- ---------------------------------------------------------------------
1474 -- Special lists
1475
1476 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1477 inits :: ByteString -> [ByteString]
1478 inits (PS x s l) = [PS x s n | n <- [0..l]]
1479
1480 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1481 tails :: ByteString -> [ByteString]
1482 tails p | null p    = [empty]
1483         | otherwise = p : tails (unsafeTail p)
1484
1485 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1486
1487 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1488 elems :: ByteString -> [ByteString]
1489 elems (PS _ _ 0) = []
1490 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1491 {-# INLINE elems #-}
1492
1493 -- ---------------------------------------------------------------------
1494 -- ** Ordered 'ByteString's
1495
1496 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1497 sort :: ByteString -> ByteString
1498 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1499
1500     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1501     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1502
1503     let STRICT2(go)
1504         go 256 _   = return ()
1505         go i   ptr = do n <- peekElemOff arr i
1506                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1507                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1508     go 0 p
1509
1510 -- "countEach counts str l" counts the number of occurences of each Word8 in
1511 -- str, and stores the result in counts.
1512 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1513 STRICT3(countEach)
1514 countEach counts str l = go 0
1515  where
1516     STRICT1(go)
1517     go i | i == l    = return ()
1518          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1519                           x <- peekElemOff counts k
1520                           pokeElemOff counts k (x + 1)
1521                           go (i + 1)
1522
1523 {-
1524 sort :: ByteString -> ByteString
1525 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1526         memcpy p (f `plusPtr` s) l
1527         c_qsort p l -- inplace
1528 -}
1529
1530 {-
1531 sort = pack . List.sort . unpack
1532 -}
1533
1534 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1535 --
1536 -- Try some linear sorts: radix, counting
1537 -- Or mergesort.
1538 --
1539 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1540 -- sortBy f ps = undefined
1541
1542 -- ---------------------------------------------------------------------
1543 --
1544 -- Extensions to the basic interface
1545 --
1546
1547 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1548 -- check for the empty case, so there is an obligation on the programmer
1549 -- to provide a proof that the ByteString is non-empty.
1550 unsafeHead :: ByteString -> Word8
1551 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1552 {-# INLINE unsafeHead #-}
1553
1554 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1555 -- check for the empty case. As with 'unsafeHead', the programmer must
1556 -- provide a separate proof that the ByteString is non-empty.
1557 unsafeTail :: ByteString -> ByteString
1558 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1559 {-# INLINE unsafeTail #-}
1560
1561 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1562 -- This omits the bounds check, which means there is an accompanying
1563 -- obligation on the programmer to ensure the bounds are checked in some
1564 -- other way.
1565 unsafeIndex :: ByteString -> Int -> Word8
1566 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1567 {-# INLINE unsafeIndex #-}
1568
1569 -- ---------------------------------------------------------------------
1570 -- Low level constructors
1571
1572 #if defined(__GLASGOW_HASKELL__)
1573 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1574 -- Addr\# (an arbitrary machine address assumed to point outside the
1575 -- garbage-collected heap) into a @ByteString@. A much faster way to
1576 -- create an Addr\# is with an unboxed string literal, than to pack a
1577 -- boxed string. A unboxed string literal is compiled to a static @char
1578 -- []@ by GHC. Establishing the length of the string requires a call to
1579 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1580 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1581 -- if you know the length of the string statically.
1582 --
1583 -- An example:
1584 --
1585 -- > literalFS = packAddress "literal"#
1586 --
1587 packAddress :: Addr# -> ByteString
1588 packAddress addr# = inlinePerformIO $ do
1589     p <- newForeignPtr_ cstr
1590     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1591   where
1592     cstr = Ptr addr#
1593 {-# INLINE packAddress #-}
1594
1595 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1596 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1597 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1598 -- 'Addr\#' to the string, and the length of the string. Make sure the
1599 -- length is correct, otherwise use the safer 'packAddress' (where the
1600 -- length will be calculated once at runtime).
1601 unsafePackAddress :: Int -> Addr# -> ByteString
1602 unsafePackAddress len addr# = inlinePerformIO $ do
1603     p <- newForeignPtr_ cstr
1604     return $ PS p 0 len
1605     where cstr = Ptr addr#
1606
1607 #endif
1608
1609 -- | /O(1)/ Build a ByteString from a ForeignPtr
1610 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1611 fromForeignPtr fp l = PS fp 0 l
1612
1613 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1614 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1615 toForeignPtr (PS ps s l) = (ps, s, l)
1616
1617 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1618 -- current 'ByteString' from any larger string it was created from, as
1619 -- an 'Int'.
1620 skipIndex :: ByteString -> Int
1621 skipIndex (PS _ s _) = s
1622 {-# INLINE skipIndex #-}
1623
1624 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1625 -- finalizer associated to it. The ByteString length is calculated using
1626 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1627 packCString :: CString -> ByteString
1628 packCString cstr = inlinePerformIO $ do
1629     fp <- newForeignPtr_ (castPtr cstr)
1630     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1631
1632 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1633 -- have /no/ finalizer associated with it. This operation has /O(1)/
1634 -- complexity as we already know the final size, so no /strlen(3)/ is
1635 -- required.
1636 packCStringLen :: CStringLen -> ByteString
1637 packCStringLen (ptr,len) = inlinePerformIO $ do
1638     fp <- newForeignPtr_ (castPtr ptr)
1639     return $ PS fp 0 (fromIntegral len)
1640
1641 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1642 -- have a @free(3)@ finalizer associated to it.
1643 packMallocCString :: CString -> ByteString
1644 packMallocCString cstr = inlinePerformIO $ do
1645     fp <- newForeignFreePtr (castPtr cstr)
1646     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1647
1648 #if defined(__GLASGOW_HASKELL__)
1649 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1650 -- length, and an IO action representing a finalizer. This function is
1651 -- not available on Hugs.
1652 --
1653 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1654 packCStringFinalizer p l f = do
1655     fp <- FC.newForeignPtr p f
1656     return $ PS fp 0 l
1657
1658 -- | Explicitly run the finaliser associated with a 'ByteString'.
1659 -- Further references to this value may generate invalid memory
1660 -- references. This operation is unsafe, as there may be other
1661 -- 'ByteStrings' referring to the same underlying pages. If you use
1662 -- this, you need to have a proof of some kind that all 'ByteString's
1663 -- ever generated from the underlying byte array are no longer live.
1664 unsafeFinalize :: ByteString -> IO ()
1665 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1666
1667 #endif
1668
1669 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1670 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1671 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1672 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1673     where
1674       alloc = withForeignPtr ps $ \p -> do
1675                 buf <- c_malloc (fromIntegral l+1)
1676                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1677                 poke (buf `plusPtr` l) (0::Word8)
1678                 return $ castPtr buf
1679
1680 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1681 -- Warning: modifying the @CString@ will affect the @ByteString@.
1682 -- Why is this function unsafe? It relies on the null byte at the end of
1683 -- the ByteString to be there. This is /not/ the case if your ByteString
1684 -- has been spliced from a larger string (i.e. with take or drop).
1685 -- Unless you can guarantee the null byte, you should use the safe
1686 -- version, which will copy the string first.
1687 --
1688 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1689 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1690
1691 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1692 --   This is mainly useful to allow the rest of the data pointed
1693 --   to by the 'ByteString' to be garbage collected, for example
1694 --   if a large string has been read in, and only a small part of it
1695 --   is needed in the rest of the program.
1696 copy :: ByteString -> ByteString
1697 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1698     memcpy p (f `plusPtr` s) (fromIntegral l)
1699
1700 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1701 -- CString is going to be deallocated from C land.
1702 copyCString :: CString -> ByteString
1703 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1704
1705 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1706 copyCStringLen :: CStringLen -> ByteString
1707 copyCStringLen (cstr, len) = inlinePerformIO $ do
1708     fp <- mallocForeignPtrArray (len+1)
1709     withForeignPtr fp $ \p -> do
1710         memcpy p (castPtr cstr) (fromIntegral len)
1711         poke (p `plusPtr` len) (0 :: Word8)
1712     return $! PS fp 0 len
1713
1714 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1715 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1716 -- This is analogous to unsafeUseAsCString, and comes with the same
1717 -- safety requirements.
1718 --
1719 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1720 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1721
1722 -- | Given the maximum size needed and a function to make the contents
1723 -- of a ByteString, generate makes the 'ByteString'. The generating
1724 -- function is required to return the actual final size (<= the maximum
1725 -- size), and the resulting byte array is realloced to this size.  The
1726 -- string is padded at the end with a null byte.
1727 --
1728 -- generate is the main mechanism for creating custom, efficient
1729 -- ByteString functions, using Haskell or C functions to fill the space.
1730 --
1731 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1732 generate i f = do
1733     fp      <- mallocByteString i
1734     (ptr,n) <- withForeignPtr fp $ \p -> do
1735         i' <- f p
1736         if i' == i
1737             then return (fp,i')
1738             else do fp_ <- mallocByteString i'      -- realloc
1739                     withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1740                     return (fp_,i')
1741     return (PS ptr 0 n)
1742
1743 {-
1744 --
1745 -- On the C malloc heap. Less fun.
1746 --
1747 generate i f = do
1748     p <- mallocArray (i+1)
1749     i' <- f p
1750     p' <- reallocArray p (i'+1)
1751     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1752     fp <- newForeignFreePtr p'
1753     return $ PS fp 0 i'
1754 -}
1755
1756 -- ---------------------------------------------------------------------
1757 -- line IO
1758
1759 #if defined(__GLASGOW_HASKELL__)
1760
1761 -- | getLine, read a line from stdin.
1762 getLine :: IO ByteString
1763 getLine = hGetLine stdin
1764
1765 -- | hGetLine. read a ByteString from a handle
1766 hGetLine :: Handle -> IO ByteString
1767 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1768     case haBufferMode handle_ of
1769        NoBuffering -> error "no buffering"
1770        _other      -> hGetLineBuffered handle_
1771
1772  where
1773     hGetLineBuffered handle_ = do
1774         let ref = haBuffer handle_
1775         buf <- readIORef ref
1776         hGetLineBufferedLoop handle_ ref buf 0 []
1777
1778     hGetLineBufferedLoop handle_ ref
1779             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1780         len `seq` do
1781         off <- findEOL r w raw
1782         let new_len = len + off - r
1783         xs <- mkPS raw r off
1784
1785       -- if eol == True, then off is the offset of the '\n'
1786       -- otherwise off == w and the buffer is now empty.
1787         if off /= w
1788             then do if (w == off + 1)
1789                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1790                             else writeIORef ref buf{ bufRPtr = off + 1 }
1791                     mkBigPS new_len (xs:xss)
1792             else do
1793                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1794                                     buf{ bufWPtr=0, bufRPtr=0 }
1795                  case maybe_buf of
1796                     -- Nothing indicates we caught an EOF, and we may have a
1797                     -- partial line to return.
1798                     Nothing -> do
1799                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1800                          if new_len > 0
1801                             then mkBigPS new_len (xs:xss)
1802                             else ioe_EOF
1803                     Just new_buf ->
1804                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1805
1806     -- find the end-of-line character, if there is one
1807     findEOL r w raw
1808         | r == w = return w
1809         | otherwise =  do
1810             (c,r') <- readCharFromBuffer raw r
1811             if c == '\n'
1812                 then return r -- NB. not r': don't include the '\n'
1813                 else findEOL r' w raw
1814
1815     maybeFillReadBuffer fd is_line is_stream buf = catch
1816         (do buf' <- fillReadBuffer fd is_line is_stream buf
1817             return (Just buf'))
1818         (\e -> if isEOFError e then return Nothing else ioError e)
1819
1820 -- TODO, rewrite to use normal memcpy
1821 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1822 mkPS buf start end = do
1823     let len = end - start
1824     fp <- mallocByteString len
1825     withForeignPtr fp $ \p -> do
1826         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1827         return (PS fp 0 len)
1828
1829 mkBigPS :: Int -> [ByteString] -> IO ByteString
1830 mkBigPS _ [ps] = return ps
1831 mkBigPS _ pss = return $! concat (P.reverse pss)
1832
1833 #endif
1834
1835 -- ---------------------------------------------------------------------
1836 -- Block IO
1837
1838 -- | Outputs a 'ByteString' to the specified 'Handle'.
1839 hPut :: Handle -> ByteString -> IO ()
1840 hPut _ (PS _ _ 0)  = return ()
1841 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1842 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1843
1844 -- | Write a ByteString to stdout
1845 putStr :: ByteString -> IO ()
1846 putStr = hPut stdout
1847
1848 -- | Write a ByteString to stdout, appending a newline byte
1849 putStrLn :: ByteString -> IO ()
1850 putStrLn ps = hPut stdout ps >> hPut stdout nl
1851     where nl = packByte 0x0a
1852
1853 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1854 -- is far more efficient than reading the characters into a 'String'
1855 -- and then using 'pack'.
1856 hGet :: Handle -> Int -> IO ByteString
1857 hGet _ 0 = return empty
1858 hGet h i = do fp <- mallocByteString i
1859               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1860               return $ PS fp 0 l
1861
1862 #if defined(__GLASGOW_HASKELL__)
1863 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1864 -- waiting for data to become available, instead it returns only whatever data
1865 -- is available.
1866 hGetNonBlocking :: Handle -> Int -> IO ByteString
1867 hGetNonBlocking _ 0 = return empty
1868 hGetNonBlocking h i = do
1869     fp <- mallocByteString i
1870     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1871     return $ PS fp 0 l
1872 #endif
1873
1874 -- | Read entire handle contents into a 'ByteString'.
1875 --
1876 -- As with 'hGet', the string representation in the file is assumed to
1877 -- be ISO-8859-1.
1878 --
1879 hGetContents :: Handle -> IO ByteString
1880 hGetContents h = do
1881     let start_size = 1024
1882     p <- mallocArray start_size
1883     i <- hGetBuf h p start_size
1884     if i < start_size
1885         then do p' <- reallocArray p i
1886                 fp <- newForeignFreePtr p'
1887                 return $ PS fp 0 i
1888         else f p start_size
1889     where
1890         f p s = do
1891             let s' = 2 * s
1892             p' <- reallocArray p s'
1893             i  <- hGetBuf h (p' `plusPtr` s) s
1894             if i < s
1895                 then do let i' = s + i
1896                         p'' <- reallocArray p' i'
1897                         fp  <- newForeignFreePtr p''
1898                         return $ PS fp 0 i'
1899                 else f p' s'
1900
1901 -- | getContents. Equivalent to hGetContents stdin
1902 getContents :: IO ByteString
1903 getContents = hGetContents stdin
1904
1905 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1906 -- efficient than reading the characters into a 'String' and then using
1907 -- 'pack'.  It also may be more efficient than opening the file and
1908 -- reading it using hGet.
1909 readFile :: FilePath -> IO ByteString
1910 readFile f = do
1911     h <- openBinaryFile f ReadMode
1912     l <- hFileSize h
1913     s <- hGet h $ fromIntegral l
1914     hClose h
1915     return s
1916
1917 -- | Write a 'ByteString' to a file.
1918 writeFile :: FilePath -> ByteString -> IO ()
1919 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1920     (\h -> hPut h ps)
1921
1922 {-
1923 --
1924 -- Disable until we can move it into a portable .hsc file
1925 --
1926
1927 -- | Like readFile, this reads an entire file directly into a
1928 -- 'ByteString', but it is even more efficient.  It involves directly
1929 -- mapping the file to memory.  This has the advantage that the contents
1930 -- of the file never need to be copied.  Also, under memory pressure the
1931 -- page may simply be discarded, while in the case of readFile it would
1932 -- need to be written to swap.  If you read many small files, mmapFile
1933 -- will be less memory-efficient than readFile, since each mmapFile
1934 -- takes up a separate page of memory.  Also, you can run into bus
1935 -- errors if the file is modified.  As with 'readFile', the string
1936 -- representation in the file is assumed to be ISO-8859-1.
1937 --
1938 -- On systems without mmap, this is the same as a readFile.
1939 --
1940 mmapFile :: FilePath -> IO ByteString
1941 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1942
1943 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1944 mmap f = do
1945     h <- openBinaryFile f ReadMode
1946     l <- fromIntegral `fmap` hFileSize h
1947     -- Don't bother mmaping small files because each mmapped file takes up
1948     -- at least one full VM block.
1949     if l < mmap_limit
1950        then do thefp <- mallocByteString l
1951                withForeignPtr thefp $ \p-> hGetBuf h p l
1952                hClose h
1953                return (thefp, l)
1954        else do
1955                -- unix only :(
1956                fd <- fromIntegral `fmap` handleToFd h
1957                p  <- my_mmap l fd
1958                fp <- if p == nullPtr
1959                      then do thefp <- mallocByteString l
1960                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1961                              return thefp
1962                      else do
1963                           -- The munmap leads to crashes on OpenBSD.
1964                           -- maybe there's a use after unmap in there somewhere?
1965 #if !defined(__OpenBSD__)
1966                              let unmap = c_munmap p l >> return ()
1967 #else
1968                              let unmap = return ()
1969 #endif
1970                              fp <- FC.newForeignPtr p unmap
1971                              return fp
1972                c_close fd
1973                hClose h
1974                return (fp, l)
1975     where mmap_limit = 16*1024
1976 -}
1977
1978 #if defined(__GLASGOW_HASKELL__)
1979 --
1980 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1981 --
1982 getArgs :: IO [ByteString]
1983 getArgs =
1984   alloca $ \ p_argc ->
1985   alloca $ \ p_argv -> do
1986     getProgArgv p_argc p_argv
1987     p    <- fromIntegral `fmap` peek p_argc
1988     argv <- peek p_argv
1989     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1990 #endif
1991
1992 -- ---------------------------------------------------------------------
1993 -- Internal utilities
1994
1995 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1996 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1997 -- convenience for ByteString construction.
1998 w2c :: Word8 -> Char
1999 #if !defined(__GLASGOW_HASKELL__)
2000 w2c = chr . fromIntegral
2001 #else
2002 w2c = unsafeChr . fromIntegral
2003 #endif
2004 {-# INLINE w2c #-}
2005
2006 c2w :: Char -> Word8
2007 c2w = fromIntegral . ord
2008 {-# INLINE c2w #-}
2009
2010 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2011 -- is padded with a null byte.
2012 mallocByteString :: Int -> IO (ForeignPtr Word8)
2013 mallocByteString l = do
2014     fp <- mallocForeignPtrArray (l+1)
2015     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2016     return fp
2017
2018 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2019 -- argument gives the final size of the ByteString. Unlike 'generate'
2020 -- the ByteString is not reallocated if the final size is less than the
2021 -- estimated size. Also, unlike 'generate' ByteString's created this way
2022 -- are managed on the Haskell heap.
2023 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2024 create l write_ptr = inlinePerformIO $ do
2025     fp <- mallocByteString (l+1)
2026     withForeignPtr fp $ \p -> write_ptr p
2027     return $ PS fp 0 l
2028 {-# INLINE create #-}
2029
2030 -- | Perform an operation with a temporary ByteString
2031 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2032 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2033 {-# INLINE withPtr #-}
2034
2035 -- Common up near identical calls to `error' to reduce the number
2036 -- constant strings created when compiled:
2037 errorEmptyList :: String -> a
2038 errorEmptyList fun = moduleError fun "empty ByteString"
2039 {-# NOINLINE errorEmptyList #-}
2040
2041 moduleError :: String -> String -> a
2042 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2043 {-# NOINLINE moduleError #-}
2044
2045 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2046 -- of the string if no element is found, rather than Nothing.
2047 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2048 STRICT2(findIndexOrEnd)
2049 findIndexOrEnd f ps
2050     | null ps           = 0
2051     | f (unsafeHead ps) = 0
2052     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
2053 {-# INLINE findIndexOrEnd #-}
2054
2055 -- Find from the end of the string using predicate
2056 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2057 STRICT2(findFromEndUntil)
2058 findFromEndUntil f ps@(PS x s l) =
2059     if null ps then 0
2060     else if f (last ps) then l
2061          else findFromEndUntil f (PS x s (l-1))
2062
2063 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2064 -- it exposes lots of things to further inlining
2065 --
2066 {-# INLINE inlinePerformIO #-}
2067 inlinePerformIO :: IO a -> a
2068 #if defined(__GLASGOW_HASKELL__)
2069 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2070 #else
2071 inlinePerformIO = unsafePerformIO
2072 #endif
2073
2074 {-# INLINE newForeignFreePtr #-}
2075 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2076 #if defined(__GLASGOW_HASKELL__)
2077 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2078 #else
2079 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2080 #endif
2081
2082 -- ---------------------------------------------------------------------
2083 --
2084 -- Standard C functions
2085 --
2086
2087 foreign import ccall unsafe "string.h strlen" c_strlen
2088     :: CString -> CInt
2089
2090 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2091     :: CInt -> IO (Ptr Word8)
2092
2093 foreign import ccall unsafe "static stdlib.h free" c_free
2094     :: Ptr Word8 -> IO ()
2095
2096 #if !defined(__GLASGOW_HASKELL__)
2097 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2098     :: FunPtr (Ptr Word8 -> IO ())
2099 #endif
2100
2101 foreign import ccall unsafe "string.h memset" memset
2102     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2103
2104 foreign import ccall unsafe "string.h memchr" memchr
2105     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2106
2107 foreign import ccall unsafe "string.h memcmp" memcmp
2108     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2109
2110 foreign import ccall unsafe "string.h memcpy" memcpy
2111     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2112
2113 -- ---------------------------------------------------------------------
2114 --
2115 -- Uses our C code
2116 --
2117
2118 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2119     :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2120
2121 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2122     :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2123
2124 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2125     :: Ptr Word8 -> CInt -> Word8
2126
2127 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2128     :: Ptr Word8 -> CInt -> Word8
2129
2130 foreign import ccall unsafe "static fpstring.h count" c_count
2131     :: Ptr Word8 -> CInt -> Word8 -> CInt
2132
2133 -- ---------------------------------------------------------------------
2134 -- MMap
2135
2136 {-
2137 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2138     :: Int -> Int -> IO (Ptr Word8)
2139
2140 foreign import ccall unsafe "static unistd.h close" c_close
2141     :: Int -> IO Int
2142
2143 #  if !defined(__OpenBSD__)
2144 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2145     :: Ptr Word8 -> Int -> IO Int
2146 #  endif
2147 -}
2148
2149 -- ---------------------------------------------------------------------
2150 -- Internal GHC Haskell magic
2151
2152 #if defined(__GLASGOW_HASKELL__)
2153 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2154     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2155
2156 foreign import ccall unsafe "__hscore_memcpy_src_off"
2157    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2158 #endif
2159
2160 -- ---------------------------------------------------------------------
2161 --
2162 -- Functional array fusion for ByteStrings.
2163 --
2164 -- From the Data Parallel Haskell project,
2165 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2166 --
2167
2168 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2169 -- the fact that no bottoms of this type are ever constructed; hence, we can
2170 -- assume @(_ :: NoAL) `seq` x = x@.
2171 --
2172 data NoAL = NoAL
2173
2174 -- | Special forms of loop arguments
2175 --
2176 -- * These are common special cases for the three function arguments of gen
2177 --   and loop; we give them special names to make it easier to trigger RULES
2178 --   applying in the special cases represented by these arguments.  The
2179 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2180 --   two simplifier phases.
2181 --
2182 -- * In the case where the accumulator is not needed, it is better to always
2183 --   explicitly return a value `()', rather than just copy the input to the
2184 --   output, as the former gives GHC better local information.
2185 --
2186
2187 -- | Element function expressing a mapping only
2188 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2189 mapEFL f = \_ e -> (noAL, (Just $ f e))
2190 #if defined(__GLASGOW_HASKELL__)
2191 {-# INLINE [1] mapEFL #-}
2192 #endif
2193
2194 -- | Element function implementing a filter function only
2195 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2196 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2197 #if defined(__GLASGOW_HASKELL__)
2198 {-# INLINE [1] filterEFL #-}
2199 #endif
2200
2201 -- |Element function expressing a reduction only
2202 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2203 foldEFL f = \a e -> (f a e, Nothing)
2204 #if defined(__GLASGOW_HASKELL__)
2205 {-# INLINE [1] foldEFL #-}
2206 #endif
2207
2208 -- | A strict foldEFL.
2209 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2210 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2211 #if defined(__GLASGOW_HASKELL__)
2212 {-# INLINE [1] foldEFL' #-}
2213 #endif
2214
2215 -- | No accumulator
2216 noAL :: NoAL
2217 noAL = NoAL
2218 #if defined(__GLASGOW_HASKELL__)
2219 {-# INLINE [1] noAL #-}
2220 #endif
2221
2222 -- | Projection functions that are fusion friendly (as in, we determine when
2223 -- they are inlined)
2224 loopArr :: (ByteString, acc) -> ByteString
2225 loopArr (arr, _) = arr
2226 #if defined(__GLASGOW_HASKELL__)
2227 {-# INLINE [1] loopArr #-}
2228 #endif
2229
2230 loopAcc :: (ByteString, acc) -> acc
2231 loopAcc (_, acc) = acc
2232 #if defined(__GLASGOW_HASKELL__)
2233 {-# INLINE [1] loopAcc #-}
2234 #endif
2235
2236 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2237 loopSndAcc (arr, (_, acc)) = (arr, acc)
2238 #if defined(__GLASGOW_HASKELL__)
2239 {-# INLINE [1] loopSndAcc #-}
2240 #endif
2241
2242 ------------------------------------------------------------------------
2243
2244 --
2245 -- size, and then percentage.
2246 --
2247
2248 -- | Iteration over over ByteStrings
2249 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2250       -> acc                                   -- ^ initial acc value
2251       -> ByteString                            -- ^ input ByteString
2252       -> (ByteString, acc)
2253
2254 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2255     fp          <- mallocByteString i
2256     (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2257         (acc, i') <- go (a `plusPtr` s) p start
2258         if i' == i
2259             then return (fp,i',acc)                 -- no realloc for map
2260             else do fp_ <- mallocByteString i'      -- realloc
2261                     withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2262                     return (fp_,i',acc)
2263
2264     return (PS ptr 0 n, acc)
2265   where
2266     go p ma = trans 0 0
2267         where
2268             STRICT3(trans)
2269             trans a_off ma_off acc
2270                 | a_off >= i = return (acc, ma_off)
2271                 | otherwise  = do
2272                     x <- peekByteOff p a_off
2273                     let (acc', oe) = f acc x
2274                     ma_off' <- case oe of
2275                         Nothing  -> return ma_off
2276                         Just e   -> do pokeByteOff ma ma_off e
2277                                        return $ ma_off + 1
2278                     trans (a_off+1) ma_off' acc'
2279
2280 #if defined(__GLASGOW_HASKELL__)
2281 {-# INLINE [1] loopU #-}
2282 #endif
2283
2284 infixr 9 `fuseEFL`
2285
2286 -- |Fuse to flat loop functions
2287 fuseEFL :: (a1 -> Word8  -> (a1, Maybe Word8))
2288         -> (a2 -> Word8  -> (a2, Maybe Word8))
2289         -> (a1, a2)
2290         -> Word8
2291         -> ((a1, a2), Maybe Word8)
2292 fuseEFL f g (acc1, acc2) e1 =
2293     case f acc1 e1 of
2294         (acc1', Nothing) -> ((acc1', acc2), Nothing)
2295         (acc1', Just e2) ->
2296             case g acc2 e2 of
2297                 (acc2', res) -> ((acc1', acc2'), res)
2298
2299 {-# RULES
2300
2301 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2302   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2303     loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2304
2305 "loopArr/loopSndAcc" forall x.
2306   loopArr (loopSndAcc x) = loopArr x
2307
2308 "seq/NoAL" forall (u::NoAL) e.
2309   u `seq` e = e
2310
2311   #-}
2312