Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with "Prelude" functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         singleton,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: ByteString -> Word8 -> ByteString
  55         append,                 -- :: ByteString -> ByteString -> ByteString
  56         head,                   -- :: ByteString -> Word8
  57         last,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         init,                   -- :: ByteString -> ByteString
  60         null,                   -- :: ByteString -> Bool
  61         length,                 -- :: ByteString -> Int
  62
  63         -- * Transformating ByteStrings
  64         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  65         map',                   -- :: (Word8 -> Word8) -> ByteString -> ByteString
  66         reverse,                -- :: ByteString -> ByteString
  67         intersperse,            -- :: Word8 -> ByteString -> ByteString
  68         transpose,              -- :: [ByteString] -> [ByteString]
  69
  70         -- * Reducing 'ByteString's (folds)
  71         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  72         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  73         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  74         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  75         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  76         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77
  78         -- ** Special folds
  79         concat,                 -- :: [ByteString] -> ByteString
  80         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  81         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  82         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  83         maximum,                -- :: ByteString -> Word8
  84         minimum,                -- :: ByteString -> Word8
  85
  86         -- * Building ByteStrings
  87         -- ** Scans
  88         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  89         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  90
  91         -- ** Accumulating maps
  92         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  93 --      mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  94         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  95
  96         -- ** Unfolding ByteStrings
  97         replicate,              -- :: Int -> Word8 -> ByteString
  98         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  99         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
 100
 101         -- * Substrings
 102
 103         -- ** Breaking strings
 104         take,                   -- :: Int -> ByteString -> ByteString
 105         drop,                   -- :: Int -> ByteString -> ByteString
 106         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 107         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 108         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 109         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 110         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 111         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         group,                  -- :: ByteString -> [ByteString]
 113         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 114         inits,                  -- :: ByteString -> [ByteString]
 115         tails,                  -- :: ByteString -> [ByteString]
 116
 117         -- ** Breaking and dropping on specific bytes
 118         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 119         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 120
 121         -- ** Breaking into many substrings
 122         split,                  -- :: Word8 -> ByteString -> [ByteString]
 123         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 124         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 125
 126         -- ** Joining strings
 127         join,                   -- :: ByteString -> [ByteString] -> ByteString
 128         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 129
 130         -- * Predicates
 131         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 132         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 133
 134         -- ** Search for arbitrary substrings
 135         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 136         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 137         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 138
 139         -- * Searching ByteStrings
 140
 141         -- ** Searching by equality
 142         -- | These functions use memchr(3) to efficiently search the ByteString
 143         elem,                   -- :: Word8 -> ByteString -> Bool
 144         notElem,                -- :: Word8 -> ByteString -> Bool
 145         filterByte,             -- :: Word8 -> ByteString -> ByteString
 146         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 147
 148         -- ** Searching with a predicate
 149         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 150         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 151         filter',                -- :: (Word8 -> Bool) -> ByteString -> ByteString
 152 --      partition               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 153
 154         -- * Indexing ByteStrings
 155         index,                  -- :: ByteString -> Int -> Word8
 156         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 157         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 158         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
 159         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 160         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 161         count,                  -- :: Word8 -> ByteString -> Int
 162         findIndexOrEnd,         -- :: (Word8 -> Bool) -> ByteString -> Int
 163
 164         -- * Zipping and unzipping ByteStrings
 165         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 166         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 167         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 168
 169         -- * Ordered ByteStrings
 170         sort,                   -- :: ByteString -> ByteString
 171
 172         -- * Unchecked access
 173         unsafeHead,             -- :: ByteString -> Word8
 174         unsafeTail,             -- :: ByteString -> ByteString
 175         unsafeIndex,            -- :: ByteString -> Int -> Word8
 176         unsafeTake,             -- :: Int -> ByteString -> ByteString
 177         unsafeDrop,             -- :: Int -> ByteString -> ByteString
 178
 179         -- * Low level introduction and elimination
 180         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 181         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 182         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 183         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 184         skipIndex,              -- :: ByteString -> Int
 185
 186         -- ** Packing CStrings and pointers
 187         packCString,            -- :: CString -> ByteString
 188         packCStringLen,         -- :: CString -> ByteString
 189         packMallocCString,      -- :: CString -> ByteString
 190
 191 #if defined(__GLASGOW_HASKELL__)
 192         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 193         packAddress,            -- :: Addr# -> ByteString
 194         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 195         unsafeFinalize,         -- :: ByteString -> IO ()
 196 #endif
 197
 198         -- ** Using ByteStrings as CStrings
 199         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 200         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 201         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 202
 203         -- ** Copying ByteStrings
 204         -- | These functions perform memcpy(3) operations
 205         copy,                   -- :: ByteString -> ByteString
 206         copyCString,            -- :: CString -> ByteString
 207         copyCStringLen,         -- :: CStringLen -> ByteString
 208
 209         -- * I\/O with 'ByteString's
 210
 211         -- ** Standard input and output
 212
 213 #if defined(__GLASGOW_HASKELL__)
 214         getLine,                -- :: IO ByteString
 215 #endif
 216         getContents,            -- :: IO ByteString
 217         putStr,                 -- :: ByteString -> IO ()
 218         putStrLn,               -- :: ByteString -> IO ()
 219
 220         -- ** Files
 221         readFile,               -- :: FilePath -> IO ByteString
 222         writeFile,              -- :: FilePath -> ByteString -> IO ()
 223 --      mmapFile,               -- :: FilePath -> IO ByteString
 224
 225         -- ** I\/O with Handles
 226 #if defined(__GLASGOW_HASKELL__)
 227         getArgs,                -- :: IO [ByteString]
 228         hGetLine,               -- :: Handle -> IO ByteString
 229         hGetLines,              -- :: Handle -> IO [ByteString]
 230         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 231 #endif
 232         hGetContents,           -- :: Handle -> IO ByteString
 233         hGet,                   -- :: Handle -> Int -> IO ByteString
 234         hPut,                   -- :: Handle -> ByteString -> IO ()
 235
 236         -- * Fusion utilities
 237 #if defined(__GLASGOW_HASKELL__)
 238         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 239 #endif
 240
 241         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 242         loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL,
 243         mapAccumEFL, mapIndexEFL,
 244
 245   ) where
 246
 247 import qualified Prelude as P
 248 import Prelude hiding           (reverse,head,tail,last,init,null
 249                                 ,length,map,lines,foldl,foldr,unlines
 250                                 ,concat,any,take,drop,splitAt,takeWhile
 251                                 ,dropWhile,span,break,elem,filter,maximum
 252                                 ,minimum,all,concatMap,foldl1,foldr1
 253                                 ,scanl,scanl1,readFile,writeFile,replicate
 254                                 ,getContents,getLine,putStr,putStrLn
 255                                 ,zip,zipWith,unzip,notElem)
 256
 257 import qualified Data.List as List
 258
 259 import Data.Char
 260 import Data.Word                (Word8)
 261 import Data.Maybe               (listToMaybe)
 262 import Data.Array               (listArray)
 263 import qualified Data.Array as Array ((!))
 264
 265 -- Control.Exception.bracket not available in yhc or nhc
 266 import Control.Exception        (bracket, assert)
 267 import Control.Monad            (when)
 268
 269 import Foreign.C.String         (CString, CStringLen)
 270 import Foreign.C.Types          (CSize,CInt)
 271 import Foreign.ForeignPtr
 272 import Foreign.Marshal.Array
 273 import Foreign.Ptr
 274 import Foreign.Storable         (Storable(..))
 275
 276 -- hGetBuf and hPutBuf not available in yhc or nhc
 277 import System.IO                (stdin,stdout,hClose,hFileSize
 278                                 ,hGetBuf,hPutBuf,openBinaryFile
 279                                 ,Handle,IOMode(..))
 280
 281 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
 282
 283 #if !defined(__GLASGOW_HASKELL__)
 284 import System.IO.Unsafe
 285 #endif
 286
 287 #if defined(__GLASGOW_HASKELL__)
 288
 289 import Data.Generics            (Data(..), Typeable(..))
 290
 291 import System.IO                (hGetBufNonBlocking)
 292 import System.IO.Error          (isEOFError)
 293
 294 import Foreign.Marshal          (alloca)
 295 import qualified Foreign.Concurrent as FC (newForeignPtr)
 296
 297 import GHC.Handle
 298 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 299 import GHC.Base                 (build, unsafeChr)
 300 import GHC.Word hiding (Word8)
 301 import GHC.Ptr                  (Ptr(..))
 302 import GHC.ST                   (ST(..))
 303 import GHC.IOBase
 304
 305 #endif
 306
 307 -- CFILES stuff is Hugs only
 308 {-# CFILES cbits/fpstring.c #-}
 309
 310 -- -----------------------------------------------------------------------------
 311 --
 312 -- Useful macros, until we have bang patterns
 313 --
 314
 315 #define STRICT1(f) f a | a `seq` False = undefined
 316 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 317 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 318 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 319 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 320
 321 -- -----------------------------------------------------------------------------
 322
 323 -- | A space-efficient representation of a Word8 vector, supporting many
 324 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 325 --
 326 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 327 --
 328 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 329                      {-# UNPACK #-} !Int
 330                      {-# UNPACK #-} !Int
 331
 332 #if defined(__GLASGOW_HASKELL__)
 333     deriving (Data, Typeable)
 334 #endif
 335
 336 instance Eq  ByteString
 337     where (==)    = eq
 338
 339 instance Ord ByteString
 340     where compare = compareBytes
 341
 342 instance Show ByteString where
 343     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 344
 345 instance Read ByteString where
 346     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 347
 348 instance Monoid ByteString where
 349     mempty  = empty
 350     mappend = append
 351     mconcat = concat
 352
 353 {-
 354 instance Arbitrary PackedString where
 355     arbitrary = P.pack `fmap` arbitrary
 356     coarbitrary s = coarbitrary (P.unpack s)
 357 -}
 358
 359 -- | /O(n)/ Equality on the 'ByteString' type.
 360 eq :: ByteString -> ByteString -> Bool
 361 eq a@(PS p s l) b@(PS p' s' l')
 362     | l /= l'            = False    -- short cut on length
 363     | p == p' && s == s' = True     -- short cut for the same string
 364     | otherwise          = compareBytes a b == EQ
 365 {-# INLINE eq #-}
 366
 367 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 368 compareBytes :: ByteString -> ByteString -> Ordering
 369 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 370     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 371     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 372     | otherwise                         = inlinePerformIO $
 373         withForeignPtr x1 $ \p1 ->
 374         withForeignPtr x2 $ \p2 -> do
 375             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 376             return $ case i `compare` 0 of
 377                         EQ  -> l1 `compare` l2
 378                         x   -> x
 379 {-# INLINE compareBytes #-}
 380
 381 {-
 382 --
 383 -- About 4x slower over 32M
 384 --
 385 compareBytes :: ByteString -> ByteString -> Ordering
 386 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 387     withForeignPtr fp1 $ \p1 ->
 388         withForeignPtr fp2 $ \p2 ->
 389             cmp (p1 `plusPtr` off1)
 390                 (p2 `plusPtr` off2) 0 len1 len2
 391
 392 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 393 STRICT5(cmp)
 394 cmp p1 p2 n len1 len2
 395       | n == len1 = if n == len2 then return EQ else return LT
 396       | n == len2 = return GT
 397       | otherwise = do
 398           (a :: Word8) <- peekByteOff p1 n
 399           (b :: Word8) <- peekByteOff p2 n
 400           case a `compare` b of
 401                 EQ -> cmp p1 p2 (n+1) len1 len2
 402                 LT -> return LT
 403                 GT -> return GT
 404 {-# INLINE compareBytes #-}
 405 -}
 406
 407 -- -----------------------------------------------------------------------------
 408 -- Introducing and eliminating 'ByteString's
 409
 410 -- | /O(1)/ The empty 'ByteString'
 411 empty :: ByteString
 412 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 413 {-# NOINLINE empty #-}
 414
 415 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 416 singleton :: Word8 -> ByteString
 417 singleton c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
 418     withForeignPtr fp $ \p -> poke p c
 419     return $ PS fp 0 1
 420 {-# INLINE singleton #-}
 421
 422 --
 423 -- XXX The unsafePerformIO is critical!
 424 --
 425 -- Otherwise:
 426 --
 427 --  singleton 255 `compare` singleton 127
 428 --
 429 -- is compiled to:
 430 --
 431 --  case mallocByteString 2 of
 432 --      ForeignPtr f internals ->
 433 --           case writeWord8OffAddr# f 0 255 of _ ->
 434 --           case writeWord8OffAddr# f 0 127 of _ ->
 435 --           case eqAddr# f f of
 436 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 437 --                                        (GHC.Prim.plusAddr# f 0)
 438 --
 439 --
 440
 441 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 442 --
 443 -- For applications with large numbers of string literals, pack can be a
 444 -- bottleneck. In such cases, consider using packAddress (GHC only).
 445 pack :: [Word8] -> ByteString
 446
 447 #if !defined(__GLASGOW_HASKELL__)
 448
 449 pack str = create (P.length str) $ \p -> go p str
 450     where
 451         go _ []     = return ()
 452         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 453
 454 #else /* hack away */
 455
 456 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 457     where
 458         go _ _ []        = return ()
 459         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 460
 461         writeByte p i c = ST $ \s# ->
 462             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 463
 464 #endif
 465
 466 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 467 unpack :: ByteString -> [Word8]
 468
 469 #if !defined(__GLASGOW_HASKELL__)
 470
 471 unpack (PS _  _ 0) = []
 472 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 473         go (p `plusPtr` s) (l - 1) []
 474     where
 475         STRICT3(go)
 476         go p 0 acc = peek p          >>= \e -> return (e : acc)
 477         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 478 {-# INLINE unpack #-}
 479
 480 #else
 481
 482 unpack ps = build (unpackFoldr ps)
 483 {-# INLINE unpack #-}
 484
 485 unpackList :: ByteString -> [Word8]
 486 unpackList (PS fp off len) = withPtr fp $ \p -> do
 487     let STRICT3(loop)
 488         loop _ (-1) acc = return acc
 489         loop q n acc = do
 490            a <- peekByteOff q n
 491            loop q (n-1) (a : acc)
 492     loop (p `plusPtr` off) (len-1) []
 493
 494 {-# RULES
 495 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 496  #-}
 497
 498 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 499 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 500     let STRICT3(loop)
 501         loop _ (-1) acc = return acc
 502         loop q n    acc = do
 503            a <- peekByteOff q n
 504            loop q (n-1) (a `f` acc)
 505     loop (p `plusPtr` off) (len-1) ch
 506 {-# INLINE [0] unpackFoldr #-}
 507
 508 -- TODO just use normal foldr here.
 509
 510 #endif
 511
 512 ------------------------------------------------------------------------
 513
 514 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 515 -- conversion function
 516 packWith :: (a -> Word8) -> [a] -> ByteString
 517 packWith k str = create (P.length str) $ \p -> go p str
 518     where
 519         STRICT2(go)
 520         go _ []     = return ()
 521         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 522 {-# INLINE packWith #-}
 523 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 524
 525 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 526 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 527 unpackWith _ (PS _  _ 0) = []
 528 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 529         go (p `plusPtr` s) (l - 1) []
 530     where
 531         STRICT3(go)
 532         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 533         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 534 {-# INLINE unpackWith #-}
 535 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 536
 537 -- ---------------------------------------------------------------------
 538 -- Basic interface
 539
 540 -- | /O(1)/ Test whether a ByteString is empty.
 541 null :: ByteString -> Bool
 542 null (PS _ _ l) = l == 0
 543 {-# INLINE null #-}
 544
 545 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 546 length :: ByteString -> Int
 547 length (PS _ _ l) = l
 548
 549 #if defined(__GLASGOW_HASKELL__)
 550 {-# INLINE [1] length #-}
 551 #endif
 552
 553 {-#
 554
 555 -- Translate length into a loop.
 556 -- Performace ok, but allocates too much, so disable for now.
 557
 558   "length/loop" forall f acc s .
 559   length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
 560
 561   #-}
 562
 563 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 564 -- complexity, as it requires a memcpy.
 565 cons :: Word8 -> ByteString -> ByteString
 566 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 567         poke p c
 568         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 569 {-# INLINE cons #-}
 570
 571 -- todo fuse
 572
 573 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 574 snoc :: ByteString -> Word8 -> ByteString
 575 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 576         memcpy p (f `plusPtr` s) (fromIntegral l)
 577         poke (p `plusPtr` l) c
 578 {-# INLINE snoc #-}
 579
 580 -- todo fuse
 581
 582 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 583 head :: ByteString -> Word8
 584 head ps@(PS x s _)
 585     | null ps   = errorEmptyList "head"
 586     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 587 {-# INLINE head #-}
 588
 589 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 590 tail :: ByteString -> ByteString
 591 tail (PS p s l)
 592     | l <= 0    = errorEmptyList "tail"
 593     | otherwise = PS p (s+1) (l-1)
 594 {-# INLINE tail #-}
 595
 596 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 597 last :: ByteString -> Word8
 598 last ps@(PS x s l)
 599     | null ps   = errorEmptyList "last"
 600     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 601 {-# INLINE last #-}
 602
 603 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 604 init :: ByteString -> ByteString
 605 init (PS p s l)
 606     | l <= 0    = errorEmptyList "init"
 607     | otherwise = PS p s (l-1)
 608 {-# INLINE init #-}
 609
 610 -- | /O(n)/ Append two ByteStrings
 611 append :: ByteString -> ByteString -> ByteString
 612 append xs ys | null xs   = ys
 613              | null ys   = xs
 614              | otherwise = concat [xs,ys]
 615 {-# INLINE append #-}
 616
 617 -- ---------------------------------------------------------------------
 618 -- Transformations
 619
 620 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 621 -- element of @xs@. This function is subject to array fusion.
 622 map :: (Word8 -> Word8) -> ByteString -> ByteString
 623 map f = loopArr . loopU (mapEFL f) noAL
 624 {-# INLINE map #-}
 625
 626 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 627 -- slightly faster for one-shot cases.
 628 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 629 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 630     np <- mallocByteString (len+1)
 631     withForeignPtr np $ \p -> do
 632         map_ 0 (a `plusPtr` s) p
 633         return (PS np 0 len)
 634   where
 635
 636     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 637     STRICT3(map_)
 638     map_ n p1 p2
 639        | n >= len = return ()
 640        | otherwise = do
 641             x <- peekByteOff p1 n
 642             pokeByteOff p2 n (f x)
 643             map_ (n+1) p1 p2
 644 {-# INLINE map' #-}
 645
 646 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 647 reverse :: ByteString -> ByteString
 648 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 649         c_reverse p (f `plusPtr` s) (fromIntegral l)
 650
 651 {-
 652 reverse = pack . P.reverse . unpack
 653 -}
 654
 655 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 656 -- 'ByteString' and \`intersperses\' that byte between the elements of
 657 -- the 'ByteString'.  It is analogous to the intersperse function on
 658 -- Lists.
 659 intersperse :: Word8 -> ByteString -> ByteString
 660 intersperse c ps@(PS x s l)
 661     | length ps < 2  = ps
 662     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 663         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 664
 665 {-
 666 intersperse c = pack . List.intersperse c . unpack
 667 -}
 668
 669 -- | The 'transpose' function transposes the rows and columns of its
 670 -- 'ByteString' argument.
 671 transpose :: [ByteString] -> [ByteString]
 672 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 673
 674 -- ---------------------------------------------------------------------
 675 -- Reducing 'ByteString's
 676
 677 -- | 'foldl', applied to a binary operator, a starting value (typically
 678 -- the left-identity of the operator), and a ByteString, reduces the
 679 -- ByteString using the binary operator, from left to right.
 680 -- This function is subject to array fusion.
 681 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 682 foldl f z = loopAcc . loopU (foldEFL f) z
 683 {-# INLINE foldl #-}
 684
 685 {-
 686 --
 687 -- About twice as fast with 6.4.1, but not fuseable
 688 -- A simple fold . map is enough to make it worth while.
 689 --
 690 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 691         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 692     where
 693         STRICT3(lgo)
 694         lgo z p q | p == q    = return z
 695                   | otherwise = do c <- peek p
 696                                    lgo (f z c) (p `plusPtr` 1) q
 697 -}
 698
 699 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
 700 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 701 foldl' f z = loopAcc . loopU (foldEFL' f) z
 702 {-# INLINE foldl' #-}
 703
 704 -- | 'foldr', applied to a binary operator, a starting value
 705 -- (typically the right-identity of the operator), and a ByteString,
 706 -- reduces the ByteString using the binary operator, from right to left.
 707 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 708 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 709         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 710     where
 711         STRICT2(go)
 712         go p q | p == q    = return z
 713                | otherwise = do c  <- peek p
 714                                 ws <- go (p `plusPtr` 1) q
 715                                 return $ c `k` ws
 716
 717 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 718 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 719 -- This function is subject to array fusion.
 720 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 721 foldl1 f ps
 722     | null ps   = errorEmptyList "foldl1"
 723     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 724
 725 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
 726 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 727 foldl1' f ps
 728     | null ps   = errorEmptyList "foldl1'"
 729     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 730
 731 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 732 -- and thus must be applied to non-empty 'ByteString's
 733 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 734 foldr1 f ps
 735     | null ps        = errorEmptyList "foldr1"
 736     | otherwise      = foldr f (last ps) (init ps)
 737
 738 -- ---------------------------------------------------------------------
 739 -- Special folds
 740
 741 -- | /O(n)/ Concatenate a list of ByteStrings.
 742 concat :: [ByteString] -> ByteString
 743 concat []     = empty
 744 concat [ps]   = ps
 745 concat xs     = create len $ \ptr -> go xs ptr
 746   where len = P.sum . P.map length $ xs
 747         STRICT2(go)
 748         go []            _   = return ()
 749         go (PS p s l:ps) ptr = do
 750                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 751                 go ps (ptr `plusPtr` l)
 752
 753 -- | Map a function over a 'ByteString' and concatenate the results
 754 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 755 concatMap f = foldr (append . f) empty
 756 -- A silly function for ByteStrings anyway.
 757
 758 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 759 -- any element of the 'ByteString' satisfies the predicate.
 760 any :: (Word8 -> Bool) -> ByteString -> Bool
 761 any _ (PS _ _ 0) = False
 762 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 763         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 764     where
 765         STRICT2(go)
 766         go p q | p == q    = return False
 767                | otherwise = do c <- peek p
 768                                 if f c then return True
 769                                        else go (p `plusPtr` 1) q
 770
 771 -- todo fuse
 772
 773 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 774 -- if all elements of the 'ByteString' satisfy the predicate.
 775 all :: (Word8 -> Bool) -> ByteString -> Bool
 776 all _ (PS _ _ 0) = True
 777 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 778         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 779     where
 780         STRICT2(go)
 781         go p q | p == q     = return True  -- end of list
 782                | otherwise  = do c <- peek p
 783                                  if f c
 784                                     then go (p `plusPtr` 1) q
 785                                     else return False
 786 -- todo fuse
 787
 788 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 789 maximum :: ByteString -> Word8
 790 maximum xs@(PS x s l)
 791     | null xs   = errorEmptyList "maximum"
 792     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 793                     return $ c_maximum (p `plusPtr` s) (fromIntegral l)
 794 {-# INLINE maximum #-}
 795
 796 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 797 minimum :: ByteString -> Word8
 798 minimum xs@(PS x s l)
 799     | null xs   = errorEmptyList "minimum"
 800     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 801                     return $ c_minimum (p `plusPtr` s) (fromIntegral l)
 802 {-# INLINE minimum #-}
 803
 804 -- fusion is too slow here (10x)
 805
 806 {-
 807 maximum xs@(PS x s l)
 808     | null xs   = errorEmptyList "maximum"
 809     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 810                         w <- peek p
 811                         maximum_ (p `plusPtr` s) 0 l w
 812
 813 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 814 STRICT4(maximum_)
 815 maximum_ ptr n m c
 816     | n >= m    = return c
 817     | otherwise = do w <- peekByteOff ptr n
 818                      maximum_ ptr (n+1) m (if w > c then w else c)
 819
 820 minimum xs@(PS x s l)
 821     | null xs   = errorEmptyList "minimum"
 822     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 823                         w <- peek p
 824                         minimum_ (p `plusPtr` s) 0 l w
 825
 826 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 827 STRICT4(minimum_)
 828 minimum_ ptr n m c
 829     | n >= m    = return c
 830     | otherwise = do w <- peekByteOff ptr n
 831                      minimum_ ptr (n+1) m (if w < c then w else c)
 832 -}
 833
 834 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 835 mapAccumL f z = loopU (mapAccumEFL f) z
 836
 837 --mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
 838
 839 -- | /O(n)/ map Word8 functions, provided with the index at each position
 840 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 841 mapIndexed f = loopArr . loopU (mapIndexEFL f) 0
 842
 843 -- ---------------------------------------------------------------------
 844 -- Building ByteStrings
 845
 846 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 847 -- reduced values from the left. This function will fuse.
 848 --
 849 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 850 --
 851 -- Note that
 852 --
 853 -- > last (scanl f z xs) == foldl f z xs.
 854 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 855 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space
 856 {-# INLINE scanl #-}
 857
 858 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 859 -- This function will fuse.
 860 --
 861 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 862 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 863 scanl1 f ps
 864     | null ps   = empty
 865     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 866 {-# INLINE scanl1 #-}
 867
 868 -- ---------------------------------------------------------------------
 869 -- Unfolds and replicates
 870
 871 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 872 -- the value of every element. The following holds:
 873 --
 874 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 875 --
 876 -- This implemenation uses @memset(3)@
 877 replicate :: Int -> Word8 -> ByteString
 878 replicate w c | w <= 0    = empty
 879               | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 880
 881 {-
 882 -- About 5x slower
 883 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 884     where
 885         STRICT2(go)
 886         go _   0 = return w
 887         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 888 -}
 889
 890 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
 891 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
 892 -- ByteString from a seed value.  The function takes the element and
 893 -- returns 'Nothing' if it is done producing the ByteString or returns
 894 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
 895 -- and @b@ is the seed value for further production.
 896 --
 897 -- Examples:
 898 --
 899 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
 900 -- > == pack [0, 1, 2, 3, 4, 5]
 901 --
 902 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
 903 unfoldr f = concat . unfoldChunk 32 64
 904   where unfoldChunk n n' x =
 905           case unfoldrN n f x of
 906             (s, Nothing) -> s : []
 907             (s, Just x') -> s : unfoldChunk n' (n+n') x'
 908
 909 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
 910 -- value.  However, the length of the result is limited by the first
 911 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
 912 -- when the maximum length of the result is known.
 913 --
 914 -- The following equation relates 'unfoldrN' and 'unfoldr':
 915 --
 916 -- > unfoldrN n f s == take n (unfoldr f s)
 917 --
 918 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 919 unfoldrN i f x0
 920     | i < 0     = (empty, Just x0)
 921     | otherwise = inlinePerformIO $ do
 922                     fp <- mallocByteString i
 923                     withForeignPtr fp (\p -> go fp p x0 0)
 924   where STRICT4(go)
 925         go fp p x n =
 926           case f x of
 927             Nothing      -> let s = copy (PS fp 0 n)
 928                              in s `seq` return (s, Nothing)
 929             Just (w,x')
 930              | n == i    -> return (PS fp 0 i, Just x)
 931              | otherwise -> do poke p w
 932                                go fp (p `plusPtr` 1) x' (n+1)
 933
 934 -- ---------------------------------------------------------------------
 935 -- Substrings
 936
 937 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 938 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 939 take :: Int -> ByteString -> ByteString
 940 take n ps@(PS x s l)
 941     | n <= 0    = empty
 942     | n >= l    = ps
 943     | otherwise = PS x s n
 944 {-# INLINE take #-}
 945
 946 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 947 -- elements, or @[]@ if @n > 'length' xs@.
 948 drop  :: Int -> ByteString -> ByteString
 949 drop n ps@(PS x s l)
 950     | n <= 0    = ps
 951     | n >= l    = empty
 952     | otherwise = PS x (s+n) (l-n)
 953 {-# INLINE drop #-}
 954
 955 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 956 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 957 splitAt n ps@(PS x s l)
 958     | n <= 0    = (empty, ps)
 959     | n >= l    = (ps, empty)
 960     | otherwise = (PS x s n, PS x (s+n) (l-n))
 961 {-# INLINE splitAt #-}
 962
 963 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 964 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 965 -- satisfy @p@.
 966 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 967 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 968 {-# INLINE takeWhile #-}
 969
 970 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 971 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 972 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
 973 {-# INLINE dropWhile #-}
 974
 975 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 976 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 977 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
 978 {-# INLINE break #-}
 979
 980 -- | 'breakByte' breaks its ByteString argument at the first occurence
 981 -- of the specified byte. It is more efficient than 'break' as it is
 982 -- implemented with @memchr(3)@. I.e.
 983 --
 984 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 985 --
 986 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 987 breakByte c p = case elemIndex c p of
 988     Nothing -> (p,empty)
 989     Just n  -> (unsafeTake n p, unsafeDrop n p)
 990 {-# INLINE breakByte #-}
 991
 992 -- | 'spanByte' breaks its ByteString argument at the first
 993 -- occurence of a byte other than its argument. It is more efficient
 994 -- than 'span (==)'
 995 --
 996 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 997 --
 998 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 999 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1000     go (p `plusPtr` s) 0
1001   where
1002     STRICT2(go)
1003     go p i | i >= l    = return (ps, empty)
1004            | otherwise = do c' <- peekByteOff p i
1005                             if c /= c'
1006                                 then return (unsafeTake i ps, unsafeDrop i ps)
1007                                 else go p (i+1)
1008 {-# INLINE spanByte #-}
1009
1010 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1011 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1012 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1013 span p ps = break (not . p) ps
1014 {-# INLINE span #-}
1015
1016 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1017 -- We have
1018 --
1019 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1020 --
1021 -- and
1022 --
1023 -- > spanEnd (not . isSpace) ps
1024 -- >    ==
1025 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1026 --
1027 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1028 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1029
1030 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1031 -- separators, where the predicate returns True for a separator element.
1032 -- The resulting components do not contain the separators.  Two adjacent
1033 -- separators result in an empty component in the output.  eg.
1034 --
1035 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1036 -- > splitWith (=='a') []        == []
1037 --
1038 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1039
1040 #if defined(__GLASGOW_HASKELL__)
1041 splitWith _pred (PS _  _   0) = []
1042 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1043   where pred# c# = pred_ (W8# c#)
1044
1045         STRICT4(splitWith0)
1046         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1047             splitLoop pred' p 0 off' len' fp'
1048
1049         splitLoop :: (Word# -> Bool)
1050                   -> Ptr Word8
1051                   -> Int -> Int -> Int
1052                   -> ForeignPtr Word8
1053                   -> IO [ByteString]
1054
1055         splitLoop pred' p idx' off' len' fp'
1056             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1057             | idx' >= len'  = return [PS fp' off' idx']
1058             | otherwise = do
1059                 w <- peekElemOff p (off'+idx')
1060                 if pred' (case w of W8# w# -> w#)
1061                    then return (PS fp' off' idx' :
1062                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1063                    else splitLoop pred' p (idx'+1) off' len' fp'
1064 {-# INLINE splitWith #-}
1065
1066 #else
1067 splitWith _ (PS _ _ 0) = []
1068 splitWith p ps = loop p ps
1069     where
1070         STRICT2(loop)
1071         loop q qs = if null rest then [chunk]
1072                                  else chunk : loop q (unsafeTail rest)
1073             where (chunk,rest) = break q qs
1074 #endif
1075
1076 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1077 -- argument, consuming the delimiter. I.e.
1078 --
1079 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1080 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1081 -- > split 'x'  "x"          == ["",""]
1082 --
1083 -- and
1084 --
1085 -- > join [c] . split c == id
1086 -- > split == splitWith . (==)
1087 --
1088 -- As for all splitting functions in this library, this function does
1089 -- not copy the substrings, it just constructs new 'ByteStrings' that
1090 -- are slices of the original.
1091 --
1092 split :: Word8 -> ByteString -> [ByteString]
1093 split _ (PS _ _ 0) = []
1094 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1095     let ptr = p `plusPtr` s
1096
1097         STRICT1(loop)
1098         loop n = do
1099             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1100             if q == nullPtr
1101                 then [PS x (s+n) (l-n)]
1102                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1103
1104     return (loop 0)
1105 {-# INLINE split #-}
1106
1107 {-
1108 -- slower. but stays inside Haskell.
1109 split _ (PS _  _   0) = []
1110 split (W8# w#) (PS fp off len) = splitWith' off len fp
1111     where
1112         splitWith' off' len' fp' = withPtr fp $ \p ->
1113             splitLoop p 0 off' len' fp'
1114
1115         splitLoop :: Ptr Word8
1116                   -> Int -> Int -> Int
1117                   -> ForeignPtr Word8
1118                   -> IO [ByteString]
1119
1120         STRICT5(splitLoop)
1121         splitLoop p idx' off' len' fp'
1122             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1123             | idx' >= len'  = return [PS fp' off' idx']
1124             | otherwise = do
1125                 (W8# x#) <- peekElemOff p (off'+idx')
1126                 if word2Int# w# ==# word2Int# x#
1127                    then return (PS fp' off' idx' :
1128                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1129                    else splitLoop p (idx'+1) off' len' fp'
1130 -}
1131
1132 -- | Like 'splitWith', except that sequences of adjacent separators are
1133 -- treated as a single separator. eg.
1134 --
1135 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1136 --
1137 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1138 tokens f = P.filter (not.null) . splitWith f
1139 {-# INLINE tokens #-}
1140
1141 -- | The 'group' function takes a ByteString and returns a list of
1142 -- ByteStrings such that the concatenation of the result is equal to the
1143 -- argument.  Moreover, each sublist in the result contains only equal
1144 -- elements.  For example,
1145 --
1146 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1147 --
1148 -- It is a special case of 'groupBy', which allows the programmer to
1149 -- supply their own equality test. It is about 40% faster than
1150 -- /groupBy (==)/
1151 group :: ByteString -> [ByteString]
1152 group xs
1153     | null xs   = []
1154     | otherwise = ys : group zs
1155     where
1156         (ys, zs) = spanByte (unsafeHead xs) xs
1157
1158 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1159 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1160 groupBy k xs
1161     | null xs   = []
1162     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1163     where
1164         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1165
1166 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1167 -- 'ByteString's and concatenates the list after interspersing the first
1168 -- argument between each element of the list.
1169 join :: ByteString -> [ByteString] -> ByteString
1170 join s = concat . (List.intersperse s)
1171 {-# INLINE join #-}
1172
1173 --
1174 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1175 -- with a char. Around 4 times faster than the generalised join.
1176 --
1177 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1178 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1179     withForeignPtr ffp $ \fp ->
1180     withForeignPtr fgp $ \gp -> do
1181         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1182         poke (ptr `plusPtr` l) c
1183         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1184     where
1185       len = length f + length g + 1
1186 {-# INLINE joinWithByte #-}
1187
1188 -- ---------------------------------------------------------------------
1189 -- Indexing ByteStrings
1190
1191 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1192 index :: ByteString -> Int -> Word8
1193 index ps n
1194     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1195     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1196                                          ++ ", length = " ++ show (length ps))
1197     | otherwise      = ps `unsafeIndex` n
1198 {-# INLINE index #-}
1199
1200 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1201 -- element in the given 'ByteString' which is equal to the query
1202 -- element, or 'Nothing' if there is no such element.
1203 -- This implementation uses memchr(3).
1204 elemIndex :: Word8 -> ByteString -> Maybe Int
1205 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1206     let p' = p `plusPtr` s
1207         q  = memchr p' c (fromIntegral l)
1208     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1209 {-# INLINE elemIndex #-}
1210
1211 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1212 -- element in the given 'ByteString' which is equal to the query
1213 -- element, or 'Nothing' if there is no such element. The following
1214 -- holds:
1215 --
1216 -- > elemIndexEnd c xs ==
1217 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1218 --
1219 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1220 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1221     go (p `plusPtr` s) (l-1)
1222   where
1223     STRICT2(go)
1224     go p i | i < 0     = return Nothing
1225            | otherwise = do ch' <- peekByteOff p i
1226                             if ch == ch'
1227                                 then return $ Just i
1228                                 else go p (i-1)
1229 {-# INLINE elemIndexEnd #-}
1230
1231 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1232 -- the indices of all elements equal to the query element, in ascending order.
1233 -- This implementation uses memchr(3).
1234 elemIndices :: Word8 -> ByteString -> [Int]
1235 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1236     let ptr = p `plusPtr` s
1237
1238         STRICT1(loop)
1239         loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1240                  in if q == nullPtr
1241                         then []
1242                         else let i = q `minusPtr` ptr
1243                              in i : loop (i+1)
1244     return (loop 0)
1245 {-# INLINE elemIndices #-}
1246
1247 {-
1248 -- much slower
1249 elemIndices :: Word8 -> ByteString -> [Int]
1250 elemIndices c ps = loop 0 ps
1251    where STRICT2(loop)
1252          loop _ ps' | null ps'            = []
1253          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1254                     | otherwise           = loop (n+1) (unsafeTail ps')
1255 -}
1256
1257 -- | count returns the number of times its argument appears in the ByteString
1258 --
1259 -- > count = length . elemIndices
1260 --
1261 -- But more efficiently than using length on the intermediate list.
1262 count :: Word8 -> ByteString -> Int
1263 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1264     return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1265 {-# INLINE count #-}
1266
1267 {-
1268 --
1269 -- around 30% slower
1270 --
1271 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1272      go (p `plusPtr` s) (fromIntegral m) 0
1273     where
1274         go :: Ptr Word8 -> CSize -> Int -> IO Int
1275         STRICT3(go)
1276         go p l i = do
1277             let q = memchr p w l
1278             if q == nullPtr
1279                 then return i
1280                 else do let k = fromIntegral $ q `minusPtr` p
1281                         go (q `plusPtr` 1) (l-k-1) (i+1)
1282 -}
1283
1284 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1285 -- returns the index of the first element in the ByteString
1286 -- satisfying the predicate.
1287 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1288 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1289   where
1290     STRICT2(go)
1291     go ptr n | n >= l    = return Nothing
1292              | otherwise = do w <- peek ptr
1293                               if k w
1294                                 then return (Just n)
1295                                 else go (ptr `plusPtr` 1) (n+1)
1296 {-# INLINE findIndex #-}
1297
1298 -- | The 'findIndices' function extends 'findIndex', by returning the
1299 -- indices of all elements satisfying the predicate, in ascending order.
1300 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1301 findIndices p ps = loop 0 ps
1302    where
1303      STRICT2(loop)
1304      loop n qs | null qs           = []
1305                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1306                | otherwise         =     loop (n+1) (unsafeTail qs)
1307
1308 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1309 -- of the string if no element is found, rather than Nothing.
1310 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1311 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1312   where
1313     STRICT2(go)
1314     go ptr n | n >= l    = return l
1315              | otherwise = do w <- peek ptr
1316                               if k w
1317                                 then return n
1318                                 else go (ptr `plusPtr` 1) (n+1)
1319 {-# INLINE findIndexOrEnd #-}
1320
1321 -- ---------------------------------------------------------------------
1322 -- Searching ByteStrings
1323
1324 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1325 elem :: Word8 -> ByteString -> Bool
1326 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1327 {-# INLINE elem #-}
1328
1329 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1330 notElem :: Word8 -> ByteString -> Bool
1331 notElem c ps = not (elem c ps)
1332 {-# INLINE notElem #-}
1333
1334 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1335 -- returns a ByteString containing those characters that satisfy the
1336 -- predicate. This function is subject to array fusion.
1337 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1338 filter p  = loopArr . loopU (filterEFL p) noAL
1339 {-# INLINE filter #-}
1340
1341 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1342 -- around 2x faster for some one-shot applications.
1343 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1344 filter' k ps@(PS x s l)
1345     | null ps   = ps
1346     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1347         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1348         return (t `minusPtr` p) -- actual length
1349     where
1350         STRICT3(go)
1351         go f t end | f == end  = return t
1352                    | otherwise = do
1353                         w <- peek f
1354                         if k w
1355                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1356                             else             go (f `plusPtr` 1) t               end
1357 {-# INLINE filter' #-}
1358
1359 --
1360 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1361 -- case of filtering a single byte. It is more efficient to use
1362 -- /filterByte/ in this case.
1363 --
1364 -- > filterByte == filter . (==)
1365 --
1366 -- filterByte is around 10x faster, and uses much less space, than its
1367 -- filter equivalent
1368 filterByte :: Word8 -> ByteString -> ByteString
1369 filterByte w ps = replicate (count w ps) w
1370 {-# INLINE filterByte #-}
1371
1372 --
1373 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1374 -- case of filtering a single byte out of a list. It is more efficient
1375 -- to use /filterNotByte/ in this case.
1376 --
1377 -- > filterNotByte == filter . (/=)
1378 --
1379 -- filterNotByte is around 2x faster than its filter equivalent.
1380 filterNotByte :: Word8 -> ByteString -> ByteString
1381 filterNotByte w = filter' (/= w)
1382 {-# INLINE filterNotByte #-}
1383
1384 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1385 -- and returns the first element in matching the predicate, or 'Nothing'
1386 -- if there is no such element.
1387 --
1388 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1389 --
1390 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1391 find f p = case findIndex f p of
1392                     Just n -> Just (p `unsafeIndex` n)
1393                     _      -> Nothing
1394 {-# INLINE find #-}
1395
1396 {-
1397 --
1398 -- fuseable, but we don't want to walk the whole array.
1399 --
1400 find k = foldl findEFL Nothing
1401     where findEFL a@(Just _) _ = a
1402           findEFL _          c | k c       = Just c
1403                                | otherwise = Nothing
1404 -}
1405
1406 -- ---------------------------------------------------------------------
1407 -- Searching for substrings
1408
1409 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1410 -- iff the first is a prefix of the second.
1411 isPrefixOf :: ByteString -> ByteString -> Bool
1412 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1413     | l1 == 0   = True
1414     | l2 < l1   = False
1415     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1416         withForeignPtr x2 $ \p2 -> do
1417             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1418             return (i == 0)
1419
1420 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1421 -- iff the first is a suffix of the second.
1422 --
1423 -- The following holds:
1424 --
1425 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1426 --
1427 -- However, the real implemenation uses memcmp to compare the end of the
1428 -- string only, with no reverse required..
1429 isSuffixOf :: ByteString -> ByteString -> Bool
1430 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1431     | l1 == 0   = True
1432     | l2 < l1   = False
1433     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1434         withForeignPtr x2 $ \p2 -> do
1435             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1436             return (i == 0)
1437
1438 -- | Check whether one string is a substring of another. @isSubstringOf
1439 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1440 isSubstringOf :: ByteString -- ^ String to search for.
1441               -> ByteString -- ^ String to search in.
1442               -> Bool
1443 isSubstringOf p s = not $ P.null $ findSubstrings p s
1444
1445 -- | Get the first index of a substring in another string,
1446 --   or 'Nothing' if the string is not found.
1447 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1448 findSubstring :: ByteString -- ^ String to search for.
1449               -> ByteString -- ^ String to seach in.
1450               -> Maybe Int
1451 findSubstring = (listToMaybe .) . findSubstrings
1452
1453 -- | Find the indexes of all (possibly overlapping) occurances of a
1454 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1455 -- string matching algorithm.
1456 findSubstrings :: ByteString -- ^ String to search for.
1457                -> ByteString -- ^ String to seach in.
1458                -> [Int]
1459
1460 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1461   where
1462       patc x = pat `unsafeIndex` x
1463       strc x = str `unsafeIndex` x
1464
1465       -- maybe we should make kmpNext a UArray before using it in search?
1466       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1467       kmpNextL p _ | null p = []
1468       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1469                          ps = unsafeTail p
1470                          x = if not (null ps) && unsafeHead ps == patc j'
1471                                 then kmpNext Array.! j' else j'
1472                         in x:kmpNextL ps j'
1473       search i j = match ++ rest -- i: position in string, j: position in pattern
1474         where match = if j == m then [(i - j)] else []
1475               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1476       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1477                | otherwise = j
1478
1479 -- ---------------------------------------------------------------------
1480 -- Zipping
1481
1482 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1483 -- corresponding pairs of bytes. If one input ByteString is short,
1484 -- excess elements of the longer ByteString are discarded. This is
1485 -- equivalent to a pair of 'unpack' operations.
1486 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1487 zip ps qs
1488     | null ps || null qs = []
1489     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1490
1491 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1492 -- the first argument, instead of a tupling function.  For example,
1493 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1494 -- corresponding sums.
1495 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1496 zipWith f ps qs
1497     | null ps || null qs = []
1498     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1499
1500 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1501 -- ByteStrings. Note that this performs two 'pack' operations.
1502 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1503 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1504 {-# INLINE unzip #-}
1505
1506 -- ---------------------------------------------------------------------
1507 -- Special lists
1508
1509 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1510 inits :: ByteString -> [ByteString]
1511 inits (PS x s l) = [PS x s n | n <- [0..l]]
1512
1513 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1514 tails :: ByteString -> [ByteString]
1515 tails p | null p    = [empty]
1516         | otherwise = p : tails (unsafeTail p)
1517
1518 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1519
1520 -- ---------------------------------------------------------------------
1521 -- ** Ordered 'ByteString's
1522
1523 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1524 sort :: ByteString -> ByteString
1525 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1526
1527     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1528     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1529
1530     let STRICT2(go)
1531         go 256 _   = return ()
1532         go i   ptr = do n <- peekElemOff arr i
1533                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1534                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1535     go 0 p
1536
1537 -- "countEach counts str l" counts the number of occurences of each Word8 in
1538 -- str, and stores the result in counts.
1539 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1540 STRICT3(countEach)
1541 countEach counts str l = go 0
1542  where
1543     STRICT1(go)
1544     go i | i == l    = return ()
1545          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1546                           x <- peekElemOff counts k
1547                           pokeElemOff counts k (x + 1)
1548                           go (i + 1)
1549
1550 {-
1551 sort :: ByteString -> ByteString
1552 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1553         memcpy p (f `plusPtr` s) l
1554         c_qsort p l -- inplace
1555 -}
1556
1557 {-
1558 sort = pack . List.sort . unpack
1559 -}
1560
1561 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1562 --
1563 -- Try some linear sorts: radix, counting
1564 -- Or mergesort.
1565 --
1566 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1567 -- sortBy f ps = undefined
1568
1569 -- ---------------------------------------------------------------------
1570 --
1571 -- Extensions to the basic interface
1572 --
1573
1574 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1575 -- check for the empty case, so there is an obligation on the programmer
1576 -- to provide a proof that the ByteString is non-empty.
1577 unsafeHead :: ByteString -> Word8
1578 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1579 {-# INLINE unsafeHead #-}
1580
1581 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1582 -- check for the empty case. As with 'unsafeHead', the programmer must
1583 -- provide a separate proof that the ByteString is non-empty.
1584 unsafeTail :: ByteString -> ByteString
1585 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1586 {-# INLINE unsafeTail #-}
1587
1588 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1589 -- This omits the bounds check, which means there is an accompanying
1590 -- obligation on the programmer to ensure the bounds are checked in some
1591 -- other way.
1592 unsafeIndex :: ByteString -> Int -> Word8
1593 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1594 {-# INLINE unsafeIndex #-}
1595
1596 -- | A variety of 'take' which omits the checks on @n@ so there is an
1597 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1598 unsafeTake :: Int -> ByteString -> ByteString
1599 unsafeTake n (PS x s l) =
1600   assert (0 <= n && n <= l) $ PS x s n
1601 {-# INLINE unsafeTake #-}
1602
1603 -- | A variety of 'drop' which omits the checks on @n@ so there is an
1604 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1605 unsafeDrop  :: Int -> ByteString -> ByteString
1606 unsafeDrop n (PS x s l) =
1607   assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
1608 {-# INLINE unsafeDrop #-}
1609
1610 -- ---------------------------------------------------------------------
1611 -- Low level constructors
1612
1613 #if defined(__GLASGOW_HASKELL__)
1614 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1615 -- Addr\# (an arbitrary machine address assumed to point outside the
1616 -- garbage-collected heap) into a @ByteString@. A much faster way to
1617 -- create an Addr\# is with an unboxed string literal, than to pack a
1618 -- boxed string. A unboxed string literal is compiled to a static @char
1619 -- []@ by GHC. Establishing the length of the string requires a call to
1620 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1621 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1622 -- if you know the length of the string statically.
1623 --
1624 -- An example:
1625 --
1626 -- > literalFS = packAddress "literal"#
1627 --
1628 packAddress :: Addr# -> ByteString
1629 packAddress addr# = inlinePerformIO $ do
1630     p <- newForeignPtr_ cstr
1631     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1632   where
1633     cstr = Ptr addr#
1634 {-# INLINE packAddress #-}
1635
1636 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1637 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1638 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1639 -- 'Addr\#' to the string, and the length of the string. Make sure the
1640 -- length is correct, otherwise use the safer 'packAddress' (where the
1641 -- length will be calculated once at runtime).
1642 unsafePackAddress :: Int -> Addr# -> ByteString
1643 unsafePackAddress len addr# = inlinePerformIO $ do
1644     p <- newForeignPtr_ cstr
1645     return $ PS p 0 len
1646     where cstr = Ptr addr#
1647
1648 #endif
1649
1650 -- | /O(1)/ Build a ByteString from a ForeignPtr
1651 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1652 fromForeignPtr fp l = PS fp 0 l
1653
1654 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1655 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1656 toForeignPtr (PS ps s l) = (ps, s, l)
1657
1658 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1659 -- current 'ByteString' from any larger string it was created from, as
1660 -- an 'Int'.
1661 skipIndex :: ByteString -> Int
1662 skipIndex (PS _ s _) = s
1663 {-# INLINE skipIndex #-}
1664
1665 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1666 -- finalizer associated to it. The ByteString length is calculated using
1667 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1668 packCString :: CString -> ByteString
1669 packCString cstr = inlinePerformIO $ do
1670     fp <- newForeignPtr_ (castPtr cstr)
1671     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1672
1673 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1674 -- have /no/ finalizer associated with it. This operation has /O(1)/
1675 -- complexity as we already know the final size, so no /strlen(3)/ is
1676 -- required.
1677 packCStringLen :: CStringLen -> ByteString
1678 packCStringLen (ptr,len) = inlinePerformIO $ do
1679     fp <- newForeignPtr_ (castPtr ptr)
1680     return $ PS fp 0 (fromIntegral len)
1681
1682 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1683 -- have a @free(3)@ finalizer associated to it.
1684 packMallocCString :: CString -> ByteString
1685 packMallocCString cstr = inlinePerformIO $ do
1686     fp <- newForeignFreePtr (castPtr cstr)
1687     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1688
1689 #if defined(__GLASGOW_HASKELL__)
1690 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1691 -- length, and an IO action representing a finalizer. This function is
1692 -- not available on Hugs.
1693 --
1694 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1695 packCStringFinalizer p l f = do
1696     fp <- FC.newForeignPtr p f
1697     return $ PS fp 0 l
1698
1699 -- | Explicitly run the finaliser associated with a 'ByteString'.
1700 -- Further references to this value may generate invalid memory
1701 -- references. This operation is unsafe, as there may be other
1702 -- 'ByteStrings' referring to the same underlying pages. If you use
1703 -- this, you need to have a proof of some kind that all 'ByteString's
1704 -- ever generated from the underlying byte array are no longer live.
1705 unsafeFinalize :: ByteString -> IO ()
1706 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1707
1708 #endif
1709
1710 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1711 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1712 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1713 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1714     where
1715       alloc = withForeignPtr ps $ \p -> do
1716                 buf <- c_malloc (fromIntegral l+1)
1717                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1718                 poke (buf `plusPtr` l) (0::Word8)
1719                 return $ castPtr buf
1720
1721 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1722 -- Warning: modifying the @CString@ will affect the @ByteString@.
1723 -- Why is this function unsafe? It relies on the null byte at the end of
1724 -- the ByteString to be there. This is /not/ the case if your ByteString
1725 -- has been spliced from a larger string (i.e. with take or drop).
1726 -- Unless you can guarantee the null byte, you should use the safe
1727 -- version, which will copy the string first.
1728 --
1729 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1730 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1731
1732 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1733 --   This is mainly useful to allow the rest of the data pointed
1734 --   to by the 'ByteString' to be garbage collected, for example
1735 --   if a large string has been read in, and only a small part of it
1736 --   is needed in the rest of the program.
1737 copy :: ByteString -> ByteString
1738 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1739     memcpy p (f `plusPtr` s) (fromIntegral l)
1740
1741 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1742 -- CString is going to be deallocated from C land.
1743 copyCString :: CString -> IO ByteString
1744 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1745
1746 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1747 copyCStringLen :: CStringLen -> IO ByteString
1748 copyCStringLen (cstr, len) = do
1749     fp <- mallocForeignPtrArray (len+1)
1750     withForeignPtr fp $ \p -> do
1751         memcpy p (castPtr cstr) (fromIntegral len)
1752         poke (p `plusPtr` len) (0 :: Word8)
1753     return $! PS fp 0 len
1754
1755 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1756 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1757 -- This is analogous to unsafeUseAsCString, and comes with the same
1758 -- safety requirements.
1759 --
1760 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1761 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1762
1763 -- | Given the maximum size needed and a function to make the contents
1764 -- of a ByteString, generate makes the 'ByteString'. The generating
1765 -- function is required to return the actual final size (<= the maximum
1766 -- size), and the resulting byte array is realloced to this size.  The
1767 -- string is padded at the end with a null byte.
1768 --
1769 -- generate is the main mechanism for creating custom, efficient
1770 -- ByteString functions, using Haskell or C functions to fill the space.
1771 --
1772 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1773 generate i f = do
1774     fp      <- mallocByteString i
1775     (ptr,n) <- withForeignPtr fp $ \p -> do
1776         i' <- f p
1777         if i' == i
1778             then return (fp,i')
1779             else do fp_ <- mallocByteString i'      -- realloc
1780                     withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1781                     return (fp_,i')
1782     return (PS ptr 0 n)
1783
1784 {-
1785 --
1786 -- On the C malloc heap. Less fun.
1787 --
1788 generate i f = do
1789     p <- mallocArray (i+1)
1790     i' <- f p
1791     p' <- reallocArray p (i'+1)
1792     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1793     fp <- newForeignFreePtr p'
1794     return $ PS fp 0 i'
1795 -}
1796
1797 -- ---------------------------------------------------------------------
1798 -- line IO
1799
1800 #if defined(__GLASGOW_HASKELL__)
1801
1802 -- | getLine, read a line from stdin.
1803 getLine :: IO ByteString
1804 getLine = hGetLine stdin
1805
1806 -- | Lazily construct a list of lines of ByteStrings. This will be much
1807 -- better on memory consumption than using lines =<< getContents.
1808 hGetLines :: Handle -> IO [ByteString]
1809 hGetLines h = go
1810     where
1811         go = unsafeInterleaveIO $ do
1812                 ms <- catch (hGetLine h >>= return . Just)
1813                             (\_ -> return Nothing)
1814                 case ms of
1815                     Nothing -> return []
1816                     Just s  -> do ss <- go
1817                                   return (s:ss)
1818
1819 -- | hGetLine. read a ByteString from a handle
1820 hGetLine :: Handle -> IO ByteString
1821 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1822     case haBufferMode handle_ of
1823        NoBuffering -> error "no buffering"
1824        _other      -> hGetLineBuffered handle_
1825
1826  where
1827     hGetLineBuffered handle_ = do
1828         let ref = haBuffer handle_
1829         buf <- readIORef ref
1830         hGetLineBufferedLoop handle_ ref buf 0 []
1831
1832     hGetLineBufferedLoop handle_ ref
1833             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1834         len `seq` do
1835         off <- findEOL r w raw
1836         let new_len = len + off - r
1837         xs <- mkPS raw r off
1838
1839       -- if eol == True, then off is the offset of the '\n'
1840       -- otherwise off == w and the buffer is now empty.
1841         if off /= w
1842             then do if (w == off + 1)
1843                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1844                             else writeIORef ref buf{ bufRPtr = off + 1 }
1845                     mkBigPS new_len (xs:xss)
1846             else do
1847                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1848                                     buf{ bufWPtr=0, bufRPtr=0 }
1849                  case maybe_buf of
1850                     -- Nothing indicates we caught an EOF, and we may have a
1851                     -- partial line to return.
1852                     Nothing -> do
1853                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1854                          if new_len > 0
1855                             then mkBigPS new_len (xs:xss)
1856                             else ioe_EOF
1857                     Just new_buf ->
1858                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1859
1860     -- find the end-of-line character, if there is one
1861     findEOL r w raw
1862         | r == w = return w
1863         | otherwise =  do
1864             (c,r') <- readCharFromBuffer raw r
1865             if c == '\n'
1866                 then return r -- NB. not r': don't include the '\n'
1867                 else findEOL r' w raw
1868
1869     maybeFillReadBuffer fd is_line is_stream buf = catch
1870         (do buf' <- fillReadBuffer fd is_line is_stream buf
1871             return (Just buf'))
1872         (\e -> if isEOFError e then return Nothing else ioError e)
1873
1874 -- TODO, rewrite to use normal memcpy
1875 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1876 mkPS buf start end = do
1877     let len = end - start
1878     fp <- mallocByteString len
1879     withForeignPtr fp $ \p -> do
1880         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1881         return (PS fp 0 len)
1882
1883 mkBigPS :: Int -> [ByteString] -> IO ByteString
1884 mkBigPS _ [ps] = return ps
1885 mkBigPS _ pss = return $! concat (P.reverse pss)
1886
1887 #endif
1888
1889 -- ---------------------------------------------------------------------
1890 -- Block IO
1891
1892 -- | Outputs a 'ByteString' to the specified 'Handle'.
1893 hPut :: Handle -> ByteString -> IO ()
1894 hPut _ (PS _ _ 0)  = return ()
1895 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1896 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1897
1898 -- | Write a ByteString to stdout
1899 putStr :: ByteString -> IO ()
1900 putStr = hPut stdout
1901
1902 -- | Write a ByteString to stdout, appending a newline byte
1903 putStrLn :: ByteString -> IO ()
1904 putStrLn ps = hPut stdout ps >> hPut stdout nl
1905     where nl = singleton 0x0a
1906
1907 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1908 -- is far more efficient than reading the characters into a 'String'
1909 -- and then using 'pack'.
1910 hGet :: Handle -> Int -> IO ByteString
1911 hGet _ 0 = return empty
1912 hGet h i = do fp <- mallocByteString i
1913               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1914               return $ PS fp 0 l
1915
1916 #if defined(__GLASGOW_HASKELL__)
1917 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1918 -- waiting for data to become available, instead it returns only whatever data
1919 -- is available.
1920 hGetNonBlocking :: Handle -> Int -> IO ByteString
1921 hGetNonBlocking _ 0 = return empty
1922 hGetNonBlocking h i = do
1923     fp <- mallocByteString i
1924     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1925     return $ PS fp 0 l
1926 #endif
1927
1928 -- | Read entire handle contents into a 'ByteString'.
1929 -- This function reads chunks at a time, doubling the chunksize on each
1930 -- read. The final buffer is then realloced to the appropriate size. For
1931 -- files > half of available memory, this may lead to memory exhaustion.
1932 -- Consider using 'readFile' in this case.
1933 --
1934 -- As with 'hGet', the string representation in the file is assumed to
1935 -- be ISO-8859-1.
1936 --
1937 hGetContents :: Handle -> IO ByteString
1938 hGetContents h = do
1939     let start_size = 1024
1940     p <- mallocArray start_size
1941     i <- hGetBuf h p start_size
1942     if i < start_size
1943         then do p' <- reallocArray p i
1944                 fp <- newForeignFreePtr p'
1945                 return $ PS fp 0 i
1946         else f p start_size
1947     where
1948         f p s = do
1949             let s' = 2 * s
1950             p' <- reallocArray p s'
1951             i  <- hGetBuf h (p' `plusPtr` s) s
1952             if i < s
1953                 then do let i' = s + i
1954                         p'' <- reallocArray p' i'
1955                         fp  <- newForeignFreePtr p''
1956                         return $ PS fp 0 i'
1957                 else f p' s'
1958
1959 -- | getContents. Equivalent to hGetContents stdin
1960 getContents :: IO ByteString
1961 getContents = hGetContents stdin
1962
1963 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1964 -- efficient than reading the characters into a 'String' and then using
1965 -- 'pack'.  It also may be more efficient than opening the file and
1966 -- reading it using hGet.
1967 readFile :: FilePath -> IO ByteString
1968 readFile f = do
1969     h <- openBinaryFile f ReadMode
1970     l <- hFileSize h
1971     s <- hGet h $ fromIntegral l
1972     hClose h
1973     return s
1974
1975 -- | Write a 'ByteString' to a file.
1976 writeFile :: FilePath -> ByteString -> IO ()
1977 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1978     (\h -> hPut h ps)
1979
1980 {-
1981 --
1982 -- Disable until we can move it into a portable .hsc file
1983 --
1984
1985 -- | Like readFile, this reads an entire file directly into a
1986 -- 'ByteString', but it is even more efficient.  It involves directly
1987 -- mapping the file to memory.  This has the advantage that the contents
1988 -- of the file never need to be copied.  Also, under memory pressure the
1989 -- page may simply be discarded, while in the case of readFile it would
1990 -- need to be written to swap.  If you read many small files, mmapFile
1991 -- will be less memory-efficient than readFile, since each mmapFile
1992 -- takes up a separate page of memory.  Also, you can run into bus
1993 -- errors if the file is modified.  As with 'readFile', the string
1994 -- representation in the file is assumed to be ISO-8859-1.
1995 --
1996 -- On systems without mmap, this is the same as a readFile.
1997 --
1998 mmapFile :: FilePath -> IO ByteString
1999 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
2000
2001 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
2002 mmap f = do
2003     h <- openBinaryFile f ReadMode
2004     l <- fromIntegral `fmap` hFileSize h
2005     -- Don't bother mmaping small files because each mmapped file takes up
2006     -- at least one full VM block.
2007     if l < mmap_limit
2008        then do thefp <- mallocByteString l
2009                withForeignPtr thefp $ \p-> hGetBuf h p l
2010                hClose h
2011                return (thefp, l)
2012        else do
2013                -- unix only :(
2014                fd <- fromIntegral `fmap` handleToFd h
2015                p  <- my_mmap l fd
2016                fp <- if p == nullPtr
2017                      then do thefp <- mallocByteString l
2018                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
2019                              return thefp
2020                      else do
2021                           -- The munmap leads to crashes on OpenBSD.
2022                           -- maybe there's a use after unmap in there somewhere?
2023 #if !defined(__OpenBSD__)
2024                              let unmap = c_munmap p l >> return ()
2025 #else
2026                              let unmap = return ()
2027 #endif
2028                              fp <- FC.newForeignPtr p unmap
2029                              return fp
2030                c_close fd
2031                hClose h
2032                return (fp, l)
2033     where mmap_limit = 16*1024
2034 -}
2035
2036 #if defined(__GLASGOW_HASKELL__)
2037 --
2038 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
2039 --
2040 getArgs :: IO [ByteString]
2041 getArgs =
2042   alloca $ \ p_argc ->
2043   alloca $ \ p_argv -> do
2044     getProgArgv p_argc p_argv
2045     p    <- fromIntegral `fmap` peek p_argc
2046     argv <- peek p_argv
2047     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
2048 #endif
2049
2050 -- ---------------------------------------------------------------------
2051 -- Internal utilities
2052
2053 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
2054 -- silently truncate to 8 bits Chars > '\255'. They are provided as
2055 -- convenience for ByteString construction.
2056 w2c :: Word8 -> Char
2057 #if !defined(__GLASGOW_HASKELL__)
2058 w2c = chr . fromIntegral
2059 #else
2060 w2c = unsafeChr . fromIntegral
2061 #endif
2062 {-# INLINE w2c #-}
2063
2064 c2w :: Char -> Word8
2065 c2w = fromIntegral . ord
2066 {-# INLINE c2w #-}
2067
2068 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2069 -- is padded with a null byte.
2070 mallocByteString :: Int -> IO (ForeignPtr Word8)
2071 mallocByteString l = do
2072     fp <- mallocForeignPtrArray (l+1)
2073     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2074     return fp
2075
2076 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2077 -- argument gives the final size of the ByteString. Unlike 'generate'
2078 -- the ByteString is not reallocated if the final size is less than the
2079 -- estimated size. Also, unlike 'generate' ByteString's created this way
2080 -- are managed on the Haskell heap.
2081 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2082 create l write_ptr = inlinePerformIO $ do
2083     fp <- mallocByteString (l+1)
2084     withForeignPtr fp $ \p -> write_ptr p
2085     return $ PS fp 0 l
2086 {-# INLINE create #-}
2087
2088 -- | Perform an operation with a temporary ByteString
2089 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2090 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2091 {-# INLINE withPtr #-}
2092
2093 -- Common up near identical calls to `error' to reduce the number
2094 -- constant strings created when compiled:
2095 errorEmptyList :: String -> a
2096 errorEmptyList fun = moduleError fun "empty ByteString"
2097 {-# NOINLINE errorEmptyList #-}
2098
2099 moduleError :: String -> String -> a
2100 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2101 {-# NOINLINE moduleError #-}
2102
2103 -- Find from the end of the string using predicate
2104 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2105 STRICT2(findFromEndUntil)
2106 findFromEndUntil f ps@(PS x s l) =
2107     if null ps then 0
2108     else if f (last ps) then l
2109          else findFromEndUntil f (PS x s (l-1))
2110
2111 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2112 -- it exposes lots of things to further inlining
2113 --
2114 {-# INLINE inlinePerformIO #-}
2115 inlinePerformIO :: IO a -> a
2116 #if defined(__GLASGOW_HASKELL__)
2117 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2118 #else
2119 inlinePerformIO = unsafePerformIO
2120 #endif
2121
2122 {-# INLINE newForeignFreePtr #-}
2123 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2124 #if defined(__GLASGOW_HASKELL__)
2125 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2126 #else
2127 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2128 #endif
2129
2130 -- ---------------------------------------------------------------------
2131 --
2132 -- Standard C functions
2133 --
2134
2135 foreign import ccall unsafe "string.h strlen" c_strlen
2136     :: CString -> CInt
2137
2138 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2139     :: CInt -> IO (Ptr Word8)
2140
2141 foreign import ccall unsafe "static stdlib.h free" c_free
2142     :: Ptr Word8 -> IO ()
2143
2144 #if !defined(__GLASGOW_HASKELL__)
2145 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2146     :: FunPtr (Ptr Word8 -> IO ())
2147 #endif
2148
2149 foreign import ccall unsafe "string.h memset" memset
2150     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2151
2152 foreign import ccall unsafe "string.h memchr" memchr
2153     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2154
2155 foreign import ccall unsafe "string.h memcmp" memcmp
2156     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2157
2158 foreign import ccall unsafe "string.h memcpy" memcpy
2159     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2160
2161 -- ---------------------------------------------------------------------
2162 --
2163 -- Uses our C code
2164 --
2165
2166 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2167     :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2168
2169 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2170     :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2171
2172 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2173     :: Ptr Word8 -> CInt -> Word8
2174
2175 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2176     :: Ptr Word8 -> CInt -> Word8
2177
2178 foreign import ccall unsafe "static fpstring.h count" c_count
2179     :: Ptr Word8 -> CInt -> Word8 -> CInt
2180
2181 -- ---------------------------------------------------------------------
2182 -- MMap
2183
2184 {-
2185 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2186     :: Int -> Int -> IO (Ptr Word8)
2187
2188 foreign import ccall unsafe "static unistd.h close" c_close
2189     :: Int -> IO Int
2190
2191 #  if !defined(__OpenBSD__)
2192 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2193     :: Ptr Word8 -> Int -> IO Int
2194 #  endif
2195 -}
2196
2197 -- ---------------------------------------------------------------------
2198 -- Internal GHC Haskell magic
2199
2200 #if defined(__GLASGOW_HASKELL__)
2201 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2202     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2203
2204 foreign import ccall unsafe "__hscore_memcpy_src_off"
2205    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2206 #endif
2207
2208 -- ---------------------------------------------------------------------
2209 --
2210 -- Functional array fusion for ByteStrings.
2211 --
2212 -- From the Data Parallel Haskell project,
2213 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2214 --
2215
2216 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2217 -- the fact that no bottoms of this type are ever constructed; hence, we can
2218 -- assume @(_ :: NoAL) `seq` x = x@.
2219 --
2220 data NoAL = NoAL
2221
2222 -- | Special forms of loop arguments
2223 --
2224 -- * These are common special cases for the three function arguments of gen
2225 --   and loop; we give them special names to make it easier to trigger RULES
2226 --   applying in the special cases represented by these arguments.  The
2227 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2228 --   two simplifier phases.
2229 --
2230 -- * In the case where the accumulator is not needed, it is better to always
2231 --   explicitly return a value `()', rather than just copy the input to the
2232 --   output, as the former gives GHC better local information.
2233 --
2234
2235 -- | Element function expressing a mapping only
2236 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2237 mapEFL f = \_ e -> (noAL, (Just $ f e))
2238 #if defined(__GLASGOW_HASKELL__)
2239 {-# INLINE [1] mapEFL #-}
2240 #endif
2241
2242 -- | Element function implementing a filter function only
2243 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2244 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2245 #if defined(__GLASGOW_HASKELL__)
2246 {-# INLINE [1] filterEFL #-}
2247 #endif
2248
2249 -- |Element function expressing a reduction only
2250 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2251 foldEFL f = \a e -> (f a e, Nothing)
2252 #if defined(__GLASGOW_HASKELL__)
2253 {-# INLINE [1] foldEFL #-}
2254 #endif
2255
2256 -- | A strict foldEFL.
2257 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2258 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2259 #if defined(__GLASGOW_HASKELL__)
2260 {-# INLINE [1] foldEFL' #-}
2261 #endif
2262
2263 -- | Element function expressing a prefix reduction only
2264 --
2265 scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8)
2266 scanEFL f = \a e -> (f a e, Just a)
2267 #if defined(__GLASGOW_HASKELL__)
2268 {-# INLINE [1] scanEFL #-}
2269 #endif
2270
2271 -- | Element function implementing a map and fold
2272 --
2273 mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> acc -> Word8 -> (acc, Maybe Word8)
2274 mapAccumEFL f = \a e -> case f a e of (a', e') -> (a', Just e')
2275 #if defined(__GLASGOW_HASKELL__)
2276 {-# INLINE [1] mapAccumEFL #-}
2277 #endif
2278
2279 -- | Element function implementing a map with index
2280 --
2281 mapIndexEFL :: (Int -> Word8 -> Word8) -> Int -> Word8 -> (Int, Maybe Word8)
2282 mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i', Just $ f i e)
2283 #if defined(__GLASGOW_HASKELL__)
2284 {-# INLINE [1] mapIndexEFL #-}
2285 #endif
2286
2287 -- | No accumulator
2288 noAL :: NoAL
2289 noAL = NoAL
2290 #if defined(__GLASGOW_HASKELL__)
2291 {-# INLINE [1] noAL #-}
2292 #endif
2293
2294 -- | Projection functions that are fusion friendly (as in, we determine when
2295 -- they are inlined)
2296 loopArr :: (acc, byteString) -> byteString
2297 loopArr (_, arr) = arr
2298 #if defined(__GLASGOW_HASKELL__)
2299 {-# INLINE [1] loopArr #-}
2300 #endif
2301
2302 loopAcc :: (acc, byteString) -> acc
2303 loopAcc (acc, _) = acc
2304 #if defined(__GLASGOW_HASKELL__)
2305 {-# INLINE [1] loopAcc #-}
2306 #endif
2307
2308 loopSndAcc :: ((acc1, acc2), byteString) -> (acc2, byteString)
2309 loopSndAcc ((_, acc), arr) = (acc, arr)
2310 #if defined(__GLASGOW_HASKELL__)
2311 {-# INLINE [1] loopSndAcc #-}
2312 #endif
2313
2314 ------------------------------------------------------------------------
2315
2316 --
2317 -- size, and then percentage.
2318 --
2319
2320 -- | Iteration over over ByteStrings
2321 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2322       -> acc                                   -- ^ initial acc value
2323       -> ByteString                            -- ^ input ByteString
2324       -> (acc, ByteString)
2325
2326 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2327     fp          <- mallocByteString i
2328     (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2329         (acc, i') <- go (a `plusPtr` s) p start
2330         if i' == i
2331             then return (fp,i',acc)                 -- no realloc for map
2332             else do fp_ <- mallocByteString i'      -- realloc
2333                     withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2334                     return (fp_,i',acc)
2335
2336     return (acc, PS ptr 0 n)
2337   where
2338     go p ma = trans 0 0
2339         where
2340             STRICT3(trans)
2341             trans a_off ma_off acc
2342                 | a_off >= i = return (acc, ma_off)
2343                 | otherwise  = do
2344                     x <- peekByteOff p a_off
2345                     let (acc', oe) = f acc x
2346                     ma_off' <- case oe of
2347                         Nothing  -> return ma_off
2348                         Just e   -> do pokeByteOff ma ma_off e
2349                                        return $ ma_off + 1
2350                     trans (a_off+1) ma_off' acc'
2351
2352 #if defined(__GLASGOW_HASKELL__)
2353 {-# INLINE [1] loopU #-}
2354 #endif
2355
2356 infixr 9 `fuseEFL`
2357
2358 -- |Fuse to flat loop functions
2359 fuseEFL :: (a1 -> Word8  -> (a1, Maybe Word8))
2360         -> (a2 -> Word8  -> (a2, Maybe Word8))
2361         -> (a1, a2)
2362         -> Word8
2363         -> ((a1, a2), Maybe Word8)
2364 fuseEFL f g (acc1, acc2) e1 =
2365     case f acc1 e1 of
2366         (acc1', Nothing) -> ((acc1', acc2), Nothing)
2367         (acc1', Just e2) ->
2368             case g acc2 e2 of
2369                 (acc2', res) -> ((acc1', acc2'), res)
2370
2371 {-# RULES
2372
2373 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2374   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2375     loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2376
2377 "loopArr/loopSndAcc" forall x.
2378   loopArr (loopSndAcc x) = loopArr x
2379
2380 "seq/NoAL" forall (u::NoAL) e.
2381   u `seq` e = e
2382
2383   #-}
2384