Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --                       (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with Prelude functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         packByte,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: Word8 -> ByteString -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57         head,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         last,                   -- :: ByteString -> Word8
  60         init,                   -- :: ByteString -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62
  63         -- * Special ByteStrings
  64         inits,                  -- :: ByteString -> [ByteString]
  65         tails,                  -- :: ByteString -> [ByteString]
  66         elems,                  -- :: ByteString -> [ByteString]
  67
  68         -- * Transformating ByteStrings
  69         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  70         reverse,                -- :: ByteString -> ByteString
  71         intersperse,            -- :: Word8 -> ByteString -> ByteString
  72         transpose,              -- :: [ByteString] -> [ByteString]
  73
  74         -- * Reducing 'ByteString's
  75         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  76         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  77         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  78         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79
  80         -- ** Special folds
  81         concat,                 -- :: [ByteString] -> ByteString
  82         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  83         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  84         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  85         maximum,                -- :: ByteString -> Word8
  86         minimum,                -- :: ByteString -> Word8
  87         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  88
  89         -- * Generating and unfolding ByteStrings
  90         replicate,              -- :: Int -> Word8 -> ByteString
  91         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  92
  93         -- * Substrings
  94
  95         -- ** Breaking strings
  96         take,                   -- :: Int -> ByteString -> ByteString
  97         drop,                   -- :: Int -> ByteString -> ByteString
  98         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  99         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 100         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 101         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 102         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 103         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 104
 105         -- ** Breaking and dropping on specific bytes
 106         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 107         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 108         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 109         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 110
 111         -- ** Breaking into many substrings
 112         split,                  -- :: Word8 -> ByteString -> [ByteString]
 113         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 114         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117
 118         -- ** Joining strings
 119         join,                   -- :: ByteString -> [ByteString] -> ByteString
 120         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 121
 122         -- * Indexing ByteStrings
 123         index,                  -- :: ByteString -> Int -> Word8
 124         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 125         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 126         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 127         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 128         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 129         count,                  -- :: Word8 -> ByteString -> Int
 130
 131         -- * Ordered ByteStrings
 132         sort,                   -- :: ByteString -> ByteString
 133
 134         -- * Searching ByteStrings
 135
 136         -- ** Searching by equality
 137         -- | These functions use memchr(3) to efficiently search the ByteString
 138
 139         elem,                   -- :: Word8 -> ByteString -> Bool
 140         notElem,                -- :: Word8 -> ByteString -> Bool
 141         filterByte,             -- :: Word8 -> ByteString -> ByteString
 142         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 143
 144         -- ** Searching with a predicate
 145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 146         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 147
 148         -- ** Prefixes and suffixes
 149         -- | These functions use memcmp(3) to efficiently compare substrings
 150         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 151         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 152
 153         -- ** Search for arbitrary substrings
 154         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 155         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 156         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 157
 158         -- * Zipping and unzipping ByteStrings
 159         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 160         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 161         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 162
 163         -- * Unchecked access
 164         unsafeHead,             -- :: ByteString -> Word8
 165         unsafeTail,             -- :: ByteString -> ByteString
 166         unsafeIndex,            -- :: ByteString -> Int -> Word8
 167
 168         -- * Low level introduction and elimination
 169         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 170         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 171         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 172         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 173         skipIndex,              -- :: ByteString -> Int
 174
 175         -- ** Packing CStrings and pointers
 176         packCString,            -- :: CString -> ByteString
 177         packCStringLen,         -- :: CString -> ByteString
 178         packMallocCString,      -- :: CString -> ByteString
 179
 180 #if defined(__GLASGOW_HASKELL__)
 181         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 182         packAddress,            -- :: Addr# -> ByteString
 183         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 184         unsafeFinalize,         -- :: ByteString -> IO ()
 185 #endif
 186
 187         -- ** Using ByteStrings as CStrings
 188         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 189         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 190         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 191
 192         -- ** Copying ByteStrings
 193         -- | These functions perform memcpy(3) operations
 194         copy,                   -- :: ByteString -> ByteString
 195         copyCString,            -- :: CString -> ByteString
 196         copyCStringLen,         -- :: CStringLen -> ByteString
 197
 198         -- * I\/O with @ByteString@s
 199
 200         -- ** Standard input and output
 201
 202 #if defined(__GLASGOW_HASKELL__)
 203         getLine,                -- :: IO ByteString
 204 #endif
 205         getContents,            -- :: IO ByteString
 206         putStr,                 -- :: ByteString -> IO ()
 207         putStrLn,               -- :: ByteString -> IO ()
 208
 209         -- ** Files
 210         readFile,               -- :: FilePath -> IO ByteString
 211         writeFile,              -- :: FilePath -> ByteString -> IO ()
 212 --      mmapFile,               -- :: FilePath -> IO ByteString
 213
 214         -- ** I\/O with Handles
 215 #if defined(__GLASGOW_HASKELL__)
 216         getArgs,                -- :: IO [ByteString]
 217         hGetLine,               -- :: Handle -> IO ByteString
 218         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 219 #endif
 220         hGetContents,           -- :: Handle -> IO ByteString
 221         hGet,                   -- :: Handle -> Int -> IO ByteString
 222         hPut,                   -- :: Handle -> ByteString -> IO ()
 223
 224         -- * Fusion utilities
 225 #if defined(__GLASGOW_HASKELL__)
 226         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 227 #endif
 228
 229         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 230         loopU, mapEFL, filterEFL, foldEFL,
 231         filterF, mapF
 232
 233   ) where
 234
 235 import qualified Prelude as P
 236 import Prelude hiding           (reverse,head,tail,last,init,null
 237                                 ,length,map,lines,foldl,foldr,unlines
 238                                 ,concat,any,take,drop,splitAt,takeWhile
 239                                 ,dropWhile,span,break,elem,filter,maximum
 240                                 ,minimum,all,concatMap,foldl1,foldr1
 241                                 ,readFile,writeFile,replicate
 242                                 ,getContents,getLine,putStr,putStrLn
 243                                 ,zip,zipWith,unzip,notElem)
 244
 245 import qualified Data.List as List
 246
 247 import Data.Char
 248 import Data.Word                (Word8)
 249 import Data.Maybe               (listToMaybe)
 250 import Data.Array               (listArray)
 251 import qualified Data.Array as Array ((!))
 252
 253 -- Control.Exception.bracket not available in yhc or nhc
 254 import Control.Exception        (bracket)
 255 import Control.Monad            (when)
 256
 257 import Foreign.C.String         (CString, CStringLen)
 258 import Foreign.C.Types          (CSize, CInt)
 259 import Foreign.ForeignPtr
 260 import Foreign.Marshal.Array
 261 import Foreign.Ptr
 262 import Foreign.Storable         (Storable(..))
 263
 264 -- hGetBuf and hPutBuf not available in yhc or nhc
 265 import System.IO                (stdin,stdout,hClose,hFileSize
 266                                 ,hGetBuf,hPutBuf,openBinaryFile
 267                                 ,Handle,IOMode(..))
 268
 269 #if !defined(__GLASGOW_HASKELL__)
 270 import System.IO.Unsafe
 271 #endif
 272
 273 #if defined(__GLASGOW_HASKELL__)
 274
 275 import Data.Generics            (Data(..), Typeable(..))
 276
 277 import System.IO                (hGetBufNonBlocking)
 278 import System.IO.Error          (isEOFError)
 279
 280 import Foreign.Marshal          (alloca)
 281 import qualified Foreign.Concurrent as FC (newForeignPtr)
 282
 283 import GHC.Handle
 284 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 285 import GHC.Base                 (build, unsafeChr)
 286 import GHC.Word hiding (Word8)
 287 import GHC.Ptr                  (Ptr(..))
 288 import GHC.ST                   (ST(..))
 289 import GHC.IOBase
 290
 291 #endif
 292
 293 -- CFILES stuff is Hugs only
 294 {-# CFILES cbits/fpstring.c #-}
 295
 296 -- -----------------------------------------------------------------------------
 297 --
 298 -- Useful macros, until we have bang patterns
 299 --
 300
 301 #define STRICT1(f) f a | a `seq` False = undefined
 302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 306
 307 -- -----------------------------------------------------------------------------
 308
 309 -- | A space-efficient representation of a Word8 vector, supporting many
 310 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 311 --
 312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 313 --
 314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 315                      {-# UNPACK #-} !Int
 316                      {-# UNPACK #-} !Int
 317
 318 #if defined(__GLASGOW_HASKELL__)
 319     deriving (Data, Typeable)
 320 #endif
 321
 322 instance Eq  ByteString
 323     where (==)    = eq
 324
 325 instance Ord ByteString
 326     where compare = compareBytes
 327
 328 instance Show ByteString where
 329     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 330
 331 instance Read ByteString where
 332     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 333
 334 {-
 335 instance Arbitrary PackedString where
 336     arbitrary = P.pack `fmap` arbitrary
 337     coarbitrary s = coarbitrary (P.unpack s)
 338 -}
 339
 340 -- | /O(n)/ Equality on the 'ByteString' type.
 341 eq :: ByteString -> ByteString -> Bool
 342 eq a@(PS p s l) b@(PS p' s' l')
 343     | l /= l'            = False    -- short cut on length
 344     | p == p' && s == s' = True     -- short cut for the same string
 345     | otherwise          = compareBytes a b == EQ
 346 {-# INLINE eq #-}
 347
 348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 349 compareBytes :: ByteString -> ByteString -> Ordering
 350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 351     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 352     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 353     | otherwise                         = inlinePerformIO $
 354         withForeignPtr x1 $ \p1 ->
 355         withForeignPtr x2 $ \p2 -> do
 356             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
 357             return $ case i `compare` 0 of
 358                         EQ  -> l1 `compare` l2
 359                         x   -> x
 360 {-# INLINE compareBytes #-}
 361
 362 {-
 363 --
 364 -- About 4x slower over 32M
 365 --
 366 compareBytes :: ByteString -> ByteString -> Ordering
 367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 368     withForeignPtr fp1 $ \p1 ->
 369         withForeignPtr fp2 $ \p2 ->
 370             cmp (p1 `plusPtr` off1)
 371                 (p2 `plusPtr` off2) 0 len1 len2
 372
 373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 374 STRICT5(cmp)
 375 cmp p1 p2 n len1 len2
 376       | n == len1 = if n == len2 then return EQ else return LT
 377       | n == len2 = return GT
 378       | otherwise = do
 379           (a :: Word8) <- peekByteOff p1 n
 380           (b :: Word8) <- peekByteOff p2 n
 381           case a `compare` b of
 382                 EQ -> cmp p1 p2 (n+1) len1 len2
 383                 LT -> return LT
 384                 GT -> return GT
 385 {-# INLINE compareBytes #-}
 386 -}
 387
 388 -- -----------------------------------------------------------------------------
 389 -- Introducing and eliminating 'ByteString's
 390
 391 -- | /O(1)/ The empty 'ByteString'
 392 empty :: ByteString
 393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 394 {-# NOINLINE empty #-}
 395
 396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 397 packByte :: Word8 -> ByteString
 398 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
 399     withForeignPtr fp $ \p -> poke p c
 400     return $ PS fp 0 1
 401 {-# NOINLINE packByte #-}
 402
 403 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 404 --
 405 -- For applications with large numbers of string literals, pack can be a
 406 -- bottleneck. In such cases, consider using packAddress (GHC only).
 407 pack :: [Word8] -> ByteString
 408
 409 #if !defined(__GLASGOW_HASKELL__)
 410
 411 pack str = create (P.length str) $ \p -> go p str
 412     where
 413         go _ []     = return ()
 414         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 415
 416 #else /* hack away */
 417
 418 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 419     where
 420         go _ _ []        = return ()
 421         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 422
 423         writeByte p i c = ST $ \s# ->
 424             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 425
 426 #endif
 427
 428 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 429 unpack :: ByteString -> [Word8]
 430
 431 #if !defined(__GLASGOW_HASKELL__)
 432
 433 unpack (PS _  _ 0) = []
 434 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 435         go (p `plusPtr` s) (l - 1) []
 436     where
 437         STRICT3(go)
 438         go p 0 acc = peek p          >>= \e -> return (e : acc)
 439         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 440 {-# INLINE unpack #-}
 441
 442 #else
 443
 444 unpack ps = build (unpackFoldr ps)
 445 {-# INLINE unpack #-}
 446
 447 unpackList :: ByteString -> [Word8]
 448 unpackList (PS fp off len) = withPtr fp $ \p -> do
 449     let STRICT3(loop)
 450         loop _ (-1) acc = return acc
 451         loop q n acc = do
 452            a <- peekByteOff q n
 453            loop q (n-1) (a : acc)
 454     loop (p `plusPtr` off) (len-1) []
 455
 456 {-# RULES
 457 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 458  #-}
 459
 460 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 461 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 462     let STRICT3(loop)
 463         loop _ (-1) acc = return acc
 464         loop q n    acc = do
 465            a <- peekByteOff q n
 466            loop q (n-1) (a `f` acc)
 467     loop (p `plusPtr` off) (len-1) ch
 468 {-# INLINE [0] unpackFoldr #-}
 469
 470 #endif
 471
 472 ------------------------------------------------------------------------
 473
 474 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 475 -- conversion function
 476 packWith :: (a -> Word8) -> [a] -> ByteString
 477 packWith k str = create (P.length str) $ \p -> go p str
 478     where
 479         STRICT2(go)
 480         go _ []     = return ()
 481         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 482 {-# INLINE packWith #-}
 483 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 484
 485 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 486 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 487 unpackWith _ (PS _  _ 0) = []
 488 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 489         go (p `plusPtr` s) (l - 1) []
 490     where
 491         STRICT3(go)
 492         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 493         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 494 {-# INLINE unpackWith #-}
 495 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 496
 497 -- ---------------------------------------------------------------------
 498 -- Basic interface
 499
 500 -- | /O(1)/ Test whether a ByteString is empty.
 501 null :: ByteString -> Bool
 502 null (PS _ _ l) = l == 0
 503 {-# INLINE null #-}
 504
 505 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 506 length :: ByteString -> Int
 507 length (PS _ _ l) = l
 508 {-# INLINE length #-}
 509
 510 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 511 -- complexity, as it requires a memcpy.
 512 cons :: Word8 -> ByteString -> ByteString
 513 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 514         memcpy (p `plusPtr` 1) (f `plusPtr` s) l
 515         poke p c
 516 {-# INLINE cons #-}
 517
 518 -- todo fuse
 519
 520 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 521 snoc :: ByteString -> Word8 -> ByteString
 522 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 523         memcpy p (f `plusPtr` s) l
 524         poke (p `plusPtr` l) c
 525 {-# INLINE snoc #-}
 526
 527 -- todo fuse
 528
 529 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 530 head :: ByteString -> Word8
 531 head ps@(PS x s _)
 532     | null ps   = errorEmptyList "head"
 533     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 534 {-# INLINE head #-}
 535
 536 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 537 tail :: ByteString -> ByteString
 538 tail (PS p s l)
 539     | l <= 0    = errorEmptyList "tail"
 540     | otherwise = PS p (s+1) (l-1)
 541 {-# INLINE tail #-}
 542
 543 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 544 last :: ByteString -> Word8
 545 last ps@(PS x s l)
 546     | null ps   = errorEmptyList "last"
 547     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 548 {-# INLINE last #-}
 549
 550 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 551 init :: ByteString -> ByteString
 552 init (PS p s l)
 553     | l <= 0    = errorEmptyList "init"
 554     | otherwise = PS p s (l-1)
 555 {-# INLINE init #-}
 556
 557 -- | /O(n)/ Append two ByteStrings
 558 append :: ByteString -> ByteString -> ByteString
 559 append xs ys | null xs   = ys
 560              | null ys   = xs
 561              | otherwise = concat [xs,ys]
 562 {-# INLINE append #-}
 563
 564 {-
 565 --
 566 -- About 30% faster, but allocating in a big chunk isn't good for memory use
 567 --
 568 append :: ByteString -> ByteString -> ByteString
 569 append xs@(PS ffp s l) ys@(PS fgp t m)
 570     | null xs   = ys
 571     | null ys   = xs
 572     | otherwise = create len $ \ptr ->
 573         withForeignPtr ffp $ \fp ->
 574         withForeignPtr fgp $ \gp -> do
 575             memcpy ptr               (fp `plusPtr` s) l
 576             memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
 577         where len = length xs + length ys
 578 -}
 579
 580 -- ---------------------------------------------------------------------
 581 -- Transformations
 582
 583 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 584 -- element of @xs@. This function is subject to array fusion.
 585 map :: (Word8 -> Word8) -> ByteString -> ByteString
 586 map f = loopArr . loopU (mapEFL f) noAL
 587 {-# INLINE map #-}
 588
 589 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 590 -- slightly faster for one-shot cases.
 591 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
 592 STRICT2(mapF)
 593 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 594     np <- mallocByteString (len+1)
 595     withForeignPtr np $ \p -> do
 596         map_ 0 (a `plusPtr` s) p
 597         return (PS np 0 len)
 598   where
 599     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 600     STRICT3(map_)
 601     map_ n p1 p2
 602        | n >= len = return ()
 603        | otherwise = do
 604             x <- peekByteOff p1 n
 605             pokeByteOff p2 n (f x)
 606             map_ (n+1) p1 p2
 607 {-# INLINE mapF #-}
 608
 609 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 610 reverse :: ByteString -> ByteString
 611 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 612         c_reverse p (f `plusPtr` s) l
 613
 614 {-
 615 reverse = pack . P.reverse . unpack
 616 -}
 617
 618 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 619 -- 'ByteString' and \`intersperses\' that byte between the elements of
 620 -- the 'ByteString'.  It is analogous to the intersperse function on
 621 -- Lists.
 622 intersperse :: Word8 -> ByteString -> ByteString
 623 intersperse c ps@(PS x s l)
 624     | length ps < 2  = ps
 625     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 626         c_intersperse p (f `plusPtr` s) l c
 627
 628 {-
 629 intersperse c = pack . List.intersperse c . unpack
 630 -}
 631
 632 -- | The 'transpose' function transposes the rows and columns of its
 633 -- 'ByteString' argument.
 634 transpose :: [ByteString] -> [ByteString]
 635 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 636
 637 -- ---------------------------------------------------------------------
 638 -- Reducing 'ByteString's
 639
 640 -- | 'foldl', applied to a binary operator, a starting value (typically
 641 -- the left-identity of the operator), and a ByteString, reduces the
 642 -- ByteString using the binary operator, from left to right.
 643 -- This function is subject to array fusion.
 644 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 645 foldl f z = loopAcc . loopU (foldEFL f) z
 646 {-# INLINE foldl #-}
 647
 648 {-
 649 --
 650 -- About twice as fast with 6.4.1, but not fuseable
 651 -- A simple fold . map is enough to make it worth while.
 652 --
 653 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 654         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 655     where
 656         STRICT3(lgo)
 657         lgo z p q | p == q    = return z
 658                   | otherwise = do c <- peek p
 659                                    lgo (f z c) (p `plusPtr` 1) q
 660 -}
 661
 662 -- | 'foldr', applied to a binary operator, a starting value
 663 -- (typically the right-identity of the operator), and a ByteString,
 664 -- reduces the ByteString using the binary operator, from right to left.
 665 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 666 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 667         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 668     where
 669         STRICT2(go)
 670         go p q | p == q    = return z
 671                | otherwise = do c  <- peek p
 672                                 ws <- go (p `plusPtr` 1) q
 673                                 return $ c `k` ws
 674
 675 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 676 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 677 -- This function is subject to array fusion.
 678 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 679 foldl1 f ps
 680     | null ps   = errorEmptyList "foldl1"
 681     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 682
 683 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 684 -- and thus must be applied to non-empty 'ByteString's
 685 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 686 foldr1 f ps
 687     | null ps        = errorEmptyList "foldr1"
 688     | otherwise      = foldr f (last ps) (init ps)
 689
 690 -- ---------------------------------------------------------------------
 691 -- Special folds
 692
 693 -- | /O(n)/ Concatenate a list of ByteStrings.
 694 concat :: [ByteString] -> ByteString
 695 concat []     = empty
 696 concat [ps]   = ps
 697 concat xs     = inlinePerformIO $ do
 698     let start_size = 1024
 699     p <- mallocArray start_size
 700     f p 0 1024 xs
 701
 702     where f ptr len _ [] = do
 703                 ptr' <- reallocArray ptr (len+1)
 704                 poke (ptr' `plusPtr` len) (0::Word8)    -- XXX so CStrings work
 705                 fp   <- newForeignFreePtr ptr'
 706                 return $ PS fp 0 len
 707
 708           f ptr len to_go pss@(PS p s l:pss')
 709            | l <= to_go = do withForeignPtr p $ \pf ->
 710                                  memcpy (ptr `plusPtr` len)
 711                                           (pf `plusPtr` s) l
 712                              f ptr (len + l) (to_go - l) pss'
 713
 714            | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
 715                             ptr' <- reallocArray ptr new_total
 716                             f ptr' len (new_total - len) pss
 717
 718 -- | Map a function over a 'ByteString' and concatenate the results
 719 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 720 concatMap f = foldr (append . f) empty
 721
 722 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 723 -- any element of the 'ByteString' satisfies the predicate.
 724 any :: (Word8 -> Bool) -> ByteString -> Bool
 725 any _ (PS _ _ 0) = False
 726 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 727         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 728     where
 729         STRICT2(go)
 730         go p q | p == q    = return False
 731                | otherwise = do c <- peek p
 732                                 if f c then return True
 733                                        else go (p `plusPtr` 1) q
 734
 735 -- todo fuse
 736
 737 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 738 -- if all elements of the 'ByteString' satisfy the predicate.
 739 all :: (Word8 -> Bool) -> ByteString -> Bool
 740 all _ (PS _ _ 0) = True
 741 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 742         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 743     where
 744         STRICT2(go)
 745         go p q | p == q     = return True  -- end of list
 746                | otherwise  = do c <- peek p
 747                                  if f c
 748                                     then go (p `plusPtr` 1) q
 749                                     else return False
 750 -- todo fuse
 751
 752 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 753 maximum :: ByteString -> Word8
 754 maximum xs@(PS x s l)
 755     | null xs   = errorEmptyList "maximum"
 756     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 757                     return $ c_maximum (p `plusPtr` s) l
 758 {-# INLINE maximum #-}
 759
 760 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 761 minimum :: ByteString -> Word8
 762 minimum xs@(PS x s l)
 763     | null xs   = errorEmptyList "minimum"
 764     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 765                     return $ c_minimum (p `plusPtr` s) l
 766 {-# INLINE minimum #-}
 767
 768 -- fusion is too slow here (10x)
 769
 770 {-
 771 maximum xs@(PS x s l)
 772     | null xs   = errorEmptyList "maximum"
 773     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 774                         w <- peek p
 775                         maximum_ (p `plusPtr` s) 0 l w
 776
 777 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 778 STRICT4(maximum_)
 779 maximum_ ptr n m c
 780     | n >= m    = return c
 781     | otherwise = do w <- peekByteOff ptr n
 782                      maximum_ ptr (n+1) m (if w > c then w else c)
 783
 784 minimum xs@(PS x s l)
 785     | null xs   = errorEmptyList "minimum"
 786     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 787                         w <- peek p
 788                         minimum_ (p `plusPtr` s) 0 l w
 789
 790 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 791 STRICT4(minimum_)
 792 minimum_ ptr n m c
 793     | n >= m    = return c
 794     | otherwise = do w <- peekByteOff ptr n
 795                      minimum_ ptr (n+1) m (if w < c then w else c)
 796 -}
 797
 798 -- | /O(n)/ map Word8 functions, provided with the index at each position
 799 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 800 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 801     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 802   where
 803     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 804     STRICT4(go)
 805     go n f t p | f == p    = return ()
 806                | otherwise = do w <- peek f
 807                                 ((poke t) . k n) w
 808                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 809
 810 -- ---------------------------------------------------------------------
 811 -- Unfolds and replicates
 812
 813 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 814 -- the value of every element. The following holds:
 815 --
 816 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 817 --
 818 -- This implemenation uses @memset(3)@
 819 replicate :: Int -> Word8 -> ByteString
 820 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 821
 822 {-
 823 -- About 5x slower
 824 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 825     where
 826         STRICT2(go)
 827         go _   0 = return w
 828         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 829 -}
 830
 831 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 832 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 833 -- the element and returns 'Nothing' if it is done producing the
 834 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 835 -- prepending to the ByteString and @b@ is used as the next element in a
 836 -- recursive call.
 837 --
 838 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 839 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 840 -- final size of the ByteString as an argument. 'cons' can then be
 841 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 842 -- linear complexity. The depth of the recursion is limited to this
 843 -- size, but may be less. For lazy, infinite unfoldr, use
 844 -- 'Data.List.unfoldr' (from 'Data.List').
 845 --
 846 -- Examples:
 847 --
 848 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 849 --
 850 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 851 --
 852 -- > unfoldrN n == take n $ List.unfoldr
 853 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 854 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 855     where
 856         STRICT3(go)
 857         go q c n | n == i    = return n      -- stop if we reach `i'
 858                  | otherwise = case f c of
 859                                    Nothing        -> return n
 860                                    Just (a,new_c) -> do
 861                                         poke q a
 862                                         go (q `plusPtr` 1) new_c (n+1)
 863
 864 -- ---------------------------------------------------------------------
 865 -- Substrings
 866
 867 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 868 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 869 take :: Int -> ByteString -> ByteString
 870 take n ps@(PS x s l)
 871     | n < 0     = empty
 872     | n >= l    = ps
 873     | otherwise = PS x s n
 874 {-# INLINE take #-}
 875
 876 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 877 -- elements, or @[]@ if @n > 'length' xs@.
 878 drop  :: Int -> ByteString -> ByteString
 879 drop n ps@(PS x s l)
 880     | n <= 0    = ps
 881     | n >  l    = empty
 882     | otherwise = PS x (s+n) (l-n)
 883 {-# INLINE drop #-}
 884
 885 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 886 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 887 splitAt  n ps  = (take n ps, drop n ps)
 888 {-# INLINE splitAt #-}
 889
 890 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 891 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 892 -- satisfy @p@.
 893 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 894 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 895 {-# INLINE takeWhile #-}
 896
 897 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 898 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 899 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 900 {-# INLINE dropWhile #-}
 901
 902 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 903 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 904 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 905 {-# INLINE break #-}
 906
 907 -- | 'breakByte' breaks its ByteString argument at the first occurence
 908 -- of the specified byte. It is more efficient than 'break' as it is
 909 -- implemented with @memchr(3)@. I.e.
 910 --
 911 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 912 --
 913 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 914 breakByte c p = case elemIndex c p of
 915     Nothing -> (p,empty)
 916     Just n  -> (take n p, drop n p)
 917 {-# INLINE breakByte #-}
 918
 919 -- | 'spanByte' breaks its ByteString argument at the first
 920 -- occurence of a byte other than its argument. It is more efficient
 921 -- than 'span (==)'
 922 --
 923 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 924 --
 925 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 926 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 927     go (p `plusPtr` s) 0
 928   where
 929     STRICT2(go)
 930     go p i | i >= l    = return (ps, empty)
 931            | otherwise = do c' <- peekByteOff p i
 932                             if c /= c'
 933                                 then return (take i ps, drop i ps)
 934                                 else go p (i+1)
 935 {-# INLINE spanByte #-}
 936
 937 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 938 -- occurence of @w@. It behaves like 'break', except the delimiter is
 939 -- not returned, and @Nothing@ is returned if the delimiter is not in
 940 -- the ByteString. I.e.
 941 --
 942 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 943 --
 944 -- > breakFirst c xs ==
 945 -- > let (x,y) = break (== c) xs
 946 -- > in if null y then Nothing else Just (x, drop 1 y))
 947 --
 948 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 949 breakFirst c p = case elemIndex c p of
 950    Nothing -> Nothing
 951    Just n -> Just (take n p, drop (n+1) p)
 952 {-# INLINE breakFirst #-}
 953
 954 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 955 -- ByteString.
 956 --
 957 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 958 --
 959 -- and the following are equivalent:
 960 --
 961 -- > breakLast 'c' "abcdef"
 962 -- > let (x,y) = break (=='c') (reverse "abcdef")
 963 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 964 --
 965 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 966 breakLast c p = case elemIndexLast c p of
 967     Nothing -> Nothing
 968     Just n -> Just (take n p, drop (n+1) p)
 969 {-# INLINE breakLast #-}
 970
 971 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 972 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 973 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 974 span p ps = break (not . p) ps
 975 {-# INLINE span #-}
 976
 977 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 978 -- We have
 979 --
 980 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 981 --
 982 -- and
 983 --
 984 -- > spanEnd (not . isSpace) ps
 985 -- >    ==
 986 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 987 --
 988 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 989 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 990
 991 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 992 -- separators, where the predicate returns True for a separator element.
 993 -- The resulting components do not contain the separators.  Two adjacent
 994 -- separators result in an empty component in the output.  eg.
 995 --
 996 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 997 -- > splitWith (=='a') []        == []
 998 --
 999 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1000
1001 #if defined(__GLASGOW_HASKELL__)
1002 splitWith _pred (PS _  _   0) = []
1003 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
1004   where pred# c# = pred_ (W8# c#)
1005
1006         splitWith' pred' off' len' fp' = withPtr fp $ \p ->
1007             splitLoop pred' p 0 off' len' fp'
1008
1009         splitLoop :: (Word# -> Bool)
1010                   -> Ptr Word8
1011                   -> Int -> Int -> Int
1012                   -> ForeignPtr Word8
1013                   -> IO [ByteString]
1014
1015         splitLoop pred' p idx' off' len' fp'
1016             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1017             | idx' >= len'  = return [PS fp' off' idx']
1018             | otherwise = do
1019                 w <- peekElemOff p (off'+idx')
1020                 if pred' (case w of W8# w# -> w#)
1021                    then return (PS fp' off' idx' :
1022                               splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
1023                    else splitLoop pred' p (idx'+1) off' len' fp'
1024 {-# INLINE splitWith #-}
1025
1026 #else
1027 splitWith _ (PS _ _ 0) = []
1028 splitWith p ps = splitWith' p ps
1029     where
1030         STRICT2(splitWith')
1031         splitWith' q qs = if null rest then [chunk]
1032                                        else chunk : splitWith' q (unsafeTail rest)
1033             where (chunk,rest) = break q qs
1034 #endif
1035
1036 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1037 -- argument, consuming the delimiter. I.e.
1038 --
1039 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1040 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1041 -- > split 'x'  "x"          == ["",""]
1042 --
1043 -- and
1044 --
1045 -- > join [c] . split c == id
1046 -- > split == splitWith . (==)
1047 --
1048 -- As for all splitting functions in this library, this function does
1049 -- not copy the substrings, it just constructs new 'ByteStrings' that
1050 -- are slices of the original.
1051 --
1052 split :: Word8 -> ByteString -> [ByteString]
1053 split _ (PS _ _ 0) = []
1054 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1055     let ptr = p `plusPtr` s
1056
1057         STRICT1(loop)
1058         loop n = do
1059             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1060             if q == nullPtr
1061                 then return [PS x (s+n) (l-n)]
1062                 else do let i = q `minusPtr` ptr
1063                         ls <- loop (i+1)
1064                         return $! PS x (s+n) (i-n) : ls
1065     loop 0
1066 {-# INLINE split #-}
1067
1068 {-
1069 -- slower. but stays inside Haskell.
1070 split _ (PS _  _   0) = []
1071 split (W8# w#) (PS fp off len) = splitWith' off len fp
1072     where
1073         splitWith' off' len' fp' = withPtr fp $ \p ->
1074             splitLoop p 0 off' len' fp'
1075
1076         splitLoop :: Ptr Word8
1077                   -> Int -> Int -> Int
1078                   -> ForeignPtr Word8
1079                   -> IO [ByteString]
1080
1081         STRICT5(splitLoop)
1082         splitLoop p idx' off' len' fp'
1083             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1084             | idx' >= len'  = return [PS fp' off' idx']
1085             | otherwise = do
1086                 (W8# x#) <- peekElemOff p (off'+idx')
1087                 if word2Int# w# ==# word2Int# x#
1088                    then return (PS fp' off' idx' :
1089                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1090                    else splitLoop p (idx'+1) off' len' fp'
1091 -}
1092
1093 -- | Like 'splitWith', except that sequences of adjacent separators are
1094 -- treated as a single separator. eg.
1095 --
1096 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1097 --
1098 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1099 tokens f = P.filter (not.null) . splitWith f
1100
1101 -- | The 'group' function takes a ByteString and returns a list of
1102 -- ByteStrings such that the concatenation of the result is equal to the
1103 -- argument.  Moreover, each sublist in the result contains only equal
1104 -- elements.  For example,
1105 --
1106 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1107 --
1108 -- It is a special case of 'groupBy', which allows the programmer to
1109 -- supply their own equality test. It is about 40% faster than
1110 -- /groupBy (==)/
1111 group :: ByteString -> [ByteString]
1112 group xs
1113     | null xs   = []
1114     | otherwise = ys : group zs
1115     where
1116         (ys, zs) = spanByte (unsafeHead xs) xs
1117
1118 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1119 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1120 groupBy k xs
1121     | null xs   = []
1122     | otherwise = take n xs : groupBy k (drop n xs)
1123     where
1124         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1125
1126 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1127 -- 'ByteString's and concatenates the list after interspersing the first
1128 -- argument between each element of the list.
1129 join :: ByteString -> [ByteString] -> ByteString
1130 join filler pss = concat (splice pss)
1131     where
1132         splice []  = []
1133         splice [x] = [x]
1134         splice (x:y:xs) = x:filler:splice (y:xs)
1135
1136 --
1137 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1138 -- with a char. Around 4 times faster than the generalised join.
1139 --
1140 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1141 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1142     withForeignPtr ffp $ \fp ->
1143     withForeignPtr fgp $ \gp -> do
1144         memcpy ptr (fp `plusPtr` s) l
1145         poke (ptr `plusPtr` l) c
1146         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1147     where
1148       len = length f + length g + 1
1149 {-# INLINE joinWithByte #-}
1150
1151 -- ---------------------------------------------------------------------
1152 -- Indexing ByteStrings
1153
1154 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1155 index :: ByteString -> Int -> Word8
1156 index ps n
1157     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1158     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1159                                 ++ ", length = " ++ show (length ps)
1160     | otherwise      = ps `unsafeIndex` n
1161 {-# INLINE index #-}
1162
1163 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1164 -- element in the given 'ByteString' which is equal to the query
1165 -- element, or 'Nothing' if there is no such element.
1166 -- This implementation uses memchr(3).
1167 elemIndex :: Word8 -> ByteString -> Maybe Int
1168 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1169     let p' = p `plusPtr` s
1170         q  = memchr p' c (fromIntegral l)
1171     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1172 {-# INLINE elemIndex #-}
1173
1174 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1175 -- element in the given 'ByteString' which is equal to the query
1176 -- element, or 'Nothing' if there is no such element. The following
1177 -- holds:
1178 --
1179 -- > elemIndexLast c xs ==
1180 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1181 --
1182 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1183 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1184     go (p `plusPtr` s) (l-1)
1185   where
1186     STRICT2(go)
1187     go p i | i < 0     = return Nothing
1188            | otherwise = do ch' <- peekByteOff p i
1189                             if ch == ch'
1190                                 then return $ Just i
1191                                 else go p (i-1)
1192 {-# INLINE elemIndexLast #-}
1193
1194 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1195 -- the indices of all elements equal to the query element, in ascending order.
1196 -- This implementation uses memchr(3).
1197 elemIndices :: Word8 -> ByteString -> [Int]
1198 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1199     let ptr = p `plusPtr` s
1200
1201         STRICT1(loop)
1202         loop n = do
1203                 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1204                 if q == nullPtr
1205                     then return []
1206                     else do let i = q `minusPtr` ptr
1207                             ls <- loop (i+1)
1208                             return $! i:ls
1209     loop 0
1210
1211 {-
1212 -- much slower
1213 elemIndices :: Word8 -> ByteString -> [Int]
1214 elemIndices c ps = loop 0 ps
1215    where STRICT2(loop)
1216          loop _ ps' | null ps'            = []
1217          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1218                     | otherwise           = loop (n+1) (unsafeTail ps')
1219 -}
1220
1221 -- | count returns the number of times its argument appears in the ByteString
1222 --
1223 -- > count = length . elemIndices
1224 --
1225 -- But more efficiently than using length on the intermediate list.
1226 count :: Word8 -> ByteString -> Int
1227 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1228     return $ c_count (p `plusPtr` s) (fromIntegral m) w
1229 {-# INLINE count #-}
1230
1231 {-
1232 --
1233 -- around 30% slower
1234 --
1235 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1236      go (p `plusPtr` s) (fromIntegral m) 0
1237     where
1238         go :: Ptr Word8 -> CSize -> Int -> IO Int
1239         STRICT3(go)
1240         go p l i = do
1241             let q = memchr p w l
1242             if q == nullPtr
1243                 then return i
1244                 else do let k = fromIntegral $ q `minusPtr` p
1245                         go (q `plusPtr` 1) (l-k-1) (i+1)
1246 -}
1247
1248 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1249 -- returns the index of the first element in the ByteString
1250 -- satisfying the predicate.
1251 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1252 findIndex = (listToMaybe .) . findIndices
1253
1254 -- | The 'findIndices' function extends 'findIndex', by returning the
1255 -- indices of all elements satisfying the predicate, in ascending order.
1256 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1257 findIndices p ps = loop 0 ps
1258    where
1259      STRICT2(loop)
1260      loop _ qs | null qs           = []
1261      loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1262                | otherwise         =     loop (n+1) (unsafeTail qs)
1263
1264 -- ---------------------------------------------------------------------
1265 -- Searching ByteStrings
1266
1267 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1268 elem :: Word8 -> ByteString -> Bool
1269 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1270 {-# INLINE elem #-}
1271
1272 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1273 notElem :: Word8 -> ByteString -> Bool
1274 notElem c ps = not (elem c ps)
1275 {-# INLINE notElem #-}
1276
1277 --
1278 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1279 -- case of filtering a single byte. It is more efficient to use
1280 -- /filterByte/ in this case.
1281 --
1282 -- > filterByte == filter . (==)
1283 --
1284 -- filterByte is around 10x faster, and uses much less space, than its
1285 -- filter equivalent
1286 filterByte :: Word8 -> ByteString -> ByteString
1287 filterByte w ps = replicate (count w ps) w
1288
1289 --
1290 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1291 -- case of filtering a single byte out of a list. It is more efficient
1292 -- to use /filterNotByte/ in this case.
1293 --
1294 -- > filterNotByte == filter . (/=)
1295 --
1296 -- filterNotByte is around 3x faster, and uses much less space, than its
1297 -- filter equivalent
1298 filterNotByte :: Word8 -> ByteString -> ByteString
1299 filterNotByte ch ps@(PS x s l)
1300     | null ps   = ps
1301     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1302         t <- go (f `plusPtr` s) p l
1303         return (t `minusPtr` p) -- actual length
1304     where
1305         STRICT3(go)
1306         go _ t 0 = return t
1307         go f t e = do w <- peek f
1308                       if w /= ch
1309                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1310                         else             go (f `plusPtr` 1) t               (e-1)
1311
1312 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1313 -- returns a ByteString containing those characters that satisfy the
1314 -- predicate. This function is subject to array fusion.
1315 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1316 filter p  = loopArr . loopU (filterEFL p) noAL
1317 {-# INLINE filter #-}
1318
1319 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1320 filterF k ps@(PS x s l)
1321     | null ps   = ps
1322     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1323         t <- go (f `plusPtr` s) p l
1324         return (t `minusPtr` p) -- actual length
1325     where
1326         STRICT3(go)
1327         go _ t 0 = return t
1328         go f t e = do w <- peek f
1329                       if k w
1330                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1331                         else             go (f `plusPtr` 1) t               (e - 1)
1332 {-# INLINE filterF #-}
1333
1334 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1335
1336 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1337 -- and returns the first element in matching the predicate, or 'Nothing'
1338 -- if there is no such element.
1339 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1340 find p ps = case filter p ps of
1341     q | null q -> Nothing
1342       | otherwise -> Just (unsafeHead q)
1343
1344 -- ---------------------------------------------------------------------
1345 -- Searching for substrings
1346
1347 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1348 -- iff the first is a prefix of the second.
1349 isPrefixOf :: ByteString -> ByteString -> Bool
1350 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1351     | l1 == 0   = True
1352     | l2 < l1   = False
1353     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1354         withForeignPtr x2 $ \p2 -> do
1355             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1356             return (i == 0)
1357
1358 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1359 -- iff the first is a suffix of the second.
1360 --
1361 -- The following holds:
1362 --
1363 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1364 --
1365 -- However, the real implemenation uses memcmp to compare the end of the
1366 -- string only, with no reverse required..
1367 isSuffixOf :: ByteString -> ByteString -> Bool
1368 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1369     | l1 == 0   = True
1370     | l2 < l1   = False
1371     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1372         withForeignPtr x2 $ \p2 -> do
1373             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1374             return (i == 0)
1375
1376 -- | Check whether one string is a substring of another. @isSubstringOf
1377 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1378 isSubstringOf :: ByteString -- ^ String to search for.
1379               -> ByteString -- ^ String to search in.
1380               -> Bool
1381 isSubstringOf p s = not $ P.null $ findSubstrings p s
1382
1383 -- | Get the first index of a substring in another string,
1384 --   or 'Nothing' if the string is not found.
1385 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1386 findSubstring :: ByteString -- ^ String to search for.
1387               -> ByteString -- ^ String to seach in.
1388               -> Maybe Int
1389 findSubstring = (listToMaybe .) . findSubstrings
1390
1391 -- | Find the indexes of all (possibly overlapping) occurances of a
1392 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1393 -- string matching algorithm.
1394 findSubstrings :: ByteString -- ^ String to search for.
1395                -> ByteString -- ^ String to seach in.
1396                -> [Int]
1397
1398 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1399   where
1400       patc x = pat `unsafeIndex` x
1401       strc x = str `unsafeIndex` x
1402
1403       -- maybe we should make kmpNext a UArray before using it in search?
1404       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1405       kmpNextL p _ | null p = []
1406       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1407                          ps = unsafeTail p
1408                          x = if not (null ps) && unsafeHead ps == patc j'
1409                                 then kmpNext Array.! j' else j'
1410                         in x:kmpNextL ps j'
1411       search i j = match ++ rest -- i: position in string, j: position in pattern
1412         where match = if j == m then [(i - j)] else []
1413               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1414       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1415                | otherwise = j
1416
1417 -- ---------------------------------------------------------------------
1418 -- Zipping
1419
1420 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1421 -- corresponding pairs of bytes. If one input ByteString is short,
1422 -- excess elements of the longer ByteString are discarded. This is
1423 -- equivalent to a pair of 'unpack' operations.
1424 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1425 zip ps qs
1426     | null ps || null qs = []
1427     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1428
1429 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1430 -- the first argument, instead of a tupling function.  For example,
1431 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1432 -- corresponding sums.
1433 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1434 zipWith f ps qs
1435     | null ps || null qs = []
1436     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1437
1438 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1439 -- ByteStrings. Note that this performs two 'pack' operations.
1440 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1441 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1442 {-# INLINE unzip #-}
1443
1444 -- ---------------------------------------------------------------------
1445 -- Special lists
1446
1447 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1448 inits :: ByteString -> [ByteString]
1449 inits (PS x s l) = [PS x s n | n <- [0..l]]
1450
1451 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1452 tails :: ByteString -> [ByteString]
1453 tails p | null p    = [empty]
1454         | otherwise = p : tails (unsafeTail p)
1455
1456 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1457
1458 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1459 elems :: ByteString -> [ByteString]
1460 elems (PS _ _ 0) = []
1461 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1462 {-# INLINE elems #-}
1463
1464 -- ---------------------------------------------------------------------
1465 -- ** Ordered 'ByteString's
1466
1467 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1468 sort :: ByteString -> ByteString
1469 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1470
1471     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1472     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1473
1474     let STRICT2(go)
1475         go 256 _   = return ()
1476         go i   ptr = do n <- peekElemOff arr i
1477                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1478                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1479     go 0 p
1480
1481 -- "countEach counts str l" counts the number of occurences of each Word8 in
1482 -- str, and stores the result in counts.
1483 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1484 STRICT3(countEach)
1485 countEach counts str l = go 0
1486  where
1487     STRICT1(go)
1488     go i | i == l    = return ()
1489          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1490                           x <- peekElemOff counts k
1491                           pokeElemOff counts k (x + 1)
1492                           go (i + 1)
1493
1494 {-
1495 sort :: ByteString -> ByteString
1496 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1497         memcpy p (f `plusPtr` s) l
1498         c_qsort p l -- inplace
1499 -}
1500
1501 {-
1502 sort = pack . List.sort . unpack
1503 -}
1504
1505 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1506 --
1507 -- Try some linear sorts: radix, counting
1508 -- Or mergesort.
1509 --
1510 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1511 -- sortBy f ps = undefined
1512
1513 -- ---------------------------------------------------------------------
1514 --
1515 -- Extensions to the basic interface
1516 --
1517
1518 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1519 -- check for the empty case, so there is an obligation on the programmer
1520 -- to provide a proof that the ByteString is non-empty.
1521 unsafeHead :: ByteString -> Word8
1522 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1523 {-# INLINE unsafeHead #-}
1524
1525 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1526 -- check for the empty case. As with 'unsafeHead', the programmer must
1527 -- provide a separate proof that the ByteString is non-empty.
1528 unsafeTail :: ByteString -> ByteString
1529 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1530 {-# INLINE unsafeTail #-}
1531
1532 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1533 -- This omits the bounds check, which means there is an accompanying
1534 -- obligation on the programmer to ensure the bounds are checked in some
1535 -- other way.
1536 unsafeIndex :: ByteString -> Int -> Word8
1537 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1538 {-# INLINE unsafeIndex #-}
1539
1540 -- ---------------------------------------------------------------------
1541 -- Low level constructors
1542
1543 #if defined(__GLASGOW_HASKELL__)
1544 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1545 -- Addr\# (an arbitrary machine address assumed to point outside the
1546 -- garbage-collected heap) into a @ByteString@. A much faster way to
1547 -- create an Addr\# is with an unboxed string literal, than to pack a
1548 -- boxed string. A unboxed string literal is compiled to a static @char
1549 -- []@ by GHC. Establishing the length of the string requires a call to
1550 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1551 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1552 -- if you know the length of the string statically.
1553 --
1554 -- An example:
1555 --
1556 -- > literalFS = packAddress "literal"#
1557 --
1558 packAddress :: Addr# -> ByteString
1559 packAddress addr# = inlinePerformIO $ do
1560     p <- newForeignPtr_ cstr
1561     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1562   where
1563     cstr = Ptr addr#
1564 {-# INLINE packAddress #-}
1565
1566 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1567 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1568 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1569 -- 'Addr\#' to the string, and the length of the string. Make sure the
1570 -- length is correct, otherwise use the safer 'packAddress' (where the
1571 -- length will be calculated once at runtime).
1572 unsafePackAddress :: Int -> Addr# -> ByteString
1573 unsafePackAddress len addr# = inlinePerformIO $ do
1574     p <- newForeignPtr_ cstr
1575     return $ PS p 0 len
1576     where cstr = Ptr addr#
1577
1578 #endif
1579
1580 -- | /O(1)/ Build a ByteString from a ForeignPtr
1581 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1582 fromForeignPtr fp l = PS fp 0 l
1583
1584 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1585 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1586 toForeignPtr (PS ps s l) = (ps, s, l)
1587
1588 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1589 -- current 'ByteString' from any larger string it was created from, as
1590 -- an 'Int'.
1591 skipIndex :: ByteString -> Int
1592 skipIndex (PS _ s _) = s
1593 {-# INLINE skipIndex #-}
1594
1595 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1596 -- finalizer associated to it. The ByteString length is calculated using
1597 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1598 packCString :: CString -> ByteString
1599 packCString cstr = inlinePerformIO $ do
1600     fp <- newForeignPtr_ (castPtr cstr)
1601     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1602
1603 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1604 -- have /no/ finalizer associated with it. This operation has /O(1)/
1605 -- complexity as we already know the final size, so no /strlen(3)/ is
1606 -- required.
1607 packCStringLen :: CStringLen -> ByteString
1608 packCStringLen (ptr,len) = inlinePerformIO $ do
1609     fp <- newForeignPtr_ (castPtr ptr)
1610     return $ PS fp 0 (fromIntegral len)
1611
1612 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1613 -- have a @free(3)@ finalizer associated to it.
1614 packMallocCString :: CString -> ByteString
1615 packMallocCString cstr = inlinePerformIO $ do
1616     fp <- newForeignFreePtr (castPtr cstr)
1617     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1618
1619 #if defined(__GLASGOW_HASKELL__)
1620 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1621 -- length, and an IO action representing a finalizer. This function is
1622 -- not available on Hugs.
1623 --
1624 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1625 packCStringFinalizer p l f = do
1626     fp <- FC.newForeignPtr p f
1627     return $ PS fp 0 l
1628
1629 -- | Explicitly run the finaliser associated with a 'ByteString'.
1630 -- Further references to this value may generate invalid memory
1631 -- references. This operation is unsafe, as there may be other
1632 -- 'ByteStrings' referring to the same underlying pages. If you use
1633 -- this, you need to have a proof of some kind that all 'ByteString's
1634 -- ever generated from the underlying byte array are no longer live.
1635 unsafeFinalize :: ByteString -> IO ()
1636 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1637
1638 #endif
1639
1640 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1641 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1642 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1643 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1644     where
1645       alloc = withForeignPtr ps $ \p -> do
1646                 buf <- c_malloc (fromIntegral l+1)
1647                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1648                 poke (buf `plusPtr` l) (0::Word8)
1649                 return $ castPtr buf
1650
1651 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1652 -- Warning: modifying the @CString@ will affect the @ByteString@.
1653 -- Why is this function unsafe? It relies on the null byte at the end of
1654 -- the ByteString to be there. This is /not/ the case if your ByteString
1655 -- has been spliced from a larger string (i.e. with take or drop).
1656 -- Unless you can guarantee the null byte, you should use the safe
1657 -- version, which will copy the string first.
1658 --
1659 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1660 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1661
1662 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1663 --   This is mainly useful to allow the rest of the data pointed
1664 --   to by the 'ByteString' to be garbage collected, for example
1665 --   if a large string has been read in, and only a small part of it
1666 --   is needed in the rest of the program.
1667 copy :: ByteString -> ByteString
1668 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1669
1670 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1671 -- CString is going to be deallocated from C land.
1672 copyCString :: CString -> ByteString
1673 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1674
1675 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1676 copyCStringLen :: CStringLen -> ByteString
1677 copyCStringLen (cstr, len) = inlinePerformIO $ do
1678     fp <- mallocForeignPtrArray (len+1)
1679     withForeignPtr fp $ \p -> do
1680         memcpy p (castPtr cstr) len
1681         poke (p `plusPtr` len) (0 :: Word8)
1682     return $! PS fp 0 len
1683
1684 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1685 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1686 -- This is analogous to unsafeUseAsCString, and comes with the same
1687 -- safety requirements.
1688 --
1689 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1690 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1691
1692 -- | Given the maximum size needed and a function to make the contents
1693 -- of a ByteString, generate makes the 'ByteString'. The generating
1694 -- function is required to return the actual final size (<= the maximum
1695 -- size), and the resulting byte array is realloced to this size.  The
1696 -- string is padded at the end with a null byte.
1697 --
1698 -- generate is the main mechanism for creating custom, efficient
1699 -- ByteString functions, using Haskell or C functions to fill the space.
1700 --
1701 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1702 generate i f = do
1703     p <- mallocArray (i+1)
1704     i' <- f p
1705     p' <- reallocArray p (i'+1)
1706     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1707     fp <- newForeignFreePtr p'
1708     return $ PS fp 0 i'
1709
1710 -- ---------------------------------------------------------------------
1711 -- line IO
1712
1713 #if defined(__GLASGOW_HASKELL__)
1714
1715 -- | getLine, read a line from stdin.
1716 getLine :: IO ByteString
1717 getLine = hGetLine stdin
1718
1719 -- | hGetLine. read a ByteString from a handle
1720 hGetLine :: Handle -> IO ByteString
1721 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1722     case haBufferMode handle_ of
1723        NoBuffering -> error "no buffering"
1724        _other      -> hGetLineBuffered handle_
1725
1726  where
1727     hGetLineBuffered handle_ = do
1728         let ref = haBuffer handle_
1729         buf <- readIORef ref
1730         hGetLineBufferedLoop handle_ ref buf 0 []
1731
1732     hGetLineBufferedLoop handle_ ref
1733             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1734         len `seq` do
1735         off <- findEOL r w raw
1736         let new_len = len + off - r
1737         xs <- mkPS raw r off
1738
1739       -- if eol == True, then off is the offset of the '\n'
1740       -- otherwise off == w and the buffer is now empty.
1741         if off /= w
1742             then do if (w == off + 1)
1743                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1744                             else writeIORef ref buf{ bufRPtr = off + 1 }
1745                     mkBigPS new_len (xs:xss)
1746             else do
1747                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1748                                     buf{ bufWPtr=0, bufRPtr=0 }
1749                  case maybe_buf of
1750                     -- Nothing indicates we caught an EOF, and we may have a
1751                     -- partial line to return.
1752                     Nothing -> do
1753                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1754                          if new_len > 0
1755                             then mkBigPS new_len (xs:xss)
1756                             else ioe_EOF
1757                     Just new_buf ->
1758                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1759
1760     -- find the end-of-line character, if there is one
1761     findEOL r w raw
1762         | r == w = return w
1763         | otherwise =  do
1764             (c,r') <- readCharFromBuffer raw r
1765             if c == '\n'
1766                 then return r -- NB. not r': don't include the '\n'
1767                 else findEOL r' w raw
1768
1769     maybeFillReadBuffer fd is_line is_stream buf = catch
1770         (do buf' <- fillReadBuffer fd is_line is_stream buf
1771             return (Just buf'))
1772         (\e -> if isEOFError e then return Nothing else ioError e)
1773
1774 -- TODO, rewrite to use normal memcpy
1775 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1776 mkPS buf start end = do
1777     let len = end - start
1778     fp <- mallocByteString len
1779     withForeignPtr fp $ \p -> do
1780         memcpy_ptr_baoff p buf start (fromIntegral len)
1781         return (PS fp 0 len)
1782
1783 mkBigPS :: Int -> [ByteString] -> IO ByteString
1784 mkBigPS _ [ps] = return ps
1785 mkBigPS _ pss = return $! concat (P.reverse pss)
1786
1787 #endif
1788
1789 -- ---------------------------------------------------------------------
1790 -- Block IO
1791
1792 -- | Outputs a 'ByteString' to the specified 'Handle'.
1793 hPut :: Handle -> ByteString -> IO ()
1794 hPut _ (PS _ _ 0)  = return ()
1795 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1796 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1797
1798 -- | Write a ByteString to stdout
1799 putStr :: ByteString -> IO ()
1800 putStr = hPut stdout
1801
1802 -- | Write a ByteString to stdout, appending a newline byte
1803 putStrLn :: ByteString -> IO ()
1804 putStrLn ps = hPut stdout ps >> hPut stdout nl
1805     where nl = packByte 0x0a
1806
1807 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1808 -- is far more efficient than reading the characters into a 'String'
1809 -- and then using 'pack'.
1810 hGet :: Handle -> Int -> IO ByteString
1811 hGet _ 0 = return empty
1812 hGet h i = do fp <- mallocByteString i
1813               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1814               return $ PS fp 0 l
1815
1816 #if defined(__GLASGOW_HASKELL__)
1817 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1818 -- waiting for data to become available, instead it returns only whatever data
1819 -- is available.
1820 hGetNonBlocking :: Handle -> Int -> IO ByteString
1821 hGetNonBlocking _ 0 = return empty
1822 hGetNonBlocking h i = do
1823     fp <- mallocByteString i
1824     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1825     return $ PS fp 0 l
1826 #endif
1827
1828 -- | Read entire handle contents into a 'ByteString'.
1829 --
1830 -- As with 'hGet', the string representation in the file is assumed to
1831 -- be ISO-8859-1.
1832 --
1833 hGetContents :: Handle -> IO ByteString
1834 hGetContents h = do
1835     let start_size = 1024
1836     p <- mallocArray start_size
1837     i <- hGetBuf h p start_size
1838     if i < start_size
1839         then do p' <- reallocArray p i
1840                 fp <- newForeignFreePtr p'
1841                 return $ PS fp 0 i
1842         else f p start_size
1843     where
1844         f p s = do
1845             let s' = 2 * s
1846             p' <- reallocArray p s'
1847             i  <- hGetBuf h (p' `plusPtr` s) s
1848             if i < s
1849                 then do let i' = s + i
1850                         p'' <- reallocArray p' i'
1851                         fp  <- newForeignFreePtr p''
1852                         return $ PS fp 0 i'
1853                 else f p' s'
1854
1855 -- | getContents. Equivalent to hGetContents stdin
1856 getContents :: IO ByteString
1857 getContents = hGetContents stdin
1858
1859 -- | Read an entire file directly into a 'ByteString'.  This is far more
1860 -- efficient than reading the characters into a 'String' and then using
1861 -- 'pack'.  It also may be more efficient than opening the file and
1862 -- reading it using hGet.
1863 readFile :: FilePath -> IO ByteString
1864 readFile f = do
1865     h <- openBinaryFile f ReadMode
1866     l <- hFileSize h
1867     s <- hGet h $ fromIntegral l
1868     hClose h
1869     return s
1870
1871 -- | Write a 'ByteString' to a file.
1872 writeFile :: FilePath -> ByteString -> IO ()
1873 writeFile f ps = do
1874     h <- openBinaryFile f WriteMode
1875     hPut h ps
1876     hClose h
1877
1878 {-
1879 --
1880 -- Disable until we can move it into a portable .hsc file
1881 --
1882
1883 -- | Like readFile, this reads an entire file directly into a
1884 -- 'ByteString', but it is even more efficient.  It involves directly
1885 -- mapping the file to memory.  This has the advantage that the contents
1886 -- of the file never need to be copied.  Also, under memory pressure the
1887 -- page may simply be discarded, while in the case of readFile it would
1888 -- need to be written to swap.  If you read many small files, mmapFile
1889 -- will be less memory-efficient than readFile, since each mmapFile
1890 -- takes up a separate page of memory.  Also, you can run into bus
1891 -- errors if the file is modified.  As with 'readFile', the string
1892 -- representation in the file is assumed to be ISO-8859-1.
1893 --
1894 -- On systems without mmap, this is the same as a readFile.
1895 --
1896 mmapFile :: FilePath -> IO ByteString
1897 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1898
1899 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1900 mmap f = do
1901     h <- openBinaryFile f ReadMode
1902     l <- fromIntegral `fmap` hFileSize h
1903     -- Don't bother mmaping small files because each mmapped file takes up
1904     -- at least one full VM block.
1905     if l < mmap_limit
1906        then do thefp <- mallocByteString l
1907                withForeignPtr thefp $ \p-> hGetBuf h p l
1908                hClose h
1909                return (thefp, l)
1910        else do
1911                -- unix only :(
1912                fd <- fromIntegral `fmap` handleToFd h
1913                p  <- my_mmap l fd
1914                fp <- if p == nullPtr
1915                      then do thefp <- mallocByteString l
1916                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1917                              return thefp
1918                      else do
1919                           -- The munmap leads to crashes on OpenBSD.
1920                           -- maybe there's a use after unmap in there somewhere?
1921 #if !defined(__OpenBSD__)
1922                              let unmap = c_munmap p l >> return ()
1923 #else
1924                              let unmap = return ()
1925 #endif
1926                              fp <- FC.newForeignPtr p unmap
1927                              return fp
1928                c_close fd
1929                hClose h
1930                return (fp, l)
1931     where mmap_limit = 16*1024
1932 -}
1933
1934 #if defined(__GLASGOW_HASKELL__)
1935 --
1936 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1937 --
1938 getArgs :: IO [ByteString]
1939 getArgs =
1940   alloca $ \ p_argc ->
1941   alloca $ \ p_argv -> do
1942     getProgArgv p_argc p_argv
1943     p    <- fromIntegral `fmap` peek p_argc
1944     argv <- peek p_argv
1945     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1946 #endif
1947
1948 -- ---------------------------------------------------------------------
1949 -- Internal utilities
1950
1951 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1952 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1953 -- convenience for ByteString construction.
1954 w2c :: Word8 -> Char
1955 #if !defined(__GLASGOW_HASKELL__)
1956 w2c = chr . fromIntegral
1957 #else
1958 w2c = unsafeChr . fromIntegral
1959 #endif
1960 {-# INLINE w2c #-}
1961
1962 c2w :: Char -> Word8
1963 c2w = fromIntegral . ord
1964 {-# INLINE c2w #-}
1965
1966 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1967 -- is padded with a null byte.
1968 mallocByteString :: Int -> IO (ForeignPtr Word8)
1969 mallocByteString l = do
1970     fp <- mallocForeignPtrArray (l+1)
1971     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1972     return fp
1973
1974 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1975 -- argument gives the final size of the ByteString. Unlike 'generate'
1976 -- the ByteString is not reallocated if the final size is less than the
1977 -- estimated size. Also, unlike 'generate' ByteString's created this way
1978 -- are managed on the Haskell heap.
1979 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1980 create l write_ptr = inlinePerformIO $ do
1981     fp <- mallocByteString (l+1)
1982     withForeignPtr fp $ \p -> write_ptr p
1983     return $ PS fp 0 l
1984 {-# INLINE create #-}
1985
1986 -- | Perform an operation with a temporary ByteString
1987 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1988 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1989 {-# INLINE withPtr #-}
1990
1991 -- Common up near identical calls to `error' to reduce the number
1992 -- constant strings created when compiled:
1993 errorEmptyList :: String -> a
1994 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1995 {-# INLINE errorEmptyList #-}
1996
1997 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1998 -- of the string if no element is found, rather than Nothing.
1999 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2000 STRICT2(findIndexOrEnd)
2001 findIndexOrEnd f ps
2002     | null ps           = 0
2003     | f (unsafeHead ps) = 0
2004     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
2005 {-# INLINE findIndexOrEnd #-}
2006
2007 -- Find from the end of the string using predicate
2008 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2009 STRICT2(findFromEndUntil)
2010 findFromEndUntil f ps@(PS x s l) =
2011     if null ps then 0
2012     else if f (last ps) then l
2013          else findFromEndUntil f (PS x s (l-1))
2014
2015 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2016 -- it exposes lots of things to further inlining
2017 --
2018 {-# INLINE inlinePerformIO #-}
2019 inlinePerformIO :: IO a -> a
2020 #if defined(__GLASGOW_HASKELL__)
2021 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2022 #else
2023 inlinePerformIO = unsafePerformIO
2024 #endif
2025
2026 {-# INLINE newForeignFreePtr #-}
2027 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2028 #if defined(__GLASGOW_HASKELL__)
2029 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2030 #else
2031 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2032 #endif
2033
2034 -- ---------------------------------------------------------------------
2035 --
2036 -- Standard C functions
2037 --
2038
2039 foreign import ccall unsafe "string.h strlen" c_strlen
2040     :: CString -> CInt
2041
2042 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2043     :: CInt -> IO (Ptr Word8)
2044
2045 foreign import ccall unsafe "static stdlib.h free" c_free
2046     :: Ptr Word8 -> IO ()
2047
2048 #if !defined(__GLASGOW_HASKELL__)
2049 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2050     :: FunPtr (Ptr Word8 -> IO ())
2051 #endif
2052
2053 foreign import ccall unsafe "string.h memset" memset
2054     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2055
2056 foreign import ccall unsafe "string.h memchr" memchr
2057     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2058
2059 foreign import ccall unsafe "string.h memcmp" memcmp
2060     :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2061
2062 foreign import ccall unsafe "string.h memcpy" memcpy
2063     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2064
2065 -- ---------------------------------------------------------------------
2066 --
2067 -- Uses our C code
2068 --
2069
2070 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2071     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2072
2073 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2074     :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2075
2076 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2077     :: Ptr Word8 -> Int -> Word8
2078
2079 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2080     :: Ptr Word8 -> Int -> Word8
2081
2082 foreign import ccall unsafe "static fpstring.h count" c_count
2083     :: Ptr Word8 -> Int -> Word8 -> Int
2084
2085 -- ---------------------------------------------------------------------
2086 -- MMap
2087
2088 {-
2089 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2090     :: Int -> Int -> IO (Ptr Word8)
2091
2092 foreign import ccall unsafe "static unistd.h close" c_close
2093     :: Int -> IO Int
2094
2095 #  if !defined(__OpenBSD__)
2096 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2097     :: Ptr Word8 -> Int -> IO Int
2098 #  endif
2099 -}
2100
2101 -- ---------------------------------------------------------------------
2102 -- Internal GHC Haskell magic
2103
2104 #if defined(__GLASGOW_HASKELL__)
2105 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2106     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2107
2108 foreign import ccall unsafe "__hscore_memcpy_src_off"
2109    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2110 #endif
2111
2112 -- ---------------------------------------------------------------------
2113 --
2114 -- Functional array fusion for ByteStrings.
2115 --
2116 -- From the Data Parallel Haskell project,
2117 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2118 --
2119
2120 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2121 -- the fact that no bottoms of this type are ever constructed; hence, we can
2122 -- assume @(_ :: NoAL) `seq` x = x@.
2123 --
2124 data NoAL = NoAL
2125
2126 -- | Special forms of loop arguments
2127 --
2128 -- * These are common special cases for the three function arguments of gen
2129 --   and loop; we give them special names to make it easier to trigger RULES
2130 --   applying in the special cases represented by these arguments.  The
2131 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2132 --   two simplifier phases.
2133 --
2134 -- * In the case where the accumulator is not needed, it is better to always
2135 --   explicitly return a value `()', rather than just copy the input to the
2136 --   output, as the former gives GHC better local information.
2137 --
2138
2139 -- | Element function expressing a mapping only
2140 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2141 mapEFL f = \_ e -> (noAL, (Just $ f e))
2142 {-# INLINE [1] mapEFL #-}
2143
2144 -- | Element function implementing a filter function only
2145 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2146 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2147 {-# INLINE [1] filterEFL #-}
2148
2149 -- |Element function expressing a reduction only
2150 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2151 foldEFL f = \a e -> (f a e, Nothing)
2152 {-# INLINE [1] foldEFL #-}
2153
2154 -- | No accumulator
2155 noAL :: NoAL
2156 noAL = NoAL
2157 {-# INLINE [1] noAL #-}
2158
2159 -- | Projection functions that are fusion friendly (as in, we determine when
2160 -- they are inlined)
2161 loopArr :: (ByteString, acc) -> ByteString
2162 loopArr (arr, _) = arr
2163 {-# INLINE [1] loopArr #-}
2164
2165 loopAcc :: (ByteString, acc) -> acc
2166 loopAcc (_, acc) = acc
2167 {-# INLINE [1] loopAcc #-}
2168
2169 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2170 loopSndAcc (arr, (_, acc)) = (arr, acc)
2171 {-# INLINE [1] loopSndAcc #-}
2172
2173 ------------------------------------------------------------------------
2174
2175 -- | Iteration over over ByteStrings
2176 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2177       -> acc                                   -- ^ initial acc value
2178       -> ByteString                            -- ^ input ByteString
2179       -> (ByteString, acc)
2180
2181 loopU f start (PS fp s i) = inlinePerformIO $ withForeignPtr fp $ \a -> do
2182     p <- mallocArray (i+1)
2183     (acc, i') <- go (a `plusPtr` s) p start
2184     p' <- if i == i' then return p else reallocArray p (i'+1) -- avoid realloc for maps
2185     poke (p' `plusPtr` i') (0::Word8)
2186     fp' <- newForeignFreePtr p'
2187     return (PS fp' 0 i', acc)
2188   where
2189     go p ma = trans 0 0
2190         where
2191             STRICT3(trans)
2192             trans a_off ma_off acc
2193                 | a_off >= i = return (acc, ma_off)
2194                 | otherwise  = do
2195                     x <- peekByteOff p a_off
2196                     let (acc', oe) = f acc x
2197                     ma_off' <- case oe of
2198                         Nothing  -> return ma_off
2199                         Just e   -> do pokeByteOff ma ma_off e
2200                                        return $ ma_off + 1
2201                     trans (a_off+1) ma_off' acc'
2202
2203 {-# INLINE [1] loopU #-}
2204
2205 {-# RULES
2206
2207 "array fusion!" forall em1 em2 start1 start2 arr.
2208   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2209     let em (acc1, acc2) e =
2210             case em1 acc1 e of
2211                 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2212                 (acc1', Just e') ->
2213                     case em2 acc2 e' of
2214                         (acc2', res) -> ((acc1', acc2'), res)
2215     in loopSndAcc (loopU em (start1, start2) arr)
2216
2217 "loopArr/loopSndAcc" forall x.
2218   loopArr (loopSndAcc x) = loopArr x
2219
2220 -- orphan?
2221 -- "seq/NoAL" forall (u::NoAL) e.
2222 --   u `seq` e = e
2223
2224  #-}
2225