Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with Prelude functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         packByte,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: Word8 -> ByteString -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57         head,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         last,                   -- :: ByteString -> Word8
  60         init,                   -- :: ByteString -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62
  63         -- * Special ByteStrings
  64         inits,                  -- :: ByteString -> [ByteString]
  65         tails,                  -- :: ByteString -> [ByteString]
  66         elems,                  -- :: ByteString -> [ByteString]
  67
  68         -- * Transformating ByteStrings
  69         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  70         reverse,                -- :: ByteString -> ByteString
  71         intersperse,            -- :: Word8 -> ByteString -> ByteString
  72         transpose,              -- :: [ByteString] -> [ByteString]
  73
  74         -- * Reducing 'ByteString's
  75         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  76         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  77         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  78         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79
  80         -- ** Special folds
  81         concat,                 -- :: [ByteString] -> ByteString
  82         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  83         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  84         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  85         maximum,                -- :: ByteString -> Word8
  86         minimum,                -- :: ByteString -> Word8
  87         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  88
  89         -- * Generating and unfolding ByteStrings
  90         replicate,              -- :: Int -> Word8 -> ByteString
  91         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  92
  93         -- * Substrings
  94
  95         -- ** Breaking strings
  96         take,                   -- :: Int -> ByteString -> ByteString
  97         drop,                   -- :: Int -> ByteString -> ByteString
  98         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  99         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 100         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 101         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 102         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 103         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 104
 105         -- ** Breaking and dropping on specific bytes
 106         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 107         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 108         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 109         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 110
 111         -- ** Breaking into many substrings
 112         split,                  -- :: Word8 -> ByteString -> [ByteString]
 113         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 114         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117
 118         -- ** Joining strings
 119         join,                   -- :: ByteString -> [ByteString] -> ByteString
 120         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 121
 122         -- * Indexing ByteStrings
 123         index,                  -- :: ByteString -> Int -> Word8
 124         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 125         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 126         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 127         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 128         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 129         count,                  -- :: Word8 -> ByteString -> Int
 130
 131         -- * Ordered ByteStrings
 132         sort,                   -- :: ByteString -> ByteString
 133
 134         -- * Searching ByteStrings
 135
 136         -- ** Searching by equality
 137         -- | These functions use memchr(3) to efficiently search the ByteString
 138
 139         elem,                   -- :: Word8 -> ByteString -> Bool
 140         notElem,                -- :: Word8 -> ByteString -> Bool
 141         filterByte,             -- :: Word8 -> ByteString -> ByteString
 142         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 143
 144         -- ** Searching with a predicate
 145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 146         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 147
 148         -- ** Prefixes and suffixes
 149         -- | These functions use memcmp(3) to efficiently compare substrings
 150         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 151         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 152
 153         -- ** Search for arbitrary substrings
 154         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 155         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 156         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 157
 158         -- * Zipping and unzipping ByteStrings
 159         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 160         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 161         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 162
 163         -- * Unchecked access
 164         unsafeHead,             -- :: ByteString -> Word8
 165         unsafeTail,             -- :: ByteString -> ByteString
 166         unsafeIndex,            -- :: ByteString -> Int -> Word8
 167
 168         -- * Low level introduction and elimination
 169         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 170         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 171         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 172         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 173         skipIndex,              -- :: ByteString -> Int
 174
 175         -- ** Packing CStrings and pointers
 176         packCString,            -- :: CString -> ByteString
 177         packCStringLen,         -- :: CString -> ByteString
 178         packMallocCString,      -- :: CString -> ByteString
 179
 180 #if defined(__GLASGOW_HASKELL__)
 181         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 182         packAddress,            -- :: Addr# -> ByteString
 183         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 184         unsafeFinalize,         -- :: ByteString -> IO ()
 185 #endif
 186
 187         -- ** Using ByteStrings as CStrings
 188         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 189         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 190         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 191
 192         -- ** Copying ByteStrings
 193         -- | These functions perform memcpy(3) operations
 194         copy,                   -- :: ByteString -> ByteString
 195         copyCString,            -- :: CString -> ByteString
 196         copyCStringLen,         -- :: CStringLen -> ByteString
 197
 198         -- * I\/O with @ByteString@s
 199
 200         -- ** Standard input and output
 201
 202 #if defined(__GLASGOW_HASKELL__)
 203         getLine,                -- :: IO ByteString
 204 #endif
 205         getContents,            -- :: IO ByteString
 206         putStr,                 -- :: ByteString -> IO ()
 207         putStrLn,               -- :: ByteString -> IO ()
 208
 209         -- ** Files
 210         readFile,               -- :: FilePath -> IO ByteString
 211         writeFile,              -- :: FilePath -> ByteString -> IO ()
 212 --      mmapFile,               -- :: FilePath -> IO ByteString
 213
 214         -- ** I\/O with Handles
 215 #if defined(__GLASGOW_HASKELL__)
 216         getArgs,                -- :: IO [ByteString]
 217         hGetLine,               -- :: Handle -> IO ByteString
 218         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 219 #endif
 220         hGetContents,           -- :: Handle -> IO ByteString
 221         hGet,                   -- :: Handle -> Int -> IO ByteString
 222         hPut,                   -- :: Handle -> ByteString -> IO ()
 223
 224         -- * Fusion utilities
 225 #if defined(__GLASGOW_HASKELL__)
 226         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 227 #endif
 228
 229         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 230         loopU, mapEFL, filterEFL, foldEFL, fuseEFL,
 231         filterF, mapF
 232
 233   ) where
 234
 235 import qualified Prelude as P
 236 import Prelude hiding           (reverse,head,tail,last,init,null
 237                                 ,length,map,lines,foldl,foldr,unlines
 238                                 ,concat,any,take,drop,splitAt,takeWhile
 239                                 ,dropWhile,span,break,elem,filter,maximum
 240                                 ,minimum,all,concatMap,foldl1,foldr1
 241                                 ,readFile,writeFile,replicate
 242                                 ,getContents,getLine,putStr,putStrLn
 243                                 ,zip,zipWith,unzip,notElem)
 244
 245 import qualified Data.List as List
 246
 247 import Data.Char
 248 import Data.Word                (Word8)
 249 import Data.Maybe               (listToMaybe)
 250 import Data.Array               (listArray)
 251 import qualified Data.Array as Array ((!))
 252
 253 -- Control.Exception.bracket not available in yhc or nhc
 254 import Control.Exception        (bracket)
 255 import Control.Monad            (when)
 256
 257 import Foreign.C.String         (CString, CStringLen)
 258 import Foreign.C.Types          (CSize, CInt)
 259 import Foreign.ForeignPtr
 260 import Foreign.Marshal.Array
 261 import Foreign.Ptr
 262 import Foreign.Storable         (Storable(..))
 263
 264 -- hGetBuf and hPutBuf not available in yhc or nhc
 265 import System.IO                (stdin,stdout,hClose,hFileSize
 266                                 ,hGetBuf,hPutBuf,openBinaryFile
 267                                 ,Handle,IOMode(..))
 268
 269 #if !defined(__GLASGOW_HASKELL__)
 270 import System.IO.Unsafe
 271 #endif
 272
 273 #if defined(__GLASGOW_HASKELL__)
 274
 275 import Data.Generics            (Data(..), Typeable(..))
 276
 277 import System.IO                (hGetBufNonBlocking)
 278 import System.IO.Error          (isEOFError)
 279
 280 import Foreign.Marshal          (alloca)
 281 import qualified Foreign.Concurrent as FC (newForeignPtr)
 282
 283 import GHC.Handle
 284 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 285 import GHC.Base                 (build, unsafeChr)
 286 import GHC.Word hiding (Word8)
 287 import GHC.Ptr                  (Ptr(..))
 288 import GHC.ST                   (ST(..))
 289 import GHC.IOBase
 290
 291 #endif
 292
 293 -- CFILES stuff is Hugs only
 294 {-# CFILES cbits/fpstring.c #-}
 295
 296 -- -----------------------------------------------------------------------------
 297 --
 298 -- Useful macros, until we have bang patterns
 299 --
 300
 301 #define STRICT1(f) f a | a `seq` False = undefined
 302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 306
 307 -- -----------------------------------------------------------------------------
 308
 309 -- | A space-efficient representation of a Word8 vector, supporting many
 310 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 311 --
 312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 313 --
 314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 315                      {-# UNPACK #-} !Int
 316                      {-# UNPACK #-} !Int
 317
 318 #if defined(__GLASGOW_HASKELL__)
 319     deriving (Data, Typeable)
 320 #endif
 321
 322 instance Eq  ByteString
 323     where (==)    = eq
 324
 325 instance Ord ByteString
 326     where compare = compareBytes
 327
 328 instance Show ByteString where
 329     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 330
 331 instance Read ByteString where
 332     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 333
 334 {-
 335 instance Arbitrary PackedString where
 336     arbitrary = P.pack `fmap` arbitrary
 337     coarbitrary s = coarbitrary (P.unpack s)
 338 -}
 339
 340 -- | /O(n)/ Equality on the 'ByteString' type.
 341 eq :: ByteString -> ByteString -> Bool
 342 eq a@(PS p s l) b@(PS p' s' l')
 343     | l /= l'            = False    -- short cut on length
 344     | p == p' && s == s' = True     -- short cut for the same string
 345     | otherwise          = compareBytes a b == EQ
 346 {-# INLINE eq #-}
 347
 348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 349 compareBytes :: ByteString -> ByteString -> Ordering
 350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 351     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 352     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 353     | otherwise                         = inlinePerformIO $
 354         withForeignPtr x1 $ \p1 ->
 355         withForeignPtr x2 $ \p2 -> do
 356             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 357             return $ case i `compare` 0 of
 358                         EQ  -> l1 `compare` l2
 359                         x   -> x
 360 {-# INLINE compareBytes #-}
 361
 362 {-
 363 --
 364 -- About 4x slower over 32M
 365 --
 366 compareBytes :: ByteString -> ByteString -> Ordering
 367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 368     withForeignPtr fp1 $ \p1 ->
 369         withForeignPtr fp2 $ \p2 ->
 370             cmp (p1 `plusPtr` off1)
 371                 (p2 `plusPtr` off2) 0 len1 len2
 372
 373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 374 STRICT5(cmp)
 375 cmp p1 p2 n len1 len2
 376       | n == len1 = if n == len2 then return EQ else return LT
 377       | n == len2 = return GT
 378       | otherwise = do
 379           (a :: Word8) <- peekByteOff p1 n
 380           (b :: Word8) <- peekByteOff p2 n
 381           case a `compare` b of
 382                 EQ -> cmp p1 p2 (n+1) len1 len2
 383                 LT -> return LT
 384                 GT -> return GT
 385 {-# INLINE compareBytes #-}
 386 -}
 387
 388 -- -----------------------------------------------------------------------------
 389 -- Introducing and eliminating 'ByteString's
 390
 391 -- | /O(1)/ The empty 'ByteString'
 392 empty :: ByteString
 393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 394 {-# NOINLINE empty #-}
 395
 396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 397 packByte :: Word8 -> ByteString
 398 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
 399     withForeignPtr fp $ \p -> poke p c
 400     return $ PS fp 0 1
 401 {-# INLINE packByte #-}
 402
 403 --
 404 -- XXX The unsafePerformIO is critical!
 405 --
 406 -- Otherwise:
 407 --
 408 --  packByte 255 `compare` packByte 127
 409 --
 410 -- is compiled to:
 411 --
 412 --  case mallocByteString 2 of
 413 --      ForeignPtr f internals ->
 414 --           case writeWord8OffAddr# f 0 255 of _ ->
 415 --           case writeWord8OffAddr# f 0 127 of _ ->
 416 --           case eqAddr# f f of
 417 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 418 --                                        (GHC.Prim.plusAddr# f 0)
 419 --
 420 --
 421
 422 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 423 --
 424 -- For applications with large numbers of string literals, pack can be a
 425 -- bottleneck. In such cases, consider using packAddress (GHC only).
 426 pack :: [Word8] -> ByteString
 427
 428 #if !defined(__GLASGOW_HASKELL__)
 429
 430 pack str = create (P.length str) $ \p -> go p str
 431     where
 432         go _ []     = return ()
 433         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 434
 435 #else /* hack away */
 436
 437 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 438     where
 439         go _ _ []        = return ()
 440         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 441
 442         writeByte p i c = ST $ \s# ->
 443             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 444
 445 #endif
 446
 447 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 448 unpack :: ByteString -> [Word8]
 449
 450 #if !defined(__GLASGOW_HASKELL__)
 451
 452 unpack (PS _  _ 0) = []
 453 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 454         go (p `plusPtr` s) (l - 1) []
 455     where
 456         STRICT3(go)
 457         go p 0 acc = peek p          >>= \e -> return (e : acc)
 458         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 459 {-# INLINE unpack #-}
 460
 461 #else
 462
 463 unpack ps = build (unpackFoldr ps)
 464 {-# INLINE unpack #-}
 465
 466 unpackList :: ByteString -> [Word8]
 467 unpackList (PS fp off len) = withPtr fp $ \p -> do
 468     let STRICT3(loop)
 469         loop _ (-1) acc = return acc
 470         loop q n acc = do
 471            a <- peekByteOff q n
 472            loop q (n-1) (a : acc)
 473     loop (p `plusPtr` off) (len-1) []
 474
 475 {-# RULES
 476 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 477  #-}
 478
 479 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 480 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 481     let STRICT3(loop)
 482         loop _ (-1) acc = return acc
 483         loop q n    acc = do
 484            a <- peekByteOff q n
 485            loop q (n-1) (a `f` acc)
 486     loop (p `plusPtr` off) (len-1) ch
 487 {-# INLINE [0] unpackFoldr #-}
 488
 489 #endif
 490
 491 ------------------------------------------------------------------------
 492
 493 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 494 -- conversion function
 495 packWith :: (a -> Word8) -> [a] -> ByteString
 496 packWith k str = create (P.length str) $ \p -> go p str
 497     where
 498         STRICT2(go)
 499         go _ []     = return ()
 500         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 501 {-# INLINE packWith #-}
 502 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 503
 504 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 505 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 506 unpackWith _ (PS _  _ 0) = []
 507 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 508         go (p `plusPtr` s) (l - 1) []
 509     where
 510         STRICT3(go)
 511         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 512         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 513 {-# INLINE unpackWith #-}
 514 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 515
 516 -- ---------------------------------------------------------------------
 517 -- Basic interface
 518
 519 -- | /O(1)/ Test whether a ByteString is empty.
 520 null :: ByteString -> Bool
 521 null (PS _ _ l) = l == 0
 522 {-# INLINE null #-}
 523
 524 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 525 length :: ByteString -> Int
 526 length (PS _ _ l) = l
 527 {-# INLINE length #-}
 528
 529 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 530 -- complexity, as it requires a memcpy.
 531 cons :: Word8 -> ByteString -> ByteString
 532 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 533         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 534         poke p c
 535 {-# INLINE cons #-}
 536
 537 -- todo fuse
 538
 539 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 540 snoc :: ByteString -> Word8 -> ByteString
 541 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 542         memcpy p (f `plusPtr` s) (fromIntegral l)
 543         poke (p `plusPtr` l) c
 544 {-# INLINE snoc #-}
 545
 546 -- todo fuse
 547
 548 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 549 head :: ByteString -> Word8
 550 head ps@(PS x s _)
 551     | null ps   = errorEmptyList "head"
 552     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 553 {-# INLINE head #-}
 554
 555 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 556 tail :: ByteString -> ByteString
 557 tail (PS p s l)
 558     | l <= 0    = errorEmptyList "tail"
 559     | otherwise = PS p (s+1) (l-1)
 560 {-# INLINE tail #-}
 561
 562 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 563 last :: ByteString -> Word8
 564 last ps@(PS x s l)
 565     | null ps   = errorEmptyList "last"
 566     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 567 {-# INLINE last #-}
 568
 569 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 570 init :: ByteString -> ByteString
 571 init (PS p s l)
 572     | l <= 0    = errorEmptyList "init"
 573     | otherwise = PS p s (l-1)
 574 {-# INLINE init #-}
 575
 576 -- | /O(n)/ Append two ByteStrings
 577 append :: ByteString -> ByteString -> ByteString
 578 append xs ys | null xs   = ys
 579              | null ys   = xs
 580              | otherwise = concat [xs,ys]
 581 {-# INLINE append #-}
 582
 583 -- ---------------------------------------------------------------------
 584 -- Transformations
 585
 586 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 587 -- element of @xs@. This function is subject to array fusion.
 588 map :: (Word8 -> Word8) -> ByteString -> ByteString
 589 map f = loopArr . loopU (mapEFL f) noAL
 590 {-# INLINE map #-}
 591
 592 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 593 -- slightly faster for one-shot cases.
 594 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
 595 STRICT2(mapF)
 596 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 597     np <- mallocByteString (len+1)
 598     withForeignPtr np $ \p -> do
 599         map_ 0 (a `plusPtr` s) p
 600         return (PS np 0 len)
 601   where
 602     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 603     STRICT3(map_)
 604     map_ n p1 p2
 605        | n >= len = return ()
 606        | otherwise = do
 607             x <- peekByteOff p1 n
 608             pokeByteOff p2 n (f x)
 609             map_ (n+1) p1 p2
 610 {-# INLINE mapF #-}
 611
 612 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 613 reverse :: ByteString -> ByteString
 614 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 615         c_reverse p (f `plusPtr` s) (fromIntegral l)
 616
 617 {-
 618 reverse = pack . P.reverse . unpack
 619 -}
 620
 621 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 622 -- 'ByteString' and \`intersperses\' that byte between the elements of
 623 -- the 'ByteString'.  It is analogous to the intersperse function on
 624 -- Lists.
 625 intersperse :: Word8 -> ByteString -> ByteString
 626 intersperse c ps@(PS x s l)
 627     | length ps < 2  = ps
 628     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 629         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 630
 631 {-
 632 intersperse c = pack . List.intersperse c . unpack
 633 -}
 634
 635 -- | The 'transpose' function transposes the rows and columns of its
 636 -- 'ByteString' argument.
 637 transpose :: [ByteString] -> [ByteString]
 638 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 639
 640 -- ---------------------------------------------------------------------
 641 -- Reducing 'ByteString's
 642
 643 -- | 'foldl', applied to a binary operator, a starting value (typically
 644 -- the left-identity of the operator), and a ByteString, reduces the
 645 -- ByteString using the binary operator, from left to right.
 646 -- This function is subject to array fusion.
 647 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 648 foldl f z = loopAcc . loopU (foldEFL f) z
 649 {-# INLINE foldl #-}
 650
 651 {-
 652 --
 653 -- About twice as fast with 6.4.1, but not fuseable
 654 -- A simple fold . map is enough to make it worth while.
 655 --
 656 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 657         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 658     where
 659         STRICT3(lgo)
 660         lgo z p q | p == q    = return z
 661                   | otherwise = do c <- peek p
 662                                    lgo (f z c) (p `plusPtr` 1) q
 663 -}
 664
 665 -- | 'foldr', applied to a binary operator, a starting value
 666 -- (typically the right-identity of the operator), and a ByteString,
 667 -- reduces the ByteString using the binary operator, from right to left.
 668 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 669 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 670         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 671     where
 672         STRICT2(go)
 673         go p q | p == q    = return z
 674                | otherwise = do c  <- peek p
 675                                 ws <- go (p `plusPtr` 1) q
 676                                 return $ c `k` ws
 677
 678 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 679 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 680 -- This function is subject to array fusion.
 681 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 682 foldl1 f ps
 683     | null ps   = errorEmptyList "foldl1"
 684     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 685
 686 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 687 -- and thus must be applied to non-empty 'ByteString's
 688 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 689 foldr1 f ps
 690     | null ps        = errorEmptyList "foldr1"
 691     | otherwise      = foldr f (last ps) (init ps)
 692
 693 -- ---------------------------------------------------------------------
 694 -- Special folds
 695
 696 -- | /O(n)/ Concatenate a list of ByteStrings.
 697 concat :: [ByteString] -> ByteString
 698 concat []     = empty
 699 concat [ps]   = ps
 700 concat xs     = create len $ \ptr -> go xs ptr
 701   where len = P.sum . P.map length $ xs
 702         STRICT2(go)
 703         go []            _   = return ()
 704         go (PS p s l:ps) ptr = do
 705                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 706                 go ps (ptr `plusPtr` l)
 707
 708 -- | Map a function over a 'ByteString' and concatenate the results
 709 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 710 concatMap f = foldr (append . f) empty
 711
 712 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 713 -- any element of the 'ByteString' satisfies the predicate.
 714 any :: (Word8 -> Bool) -> ByteString -> Bool
 715 any _ (PS _ _ 0) = False
 716 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 717         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 718     where
 719         STRICT2(go)
 720         go p q | p == q    = return False
 721                | otherwise = do c <- peek p
 722                                 if f c then return True
 723                                        else go (p `plusPtr` 1) q
 724
 725 -- todo fuse
 726
 727 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 728 -- if all elements of the 'ByteString' satisfy the predicate.
 729 all :: (Word8 -> Bool) -> ByteString -> Bool
 730 all _ (PS _ _ 0) = True
 731 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 732         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 733     where
 734         STRICT2(go)
 735         go p q | p == q     = return True  -- end of list
 736                | otherwise  = do c <- peek p
 737                                  if f c
 738                                     then go (p `plusPtr` 1) q
 739                                     else return False
 740 -- todo fuse
 741
 742 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 743 maximum :: ByteString -> Word8
 744 maximum xs@(PS x s l)
 745     | null xs   = errorEmptyList "maximum"
 746     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 747                     return $ c_maximum (p `plusPtr` s) (fromIntegral l)
 748 {-# INLINE maximum #-}
 749
 750 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 751 minimum :: ByteString -> Word8
 752 minimum xs@(PS x s l)
 753     | null xs   = errorEmptyList "minimum"
 754     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 755                     return $ c_minimum (p `plusPtr` s) (fromIntegral l)
 756 {-# INLINE minimum #-}
 757
 758 -- fusion is too slow here (10x)
 759
 760 {-
 761 maximum xs@(PS x s l)
 762     | null xs   = errorEmptyList "maximum"
 763     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 764                         w <- peek p
 765                         maximum_ (p `plusPtr` s) 0 l w
 766
 767 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 768 STRICT4(maximum_)
 769 maximum_ ptr n m c
 770     | n >= m    = return c
 771     | otherwise = do w <- peekByteOff ptr n
 772                      maximum_ ptr (n+1) m (if w > c then w else c)
 773
 774 minimum xs@(PS x s l)
 775     | null xs   = errorEmptyList "minimum"
 776     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 777                         w <- peek p
 778                         minimum_ (p `plusPtr` s) 0 l w
 779
 780 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 781 STRICT4(minimum_)
 782 minimum_ ptr n m c
 783     | n >= m    = return c
 784     | otherwise = do w <- peekByteOff ptr n
 785                      minimum_ ptr (n+1) m (if w < c then w else c)
 786 -}
 787
 788 -- | /O(n)/ map Word8 functions, provided with the index at each position
 789 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 790 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 791     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 792   where
 793     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 794     STRICT4(go)
 795     go n f t p | f == p    = return ()
 796                | otherwise = do w <- peek f
 797                                 ((poke t) . k n) w
 798                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 799
 800 -- ---------------------------------------------------------------------
 801 -- Unfolds and replicates
 802
 803 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 804 -- the value of every element. The following holds:
 805 --
 806 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 807 --
 808 -- This implemenation uses @memset(3)@
 809 replicate :: Int -> Word8 -> ByteString
 810 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 811
 812 {-
 813 -- About 5x slower
 814 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 815     where
 816         STRICT2(go)
 817         go _   0 = return w
 818         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 819 -}
 820
 821 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 822 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 823 -- the element and returns 'Nothing' if it is done producing the
 824 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 825 -- prepending to the ByteString and @b@ is used as the next element in a
 826 -- recursive call.
 827 --
 828 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 829 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 830 -- final size of the ByteString as an argument. 'cons' can then be
 831 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 832 -- linear complexity. The depth of the recursion is limited to this
 833 -- size, but may be less. For lazy, infinite unfoldr, use
 834 -- 'Data.List.unfoldr' (from 'Data.List').
 835 --
 836 -- Examples:
 837 --
 838 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 839 --
 840 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 841 --
 842 -- > unfoldrN n == take n $ List.unfoldr
 843 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 844 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 845     where
 846         STRICT3(go)
 847         go q c n | n == i    = return n      -- stop if we reach `i'
 848                  | otherwise = case f c of
 849                                    Nothing        -> return n
 850                                    Just (a,new_c) -> do
 851                                         poke q a
 852                                         go (q `plusPtr` 1) new_c (n+1)
 853
 854 -- ---------------------------------------------------------------------
 855 -- Substrings
 856
 857 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 858 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 859 take :: Int -> ByteString -> ByteString
 860 take n ps@(PS x s l)
 861     | n < 0     = empty
 862     | n >= l    = ps
 863     | otherwise = PS x s n
 864 {-# INLINE take #-}
 865
 866 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 867 -- elements, or @[]@ if @n > 'length' xs@.
 868 drop  :: Int -> ByteString -> ByteString
 869 drop n ps@(PS x s l)
 870     | n <= 0    = ps
 871     | n >  l    = empty
 872     | otherwise = PS x (s+n) (l-n)
 873 {-# INLINE drop #-}
 874
 875 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 876 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 877 splitAt  n ps  = (take n ps, drop n ps)
 878 {-# INLINE splitAt #-}
 879
 880 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 881 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 882 -- satisfy @p@.
 883 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 884 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 885 {-# INLINE takeWhile #-}
 886
 887 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 888 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 889 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 890 {-# INLINE dropWhile #-}
 891
 892 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 893 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 894 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 895 {-# INLINE break #-}
 896
 897 -- | 'breakByte' breaks its ByteString argument at the first occurence
 898 -- of the specified byte. It is more efficient than 'break' as it is
 899 -- implemented with @memchr(3)@. I.e.
 900 --
 901 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 902 --
 903 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 904 breakByte c p = case elemIndex c p of
 905     Nothing -> (p,empty)
 906     Just n  -> (take n p, drop n p)
 907 {-# INLINE breakByte #-}
 908
 909 -- | 'spanByte' breaks its ByteString argument at the first
 910 -- occurence of a byte other than its argument. It is more efficient
 911 -- than 'span (==)'
 912 --
 913 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 914 --
 915 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 916 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 917     go (p `plusPtr` s) 0
 918   where
 919     STRICT2(go)
 920     go p i | i >= l    = return (ps, empty)
 921            | otherwise = do c' <- peekByteOff p i
 922                             if c /= c'
 923                                 then return (take i ps, drop i ps)
 924                                 else go p (i+1)
 925 {-# INLINE spanByte #-}
 926
 927 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 928 -- occurence of @w@. It behaves like 'break', except the delimiter is
 929 -- not returned, and @Nothing@ is returned if the delimiter is not in
 930 -- the ByteString. I.e.
 931 --
 932 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 933 --
 934 -- > breakFirst c xs ==
 935 -- > let (x,y) = break (== c) xs
 936 -- > in if null y then Nothing else Just (x, drop 1 y))
 937 --
 938 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 939 breakFirst c p = case elemIndex c p of
 940    Nothing -> Nothing
 941    Just n -> Just (take n p, drop (n+1) p)
 942 {-# INLINE breakFirst #-}
 943
 944 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 945 -- ByteString.
 946 --
 947 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 948 --
 949 -- and the following are equivalent:
 950 --
 951 -- > breakLast 'c' "abcdef"
 952 -- > let (x,y) = break (=='c') (reverse "abcdef")
 953 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 954 --
 955 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 956 breakLast c p = case elemIndexLast c p of
 957     Nothing -> Nothing
 958     Just n -> Just (take n p, drop (n+1) p)
 959 {-# INLINE breakLast #-}
 960
 961 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 962 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 963 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 964 span p ps = break (not . p) ps
 965 {-# INLINE span #-}
 966
 967 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 968 -- We have
 969 --
 970 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 971 --
 972 -- and
 973 --
 974 -- > spanEnd (not . isSpace) ps
 975 -- >    ==
 976 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 977 --
 978 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 979 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 980
 981 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 982 -- separators, where the predicate returns True for a separator element.
 983 -- The resulting components do not contain the separators.  Two adjacent
 984 -- separators result in an empty component in the output.  eg.
 985 --
 986 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 987 -- > splitWith (=='a') []        == []
 988 --
 989 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 990
 991 #if defined(__GLASGOW_HASKELL__)
 992 splitWith _pred (PS _  _   0) = []
 993 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
 994   where pred# c# = pred_ (W8# c#)
 995
 996         STRICT4(splitWith0)
 997         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
 998             splitLoop pred' p 0 off' len' fp'
 999
1000         splitLoop :: (Word# -> Bool)
1001                   -> Ptr Word8
1002                   -> Int -> Int -> Int
1003                   -> ForeignPtr Word8
1004                   -> IO [ByteString]
1005
1006         splitLoop pred' p idx' off' len' fp'
1007             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1008             | idx' >= len'  = return [PS fp' off' idx']
1009             | otherwise = do
1010                 w <- peekElemOff p (off'+idx')
1011                 if pred' (case w of W8# w# -> w#)
1012                    then return (PS fp' off' idx' :
1013                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1014                    else splitLoop pred' p (idx'+1) off' len' fp'
1015 {-# INLINE splitWith #-}
1016
1017 #else
1018 splitWith _ (PS _ _ 0) = []
1019 splitWith p ps = loop p ps
1020     where
1021         STRICT2(loop)
1022         loop q qs = if null rest then [chunk]
1023                                  else chunk : loop q (unsafeTail rest)
1024             where (chunk,rest) = break q qs
1025 #endif
1026
1027 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1028 -- argument, consuming the delimiter. I.e.
1029 --
1030 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1031 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1032 -- > split 'x'  "x"          == ["",""]
1033 --
1034 -- and
1035 --
1036 -- > join [c] . split c == id
1037 -- > split == splitWith . (==)
1038 --
1039 -- As for all splitting functions in this library, this function does
1040 -- not copy the substrings, it just constructs new 'ByteStrings' that
1041 -- are slices of the original.
1042 --
1043 split :: Word8 -> ByteString -> [ByteString]
1044 split _ (PS _ _ 0) = []
1045 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1046     let ptr = p `plusPtr` s
1047
1048         STRICT1(loop)
1049         loop n = do
1050             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1051             if q == nullPtr
1052                 then [PS x (s+n) (l-n)]
1053                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1054
1055     return (loop 0)
1056 {-# INLINE split #-}
1057
1058 {-
1059 -- slower. but stays inside Haskell.
1060 split _ (PS _  _   0) = []
1061 split (W8# w#) (PS fp off len) = splitWith' off len fp
1062     where
1063         splitWith' off' len' fp' = withPtr fp $ \p ->
1064             splitLoop p 0 off' len' fp'
1065
1066         splitLoop :: Ptr Word8
1067                   -> Int -> Int -> Int
1068                   -> ForeignPtr Word8
1069                   -> IO [ByteString]
1070
1071         STRICT5(splitLoop)
1072         splitLoop p idx' off' len' fp'
1073             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1074             | idx' >= len'  = return [PS fp' off' idx']
1075             | otherwise = do
1076                 (W8# x#) <- peekElemOff p (off'+idx')
1077                 if word2Int# w# ==# word2Int# x#
1078                    then return (PS fp' off' idx' :
1079                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1080                    else splitLoop p (idx'+1) off' len' fp'
1081 -}
1082
1083 -- | Like 'splitWith', except that sequences of adjacent separators are
1084 -- treated as a single separator. eg.
1085 --
1086 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1087 --
1088 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1089 tokens f = P.filter (not.null) . splitWith f
1090 {-# INLINE tokens #-}
1091
1092 -- | The 'group' function takes a ByteString and returns a list of
1093 -- ByteStrings such that the concatenation of the result is equal to the
1094 -- argument.  Moreover, each sublist in the result contains only equal
1095 -- elements.  For example,
1096 --
1097 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1098 --
1099 -- It is a special case of 'groupBy', which allows the programmer to
1100 -- supply their own equality test. It is about 40% faster than
1101 -- /groupBy (==)/
1102 group :: ByteString -> [ByteString]
1103 group xs
1104     | null xs   = []
1105     | otherwise = ys : group zs
1106     where
1107         (ys, zs) = spanByte (unsafeHead xs) xs
1108
1109 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1110 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1111 groupBy k xs
1112     | null xs   = []
1113     | otherwise = take n xs : groupBy k (drop n xs)
1114     where
1115         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1116
1117 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1118 -- 'ByteString's and concatenates the list after interspersing the first
1119 -- argument between each element of the list.
1120 join :: ByteString -> [ByteString] -> ByteString
1121 join filler pss = concat (splice pss)
1122     where
1123         splice []  = []
1124         splice [x] = [x]
1125         splice (x:y:xs) = x:filler:splice (y:xs)
1126 {-# INLINE join #-}
1127
1128 --
1129 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1130 -- with a char. Around 4 times faster than the generalised join.
1131 --
1132 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1133 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1134     withForeignPtr ffp $ \fp ->
1135     withForeignPtr fgp $ \gp -> do
1136         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1137         poke (ptr `plusPtr` l) c
1138         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1139     where
1140       len = length f + length g + 1
1141 {-# INLINE joinWithByte #-}
1142
1143 -- ---------------------------------------------------------------------
1144 -- Indexing ByteStrings
1145
1146 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1147 index :: ByteString -> Int -> Word8
1148 index ps n
1149     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1150     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1151                                 ++ ", length = " ++ show (length ps)
1152     | otherwise      = ps `unsafeIndex` n
1153 {-# INLINE index #-}
1154
1155 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1156 -- element in the given 'ByteString' which is equal to the query
1157 -- element, or 'Nothing' if there is no such element.
1158 -- This implementation uses memchr(3).
1159 elemIndex :: Word8 -> ByteString -> Maybe Int
1160 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1161     let p' = p `plusPtr` s
1162         q  = memchr p' c (fromIntegral l)
1163     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1164 {-# INLINE elemIndex #-}
1165
1166 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1167 -- element in the given 'ByteString' which is equal to the query
1168 -- element, or 'Nothing' if there is no such element. The following
1169 -- holds:
1170 --
1171 -- > elemIndexLast c xs ==
1172 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1173 --
1174 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1175 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1176     go (p `plusPtr` s) (l-1)
1177   where
1178     STRICT2(go)
1179     go p i | i < 0     = return Nothing
1180            | otherwise = do ch' <- peekByteOff p i
1181                             if ch == ch'
1182                                 then return $ Just i
1183                                 else go p (i-1)
1184 {-# INLINE elemIndexLast #-}
1185
1186 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1187 -- the indices of all elements equal to the query element, in ascending order.
1188 -- This implementation uses memchr(3).
1189 elemIndices :: Word8 -> ByteString -> [Int]
1190 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1191     let ptr = p `plusPtr` s
1192
1193         STRICT1(loop)
1194         loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1195                  in if q == nullPtr
1196                         then []
1197                         else let i = q `minusPtr` ptr
1198                              in i : loop (i+1)
1199     return (loop 0)
1200 {-# INLINE elemIndices #-}
1201
1202 {-
1203 -- much slower
1204 elemIndices :: Word8 -> ByteString -> [Int]
1205 elemIndices c ps = loop 0 ps
1206    where STRICT2(loop)
1207          loop _ ps' | null ps'            = []
1208          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1209                     | otherwise           = loop (n+1) (unsafeTail ps')
1210 -}
1211
1212 -- | count returns the number of times its argument appears in the ByteString
1213 --
1214 -- > count = length . elemIndices
1215 --
1216 -- But more efficiently than using length on the intermediate list.
1217 count :: Word8 -> ByteString -> Int
1218 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1219     return $ c_count (p `plusPtr` s) (fromIntegral m) w
1220 {-# INLINE count #-}
1221
1222 {-
1223 --
1224 -- around 30% slower
1225 --
1226 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1227      go (p `plusPtr` s) (fromIntegral m) 0
1228     where
1229         go :: Ptr Word8 -> CSize -> Int -> IO Int
1230         STRICT3(go)
1231         go p l i = do
1232             let q = memchr p w l
1233             if q == nullPtr
1234                 then return i
1235                 else do let k = fromIntegral $ q `minusPtr` p
1236                         go (q `plusPtr` 1) (l-k-1) (i+1)
1237 -}
1238
1239 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1240 -- returns the index of the first element in the ByteString
1241 -- satisfying the predicate.
1242 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1243 findIndex k ps@(PS x s l)
1244     | null ps   = Nothing
1245     | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1246   where
1247     STRICT2(go)
1248     go ptr n | n >= l    = return Nothing
1249              | otherwise = do w <- peek ptr
1250                               if k w
1251                                 then return (Just n)
1252                                 else go (ptr `plusPtr` 1) (n+1)
1253 {-# INLINE findIndex #-}
1254
1255 -- | The 'findIndices' function extends 'findIndex', by returning the
1256 -- indices of all elements satisfying the predicate, in ascending order.
1257 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1258 findIndices p ps = loop 0 ps
1259    where
1260      STRICT2(loop)
1261      loop n qs | null qs           = []
1262                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1263                | otherwise         =     loop (n+1) (unsafeTail qs)
1264
1265 -- ---------------------------------------------------------------------
1266 -- Searching ByteStrings
1267
1268 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1269 elem :: Word8 -> ByteString -> Bool
1270 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1271 {-# INLINE elem #-}
1272
1273 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1274 notElem :: Word8 -> ByteString -> Bool
1275 notElem c ps = not (elem c ps)
1276 {-# INLINE notElem #-}
1277
1278 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1279 -- returns a ByteString containing those characters that satisfy the
1280 -- predicate. This function is subject to array fusion.
1281 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1282 filter p  = loopArr . loopU (filterEFL p) noAL
1283 {-# INLINE filter #-}
1284
1285 -- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
1286 -- around 2x faster for some one-shot applications.
1287 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1288 filterF k ps@(PS x s l)
1289     | null ps   = ps
1290     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1291         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1292         return (t `minusPtr` p) -- actual length
1293     where
1294         STRICT3(go)
1295         go f t end | f == end  = return t
1296                    | otherwise = do
1297                         w <- peek f
1298                         if k w
1299                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1300                             else             go (f `plusPtr` 1) t               end
1301 {-# INLINE filterF #-}
1302
1303 --
1304 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1305 -- case of filtering a single byte. It is more efficient to use
1306 -- /filterByte/ in this case.
1307 --
1308 -- > filterByte == filter . (==)
1309 --
1310 -- filterByte is around 10x faster, and uses much less space, than its
1311 -- filter equivalent
1312 filterByte :: Word8 -> ByteString -> ByteString
1313 filterByte w ps = replicate (count w ps) w
1314 {-# INLINE filterByte #-}
1315
1316 --
1317 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1318 -- case of filtering a single byte out of a list. It is more efficient
1319 -- to use /filterNotByte/ in this case.
1320 --
1321 -- > filterNotByte == filter . (/=)
1322 --
1323 -- filterNotByte is around 2x faster than its filter equivalent.
1324 filterNotByte :: Word8 -> ByteString -> ByteString
1325 filterNotByte w = filterF (/= w)
1326 {-# INLINE filterNotByte #-}
1327
1328 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1329 -- and returns the first element in matching the predicate, or 'Nothing'
1330 -- if there is no such element.
1331 --
1332 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1333 --
1334 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1335 find f p = case findIndex f p of
1336                     Just n -> Just (p `unsafeIndex` n)
1337                     _      -> Nothing
1338 {-# INLINE find #-}
1339
1340 {-
1341 --
1342 -- fuseable, but we don't want to walk the whole array.
1343 --
1344 find k = foldl findEFL Nothing
1345     where findEFL a@(Just _) _ = a
1346           findEFL _          c | k c       = Just c
1347                                | otherwise = Nothing
1348 -}
1349
1350 -- ---------------------------------------------------------------------
1351 -- Searching for substrings
1352
1353 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1354 -- iff the first is a prefix of the second.
1355 isPrefixOf :: ByteString -> ByteString -> Bool
1356 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1357     | l1 == 0   = True
1358     | l2 < l1   = False
1359     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1360         withForeignPtr x2 $ \p2 -> do
1361             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1362             return (i == 0)
1363
1364 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1365 -- iff the first is a suffix of the second.
1366 --
1367 -- The following holds:
1368 --
1369 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1370 --
1371 -- However, the real implemenation uses memcmp to compare the end of the
1372 -- string only, with no reverse required..
1373 isSuffixOf :: ByteString -> ByteString -> Bool
1374 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1375     | l1 == 0   = True
1376     | l2 < l1   = False
1377     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1378         withForeignPtr x2 $ \p2 -> do
1379             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1380             return (i == 0)
1381
1382 -- | Check whether one string is a substring of another. @isSubstringOf
1383 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1384 isSubstringOf :: ByteString -- ^ String to search for.
1385               -> ByteString -- ^ String to search in.
1386               -> Bool
1387 isSubstringOf p s = not $ P.null $ findSubstrings p s
1388
1389 -- | Get the first index of a substring in another string,
1390 --   or 'Nothing' if the string is not found.
1391 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1392 findSubstring :: ByteString -- ^ String to search for.
1393               -> ByteString -- ^ String to seach in.
1394               -> Maybe Int
1395 findSubstring = (listToMaybe .) . findSubstrings
1396
1397 -- | Find the indexes of all (possibly overlapping) occurances of a
1398 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1399 -- string matching algorithm.
1400 findSubstrings :: ByteString -- ^ String to search for.
1401                -> ByteString -- ^ String to seach in.
1402                -> [Int]
1403
1404 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1405   where
1406       patc x = pat `unsafeIndex` x
1407       strc x = str `unsafeIndex` x
1408
1409       -- maybe we should make kmpNext a UArray before using it in search?
1410       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1411       kmpNextL p _ | null p = []
1412       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1413                          ps = unsafeTail p
1414                          x = if not (null ps) && unsafeHead ps == patc j'
1415                                 then kmpNext Array.! j' else j'
1416                         in x:kmpNextL ps j'
1417       search i j = match ++ rest -- i: position in string, j: position in pattern
1418         where match = if j == m then [(i - j)] else []
1419               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1420       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1421                | otherwise = j
1422
1423 -- ---------------------------------------------------------------------
1424 -- Zipping
1425
1426 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1427 -- corresponding pairs of bytes. If one input ByteString is short,
1428 -- excess elements of the longer ByteString are discarded. This is
1429 -- equivalent to a pair of 'unpack' operations.
1430 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1431 zip ps qs
1432     | null ps || null qs = []
1433     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1434
1435 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1436 -- the first argument, instead of a tupling function.  For example,
1437 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1438 -- corresponding sums.
1439 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1440 zipWith f ps qs
1441     | null ps || null qs = []
1442     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1443
1444 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1445 -- ByteStrings. Note that this performs two 'pack' operations.
1446 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1447 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1448 {-# INLINE unzip #-}
1449
1450 -- ---------------------------------------------------------------------
1451 -- Special lists
1452
1453 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1454 inits :: ByteString -> [ByteString]
1455 inits (PS x s l) = [PS x s n | n <- [0..l]]
1456
1457 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1458 tails :: ByteString -> [ByteString]
1459 tails p | null p    = [empty]
1460         | otherwise = p : tails (unsafeTail p)
1461
1462 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1463
1464 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1465 elems :: ByteString -> [ByteString]
1466 elems (PS _ _ 0) = []
1467 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1468 {-# INLINE elems #-}
1469
1470 -- ---------------------------------------------------------------------
1471 -- ** Ordered 'ByteString's
1472
1473 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1474 sort :: ByteString -> ByteString
1475 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1476
1477     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1478     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1479
1480     let STRICT2(go)
1481         go 256 _   = return ()
1482         go i   ptr = do n <- peekElemOff arr i
1483                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1484                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1485     go 0 p
1486
1487 -- "countEach counts str l" counts the number of occurences of each Word8 in
1488 -- str, and stores the result in counts.
1489 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1490 STRICT3(countEach)
1491 countEach counts str l = go 0
1492  where
1493     STRICT1(go)
1494     go i | i == l    = return ()
1495          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1496                           x <- peekElemOff counts k
1497                           pokeElemOff counts k (x + 1)
1498                           go (i + 1)
1499
1500 {-
1501 sort :: ByteString -> ByteString
1502 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1503         memcpy p (f `plusPtr` s) l
1504         c_qsort p l -- inplace
1505 -}
1506
1507 {-
1508 sort = pack . List.sort . unpack
1509 -}
1510
1511 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1512 --
1513 -- Try some linear sorts: radix, counting
1514 -- Or mergesort.
1515 --
1516 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1517 -- sortBy f ps = undefined
1518
1519 -- ---------------------------------------------------------------------
1520 --
1521 -- Extensions to the basic interface
1522 --
1523
1524 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1525 -- check for the empty case, so there is an obligation on the programmer
1526 -- to provide a proof that the ByteString is non-empty.
1527 unsafeHead :: ByteString -> Word8
1528 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1529 {-# INLINE unsafeHead #-}
1530
1531 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1532 -- check for the empty case. As with 'unsafeHead', the programmer must
1533 -- provide a separate proof that the ByteString is non-empty.
1534 unsafeTail :: ByteString -> ByteString
1535 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1536 {-# INLINE unsafeTail #-}
1537
1538 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1539 -- This omits the bounds check, which means there is an accompanying
1540 -- obligation on the programmer to ensure the bounds are checked in some
1541 -- other way.
1542 unsafeIndex :: ByteString -> Int -> Word8
1543 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1544 {-# INLINE unsafeIndex #-}
1545
1546 -- ---------------------------------------------------------------------
1547 -- Low level constructors
1548
1549 #if defined(__GLASGOW_HASKELL__)
1550 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1551 -- Addr\# (an arbitrary machine address assumed to point outside the
1552 -- garbage-collected heap) into a @ByteString@. A much faster way to
1553 -- create an Addr\# is with an unboxed string literal, than to pack a
1554 -- boxed string. A unboxed string literal is compiled to a static @char
1555 -- []@ by GHC. Establishing the length of the string requires a call to
1556 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1557 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1558 -- if you know the length of the string statically.
1559 --
1560 -- An example:
1561 --
1562 -- > literalFS = packAddress "literal"#
1563 --
1564 packAddress :: Addr# -> ByteString
1565 packAddress addr# = inlinePerformIO $ do
1566     p <- newForeignPtr_ cstr
1567     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1568   where
1569     cstr = Ptr addr#
1570 {-# INLINE packAddress #-}
1571
1572 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1573 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1574 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1575 -- 'Addr\#' to the string, and the length of the string. Make sure the
1576 -- length is correct, otherwise use the safer 'packAddress' (where the
1577 -- length will be calculated once at runtime).
1578 unsafePackAddress :: Int -> Addr# -> ByteString
1579 unsafePackAddress len addr# = inlinePerformIO $ do
1580     p <- newForeignPtr_ cstr
1581     return $ PS p 0 len
1582     where cstr = Ptr addr#
1583
1584 #endif
1585
1586 -- | /O(1)/ Build a ByteString from a ForeignPtr
1587 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1588 fromForeignPtr fp l = PS fp 0 l
1589
1590 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1591 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1592 toForeignPtr (PS ps s l) = (ps, s, l)
1593
1594 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1595 -- current 'ByteString' from any larger string it was created from, as
1596 -- an 'Int'.
1597 skipIndex :: ByteString -> Int
1598 skipIndex (PS _ s _) = s
1599 {-# INLINE skipIndex #-}
1600
1601 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1602 -- finalizer associated to it. The ByteString length is calculated using
1603 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1604 packCString :: CString -> ByteString
1605 packCString cstr = inlinePerformIO $ do
1606     fp <- newForeignPtr_ (castPtr cstr)
1607     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1608
1609 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1610 -- have /no/ finalizer associated with it. This operation has /O(1)/
1611 -- complexity as we already know the final size, so no /strlen(3)/ is
1612 -- required.
1613 packCStringLen :: CStringLen -> ByteString
1614 packCStringLen (ptr,len) = inlinePerformIO $ do
1615     fp <- newForeignPtr_ (castPtr ptr)
1616     return $ PS fp 0 (fromIntegral len)
1617
1618 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1619 -- have a @free(3)@ finalizer associated to it.
1620 packMallocCString :: CString -> ByteString
1621 packMallocCString cstr = inlinePerformIO $ do
1622     fp <- newForeignFreePtr (castPtr cstr)
1623     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1624
1625 #if defined(__GLASGOW_HASKELL__)
1626 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1627 -- length, and an IO action representing a finalizer. This function is
1628 -- not available on Hugs.
1629 --
1630 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1631 packCStringFinalizer p l f = do
1632     fp <- FC.newForeignPtr p f
1633     return $ PS fp 0 l
1634
1635 -- | Explicitly run the finaliser associated with a 'ByteString'.
1636 -- Further references to this value may generate invalid memory
1637 -- references. This operation is unsafe, as there may be other
1638 -- 'ByteStrings' referring to the same underlying pages. If you use
1639 -- this, you need to have a proof of some kind that all 'ByteString's
1640 -- ever generated from the underlying byte array are no longer live.
1641 unsafeFinalize :: ByteString -> IO ()
1642 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1643
1644 #endif
1645
1646 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1647 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1648 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1649 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1650     where
1651       alloc = withForeignPtr ps $ \p -> do
1652                 buf <- c_malloc (fromIntegral l+1)
1653                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1654                 poke (buf `plusPtr` l) (0::Word8)
1655                 return $ castPtr buf
1656
1657 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1658 -- Warning: modifying the @CString@ will affect the @ByteString@.
1659 -- Why is this function unsafe? It relies on the null byte at the end of
1660 -- the ByteString to be there. This is /not/ the case if your ByteString
1661 -- has been spliced from a larger string (i.e. with take or drop).
1662 -- Unless you can guarantee the null byte, you should use the safe
1663 -- version, which will copy the string first.
1664 --
1665 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1666 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1667
1668 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1669 --   This is mainly useful to allow the rest of the data pointed
1670 --   to by the 'ByteString' to be garbage collected, for example
1671 --   if a large string has been read in, and only a small part of it
1672 --   is needed in the rest of the program.
1673 copy :: ByteString -> ByteString
1674 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1675     memcpy p (f `plusPtr` s) (fromIntegral l)
1676
1677 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1678 -- CString is going to be deallocated from C land.
1679 copyCString :: CString -> ByteString
1680 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1681
1682 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1683 copyCStringLen :: CStringLen -> ByteString
1684 copyCStringLen (cstr, len) = inlinePerformIO $ do
1685     fp <- mallocForeignPtrArray (len+1)
1686     withForeignPtr fp $ \p -> do
1687         memcpy p (castPtr cstr) (fromIntegral len)
1688         poke (p `plusPtr` len) (0 :: Word8)
1689     return $! PS fp 0 len
1690
1691 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1692 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1693 -- This is analogous to unsafeUseAsCString, and comes with the same
1694 -- safety requirements.
1695 --
1696 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1697 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1698
1699 -- | Given the maximum size needed and a function to make the contents
1700 -- of a ByteString, generate makes the 'ByteString'. The generating
1701 -- function is required to return the actual final size (<= the maximum
1702 -- size), and the resulting byte array is realloced to this size.  The
1703 -- string is padded at the end with a null byte.
1704 --
1705 -- generate is the main mechanism for creating custom, efficient
1706 -- ByteString functions, using Haskell or C functions to fill the space.
1707 --
1708 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1709 generate i f = do
1710     p <- mallocArray (i+1)
1711     i' <- f p
1712     p' <- reallocArray p (i'+1)
1713     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1714     fp <- newForeignFreePtr p'
1715     return $ PS fp 0 i'
1716
1717 -- ---------------------------------------------------------------------
1718 -- line IO
1719
1720 #if defined(__GLASGOW_HASKELL__)
1721
1722 -- | getLine, read a line from stdin.
1723 getLine :: IO ByteString
1724 getLine = hGetLine stdin
1725
1726 -- | hGetLine. read a ByteString from a handle
1727 hGetLine :: Handle -> IO ByteString
1728 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1729     case haBufferMode handle_ of
1730        NoBuffering -> error "no buffering"
1731        _other      -> hGetLineBuffered handle_
1732
1733  where
1734     hGetLineBuffered handle_ = do
1735         let ref = haBuffer handle_
1736         buf <- readIORef ref
1737         hGetLineBufferedLoop handle_ ref buf 0 []
1738
1739     hGetLineBufferedLoop handle_ ref
1740             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1741         len `seq` do
1742         off <- findEOL r w raw
1743         let new_len = len + off - r
1744         xs <- mkPS raw r off
1745
1746       -- if eol == True, then off is the offset of the '\n'
1747       -- otherwise off == w and the buffer is now empty.
1748         if off /= w
1749             then do if (w == off + 1)
1750                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1751                             else writeIORef ref buf{ bufRPtr = off + 1 }
1752                     mkBigPS new_len (xs:xss)
1753             else do
1754                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1755                                     buf{ bufWPtr=0, bufRPtr=0 }
1756                  case maybe_buf of
1757                     -- Nothing indicates we caught an EOF, and we may have a
1758                     -- partial line to return.
1759                     Nothing -> do
1760                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1761                          if new_len > 0
1762                             then mkBigPS new_len (xs:xss)
1763                             else ioe_EOF
1764                     Just new_buf ->
1765                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1766
1767     -- find the end-of-line character, if there is one
1768     findEOL r w raw
1769         | r == w = return w
1770         | otherwise =  do
1771             (c,r') <- readCharFromBuffer raw r
1772             if c == '\n'
1773                 then return r -- NB. not r': don't include the '\n'
1774                 else findEOL r' w raw
1775
1776     maybeFillReadBuffer fd is_line is_stream buf = catch
1777         (do buf' <- fillReadBuffer fd is_line is_stream buf
1778             return (Just buf'))
1779         (\e -> if isEOFError e then return Nothing else ioError e)
1780
1781 -- TODO, rewrite to use normal memcpy
1782 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1783 mkPS buf start end = do
1784     let len = end - start
1785     fp <- mallocByteString len
1786     withForeignPtr fp $ \p -> do
1787         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1788         return (PS fp 0 len)
1789
1790 mkBigPS :: Int -> [ByteString] -> IO ByteString
1791 mkBigPS _ [ps] = return ps
1792 mkBigPS _ pss = return $! concat (P.reverse pss)
1793
1794 #endif
1795
1796 -- ---------------------------------------------------------------------
1797 -- Block IO
1798
1799 -- | Outputs a 'ByteString' to the specified 'Handle'.
1800 hPut :: Handle -> ByteString -> IO ()
1801 hPut _ (PS _ _ 0)  = return ()
1802 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1803 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1804
1805 -- | Write a ByteString to stdout
1806 putStr :: ByteString -> IO ()
1807 putStr = hPut stdout
1808
1809 -- | Write a ByteString to stdout, appending a newline byte
1810 putStrLn :: ByteString -> IO ()
1811 putStrLn ps = hPut stdout ps >> hPut stdout nl
1812     where nl = packByte 0x0a
1813
1814 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1815 -- is far more efficient than reading the characters into a 'String'
1816 -- and then using 'pack'.
1817 hGet :: Handle -> Int -> IO ByteString
1818 hGet _ 0 = return empty
1819 hGet h i = do fp <- mallocByteString i
1820               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1821               return $ PS fp 0 l
1822
1823 #if defined(__GLASGOW_HASKELL__)
1824 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1825 -- waiting for data to become available, instead it returns only whatever data
1826 -- is available.
1827 hGetNonBlocking :: Handle -> Int -> IO ByteString
1828 hGetNonBlocking _ 0 = return empty
1829 hGetNonBlocking h i = do
1830     fp <- mallocByteString i
1831     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1832     return $ PS fp 0 l
1833 #endif
1834
1835 -- | Read entire handle contents into a 'ByteString'.
1836 --
1837 -- As with 'hGet', the string representation in the file is assumed to
1838 -- be ISO-8859-1.
1839 --
1840 hGetContents :: Handle -> IO ByteString
1841 hGetContents h = do
1842     let start_size = 1024
1843     p <- mallocArray start_size
1844     i <- hGetBuf h p start_size
1845     if i < start_size
1846         then do p' <- reallocArray p i
1847                 fp <- newForeignFreePtr p'
1848                 return $ PS fp 0 i
1849         else f p start_size
1850     where
1851         f p s = do
1852             let s' = 2 * s
1853             p' <- reallocArray p s'
1854             i  <- hGetBuf h (p' `plusPtr` s) s
1855             if i < s
1856                 then do let i' = s + i
1857                         p'' <- reallocArray p' i'
1858                         fp  <- newForeignFreePtr p''
1859                         return $ PS fp 0 i'
1860                 else f p' s'
1861
1862 -- | getContents. Equivalent to hGetContents stdin
1863 getContents :: IO ByteString
1864 getContents = hGetContents stdin
1865
1866 -- | Read an entire file directly into a 'ByteString'.  This is far more
1867 -- efficient than reading the characters into a 'String' and then using
1868 -- 'pack'.  It also may be more efficient than opening the file and
1869 -- reading it using hGet.
1870 readFile :: FilePath -> IO ByteString
1871 readFile f = do
1872     h <- openBinaryFile f ReadMode
1873     l <- hFileSize h
1874     s <- hGet h $ fromIntegral l
1875     hClose h
1876     return s
1877
1878 -- | Write a 'ByteString' to a file.
1879 writeFile :: FilePath -> ByteString -> IO ()
1880 writeFile f ps = do
1881     h <- openBinaryFile f WriteMode
1882     hPut h ps
1883     hClose h
1884
1885 {-
1886 --
1887 -- Disable until we can move it into a portable .hsc file
1888 --
1889
1890 -- | Like readFile, this reads an entire file directly into a
1891 -- 'ByteString', but it is even more efficient.  It involves directly
1892 -- mapping the file to memory.  This has the advantage that the contents
1893 -- of the file never need to be copied.  Also, under memory pressure the
1894 -- page may simply be discarded, while in the case of readFile it would
1895 -- need to be written to swap.  If you read many small files, mmapFile
1896 -- will be less memory-efficient than readFile, since each mmapFile
1897 -- takes up a separate page of memory.  Also, you can run into bus
1898 -- errors if the file is modified.  As with 'readFile', the string
1899 -- representation in the file is assumed to be ISO-8859-1.
1900 --
1901 -- On systems without mmap, this is the same as a readFile.
1902 --
1903 mmapFile :: FilePath -> IO ByteString
1904 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1905
1906 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1907 mmap f = do
1908     h <- openBinaryFile f ReadMode
1909     l <- fromIntegral `fmap` hFileSize h
1910     -- Don't bother mmaping small files because each mmapped file takes up
1911     -- at least one full VM block.
1912     if l < mmap_limit
1913        then do thefp <- mallocByteString l
1914                withForeignPtr thefp $ \p-> hGetBuf h p l
1915                hClose h
1916                return (thefp, l)
1917        else do
1918                -- unix only :(
1919                fd <- fromIntegral `fmap` handleToFd h
1920                p  <- my_mmap l fd
1921                fp <- if p == nullPtr
1922                      then do thefp <- mallocByteString l
1923                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1924                              return thefp
1925                      else do
1926                           -- The munmap leads to crashes on OpenBSD.
1927                           -- maybe there's a use after unmap in there somewhere?
1928 #if !defined(__OpenBSD__)
1929                              let unmap = c_munmap p l >> return ()
1930 #else
1931                              let unmap = return ()
1932 #endif
1933                              fp <- FC.newForeignPtr p unmap
1934                              return fp
1935                c_close fd
1936                hClose h
1937                return (fp, l)
1938     where mmap_limit = 16*1024
1939 -}
1940
1941 #if defined(__GLASGOW_HASKELL__)
1942 --
1943 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1944 --
1945 getArgs :: IO [ByteString]
1946 getArgs =
1947   alloca $ \ p_argc ->
1948   alloca $ \ p_argv -> do
1949     getProgArgv p_argc p_argv
1950     p    <- fromIntegral `fmap` peek p_argc
1951     argv <- peek p_argv
1952     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1953 #endif
1954
1955 -- ---------------------------------------------------------------------
1956 -- Internal utilities
1957
1958 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1959 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1960 -- convenience for ByteString construction.
1961 w2c :: Word8 -> Char
1962 #if !defined(__GLASGOW_HASKELL__)
1963 w2c = chr . fromIntegral
1964 #else
1965 w2c = unsafeChr . fromIntegral
1966 #endif
1967 {-# INLINE w2c #-}
1968
1969 c2w :: Char -> Word8
1970 c2w = fromIntegral . ord
1971 {-# INLINE c2w #-}
1972
1973 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1974 -- is padded with a null byte.
1975 mallocByteString :: Int -> IO (ForeignPtr Word8)
1976 mallocByteString l = do
1977     fp <- mallocForeignPtrArray (l+1)
1978     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1979     return fp
1980
1981 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1982 -- argument gives the final size of the ByteString. Unlike 'generate'
1983 -- the ByteString is not reallocated if the final size is less than the
1984 -- estimated size. Also, unlike 'generate' ByteString's created this way
1985 -- are managed on the Haskell heap.
1986 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1987 create l write_ptr = inlinePerformIO $ do
1988     fp <- mallocByteString (l+1)
1989     withForeignPtr fp $ \p -> write_ptr p
1990     return $ PS fp 0 l
1991 {-# INLINE create #-}
1992
1993 -- | Perform an operation with a temporary ByteString
1994 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1995 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1996 {-# INLINE withPtr #-}
1997
1998 -- Common up near identical calls to `error' to reduce the number
1999 -- constant strings created when compiled:
2000 errorEmptyList :: String -> a
2001 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
2002 {-# INLINE errorEmptyList #-}
2003
2004 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2005 -- of the string if no element is found, rather than Nothing.
2006 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2007 STRICT2(findIndexOrEnd)
2008 findIndexOrEnd f ps
2009     | null ps           = 0
2010     | f (unsafeHead ps) = 0
2011     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
2012 {-# INLINE findIndexOrEnd #-}
2013
2014 -- Find from the end of the string using predicate
2015 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2016 STRICT2(findFromEndUntil)
2017 findFromEndUntil f ps@(PS x s l) =
2018     if null ps then 0
2019     else if f (last ps) then l
2020          else findFromEndUntil f (PS x s (l-1))
2021
2022 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2023 -- it exposes lots of things to further inlining
2024 --
2025 {-# INLINE inlinePerformIO #-}
2026 inlinePerformIO :: IO a -> a
2027 #if defined(__GLASGOW_HASKELL__)
2028 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2029 #else
2030 inlinePerformIO = unsafePerformIO
2031 #endif
2032
2033 {-# INLINE newForeignFreePtr #-}
2034 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2035 #if defined(__GLASGOW_HASKELL__)
2036 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2037 #else
2038 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2039 #endif
2040
2041 -- ---------------------------------------------------------------------
2042 --
2043 -- Standard C functions
2044 --
2045
2046 foreign import ccall unsafe "string.h strlen" c_strlen
2047     :: CString -> CInt
2048
2049 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2050     :: CInt -> IO (Ptr Word8)
2051
2052 foreign import ccall unsafe "static stdlib.h free" c_free
2053     :: Ptr Word8 -> IO ()
2054
2055 #if !defined(__GLASGOW_HASKELL__)
2056 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2057     :: FunPtr (Ptr Word8 -> IO ())
2058 #endif
2059
2060 foreign import ccall unsafe "string.h memset" memset
2061     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2062
2063 foreign import ccall unsafe "string.h memchr" memchr
2064     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2065
2066 foreign import ccall unsafe "string.h memcmp" memcmp
2067     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO Int
2068
2069 foreign import ccall unsafe "string.h memcpy" memcpy
2070     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2071
2072 -- ---------------------------------------------------------------------
2073 --
2074 -- Uses our C code
2075 --
2076
2077 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2078     :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2079
2080 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2081     :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2082
2083 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2084     :: Ptr Word8 -> CInt -> Word8
2085
2086 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2087     :: Ptr Word8 -> CInt -> Word8
2088
2089 foreign import ccall unsafe "static fpstring.h count" c_count
2090     :: Ptr Word8 -> CInt -> Word8 -> Int
2091
2092 -- ---------------------------------------------------------------------
2093 -- MMap
2094
2095 {-
2096 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2097     :: Int -> Int -> IO (Ptr Word8)
2098
2099 foreign import ccall unsafe "static unistd.h close" c_close
2100     :: Int -> IO Int
2101
2102 #  if !defined(__OpenBSD__)
2103 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2104     :: Ptr Word8 -> Int -> IO Int
2105 #  endif
2106 -}
2107
2108 -- ---------------------------------------------------------------------
2109 -- Internal GHC Haskell magic
2110
2111 #if defined(__GLASGOW_HASKELL__)
2112 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2113     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2114
2115 foreign import ccall unsafe "__hscore_memcpy_src_off"
2116    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2117 #endif
2118
2119 -- ---------------------------------------------------------------------
2120 --
2121 -- Functional array fusion for ByteStrings.
2122 --
2123 -- From the Data Parallel Haskell project,
2124 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2125 --
2126
2127 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2128 -- the fact that no bottoms of this type are ever constructed; hence, we can
2129 -- assume @(_ :: NoAL) `seq` x = x@.
2130 --
2131 data NoAL = NoAL
2132
2133 -- | Special forms of loop arguments
2134 --
2135 -- * These are common special cases for the three function arguments of gen
2136 --   and loop; we give them special names to make it easier to trigger RULES
2137 --   applying in the special cases represented by these arguments.  The
2138 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2139 --   two simplifier phases.
2140 --
2141 -- * In the case where the accumulator is not needed, it is better to always
2142 --   explicitly return a value `()', rather than just copy the input to the
2143 --   output, as the former gives GHC better local information.
2144 --
2145
2146 -- | Element function expressing a mapping only
2147 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2148 mapEFL f = \_ e -> (noAL, (Just $ f e))
2149 #if defined(__GLASGOW_HASKELL__)
2150 {-# INLINE [1] mapEFL #-}
2151 #endif
2152
2153 -- | Element function implementing a filter function only
2154 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2155 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2156 #if defined(__GLASGOW_HASKELL__)
2157 {-# INLINE [1] filterEFL #-}
2158 #endif
2159
2160 -- |Element function expressing a reduction only
2161 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2162 foldEFL f = \a e -> (f a e, Nothing)
2163 #if defined(__GLASGOW_HASKELL__)
2164 {-# INLINE [1] foldEFL #-}
2165 #endif
2166
2167 -- | No accumulator
2168 noAL :: NoAL
2169 noAL = NoAL
2170 #if defined(__GLASGOW_HASKELL__)
2171 {-# INLINE [1] noAL #-}
2172 #endif
2173
2174 -- | Projection functions that are fusion friendly (as in, we determine when
2175 -- they are inlined)
2176 loopArr :: (ByteString, acc) -> ByteString
2177 loopArr (arr, _) = arr
2178 #if defined(__GLASGOW_HASKELL__)
2179 {-# INLINE [1] loopArr #-}
2180 #endif
2181
2182 loopAcc :: (ByteString, acc) -> acc
2183 loopAcc (_, acc) = acc
2184 #if defined(__GLASGOW_HASKELL__)
2185 {-# INLINE [1] loopAcc #-}
2186 #endif
2187
2188 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2189 loopSndAcc (arr, (_, acc)) = (arr, acc)
2190 #if defined(__GLASGOW_HASKELL__)
2191 {-# INLINE [1] loopSndAcc #-}
2192 #endif
2193
2194 ------------------------------------------------------------------------
2195
2196 -- | Iteration over over ByteStrings
2197 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2198       -> acc                                   -- ^ initial acc value
2199       -> ByteString                            -- ^ input ByteString
2200       -> (ByteString, acc)
2201
2202 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2203     fp          <- mallocByteString i
2204     (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2205         (acc, i') <- go (a `plusPtr` s) p start
2206         if i' == i
2207             then return (fp,i,acc)                      -- no realloc for map
2208             else do fp_ <- mallocByteString (i'+1)      -- realloc
2209                     withForeignPtr fp_ $ \p' -> do
2210                         memcpy p' p (fromIntegral i')   -- can't avoid this,  right?
2211                         poke (p' `plusPtr` i') (0::Word8)
2212                     return (fp_,i',acc)
2213
2214     return (PS ptr 0 n, acc)
2215   where
2216     go p ma = trans 0 0
2217         where
2218             STRICT3(trans)
2219             trans a_off ma_off acc
2220                 | a_off >= i = return (acc, ma_off)
2221                 | otherwise  = do
2222                     x <- peekByteOff p a_off
2223                     let (acc', oe) = f acc x
2224                     ma_off' <- case oe of
2225                         Nothing  -> return ma_off
2226                         Just e   -> do pokeByteOff ma ma_off e
2227                                        return $ ma_off + 1
2228                     trans (a_off+1) ma_off' acc'
2229
2230 #if defined(__GLASGOW_HASKELL__)
2231 {-# INLINE [1] loopU #-}
2232 #endif
2233
2234 infixr 9 `fuseEFL`
2235
2236 -- |Fuse to flat loop functions
2237 fuseEFL :: (a1 -> Word8  -> (a1, Maybe Word8))
2238         -> (a2 -> Word8  -> (a2, Maybe Word8))
2239         -> (a1, a2)
2240         -> Word8
2241         -> ((a1, a2), Maybe Word8)
2242 fuseEFL f g (acc1, acc2) e1 =
2243     case f acc1 e1 of
2244         (acc1', Nothing) -> ((acc1', acc2), Nothing)
2245         (acc1', Just e2) ->
2246             case g acc2 e2 of
2247                 (acc2', res) -> ((acc1', acc2'), res)
2248
2249 {-# RULES
2250
2251 "Array fusion!" forall em1 em2 start1 start2 arr.
2252   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2253     loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2254
2255 "loopArr/loopSndAcc" forall x.
2256   loopArr (loopSndAcc x) = loopArr x
2257
2258 "seq/NoAL" forall (u::NoAL) e.
2259   u `seq` e = e
2260
2261   #-}
2262