Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --                       (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with Prelude functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         packByte,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: Word8 -> ByteString -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57         head,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         last,                   -- :: ByteString -> Word8
  60         init,                   -- :: ByteString -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62
  63         -- * Special ByteStrings
  64         inits,                  -- :: ByteString -> [ByteString]
  65         tails,                  -- :: ByteString -> [ByteString]
  66         elems,                  -- :: ByteString -> [ByteString]
  67
  68         -- * Transformating ByteStrings
  69         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  70         reverse,                -- :: ByteString -> ByteString
  71         intersperse,            -- :: Word8 -> ByteString -> ByteString
  72         transpose,              -- :: [ByteString] -> [ByteString]
  73
  74         -- * Reducing 'ByteString's
  75         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  76         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  77         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  78         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79
  80         -- ** Special folds
  81         concat,                 -- :: [ByteString] -> ByteString
  82         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  83         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  84         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  85         maximum,                -- :: ByteString -> Word8
  86         minimum,                -- :: ByteString -> Word8
  87         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  88
  89         -- * Generating and unfolding ByteStrings
  90         replicate,              -- :: Int -> Word8 -> ByteString
  91         unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
  92
  93         -- * Substrings
  94
  95         -- ** Breaking strings
  96         take,                   -- :: Int -> ByteString -> ByteString
  97         drop,                   -- :: Int -> ByteString -> ByteString
  98         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  99         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 100         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 101         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 102         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 103         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 104
 105         -- ** Breaking and dropping on specific bytes
 106         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 107         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 108         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 109         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 110
 111         -- ** Breaking into many substrings
 112         split,                  -- :: Word8 -> ByteString -> [ByteString]
 113         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 114         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 115         group,                  -- :: ByteString -> [ByteString]
 116         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 117
 118         -- ** Joining strings
 119         join,                   -- :: ByteString -> [ByteString] -> ByteString
 120         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 121
 122         -- * Indexing ByteStrings
 123         index,                  -- :: ByteString -> Int -> Word8
 124         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 125         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 126         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 127         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 128         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 129         count,                  -- :: Word8 -> ByteString -> Int
 130
 131         -- * Ordered ByteStrings
 132         sort,                   -- :: ByteString -> ByteString
 133
 134         -- * Searching ByteStrings
 135
 136         -- ** Searching by equality
 137         -- | These functions use memchr(3) to efficiently search the ByteString
 138
 139         elem,                   -- :: Word8 -> ByteString -> Bool
 140         notElem,                -- :: Word8 -> ByteString -> Bool
 141         filterByte,             -- :: Word8 -> ByteString -> ByteString
 142         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 143
 144         -- ** Searching with a predicate
 145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 146         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 147
 148         -- ** Prefixes and suffixes
 149         -- | These functions use memcmp(3) to efficiently compare substrings
 150         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 151         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 152
 153         -- ** Search for arbitrary substrings
 154         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 155         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 156         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 157
 158         -- * Zipping and unzipping ByteStrings
 159         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 160         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 161         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 162
 163         -- * Unchecked access
 164         unsafeHead,             -- :: ByteString -> Word8
 165         unsafeTail,             -- :: ByteString -> ByteString
 166         unsafeIndex,            -- :: ByteString -> Int -> Word8
 167
 168         -- * Low level introduction and elimination
 169         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 170         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 171         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 172         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 173         skipIndex,              -- :: ByteString -> Int
 174
 175         -- ** Packing CStrings and pointers
 176         packCString,            -- :: CString -> ByteString
 177         packCStringLen,         -- :: CString -> ByteString
 178         packMallocCString,      -- :: CString -> ByteString
 179
 180 #if defined(__GLASGOW_HASKELL__)
 181         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 182         packAddress,            -- :: Addr# -> ByteString
 183         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 184         unsafeFinalize,         -- :: ByteString -> IO ()
 185 #endif
 186
 187         -- ** Using ByteStrings as CStrings
 188         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 189         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 190         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 191
 192         -- ** Copying ByteStrings
 193         -- | These functions perform memcpy(3) operations
 194         copy,                   -- :: ByteString -> ByteString
 195         copyCString,            -- :: CString -> ByteString
 196         copyCStringLen,         -- :: CStringLen -> ByteString
 197
 198         -- * I\/O with @ByteString@s
 199
 200         -- ** Standard input and output
 201
 202 #if defined(__GLASGOW_HASKELL__)
 203         getLine,                -- :: IO ByteString
 204 #endif
 205         getContents,            -- :: IO ByteString
 206         putStr,                 -- :: ByteString -> IO ()
 207         putStrLn,               -- :: ByteString -> IO ()
 208
 209         -- ** Files
 210         readFile,               -- :: FilePath -> IO ByteString
 211         writeFile,              -- :: FilePath -> ByteString -> IO ()
 212 --      mmapFile,               -- :: FilePath -> IO ByteString
 213
 214         -- ** I\/O with Handles
 215 #if defined(__GLASGOW_HASKELL__)
 216         getArgs,                -- :: IO [ByteString]
 217         hGetLine,               -- :: Handle -> IO ByteString
 218         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 219 #endif
 220         hGetContents,           -- :: Handle -> IO ByteString
 221         hGet,                   -- :: Handle -> Int -> IO ByteString
 222         hPut,                   -- :: Handle -> ByteString -> IO ()
 223
 224         -- * Fusion utilities
 225 #if defined(__GLASGOW_HASKELL__)
 226         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 227 #endif
 228
 229         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 230         loopU, mapEFL, filterEFL, foldEFL,
 231         filterF, mapF
 232
 233   ) where
 234
 235 import qualified Prelude as P
 236 import Prelude hiding           (reverse,head,tail,last,init,null
 237                                 ,length,map,lines,foldl,foldr,unlines
 238                                 ,concat,any,take,drop,splitAt,takeWhile
 239                                 ,dropWhile,span,break,elem,filter,maximum
 240                                 ,minimum,all,concatMap,foldl1,foldr1
 241                                 ,readFile,writeFile,replicate
 242                                 ,getContents,getLine,putStr,putStrLn
 243                                 ,zip,zipWith,unzip,notElem)
 244
 245 import qualified Data.List as List
 246
 247 import Data.Char
 248 import Data.Word                (Word8)
 249 import Data.Maybe               (listToMaybe)
 250 import Data.Array               (listArray)
 251 import qualified Data.Array as Array ((!))
 252
 253 -- Control.Exception.bracket not available in yhc or nhc
 254 import Control.Exception        (bracket)
 255 import Control.Monad            (when)
 256
 257 import Foreign.C.String         (CString, CStringLen)
 258 import Foreign.C.Types          (CSize, CInt)
 259 import Foreign.ForeignPtr
 260 import Foreign.Marshal.Array
 261 import Foreign.Ptr
 262 import Foreign.Storable         (Storable(..))
 263
 264 -- hGetBuf and hPutBuf not available in yhc or nhc
 265 import System.IO                (stdin,stdout,hClose,hFileSize
 266                                 ,hGetBuf,hPutBuf,openBinaryFile
 267                                 ,Handle,IOMode(..))
 268
 269 #if !defined(__GLASGOW_HASKELL__)
 270 import System.IO.Unsafe
 271 #endif
 272
 273 #if defined(__GLASGOW_HASKELL__)
 274
 275 import Data.Generics            (Data(..), Typeable(..))
 276
 277 import System.IO                (hGetBufNonBlocking)
 278 import System.IO.Error          (isEOFError)
 279
 280 import Foreign.Marshal          (alloca)
 281 import qualified Foreign.Concurrent as FC (newForeignPtr)
 282
 283 import GHC.Handle
 284 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 285 import GHC.Base                 (build, unsafeChr)
 286 import GHC.Word hiding (Word8)
 287 import GHC.Ptr                  (Ptr(..))
 288 import GHC.ST                   (ST(..))
 289 import GHC.IOBase
 290
 291 #endif
 292
 293 -- CFILES stuff is Hugs only
 294 {-# CFILES cbits/fpstring.c #-}
 295
 296 -- -----------------------------------------------------------------------------
 297 --
 298 -- Useful macros, until we have bang patterns
 299 --
 300
 301 #define STRICT1(f) f a | a `seq` False = undefined
 302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 306
 307 -- -----------------------------------------------------------------------------
 308
 309 -- | A space-efficient representation of a Word8 vector, supporting many
 310 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 311 --
 312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 313 --
 314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 315                      {-# UNPACK #-} !Int
 316                      {-# UNPACK #-} !Int
 317
 318 #if defined(__GLASGOW_HASKELL__)
 319     deriving (Data, Typeable)
 320 #endif
 321
 322 instance Eq  ByteString
 323     where (==)    = eq
 324
 325 instance Ord ByteString
 326     where compare = compareBytes
 327
 328 instance Show ByteString where
 329     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 330
 331 instance Read ByteString where
 332     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 333
 334 {-
 335 instance Arbitrary PackedString where
 336     arbitrary = P.pack `fmap` arbitrary
 337     coarbitrary s = coarbitrary (P.unpack s)
 338 -}
 339
 340 -- | /O(n)/ Equality on the 'ByteString' type.
 341 eq :: ByteString -> ByteString -> Bool
 342 eq a@(PS p s l) b@(PS p' s' l')
 343     | l /= l'            = False    -- short cut on length
 344     | p == p' && s == s' = True     -- short cut for the same string
 345     | otherwise          = compareBytes a b == EQ
 346 {-# INLINE eq #-}
 347
 348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 349 compareBytes :: ByteString -> ByteString -> Ordering
 350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 351     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 352     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 353     | otherwise                         = inlinePerformIO $
 354         withForeignPtr x1 $ \p1 ->
 355         withForeignPtr x2 $ \p2 -> do
 356             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
 357             return $ case i `compare` 0 of
 358                         EQ  -> l1 `compare` l2
 359                         x   -> x
 360 {-# INLINE compareBytes #-}
 361
 362 {-
 363 --
 364 -- About 4x slower over 32M
 365 --
 366 compareBytes :: ByteString -> ByteString -> Ordering
 367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 368     withForeignPtr fp1 $ \p1 ->
 369         withForeignPtr fp2 $ \p2 ->
 370             cmp (p1 `plusPtr` off1)
 371                 (p2 `plusPtr` off2) 0 len1 len2
 372
 373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 374 STRICT5(cmp)
 375 cmp p1 p2 n len1 len2
 376       | n == len1 = if n == len2 then return EQ else return LT
 377       | n == len2 = return GT
 378       | otherwise = do
 379           (a :: Word8) <- peekByteOff p1 n
 380           (b :: Word8) <- peekByteOff p2 n
 381           case a `compare` b of
 382                 EQ -> cmp p1 p2 (n+1) len1 len2
 383                 LT -> return LT
 384                 GT -> return GT
 385 {-# INLINE compareBytes #-}
 386 -}
 387
 388 -- -----------------------------------------------------------------------------
 389 -- Introducing and eliminating 'ByteString's
 390
 391 -- | /O(1)/ The empty 'ByteString'
 392 empty :: ByteString
 393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 394 {-# NOINLINE empty #-}
 395
 396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 397 packByte :: Word8 -> ByteString
 398 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
 399     withForeignPtr fp $ \p -> poke p c
 400     return $ PS fp 0 1
 401 {-# INLINE packByte #-}
 402
 403 --
 404 -- XXX must use unsafePerformIO, not inlinePerformIO here, otherwise ghc
 405 -- 6.5 compiles:
 406 --
 407 --  packByte 255 `compare` packByte 127
 408 --
 409 -- into
 410 --
 411 --  case mallocByteString 2 of
 412 --      ForeignPtr f internals ->
 413 --           case writeWord8OffAddr# f 0 255 of _ ->
 414 --           case writeWord8OffAddr# f 0 127 of _ ->
 415 --           case eqAddr# f f of
 416 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 417 --                                        (GHC.Prim.plusAddr# f 0)
 418 --
 419 --
 420
 421 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 422 --
 423 -- For applications with large numbers of string literals, pack can be a
 424 -- bottleneck. In such cases, consider using packAddress (GHC only).
 425 pack :: [Word8] -> ByteString
 426
 427 #if !defined(__GLASGOW_HASKELL__)
 428
 429 pack str = create (P.length str) $ \p -> go p str
 430     where
 431         go _ []     = return ()
 432         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 433
 434 #else /* hack away */
 435
 436 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 437     where
 438         go _ _ []        = return ()
 439         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 440
 441         writeByte p i c = ST $ \s# ->
 442             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 443
 444 #endif
 445
 446 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 447 unpack :: ByteString -> [Word8]
 448
 449 #if !defined(__GLASGOW_HASKELL__)
 450
 451 unpack (PS _  _ 0) = []
 452 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 453         go (p `plusPtr` s) (l - 1) []
 454     where
 455         STRICT3(go)
 456         go p 0 acc = peek p          >>= \e -> return (e : acc)
 457         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 458 {-# INLINE unpack #-}
 459
 460 #else
 461
 462 unpack ps = build (unpackFoldr ps)
 463 {-# INLINE unpack #-}
 464
 465 unpackList :: ByteString -> [Word8]
 466 unpackList (PS fp off len) = withPtr fp $ \p -> do
 467     let STRICT3(loop)
 468         loop _ (-1) acc = return acc
 469         loop q n acc = do
 470            a <- peekByteOff q n
 471            loop q (n-1) (a : acc)
 472     loop (p `plusPtr` off) (len-1) []
 473
 474 {-# RULES
 475 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 476  #-}
 477
 478 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 479 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 480     let STRICT3(loop)
 481         loop _ (-1) acc = return acc
 482         loop q n    acc = do
 483            a <- peekByteOff q n
 484            loop q (n-1) (a `f` acc)
 485     loop (p `plusPtr` off) (len-1) ch
 486 {-# INLINE [0] unpackFoldr #-}
 487
 488 #endif
 489
 490 ------------------------------------------------------------------------
 491
 492 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 493 -- conversion function
 494 packWith :: (a -> Word8) -> [a] -> ByteString
 495 packWith k str = create (P.length str) $ \p -> go p str
 496     where
 497         STRICT2(go)
 498         go _ []     = return ()
 499         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 500 {-# INLINE packWith #-}
 501 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 502
 503 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 504 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 505 unpackWith _ (PS _  _ 0) = []
 506 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 507         go (p `plusPtr` s) (l - 1) []
 508     where
 509         STRICT3(go)
 510         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 511         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 512 {-# INLINE unpackWith #-}
 513 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 514
 515 -- ---------------------------------------------------------------------
 516 -- Basic interface
 517
 518 -- | /O(1)/ Test whether a ByteString is empty.
 519 null :: ByteString -> Bool
 520 null (PS _ _ l) = l == 0
 521 {-# INLINE null #-}
 522
 523 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 524 length :: ByteString -> Int
 525 length (PS _ _ l) = l
 526 {-# INLINE length #-}
 527
 528 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 529 -- complexity, as it requires a memcpy.
 530 cons :: Word8 -> ByteString -> ByteString
 531 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 532         memcpy (p `plusPtr` 1) (f `plusPtr` s) l
 533         poke p c
 534 {-# INLINE cons #-}
 535
 536 -- todo fuse
 537
 538 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 539 snoc :: ByteString -> Word8 -> ByteString
 540 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 541         memcpy p (f `plusPtr` s) l
 542         poke (p `plusPtr` l) c
 543 {-# INLINE snoc #-}
 544
 545 -- todo fuse
 546
 547 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 548 head :: ByteString -> Word8
 549 head ps@(PS x s _)
 550     | null ps   = errorEmptyList "head"
 551     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 552 {-# INLINE head #-}
 553
 554 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 555 tail :: ByteString -> ByteString
 556 tail (PS p s l)
 557     | l <= 0    = errorEmptyList "tail"
 558     | otherwise = PS p (s+1) (l-1)
 559 {-# INLINE tail #-}
 560
 561 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 562 last :: ByteString -> Word8
 563 last ps@(PS x s l)
 564     | null ps   = errorEmptyList "last"
 565     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 566 {-# INLINE last #-}
 567
 568 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 569 init :: ByteString -> ByteString
 570 init (PS p s l)
 571     | l <= 0    = errorEmptyList "init"
 572     | otherwise = PS p s (l-1)
 573 {-# INLINE init #-}
 574
 575 -- | /O(n)/ Append two ByteStrings
 576 append :: ByteString -> ByteString -> ByteString
 577 append xs ys | null xs   = ys
 578              | null ys   = xs
 579              | otherwise = concat [xs,ys]
 580 {-# INLINE append #-}
 581
 582 -- ---------------------------------------------------------------------
 583 -- Transformations
 584
 585 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 586 -- element of @xs@. This function is subject to array fusion.
 587 map :: (Word8 -> Word8) -> ByteString -> ByteString
 588 map f = loopArr . loopU (mapEFL f) noAL
 589 {-# INLINE map #-}
 590
 591 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 592 -- slightly faster for one-shot cases.
 593 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
 594 STRICT2(mapF)
 595 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 596     np <- mallocByteString (len+1)
 597     withForeignPtr np $ \p -> do
 598         map_ 0 (a `plusPtr` s) p
 599         return (PS np 0 len)
 600   where
 601     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 602     STRICT3(map_)
 603     map_ n p1 p2
 604        | n >= len = return ()
 605        | otherwise = do
 606             x <- peekByteOff p1 n
 607             pokeByteOff p2 n (f x)
 608             map_ (n+1) p1 p2
 609 {-# INLINE mapF #-}
 610
 611 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 612 reverse :: ByteString -> ByteString
 613 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 614         c_reverse p (f `plusPtr` s) l
 615
 616 {-
 617 reverse = pack . P.reverse . unpack
 618 -}
 619
 620 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 621 -- 'ByteString' and \`intersperses\' that byte between the elements of
 622 -- the 'ByteString'.  It is analogous to the intersperse function on
 623 -- Lists.
 624 intersperse :: Word8 -> ByteString -> ByteString
 625 intersperse c ps@(PS x s l)
 626     | length ps < 2  = ps
 627     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 628         c_intersperse p (f `plusPtr` s) l c
 629
 630 {-
 631 intersperse c = pack . List.intersperse c . unpack
 632 -}
 633
 634 -- | The 'transpose' function transposes the rows and columns of its
 635 -- 'ByteString' argument.
 636 transpose :: [ByteString] -> [ByteString]
 637 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 638
 639 -- ---------------------------------------------------------------------
 640 -- Reducing 'ByteString's
 641
 642 -- | 'foldl', applied to a binary operator, a starting value (typically
 643 -- the left-identity of the operator), and a ByteString, reduces the
 644 -- ByteString using the binary operator, from left to right.
 645 -- This function is subject to array fusion.
 646 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 647 foldl f z = loopAcc . loopU (foldEFL f) z
 648 {-# INLINE foldl #-}
 649
 650 {-
 651 --
 652 -- About twice as fast with 6.4.1, but not fuseable
 653 -- A simple fold . map is enough to make it worth while.
 654 --
 655 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 656         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 657     where
 658         STRICT3(lgo)
 659         lgo z p q | p == q    = return z
 660                   | otherwise = do c <- peek p
 661                                    lgo (f z c) (p `plusPtr` 1) q
 662 -}
 663
 664 -- | 'foldr', applied to a binary operator, a starting value
 665 -- (typically the right-identity of the operator), and a ByteString,
 666 -- reduces the ByteString using the binary operator, from right to left.
 667 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 668 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 669         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 670     where
 671         STRICT2(go)
 672         go p q | p == q    = return z
 673                | otherwise = do c  <- peek p
 674                                 ws <- go (p `plusPtr` 1) q
 675                                 return $ c `k` ws
 676
 677 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 678 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 679 -- This function is subject to array fusion.
 680 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 681 foldl1 f ps
 682     | null ps   = errorEmptyList "foldl1"
 683     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 684
 685 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 686 -- and thus must be applied to non-empty 'ByteString's
 687 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 688 foldr1 f ps
 689     | null ps        = errorEmptyList "foldr1"
 690     | otherwise      = foldr f (last ps) (init ps)
 691
 692 -- ---------------------------------------------------------------------
 693 -- Special folds
 694
 695 -- | /O(n)/ Concatenate a list of ByteStrings.
 696 concat :: [ByteString] -> ByteString
 697 concat []     = empty
 698 concat [ps]   = ps
 699 concat xs     = create len $ \ptr -> go xs ptr
 700   where len = P.sum . P.map length $ xs
 701         STRICT2(go)
 702         go []            _   = return ()
 703         go (PS p s l:ps) ptr = do
 704                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) l
 705                 go ps (ptr `plusPtr` l)
 706
 707 -- | Map a function over a 'ByteString' and concatenate the results
 708 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 709 concatMap f = foldr (append . f) empty
 710
 711 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 712 -- any element of the 'ByteString' satisfies the predicate.
 713 any :: (Word8 -> Bool) -> ByteString -> Bool
 714 any _ (PS _ _ 0) = False
 715 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 716         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 717     where
 718         STRICT2(go)
 719         go p q | p == q    = return False
 720                | otherwise = do c <- peek p
 721                                 if f c then return True
 722                                        else go (p `plusPtr` 1) q
 723
 724 -- todo fuse
 725
 726 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 727 -- if all elements of the 'ByteString' satisfy the predicate.
 728 all :: (Word8 -> Bool) -> ByteString -> Bool
 729 all _ (PS _ _ 0) = True
 730 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 731         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 732     where
 733         STRICT2(go)
 734         go p q | p == q     = return True  -- end of list
 735                | otherwise  = do c <- peek p
 736                                  if f c
 737                                     then go (p `plusPtr` 1) q
 738                                     else return False
 739 -- todo fuse
 740
 741 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 742 maximum :: ByteString -> Word8
 743 maximum xs@(PS x s l)
 744     | null xs   = errorEmptyList "maximum"
 745     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 746                     return $ c_maximum (p `plusPtr` s) l
 747 {-# INLINE maximum #-}
 748
 749 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 750 minimum :: ByteString -> Word8
 751 minimum xs@(PS x s l)
 752     | null xs   = errorEmptyList "minimum"
 753     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 754                     return $ c_minimum (p `plusPtr` s) l
 755 {-# INLINE minimum #-}
 756
 757 -- fusion is too slow here (10x)
 758
 759 {-
 760 maximum xs@(PS x s l)
 761     | null xs   = errorEmptyList "maximum"
 762     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 763                         w <- peek p
 764                         maximum_ (p `plusPtr` s) 0 l w
 765
 766 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 767 STRICT4(maximum_)
 768 maximum_ ptr n m c
 769     | n >= m    = return c
 770     | otherwise = do w <- peekByteOff ptr n
 771                      maximum_ ptr (n+1) m (if w > c then w else c)
 772
 773 minimum xs@(PS x s l)
 774     | null xs   = errorEmptyList "minimum"
 775     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 776                         w <- peek p
 777                         minimum_ (p `plusPtr` s) 0 l w
 778
 779 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 780 STRICT4(minimum_)
 781 minimum_ ptr n m c
 782     | n >= m    = return c
 783     | otherwise = do w <- peekByteOff ptr n
 784                      minimum_ ptr (n+1) m (if w < c then w else c)
 785 -}
 786
 787 -- | /O(n)/ map Word8 functions, provided with the index at each position
 788 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 789 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 790     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 791   where
 792     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 793     STRICT4(go)
 794     go n f t p | f == p    = return ()
 795                | otherwise = do w <- peek f
 796                                 ((poke t) . k n) w
 797                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 798
 799 -- ---------------------------------------------------------------------
 800 -- Unfolds and replicates
 801
 802 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 803 -- the value of every element. The following holds:
 804 --
 805 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 806 --
 807 -- This implemenation uses @memset(3)@
 808 replicate :: Int -> Word8 -> ByteString
 809 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 810
 811 {-
 812 -- About 5x slower
 813 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 814     where
 815         STRICT2(go)
 816         go _   0 = return w
 817         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 818 -}
 819
 820 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 821 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 822 -- the element and returns 'Nothing' if it is done producing the
 823 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 824 -- prepending to the ByteString and @b@ is used as the next element in a
 825 -- recursive call.
 826 --
 827 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 828 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 829 -- final size of the ByteString as an argument. 'cons' can then be
 830 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 831 -- linear complexity. The depth of the recursion is limited to this
 832 -- size, but may be less. For lazy, infinite unfoldr, use
 833 -- 'Data.List.unfoldr' (from 'Data.List').
 834 --
 835 -- Examples:
 836 --
 837 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 838 --
 839 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 840 --
 841 -- > unfoldrN n == take n $ List.unfoldr
 842 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
 843 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 844     where
 845         STRICT3(go)
 846         go q c n | n == i    = return n      -- stop if we reach `i'
 847                  | otherwise = case f c of
 848                                    Nothing        -> return n
 849                                    Just (a,new_c) -> do
 850                                         poke q a
 851                                         go (q `plusPtr` 1) new_c (n+1)
 852
 853 -- ---------------------------------------------------------------------
 854 -- Substrings
 855
 856 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 857 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 858 take :: Int -> ByteString -> ByteString
 859 take n ps@(PS x s l)
 860     | n < 0     = empty
 861     | n >= l    = ps
 862     | otherwise = PS x s n
 863 {-# INLINE take #-}
 864
 865 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 866 -- elements, or @[]@ if @n > 'length' xs@.
 867 drop  :: Int -> ByteString -> ByteString
 868 drop n ps@(PS x s l)
 869     | n <= 0    = ps
 870     | n >  l    = empty
 871     | otherwise = PS x (s+n) (l-n)
 872 {-# INLINE drop #-}
 873
 874 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 875 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 876 splitAt  n ps  = (take n ps, drop n ps)
 877 {-# INLINE splitAt #-}
 878
 879 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 880 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 881 -- satisfy @p@.
 882 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 883 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 884 {-# INLINE takeWhile #-}
 885
 886 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 887 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 888 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 889 {-# INLINE dropWhile #-}
 890
 891 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 892 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 893 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 894 {-# INLINE break #-}
 895
 896 -- | 'breakByte' breaks its ByteString argument at the first occurence
 897 -- of the specified byte. It is more efficient than 'break' as it is
 898 -- implemented with @memchr(3)@. I.e.
 899 --
 900 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 901 --
 902 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 903 breakByte c p = case elemIndex c p of
 904     Nothing -> (p,empty)
 905     Just n  -> (take n p, drop n p)
 906 {-# INLINE breakByte #-}
 907
 908 -- | 'spanByte' breaks its ByteString argument at the first
 909 -- occurence of a byte other than its argument. It is more efficient
 910 -- than 'span (==)'
 911 --
 912 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 913 --
 914 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 915 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 916     go (p `plusPtr` s) 0
 917   where
 918     STRICT2(go)
 919     go p i | i >= l    = return (ps, empty)
 920            | otherwise = do c' <- peekByteOff p i
 921                             if c /= c'
 922                                 then return (take i ps, drop i ps)
 923                                 else go p (i+1)
 924 {-# INLINE spanByte #-}
 925
 926 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 927 -- occurence of @w@. It behaves like 'break', except the delimiter is
 928 -- not returned, and @Nothing@ is returned if the delimiter is not in
 929 -- the ByteString. I.e.
 930 --
 931 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 932 --
 933 -- > breakFirst c xs ==
 934 -- > let (x,y) = break (== c) xs
 935 -- > in if null y then Nothing else Just (x, drop 1 y))
 936 --
 937 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 938 breakFirst c p = case elemIndex c p of
 939    Nothing -> Nothing
 940    Just n -> Just (take n p, drop (n+1) p)
 941 {-# INLINE breakFirst #-}
 942
 943 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 944 -- ByteString.
 945 --
 946 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 947 --
 948 -- and the following are equivalent:
 949 --
 950 -- > breakLast 'c' "abcdef"
 951 -- > let (x,y) = break (=='c') (reverse "abcdef")
 952 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 953 --
 954 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 955 breakLast c p = case elemIndexLast c p of
 956     Nothing -> Nothing
 957     Just n -> Just (take n p, drop (n+1) p)
 958 {-# INLINE breakLast #-}
 959
 960 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 961 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 962 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 963 span p ps = break (not . p) ps
 964 {-# INLINE span #-}
 965
 966 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 967 -- We have
 968 --
 969 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 970 --
 971 -- and
 972 --
 973 -- > spanEnd (not . isSpace) ps
 974 -- >    ==
 975 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 976 --
 977 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 978 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 979
 980 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 981 -- separators, where the predicate returns True for a separator element.
 982 -- The resulting components do not contain the separators.  Two adjacent
 983 -- separators result in an empty component in the output.  eg.
 984 --
 985 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 986 -- > splitWith (=='a') []        == []
 987 --
 988 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 989
 990 #if defined(__GLASGOW_HASKELL__)
 991 splitWith _pred (PS _  _   0) = []
 992 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
 993   where pred# c# = pred_ (W8# c#)
 994
 995         splitWith' pred' off' len' fp' = withPtr fp $ \p ->
 996             splitLoop pred' p 0 off' len' fp'
 997
 998         splitLoop :: (Word# -> Bool)
 999                   -> Ptr Word8
1000                   -> Int -> Int -> Int
1001                   -> ForeignPtr Word8
1002                   -> IO [ByteString]
1003
1004         splitLoop pred' p idx' off' len' fp'
1005             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1006             | idx' >= len'  = return [PS fp' off' idx']
1007             | otherwise = do
1008                 w <- peekElemOff p (off'+idx')
1009                 if pred' (case w of W8# w# -> w#)
1010                    then return (PS fp' off' idx' :
1011                               splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
1012                    else splitLoop pred' p (idx'+1) off' len' fp'
1013 {-# INLINE splitWith #-}
1014
1015 #else
1016 splitWith _ (PS _ _ 0) = []
1017 splitWith p ps = splitWith' p ps
1018     where
1019         STRICT2(splitWith')
1020         splitWith' q qs = if null rest then [chunk]
1021                                        else chunk : splitWith' q (unsafeTail rest)
1022             where (chunk,rest) = break q qs
1023 #endif
1024
1025 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1026 -- argument, consuming the delimiter. I.e.
1027 --
1028 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1029 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1030 -- > split 'x'  "x"          == ["",""]
1031 --
1032 -- and
1033 --
1034 -- > join [c] . split c == id
1035 -- > split == splitWith . (==)
1036 --
1037 -- As for all splitting functions in this library, this function does
1038 -- not copy the substrings, it just constructs new 'ByteStrings' that
1039 -- are slices of the original.
1040 --
1041 split :: Word8 -> ByteString -> [ByteString]
1042 split _ (PS _ _ 0) = []
1043 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1044     let ptr = p `plusPtr` s
1045
1046         STRICT1(loop)
1047         loop n = do
1048             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1049             if q == nullPtr
1050                 then return [PS x (s+n) (l-n)]
1051                 else do let i = q `minusPtr` ptr
1052                         ls <- loop (i+1)
1053                         return $! PS x (s+n) (i-n) : ls
1054     loop 0
1055 {-# INLINE split #-}
1056
1057 {-
1058 -- slower. but stays inside Haskell.
1059 split _ (PS _  _   0) = []
1060 split (W8# w#) (PS fp off len) = splitWith' off len fp
1061     where
1062         splitWith' off' len' fp' = withPtr fp $ \p ->
1063             splitLoop p 0 off' len' fp'
1064
1065         splitLoop :: Ptr Word8
1066                   -> Int -> Int -> Int
1067                   -> ForeignPtr Word8
1068                   -> IO [ByteString]
1069
1070         STRICT5(splitLoop)
1071         splitLoop p idx' off' len' fp'
1072             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1073             | idx' >= len'  = return [PS fp' off' idx']
1074             | otherwise = do
1075                 (W8# x#) <- peekElemOff p (off'+idx')
1076                 if word2Int# w# ==# word2Int# x#
1077                    then return (PS fp' off' idx' :
1078                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1079                    else splitLoop p (idx'+1) off' len' fp'
1080 -}
1081
1082 -- | Like 'splitWith', except that sequences of adjacent separators are
1083 -- treated as a single separator. eg.
1084 --
1085 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1086 --
1087 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1088 tokens f = P.filter (not.null) . splitWith f
1089
1090 -- | The 'group' function takes a ByteString and returns a list of
1091 -- ByteStrings such that the concatenation of the result is equal to the
1092 -- argument.  Moreover, each sublist in the result contains only equal
1093 -- elements.  For example,
1094 --
1095 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1096 --
1097 -- It is a special case of 'groupBy', which allows the programmer to
1098 -- supply their own equality test. It is about 40% faster than
1099 -- /groupBy (==)/
1100 group :: ByteString -> [ByteString]
1101 group xs
1102     | null xs   = []
1103     | otherwise = ys : group zs
1104     where
1105         (ys, zs) = spanByte (unsafeHead xs) xs
1106
1107 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1108 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1109 groupBy k xs
1110     | null xs   = []
1111     | otherwise = take n xs : groupBy k (drop n xs)
1112     where
1113         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1114
1115 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1116 -- 'ByteString's and concatenates the list after interspersing the first
1117 -- argument between each element of the list.
1118 join :: ByteString -> [ByteString] -> ByteString
1119 join filler pss = concat (splice pss)
1120     where
1121         splice []  = []
1122         splice [x] = [x]
1123         splice (x:y:xs) = x:filler:splice (y:xs)
1124
1125 --
1126 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1127 -- with a char. Around 4 times faster than the generalised join.
1128 --
1129 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1130 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1131     withForeignPtr ffp $ \fp ->
1132     withForeignPtr fgp $ \gp -> do
1133         memcpy ptr (fp `plusPtr` s) l
1134         poke (ptr `plusPtr` l) c
1135         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1136     where
1137       len = length f + length g + 1
1138 {-# INLINE joinWithByte #-}
1139
1140 -- ---------------------------------------------------------------------
1141 -- Indexing ByteStrings
1142
1143 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1144 index :: ByteString -> Int -> Word8
1145 index ps n
1146     | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
1147     | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1148                                 ++ ", length = " ++ show (length ps)
1149     | otherwise      = ps `unsafeIndex` n
1150 {-# INLINE index #-}
1151
1152 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1153 -- element in the given 'ByteString' which is equal to the query
1154 -- element, or 'Nothing' if there is no such element.
1155 -- This implementation uses memchr(3).
1156 elemIndex :: Word8 -> ByteString -> Maybe Int
1157 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1158     let p' = p `plusPtr` s
1159         q  = memchr p' c (fromIntegral l)
1160     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1161 {-# INLINE elemIndex #-}
1162
1163 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1164 -- element in the given 'ByteString' which is equal to the query
1165 -- element, or 'Nothing' if there is no such element. The following
1166 -- holds:
1167 --
1168 -- > elemIndexLast c xs ==
1169 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1170 --
1171 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1172 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1173     go (p `plusPtr` s) (l-1)
1174   where
1175     STRICT2(go)
1176     go p i | i < 0     = return Nothing
1177            | otherwise = do ch' <- peekByteOff p i
1178                             if ch == ch'
1179                                 then return $ Just i
1180                                 else go p (i-1)
1181 {-# INLINE elemIndexLast #-}
1182
1183 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1184 -- the indices of all elements equal to the query element, in ascending order.
1185 -- This implementation uses memchr(3).
1186 elemIndices :: Word8 -> ByteString -> [Int]
1187 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1188     let ptr = p `plusPtr` s
1189
1190         STRICT1(loop)
1191         loop n = do
1192                 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1193                 if q == nullPtr
1194                     then return []
1195                     else do let i = q `minusPtr` ptr
1196                             ls <- loop (i+1)
1197                             return $! i:ls
1198     loop 0
1199
1200 {-
1201 -- much slower
1202 elemIndices :: Word8 -> ByteString -> [Int]
1203 elemIndices c ps = loop 0 ps
1204    where STRICT2(loop)
1205          loop _ ps' | null ps'            = []
1206          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1207                     | otherwise           = loop (n+1) (unsafeTail ps')
1208 -}
1209
1210 -- | count returns the number of times its argument appears in the ByteString
1211 --
1212 -- > count = length . elemIndices
1213 --
1214 -- But more efficiently than using length on the intermediate list.
1215 count :: Word8 -> ByteString -> Int
1216 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1217     return $ c_count (p `plusPtr` s) (fromIntegral m) w
1218 {-# INLINE count #-}
1219
1220 {-
1221 --
1222 -- around 30% slower
1223 --
1224 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1225      go (p `plusPtr` s) (fromIntegral m) 0
1226     where
1227         go :: Ptr Word8 -> CSize -> Int -> IO Int
1228         STRICT3(go)
1229         go p l i = do
1230             let q = memchr p w l
1231             if q == nullPtr
1232                 then return i
1233                 else do let k = fromIntegral $ q `minusPtr` p
1234                         go (q `plusPtr` 1) (l-k-1) (i+1)
1235 -}
1236
1237 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1238 -- returns the index of the first element in the ByteString
1239 -- satisfying the predicate.
1240 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1241 findIndex = (listToMaybe .) . findIndices
1242
1243 -- | The 'findIndices' function extends 'findIndex', by returning the
1244 -- indices of all elements satisfying the predicate, in ascending order.
1245 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1246 findIndices p ps = loop 0 ps
1247    where
1248      STRICT2(loop)
1249      loop _ qs | null qs           = []
1250      loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1251                | otherwise         =     loop (n+1) (unsafeTail qs)
1252
1253 -- ---------------------------------------------------------------------
1254 -- Searching ByteStrings
1255
1256 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1257 elem :: Word8 -> ByteString -> Bool
1258 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1259 {-# INLINE elem #-}
1260
1261 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1262 notElem :: Word8 -> ByteString -> Bool
1263 notElem c ps = not (elem c ps)
1264 {-# INLINE notElem #-}
1265
1266 --
1267 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1268 -- case of filtering a single byte. It is more efficient to use
1269 -- /filterByte/ in this case.
1270 --
1271 -- > filterByte == filter . (==)
1272 --
1273 -- filterByte is around 10x faster, and uses much less space, than its
1274 -- filter equivalent
1275 filterByte :: Word8 -> ByteString -> ByteString
1276 filterByte w ps = replicate (count w ps) w
1277
1278 --
1279 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1280 -- case of filtering a single byte out of a list. It is more efficient
1281 -- to use /filterNotByte/ in this case.
1282 --
1283 -- > filterNotByte == filter . (/=)
1284 --
1285 -- filterNotByte is around 3x faster, and uses much less space, than its
1286 -- filter equivalent
1287 filterNotByte :: Word8 -> ByteString -> ByteString
1288 filterNotByte ch ps@(PS x s l)
1289     | null ps   = ps
1290     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1291         t <- go (f `plusPtr` s) p l
1292         return (t `minusPtr` p) -- actual length
1293     where
1294         STRICT3(go)
1295         go _ t 0 = return t
1296         go f t e = do w <- peek f
1297                       if w /= ch
1298                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1299                         else             go (f `plusPtr` 1) t               (e-1)
1300
1301 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1302 -- returns a ByteString containing those characters that satisfy the
1303 -- predicate. This function is subject to array fusion.
1304 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1305 filter p  = loopArr . loopU (filterEFL p) noAL
1306 {-# INLINE filter #-}
1307
1308 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1309 filterF k ps@(PS x s l)
1310     | null ps   = ps
1311     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1312         t <- go (f `plusPtr` s) p l
1313         return (t `minusPtr` p) -- actual length
1314     where
1315         STRICT3(go)
1316         go _ t 0 = return t
1317         go f t e = do w <- peek f
1318                       if k w
1319                         then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1320                         else             go (f `plusPtr` 1) t               (e - 1)
1321 {-# INLINE filterF #-}
1322
1323 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1324
1325 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1326 -- and returns the first element in matching the predicate, or 'Nothing'
1327 -- if there is no such element.
1328 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1329 find p ps = case filter p ps of
1330     q | null q -> Nothing
1331       | otherwise -> Just (unsafeHead q)
1332
1333 -- ---------------------------------------------------------------------
1334 -- Searching for substrings
1335
1336 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1337 -- iff the first is a prefix of the second.
1338 isPrefixOf :: ByteString -> ByteString -> Bool
1339 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1340     | l1 == 0   = True
1341     | l2 < l1   = False
1342     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1343         withForeignPtr x2 $ \p2 -> do
1344             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1345             return (i == 0)
1346
1347 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1348 -- iff the first is a suffix of the second.
1349 --
1350 -- The following holds:
1351 --
1352 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1353 --
1354 -- However, the real implemenation uses memcmp to compare the end of the
1355 -- string only, with no reverse required..
1356 isSuffixOf :: ByteString -> ByteString -> Bool
1357 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1358     | l1 == 0   = True
1359     | l2 < l1   = False
1360     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1361         withForeignPtr x2 $ \p2 -> do
1362             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1363             return (i == 0)
1364
1365 -- | Check whether one string is a substring of another. @isSubstringOf
1366 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1367 isSubstringOf :: ByteString -- ^ String to search for.
1368               -> ByteString -- ^ String to search in.
1369               -> Bool
1370 isSubstringOf p s = not $ P.null $ findSubstrings p s
1371
1372 -- | Get the first index of a substring in another string,
1373 --   or 'Nothing' if the string is not found.
1374 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1375 findSubstring :: ByteString -- ^ String to search for.
1376               -> ByteString -- ^ String to seach in.
1377               -> Maybe Int
1378 findSubstring = (listToMaybe .) . findSubstrings
1379
1380 -- | Find the indexes of all (possibly overlapping) occurances of a
1381 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1382 -- string matching algorithm.
1383 findSubstrings :: ByteString -- ^ String to search for.
1384                -> ByteString -- ^ String to seach in.
1385                -> [Int]
1386
1387 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1388   where
1389       patc x = pat `unsafeIndex` x
1390       strc x = str `unsafeIndex` x
1391
1392       -- maybe we should make kmpNext a UArray before using it in search?
1393       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1394       kmpNextL p _ | null p = []
1395       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1396                          ps = unsafeTail p
1397                          x = if not (null ps) && unsafeHead ps == patc j'
1398                                 then kmpNext Array.! j' else j'
1399                         in x:kmpNextL ps j'
1400       search i j = match ++ rest -- i: position in string, j: position in pattern
1401         where match = if j == m then [(i - j)] else []
1402               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1403       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1404                | otherwise = j
1405
1406 -- ---------------------------------------------------------------------
1407 -- Zipping
1408
1409 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1410 -- corresponding pairs of bytes. If one input ByteString is short,
1411 -- excess elements of the longer ByteString are discarded. This is
1412 -- equivalent to a pair of 'unpack' operations.
1413 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1414 zip ps qs
1415     | null ps || null qs = []
1416     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1417
1418 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1419 -- the first argument, instead of a tupling function.  For example,
1420 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1421 -- corresponding sums.
1422 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1423 zipWith f ps qs
1424     | null ps || null qs = []
1425     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1426
1427 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1428 -- ByteStrings. Note that this performs two 'pack' operations.
1429 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1430 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1431 {-# INLINE unzip #-}
1432
1433 -- ---------------------------------------------------------------------
1434 -- Special lists
1435
1436 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1437 inits :: ByteString -> [ByteString]
1438 inits (PS x s l) = [PS x s n | n <- [0..l]]
1439
1440 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1441 tails :: ByteString -> [ByteString]
1442 tails p | null p    = [empty]
1443         | otherwise = p : tails (unsafeTail p)
1444
1445 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1446
1447 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1448 elems :: ByteString -> [ByteString]
1449 elems (PS _ _ 0) = []
1450 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1451 {-# INLINE elems #-}
1452
1453 -- ---------------------------------------------------------------------
1454 -- ** Ordered 'ByteString's
1455
1456 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1457 sort :: ByteString -> ByteString
1458 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1459
1460     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1461     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1462
1463     let STRICT2(go)
1464         go 256 _   = return ()
1465         go i   ptr = do n <- peekElemOff arr i
1466                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1467                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1468     go 0 p
1469
1470 -- "countEach counts str l" counts the number of occurences of each Word8 in
1471 -- str, and stores the result in counts.
1472 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1473 STRICT3(countEach)
1474 countEach counts str l = go 0
1475  where
1476     STRICT1(go)
1477     go i | i == l    = return ()
1478          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1479                           x <- peekElemOff counts k
1480                           pokeElemOff counts k (x + 1)
1481                           go (i + 1)
1482
1483 {-
1484 sort :: ByteString -> ByteString
1485 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1486         memcpy p (f `plusPtr` s) l
1487         c_qsort p l -- inplace
1488 -}
1489
1490 {-
1491 sort = pack . List.sort . unpack
1492 -}
1493
1494 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1495 --
1496 -- Try some linear sorts: radix, counting
1497 -- Or mergesort.
1498 --
1499 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1500 -- sortBy f ps = undefined
1501
1502 -- ---------------------------------------------------------------------
1503 --
1504 -- Extensions to the basic interface
1505 --
1506
1507 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1508 -- check for the empty case, so there is an obligation on the programmer
1509 -- to provide a proof that the ByteString is non-empty.
1510 unsafeHead :: ByteString -> Word8
1511 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1512 {-# INLINE unsafeHead #-}
1513
1514 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1515 -- check for the empty case. As with 'unsafeHead', the programmer must
1516 -- provide a separate proof that the ByteString is non-empty.
1517 unsafeTail :: ByteString -> ByteString
1518 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1519 {-# INLINE unsafeTail #-}
1520
1521 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1522 -- This omits the bounds check, which means there is an accompanying
1523 -- obligation on the programmer to ensure the bounds are checked in some
1524 -- other way.
1525 unsafeIndex :: ByteString -> Int -> Word8
1526 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1527 {-# INLINE unsafeIndex #-}
1528
1529 -- ---------------------------------------------------------------------
1530 -- Low level constructors
1531
1532 #if defined(__GLASGOW_HASKELL__)
1533 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1534 -- Addr\# (an arbitrary machine address assumed to point outside the
1535 -- garbage-collected heap) into a @ByteString@. A much faster way to
1536 -- create an Addr\# is with an unboxed string literal, than to pack a
1537 -- boxed string. A unboxed string literal is compiled to a static @char
1538 -- []@ by GHC. Establishing the length of the string requires a call to
1539 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1540 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1541 -- if you know the length of the string statically.
1542 --
1543 -- An example:
1544 --
1545 -- > literalFS = packAddress "literal"#
1546 --
1547 packAddress :: Addr# -> ByteString
1548 packAddress addr# = inlinePerformIO $ do
1549     p <- newForeignPtr_ cstr
1550     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1551   where
1552     cstr = Ptr addr#
1553 {-# INLINE packAddress #-}
1554
1555 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1556 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1557 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1558 -- 'Addr\#' to the string, and the length of the string. Make sure the
1559 -- length is correct, otherwise use the safer 'packAddress' (where the
1560 -- length will be calculated once at runtime).
1561 unsafePackAddress :: Int -> Addr# -> ByteString
1562 unsafePackAddress len addr# = inlinePerformIO $ do
1563     p <- newForeignPtr_ cstr
1564     return $ PS p 0 len
1565     where cstr = Ptr addr#
1566
1567 #endif
1568
1569 -- | /O(1)/ Build a ByteString from a ForeignPtr
1570 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1571 fromForeignPtr fp l = PS fp 0 l
1572
1573 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1574 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1575 toForeignPtr (PS ps s l) = (ps, s, l)
1576
1577 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1578 -- current 'ByteString' from any larger string it was created from, as
1579 -- an 'Int'.
1580 skipIndex :: ByteString -> Int
1581 skipIndex (PS _ s _) = s
1582 {-# INLINE skipIndex #-}
1583
1584 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1585 -- finalizer associated to it. The ByteString length is calculated using
1586 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1587 packCString :: CString -> ByteString
1588 packCString cstr = inlinePerformIO $ do
1589     fp <- newForeignPtr_ (castPtr cstr)
1590     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1591
1592 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1593 -- have /no/ finalizer associated with it. This operation has /O(1)/
1594 -- complexity as we already know the final size, so no /strlen(3)/ is
1595 -- required.
1596 packCStringLen :: CStringLen -> ByteString
1597 packCStringLen (ptr,len) = inlinePerformIO $ do
1598     fp <- newForeignPtr_ (castPtr ptr)
1599     return $ PS fp 0 (fromIntegral len)
1600
1601 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1602 -- have a @free(3)@ finalizer associated to it.
1603 packMallocCString :: CString -> ByteString
1604 packMallocCString cstr = inlinePerformIO $ do
1605     fp <- newForeignFreePtr (castPtr cstr)
1606     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1607
1608 #if defined(__GLASGOW_HASKELL__)
1609 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1610 -- length, and an IO action representing a finalizer. This function is
1611 -- not available on Hugs.
1612 --
1613 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1614 packCStringFinalizer p l f = do
1615     fp <- FC.newForeignPtr p f
1616     return $ PS fp 0 l
1617
1618 -- | Explicitly run the finaliser associated with a 'ByteString'.
1619 -- Further references to this value may generate invalid memory
1620 -- references. This operation is unsafe, as there may be other
1621 -- 'ByteStrings' referring to the same underlying pages. If you use
1622 -- this, you need to have a proof of some kind that all 'ByteString's
1623 -- ever generated from the underlying byte array are no longer live.
1624 unsafeFinalize :: ByteString -> IO ()
1625 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1626
1627 #endif
1628
1629 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1630 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1631 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1632 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1633     where
1634       alloc = withForeignPtr ps $ \p -> do
1635                 buf <- c_malloc (fromIntegral l+1)
1636                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1637                 poke (buf `plusPtr` l) (0::Word8)
1638                 return $ castPtr buf
1639
1640 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1641 -- Warning: modifying the @CString@ will affect the @ByteString@.
1642 -- Why is this function unsafe? It relies on the null byte at the end of
1643 -- the ByteString to be there. This is /not/ the case if your ByteString
1644 -- has been spliced from a larger string (i.e. with take or drop).
1645 -- Unless you can guarantee the null byte, you should use the safe
1646 -- version, which will copy the string first.
1647 --
1648 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1649 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1650
1651 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1652 --   This is mainly useful to allow the rest of the data pointed
1653 --   to by the 'ByteString' to be garbage collected, for example
1654 --   if a large string has been read in, and only a small part of it
1655 --   is needed in the rest of the program.
1656 copy :: ByteString -> ByteString
1657 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1658
1659 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1660 -- CString is going to be deallocated from C land.
1661 copyCString :: CString -> ByteString
1662 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1663
1664 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1665 copyCStringLen :: CStringLen -> ByteString
1666 copyCStringLen (cstr, len) = inlinePerformIO $ do
1667     fp <- mallocForeignPtrArray (len+1)
1668     withForeignPtr fp $ \p -> do
1669         memcpy p (castPtr cstr) len
1670         poke (p `plusPtr` len) (0 :: Word8)
1671     return $! PS fp 0 len
1672
1673 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1674 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1675 -- This is analogous to unsafeUseAsCString, and comes with the same
1676 -- safety requirements.
1677 --
1678 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1679 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1680
1681 -- | Given the maximum size needed and a function to make the contents
1682 -- of a ByteString, generate makes the 'ByteString'. The generating
1683 -- function is required to return the actual final size (<= the maximum
1684 -- size), and the resulting byte array is realloced to this size.  The
1685 -- string is padded at the end with a null byte.
1686 --
1687 -- generate is the main mechanism for creating custom, efficient
1688 -- ByteString functions, using Haskell or C functions to fill the space.
1689 --
1690 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1691 generate i f = do
1692     p <- mallocArray (i+1)
1693     i' <- f p
1694     p' <- reallocArray p (i'+1)
1695     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1696     fp <- newForeignFreePtr p'
1697     return $ PS fp 0 i'
1698
1699 -- ---------------------------------------------------------------------
1700 -- line IO
1701
1702 #if defined(__GLASGOW_HASKELL__)
1703
1704 -- | getLine, read a line from stdin.
1705 getLine :: IO ByteString
1706 getLine = hGetLine stdin
1707
1708 -- | hGetLine. read a ByteString from a handle
1709 hGetLine :: Handle -> IO ByteString
1710 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1711     case haBufferMode handle_ of
1712        NoBuffering -> error "no buffering"
1713        _other      -> hGetLineBuffered handle_
1714
1715  where
1716     hGetLineBuffered handle_ = do
1717         let ref = haBuffer handle_
1718         buf <- readIORef ref
1719         hGetLineBufferedLoop handle_ ref buf 0 []
1720
1721     hGetLineBufferedLoop handle_ ref
1722             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1723         len `seq` do
1724         off <- findEOL r w raw
1725         let new_len = len + off - r
1726         xs <- mkPS raw r off
1727
1728       -- if eol == True, then off is the offset of the '\n'
1729       -- otherwise off == w and the buffer is now empty.
1730         if off /= w
1731             then do if (w == off + 1)
1732                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1733                             else writeIORef ref buf{ bufRPtr = off + 1 }
1734                     mkBigPS new_len (xs:xss)
1735             else do
1736                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1737                                     buf{ bufWPtr=0, bufRPtr=0 }
1738                  case maybe_buf of
1739                     -- Nothing indicates we caught an EOF, and we may have a
1740                     -- partial line to return.
1741                     Nothing -> do
1742                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1743                          if new_len > 0
1744                             then mkBigPS new_len (xs:xss)
1745                             else ioe_EOF
1746                     Just new_buf ->
1747                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1748
1749     -- find the end-of-line character, if there is one
1750     findEOL r w raw
1751         | r == w = return w
1752         | otherwise =  do
1753             (c,r') <- readCharFromBuffer raw r
1754             if c == '\n'
1755                 then return r -- NB. not r': don't include the '\n'
1756                 else findEOL r' w raw
1757
1758     maybeFillReadBuffer fd is_line is_stream buf = catch
1759         (do buf' <- fillReadBuffer fd is_line is_stream buf
1760             return (Just buf'))
1761         (\e -> if isEOFError e then return Nothing else ioError e)
1762
1763 -- TODO, rewrite to use normal memcpy
1764 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1765 mkPS buf start end = do
1766     let len = end - start
1767     fp <- mallocByteString len
1768     withForeignPtr fp $ \p -> do
1769         memcpy_ptr_baoff p buf start (fromIntegral len)
1770         return (PS fp 0 len)
1771
1772 mkBigPS :: Int -> [ByteString] -> IO ByteString
1773 mkBigPS _ [ps] = return ps
1774 mkBigPS _ pss = return $! concat (P.reverse pss)
1775
1776 #endif
1777
1778 -- ---------------------------------------------------------------------
1779 -- Block IO
1780
1781 -- | Outputs a 'ByteString' to the specified 'Handle'.
1782 hPut :: Handle -> ByteString -> IO ()
1783 hPut _ (PS _ _ 0)  = return ()
1784 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1785 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1786
1787 -- | Write a ByteString to stdout
1788 putStr :: ByteString -> IO ()
1789 putStr = hPut stdout
1790
1791 -- | Write a ByteString to stdout, appending a newline byte
1792 putStrLn :: ByteString -> IO ()
1793 putStrLn ps = hPut stdout ps >> hPut stdout nl
1794     where nl = packByte 0x0a
1795
1796 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1797 -- is far more efficient than reading the characters into a 'String'
1798 -- and then using 'pack'.
1799 hGet :: Handle -> Int -> IO ByteString
1800 hGet _ 0 = return empty
1801 hGet h i = do fp <- mallocByteString i
1802               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1803               return $ PS fp 0 l
1804
1805 #if defined(__GLASGOW_HASKELL__)
1806 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1807 -- waiting for data to become available, instead it returns only whatever data
1808 -- is available.
1809 hGetNonBlocking :: Handle -> Int -> IO ByteString
1810 hGetNonBlocking _ 0 = return empty
1811 hGetNonBlocking h i = do
1812     fp <- mallocByteString i
1813     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1814     return $ PS fp 0 l
1815 #endif
1816
1817 -- | Read entire handle contents into a 'ByteString'.
1818 --
1819 -- As with 'hGet', the string representation in the file is assumed to
1820 -- be ISO-8859-1.
1821 --
1822 hGetContents :: Handle -> IO ByteString
1823 hGetContents h = do
1824     let start_size = 1024
1825     p <- mallocArray start_size
1826     i <- hGetBuf h p start_size
1827     if i < start_size
1828         then do p' <- reallocArray p i
1829                 fp <- newForeignFreePtr p'
1830                 return $ PS fp 0 i
1831         else f p start_size
1832     where
1833         f p s = do
1834             let s' = 2 * s
1835             p' <- reallocArray p s'
1836             i  <- hGetBuf h (p' `plusPtr` s) s
1837             if i < s
1838                 then do let i' = s + i
1839                         p'' <- reallocArray p' i'
1840                         fp  <- newForeignFreePtr p''
1841                         return $ PS fp 0 i'
1842                 else f p' s'
1843
1844 -- | getContents. Equivalent to hGetContents stdin
1845 getContents :: IO ByteString
1846 getContents = hGetContents stdin
1847
1848 -- | Read an entire file directly into a 'ByteString'.  This is far more
1849 -- efficient than reading the characters into a 'String' and then using
1850 -- 'pack'.  It also may be more efficient than opening the file and
1851 -- reading it using hGet.
1852 readFile :: FilePath -> IO ByteString
1853 readFile f = do
1854     h <- openBinaryFile f ReadMode
1855     l <- hFileSize h
1856     s <- hGet h $ fromIntegral l
1857     hClose h
1858     return s
1859
1860 -- | Write a 'ByteString' to a file.
1861 writeFile :: FilePath -> ByteString -> IO ()
1862 writeFile f ps = do
1863     h <- openBinaryFile f WriteMode
1864     hPut h ps
1865     hClose h
1866
1867 {-
1868 --
1869 -- Disable until we can move it into a portable .hsc file
1870 --
1871
1872 -- | Like readFile, this reads an entire file directly into a
1873 -- 'ByteString', but it is even more efficient.  It involves directly
1874 -- mapping the file to memory.  This has the advantage that the contents
1875 -- of the file never need to be copied.  Also, under memory pressure the
1876 -- page may simply be discarded, while in the case of readFile it would
1877 -- need to be written to swap.  If you read many small files, mmapFile
1878 -- will be less memory-efficient than readFile, since each mmapFile
1879 -- takes up a separate page of memory.  Also, you can run into bus
1880 -- errors if the file is modified.  As with 'readFile', the string
1881 -- representation in the file is assumed to be ISO-8859-1.
1882 --
1883 -- On systems without mmap, this is the same as a readFile.
1884 --
1885 mmapFile :: FilePath -> IO ByteString
1886 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1887
1888 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1889 mmap f = do
1890     h <- openBinaryFile f ReadMode
1891     l <- fromIntegral `fmap` hFileSize h
1892     -- Don't bother mmaping small files because each mmapped file takes up
1893     -- at least one full VM block.
1894     if l < mmap_limit
1895        then do thefp <- mallocByteString l
1896                withForeignPtr thefp $ \p-> hGetBuf h p l
1897                hClose h
1898                return (thefp, l)
1899        else do
1900                -- unix only :(
1901                fd <- fromIntegral `fmap` handleToFd h
1902                p  <- my_mmap l fd
1903                fp <- if p == nullPtr
1904                      then do thefp <- mallocByteString l
1905                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1906                              return thefp
1907                      else do
1908                           -- The munmap leads to crashes on OpenBSD.
1909                           -- maybe there's a use after unmap in there somewhere?
1910 #if !defined(__OpenBSD__)
1911                              let unmap = c_munmap p l >> return ()
1912 #else
1913                              let unmap = return ()
1914 #endif
1915                              fp <- FC.newForeignPtr p unmap
1916                              return fp
1917                c_close fd
1918                hClose h
1919                return (fp, l)
1920     where mmap_limit = 16*1024
1921 -}
1922
1923 #if defined(__GLASGOW_HASKELL__)
1924 --
1925 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1926 --
1927 getArgs :: IO [ByteString]
1928 getArgs =
1929   alloca $ \ p_argc ->
1930   alloca $ \ p_argv -> do
1931     getProgArgv p_argc p_argv
1932     p    <- fromIntegral `fmap` peek p_argc
1933     argv <- peek p_argv
1934     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1935 #endif
1936
1937 -- ---------------------------------------------------------------------
1938 -- Internal utilities
1939
1940 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1941 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1942 -- convenience for ByteString construction.
1943 w2c :: Word8 -> Char
1944 #if !defined(__GLASGOW_HASKELL__)
1945 w2c = chr . fromIntegral
1946 #else
1947 w2c = unsafeChr . fromIntegral
1948 #endif
1949 {-# INLINE w2c #-}
1950
1951 c2w :: Char -> Word8
1952 c2w = fromIntegral . ord
1953 {-# INLINE c2w #-}
1954
1955 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1956 -- is padded with a null byte.
1957 mallocByteString :: Int -> IO (ForeignPtr Word8)
1958 mallocByteString l = do
1959     fp <- mallocForeignPtrArray (l+1)
1960     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1961     return fp
1962
1963 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1964 -- argument gives the final size of the ByteString. Unlike 'generate'
1965 -- the ByteString is not reallocated if the final size is less than the
1966 -- estimated size. Also, unlike 'generate' ByteString's created this way
1967 -- are managed on the Haskell heap.
1968 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1969 create l write_ptr = inlinePerformIO $ do
1970     fp <- mallocByteString (l+1)
1971     withForeignPtr fp $ \p -> write_ptr p
1972     return $ PS fp 0 l
1973 {-# INLINE create #-}
1974
1975 -- | Perform an operation with a temporary ByteString
1976 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1977 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1978 {-# INLINE withPtr #-}
1979
1980 -- Common up near identical calls to `error' to reduce the number
1981 -- constant strings created when compiled:
1982 errorEmptyList :: String -> a
1983 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1984 {-# INLINE errorEmptyList #-}
1985
1986 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1987 -- of the string if no element is found, rather than Nothing.
1988 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1989 STRICT2(findIndexOrEnd)
1990 findIndexOrEnd f ps
1991     | null ps           = 0
1992     | f (unsafeHead ps) = 0
1993     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
1994 {-# INLINE findIndexOrEnd #-}
1995
1996 -- Find from the end of the string using predicate
1997 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1998 STRICT2(findFromEndUntil)
1999 findFromEndUntil f ps@(PS x s l) =
2000     if null ps then 0
2001     else if f (last ps) then l
2002          else findFromEndUntil f (PS x s (l-1))
2003
2004 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2005 -- it exposes lots of things to further inlining
2006 --
2007 {-# INLINE inlinePerformIO #-}
2008 inlinePerformIO :: IO a -> a
2009 #if defined(__GLASGOW_HASKELL__)
2010 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2011 #else
2012 inlinePerformIO = unsafePerformIO
2013 #endif
2014
2015 {-# INLINE newForeignFreePtr #-}
2016 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2017 #if defined(__GLASGOW_HASKELL__)
2018 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2019 #else
2020 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2021 #endif
2022
2023 -- ---------------------------------------------------------------------
2024 --
2025 -- Standard C functions
2026 --
2027
2028 foreign import ccall unsafe "string.h strlen" c_strlen
2029     :: CString -> CInt
2030
2031 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2032     :: CInt -> IO (Ptr Word8)
2033
2034 foreign import ccall unsafe "static stdlib.h free" c_free
2035     :: Ptr Word8 -> IO ()
2036
2037 #if !defined(__GLASGOW_HASKELL__)
2038 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2039     :: FunPtr (Ptr Word8 -> IO ())
2040 #endif
2041
2042 foreign import ccall unsafe "string.h memset" memset
2043     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2044
2045 foreign import ccall unsafe "string.h memchr" memchr
2046     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2047
2048 foreign import ccall unsafe "string.h memcmp" memcmp
2049     :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2050
2051 foreign import ccall unsafe "string.h memcpy" memcpy
2052     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2053
2054 -- ---------------------------------------------------------------------
2055 --
2056 -- Uses our C code
2057 --
2058
2059 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2060     :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2061
2062 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2063     :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2064
2065 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2066     :: Ptr Word8 -> Int -> Word8
2067
2068 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2069     :: Ptr Word8 -> Int -> Word8
2070
2071 foreign import ccall unsafe "static fpstring.h count" c_count
2072     :: Ptr Word8 -> Int -> Word8 -> Int
2073
2074 -- ---------------------------------------------------------------------
2075 -- MMap
2076
2077 {-
2078 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2079     :: Int -> Int -> IO (Ptr Word8)
2080
2081 foreign import ccall unsafe "static unistd.h close" c_close
2082     :: Int -> IO Int
2083
2084 #  if !defined(__OpenBSD__)
2085 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2086     :: Ptr Word8 -> Int -> IO Int
2087 #  endif
2088 -}
2089
2090 -- ---------------------------------------------------------------------
2091 -- Internal GHC Haskell magic
2092
2093 #if defined(__GLASGOW_HASKELL__)
2094 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2095     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2096
2097 foreign import ccall unsafe "__hscore_memcpy_src_off"
2098    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2099 #endif
2100
2101 -- ---------------------------------------------------------------------
2102 --
2103 -- Functional array fusion for ByteStrings.
2104 --
2105 -- From the Data Parallel Haskell project,
2106 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2107 --
2108
2109 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2110 -- the fact that no bottoms of this type are ever constructed; hence, we can
2111 -- assume @(_ :: NoAL) `seq` x = x@.
2112 --
2113 data NoAL = NoAL
2114
2115 -- | Special forms of loop arguments
2116 --
2117 -- * These are common special cases for the three function arguments of gen
2118 --   and loop; we give them special names to make it easier to trigger RULES
2119 --   applying in the special cases represented by these arguments.  The
2120 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2121 --   two simplifier phases.
2122 --
2123 -- * In the case where the accumulator is not needed, it is better to always
2124 --   explicitly return a value `()', rather than just copy the input to the
2125 --   output, as the former gives GHC better local information.
2126 --
2127
2128 -- | Element function expressing a mapping only
2129 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2130 mapEFL f = \_ e -> (noAL, (Just $ f e))
2131 {-# INLINE [1] mapEFL #-}
2132
2133 -- | Element function implementing a filter function only
2134 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2135 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2136 {-# INLINE [1] filterEFL #-}
2137
2138 -- |Element function expressing a reduction only
2139 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2140 foldEFL f = \a e -> (f a e, Nothing)
2141 {-# INLINE [1] foldEFL #-}
2142
2143 -- | No accumulator
2144 noAL :: NoAL
2145 noAL = NoAL
2146 {-# INLINE [1] noAL #-}
2147
2148 -- | Projection functions that are fusion friendly (as in, we determine when
2149 -- they are inlined)
2150 loopArr :: (ByteString, acc) -> ByteString
2151 loopArr (arr, _) = arr
2152 {-# INLINE [1] loopArr #-}
2153
2154 loopAcc :: (ByteString, acc) -> acc
2155 loopAcc (_, acc) = acc
2156 {-# INLINE [1] loopAcc #-}
2157
2158 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2159 loopSndAcc (arr, (_, acc)) = (arr, acc)
2160 {-# INLINE [1] loopSndAcc #-}
2161
2162 ------------------------------------------------------------------------
2163
2164 -- | Iteration over over ByteStrings
2165 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2166       -> acc                                   -- ^ initial acc value
2167       -> ByteString                            -- ^ input ByteString
2168       -> (ByteString, acc)
2169
2170 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2171     fp          <- mallocByteString i
2172     (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2173         (acc, i') <- go (a `plusPtr` s) p start
2174         if i' == i
2175             then return (fp,i,acc)                      -- no realloc for map
2176             else do fp_ <- mallocByteString (i'+1)      -- realloc
2177                     withForeignPtr fp_ $ \p' -> do
2178                         memcpy p' p i'
2179                         poke (p' `plusPtr` i') (0::Word8)
2180                     return (fp_,i',acc)
2181
2182     return (PS ptr 0 n, acc)
2183   where
2184     go p ma = trans 0 0
2185         where
2186             STRICT3(trans)
2187             trans a_off ma_off acc
2188                 | a_off >= i = return (acc, ma_off)
2189                 | otherwise  = do
2190                     x <- peekByteOff p a_off
2191                     let (acc', oe) = f acc x
2192                     ma_off' <- case oe of
2193                         Nothing  -> return ma_off
2194                         Just e   -> do pokeByteOff ma ma_off e
2195                                        return $ ma_off + 1
2196                     trans (a_off+1) ma_off' acc'
2197
2198 {-# INLINE [1] loopU #-}
2199
2200 {-# RULES
2201
2202 "array fusion!" forall em1 em2 start1 start2 arr.
2203   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2204     let em (acc1, acc2) e =
2205             case em1 acc1 e of
2206                 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2207                 (acc1', Just e') ->
2208                     case em2 acc2 e' of
2209                         (acc2', res) -> ((acc1', acc2'), res)
2210     in loopSndAcc (loopU em (start1, start2) arr)
2211
2212 "loopArr/loopSndAcc" forall x.
2213   loopArr (loopSndAcc x) = loopArr x
2214
2215 "seq/NoAL" forall (u::NoAL) e.
2216   u `seq` e = e
2217
2218  #-}
2219