Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with Prelude functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         packByte,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: Word8 -> ByteString -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57         head,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         last,                   -- :: ByteString -> Word8
  60         init,                   -- :: ByteString -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62
  63         -- * Special ByteStrings
  64         inits,                  -- :: ByteString -> [ByteString]
  65         tails,                  -- :: ByteString -> [ByteString]
  66         elems,                  -- :: ByteString -> [ByteString]
  67
  68         -- * Transformating ByteStrings
  69         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  70         reverse,                -- :: ByteString -> ByteString
  71         intersperse,            -- :: Word8 -> ByteString -> ByteString
  72         transpose,              -- :: [ByteString] -> [ByteString]
  73
  74         -- * Reducing 'ByteString's
  75         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  76         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  77         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  78         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  80
  81         -- ** Special folds
  82         concat,                 -- :: [ByteString] -> ByteString
  83         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  84         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  85         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  86         maximum,                -- :: ByteString -> Word8
  87         minimum,                -- :: ByteString -> Word8
  88         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  89
  90         -- * Generating and unfolding ByteStrings
  91         replicate,              -- :: Int -> Word8 -> ByteString
  92         unfoldrN,               -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  93
  94         -- * Substrings
  95
  96         -- ** Breaking strings
  97         take,                   -- :: Int -> ByteString -> ByteString
  98         drop,                   -- :: Int -> ByteString -> ByteString
  99         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 100         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 101         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 102         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 103         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 104         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 105
 106         -- ** Breaking and dropping on specific bytes
 107         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 108         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 109         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 110         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 111
 112         -- ** Breaking into many substrings
 113         split,                  -- :: Word8 -> ByteString -> [ByteString]
 114         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 115         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 116         group,                  -- :: ByteString -> [ByteString]
 117         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 118
 119         -- ** Joining strings
 120         join,                   -- :: ByteString -> [ByteString] -> ByteString
 121         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 122
 123         -- * Indexing ByteStrings
 124         index,                  -- :: ByteString -> Int -> Word8
 125         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 126         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 127         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 128         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 129         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 130         count,                  -- :: Word8 -> ByteString -> Int
 131
 132         -- * Ordered ByteStrings
 133         sort,                   -- :: ByteString -> ByteString
 134
 135         -- * Searching ByteStrings
 136
 137         -- ** Searching by equality
 138         -- | These functions use memchr(3) to efficiently search the ByteString
 139
 140         elem,                   -- :: Word8 -> ByteString -> Bool
 141         notElem,                -- :: Word8 -> ByteString -> Bool
 142         filterByte,             -- :: Word8 -> ByteString -> ByteString
 143         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 144
 145         -- ** Searching with a predicate
 146         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 147         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 148
 149         -- ** Prefixes and suffixes
 150         -- | These functions use memcmp(3) to efficiently compare substrings
 151         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 152         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 153
 154         -- ** Search for arbitrary substrings
 155         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 156         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 157         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 158
 159         -- * Zipping and unzipping ByteStrings
 160         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 161         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 162         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 163
 164         -- * Unchecked access
 165         unsafeHead,             -- :: ByteString -> Word8
 166         unsafeTail,             -- :: ByteString -> ByteString
 167         unsafeIndex,            -- :: ByteString -> Int -> Word8
 168
 169         -- * Low level introduction and elimination
 170         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 171         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 172         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 173         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 174         skipIndex,              -- :: ByteString -> Int
 175
 176         -- ** Packing CStrings and pointers
 177         packCString,            -- :: CString -> ByteString
 178         packCStringLen,         -- :: CString -> ByteString
 179         packMallocCString,      -- :: CString -> ByteString
 180
 181 #if defined(__GLASGOW_HASKELL__)
 182         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 183         packAddress,            -- :: Addr# -> ByteString
 184         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 185         unsafeFinalize,         -- :: ByteString -> IO ()
 186 #endif
 187
 188         -- ** Using ByteStrings as CStrings
 189         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 190         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 191         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 192
 193         -- ** Copying ByteStrings
 194         -- | These functions perform memcpy(3) operations
 195         copy,                   -- :: ByteString -> ByteString
 196         copyCString,            -- :: CString -> ByteString
 197         copyCStringLen,         -- :: CStringLen -> ByteString
 198
 199         -- * I\/O with @ByteString@s
 200
 201         -- ** Standard input and output
 202
 203 #if defined(__GLASGOW_HASKELL__)
 204         getLine,                -- :: IO ByteString
 205 #endif
 206         getContents,            -- :: IO ByteString
 207         putStr,                 -- :: ByteString -> IO ()
 208         putStrLn,               -- :: ByteString -> IO ()
 209
 210         -- ** Files
 211         readFile,               -- :: FilePath -> IO ByteString
 212         writeFile,              -- :: FilePath -> ByteString -> IO ()
 213 --      mmapFile,               -- :: FilePath -> IO ByteString
 214
 215         -- ** I\/O with Handles
 216 #if defined(__GLASGOW_HASKELL__)
 217         getArgs,                -- :: IO [ByteString]
 218         hGetLine,               -- :: Handle -> IO ByteString
 219         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 220 #endif
 221         hGetContents,           -- :: Handle -> IO ByteString
 222         hGet,                   -- :: Handle -> Int -> IO ByteString
 223         hPut,                   -- :: Handle -> ByteString -> IO ()
 224
 225         -- * Fusion utilities
 226 #if defined(__GLASGOW_HASKELL__)
 227         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 228 #endif
 229
 230         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 231         loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL,
 232         filterF, mapF
 233
 234   ) where
 235
 236 import qualified Prelude as P
 237 import Prelude hiding           (reverse,head,tail,last,init,null
 238                                 ,length,map,lines,foldl,foldr,unlines
 239                                 ,concat,any,take,drop,splitAt,takeWhile
 240                                 ,dropWhile,span,break,elem,filter,maximum
 241                                 ,minimum,all,concatMap,foldl1,foldr1
 242                                 ,readFile,writeFile,replicate
 243                                 ,getContents,getLine,putStr,putStrLn
 244                                 ,zip,zipWith,unzip,notElem)
 245
 246 import qualified Data.List as List
 247
 248 import Data.Char
 249 import Data.Word                (Word8)
 250 import Data.Maybe               (listToMaybe)
 251 import Data.Array               (listArray)
 252 import qualified Data.Array as Array ((!))
 253
 254 -- Control.Exception.bracket not available in yhc or nhc
 255 import Control.Exception        (bracket)
 256 import Control.Monad            (when)
 257
 258 import Foreign.C.String         (CString, CStringLen)
 259 import Foreign.C.Types          (CSize, CInt)
 260 import Foreign.ForeignPtr
 261 import Foreign.Marshal.Array
 262 import Foreign.Ptr
 263 import Foreign.Storable         (Storable(..))
 264
 265 -- hGetBuf and hPutBuf not available in yhc or nhc
 266 import System.IO                (stdin,stdout,hClose,hFileSize
 267                                 ,hGetBuf,hPutBuf,openBinaryFile
 268                                 ,Handle,IOMode(..))
 269
 270 #if !defined(__GLASGOW_HASKELL__)
 271 import System.IO.Unsafe
 272 #endif
 273
 274 #if defined(__GLASGOW_HASKELL__)
 275
 276 import Data.Generics            (Data(..), Typeable(..))
 277
 278 import System.IO                (hGetBufNonBlocking)
 279 import System.IO.Error          (isEOFError)
 280
 281 import Foreign.Marshal          (alloca)
 282 import qualified Foreign.Concurrent as FC (newForeignPtr)
 283
 284 import GHC.Handle
 285 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 286 import GHC.Base                 (build, unsafeChr)
 287 import GHC.Word hiding (Word8)
 288 import GHC.Ptr                  (Ptr(..))
 289 import GHC.ST                   (ST(..))
 290 import GHC.IOBase
 291
 292 #endif
 293
 294 -- CFILES stuff is Hugs only
 295 {-# CFILES cbits/fpstring.c #-}
 296
 297 -- -----------------------------------------------------------------------------
 298 --
 299 -- Useful macros, until we have bang patterns
 300 --
 301
 302 #define STRICT1(f) f a | a `seq` False = undefined
 303 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 304 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 305 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 306 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 307
 308 -- -----------------------------------------------------------------------------
 309
 310 -- | A space-efficient representation of a Word8 vector, supporting many
 311 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 312 --
 313 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 314 --
 315 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 316                      {-# UNPACK #-} !Int
 317                      {-# UNPACK #-} !Int
 318
 319 #if defined(__GLASGOW_HASKELL__)
 320     deriving (Data, Typeable)
 321 #endif
 322
 323 instance Eq  ByteString
 324     where (==)    = eq
 325
 326 instance Ord ByteString
 327     where compare = compareBytes
 328
 329 instance Show ByteString where
 330     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 331
 332 instance Read ByteString where
 333     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 334
 335 {-
 336 instance Arbitrary PackedString where
 337     arbitrary = P.pack `fmap` arbitrary
 338     coarbitrary s = coarbitrary (P.unpack s)
 339 -}
 340
 341 -- | /O(n)/ Equality on the 'ByteString' type.
 342 eq :: ByteString -> ByteString -> Bool
 343 eq a@(PS p s l) b@(PS p' s' l')
 344     | l /= l'            = False    -- short cut on length
 345     | p == p' && s == s' = True     -- short cut for the same string
 346     | otherwise          = compareBytes a b == EQ
 347 {-# INLINE eq #-}
 348
 349 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 350 compareBytes :: ByteString -> ByteString -> Ordering
 351 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 352     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 353     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 354     | otherwise                         = inlinePerformIO $
 355         withForeignPtr x1 $ \p1 ->
 356         withForeignPtr x2 $ \p2 -> do
 357             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 358             return $ case i `compare` 0 of
 359                         EQ  -> l1 `compare` l2
 360                         x   -> x
 361 {-# INLINE compareBytes #-}
 362
 363 {-
 364 --
 365 -- About 4x slower over 32M
 366 --
 367 compareBytes :: ByteString -> ByteString -> Ordering
 368 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 369     withForeignPtr fp1 $ \p1 ->
 370         withForeignPtr fp2 $ \p2 ->
 371             cmp (p1 `plusPtr` off1)
 372                 (p2 `plusPtr` off2) 0 len1 len2
 373
 374 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 375 STRICT5(cmp)
 376 cmp p1 p2 n len1 len2
 377       | n == len1 = if n == len2 then return EQ else return LT
 378       | n == len2 = return GT
 379       | otherwise = do
 380           (a :: Word8) <- peekByteOff p1 n
 381           (b :: Word8) <- peekByteOff p2 n
 382           case a `compare` b of
 383                 EQ -> cmp p1 p2 (n+1) len1 len2
 384                 LT -> return LT
 385                 GT -> return GT
 386 {-# INLINE compareBytes #-}
 387 -}
 388
 389 -- -----------------------------------------------------------------------------
 390 -- Introducing and eliminating 'ByteString's
 391
 392 -- | /O(1)/ The empty 'ByteString'
 393 empty :: ByteString
 394 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 395 {-# NOINLINE empty #-}
 396
 397 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 398 packByte :: Word8 -> ByteString
 399 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
 400     withForeignPtr fp $ \p -> poke p c
 401     return $ PS fp 0 1
 402 {-# INLINE packByte #-}
 403
 404 --
 405 -- XXX The unsafePerformIO is critical!
 406 --
 407 -- Otherwise:
 408 --
 409 --  packByte 255 `compare` packByte 127
 410 --
 411 -- is compiled to:
 412 --
 413 --  case mallocByteString 2 of
 414 --      ForeignPtr f internals ->
 415 --           case writeWord8OffAddr# f 0 255 of _ ->
 416 --           case writeWord8OffAddr# f 0 127 of _ ->
 417 --           case eqAddr# f f of
 418 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 419 --                                        (GHC.Prim.plusAddr# f 0)
 420 --
 421 --
 422
 423 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 424 --
 425 -- For applications with large numbers of string literals, pack can be a
 426 -- bottleneck. In such cases, consider using packAddress (GHC only).
 427 pack :: [Word8] -> ByteString
 428
 429 #if !defined(__GLASGOW_HASKELL__)
 430
 431 pack str = create (P.length str) $ \p -> go p str
 432     where
 433         go _ []     = return ()
 434         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 435
 436 #else /* hack away */
 437
 438 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 439     where
 440         go _ _ []        = return ()
 441         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 442
 443         writeByte p i c = ST $ \s# ->
 444             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 445
 446 #endif
 447
 448 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 449 unpack :: ByteString -> [Word8]
 450
 451 #if !defined(__GLASGOW_HASKELL__)
 452
 453 unpack (PS _  _ 0) = []
 454 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 455         go (p `plusPtr` s) (l - 1) []
 456     where
 457         STRICT3(go)
 458         go p 0 acc = peek p          >>= \e -> return (e : acc)
 459         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 460 {-# INLINE unpack #-}
 461
 462 #else
 463
 464 unpack ps = build (unpackFoldr ps)
 465 {-# INLINE unpack #-}
 466
 467 unpackList :: ByteString -> [Word8]
 468 unpackList (PS fp off len) = withPtr fp $ \p -> do
 469     let STRICT3(loop)
 470         loop _ (-1) acc = return acc
 471         loop q n acc = do
 472            a <- peekByteOff q n
 473            loop q (n-1) (a : acc)
 474     loop (p `plusPtr` off) (len-1) []
 475
 476 {-# RULES
 477 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 478  #-}
 479
 480 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 481 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 482     let STRICT3(loop)
 483         loop _ (-1) acc = return acc
 484         loop q n    acc = do
 485            a <- peekByteOff q n
 486            loop q (n-1) (a `f` acc)
 487     loop (p `plusPtr` off) (len-1) ch
 488 {-# INLINE [0] unpackFoldr #-}
 489
 490 #endif
 491
 492 ------------------------------------------------------------------------
 493
 494 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 495 -- conversion function
 496 packWith :: (a -> Word8) -> [a] -> ByteString
 497 packWith k str = create (P.length str) $ \p -> go p str
 498     where
 499         STRICT2(go)
 500         go _ []     = return ()
 501         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 502 {-# INLINE packWith #-}
 503 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 504
 505 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 506 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 507 unpackWith _ (PS _  _ 0) = []
 508 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 509         go (p `plusPtr` s) (l - 1) []
 510     where
 511         STRICT3(go)
 512         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 513         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 514 {-# INLINE unpackWith #-}
 515 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 516
 517 -- ---------------------------------------------------------------------
 518 -- Basic interface
 519
 520 -- | /O(1)/ Test whether a ByteString is empty.
 521 null :: ByteString -> Bool
 522 null (PS _ _ l) = l == 0
 523 {-# INLINE null #-}
 524
 525 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 526 length :: ByteString -> Int
 527 length (PS _ _ l) = l
 528
 529 #if defined(__GLASGOW_HASKELL__)
 530 {-# INLINE [1] length #-}
 531 #endif
 532
 533 {-#
 534
 535 -- Translate length into a loop.
 536 -- Performace ok, but allocates too much, so disable for now.
 537
 538   "length/loop" forall f acc s .
 539   length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
 540
 541   #-}
 542
 543 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 544 -- complexity, as it requires a memcpy.
 545 cons :: Word8 -> ByteString -> ByteString
 546 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 547         poke p c
 548         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 549 {-# INLINE cons #-}
 550
 551 -- todo fuse
 552
 553 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 554 snoc :: ByteString -> Word8 -> ByteString
 555 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 556         memcpy p (f `plusPtr` s) (fromIntegral l)
 557         poke (p `plusPtr` l) c
 558 {-# INLINE snoc #-}
 559
 560 -- todo fuse
 561
 562 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 563 head :: ByteString -> Word8
 564 head ps@(PS x s _)
 565     | null ps   = errorEmptyList "head"
 566     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 567 {-# INLINE head #-}
 568
 569 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 570 tail :: ByteString -> ByteString
 571 tail (PS p s l)
 572     | l <= 0    = errorEmptyList "tail"
 573     | otherwise = PS p (s+1) (l-1)
 574 {-# INLINE tail #-}
 575
 576 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 577 last :: ByteString -> Word8
 578 last ps@(PS x s l)
 579     | null ps   = errorEmptyList "last"
 580     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 581 {-# INLINE last #-}
 582
 583 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 584 init :: ByteString -> ByteString
 585 init (PS p s l)
 586     | l <= 0    = errorEmptyList "init"
 587     | otherwise = PS p s (l-1)
 588 {-# INLINE init #-}
 589
 590 -- | /O(n)/ Append two ByteStrings
 591 append :: ByteString -> ByteString -> ByteString
 592 append xs ys | null xs   = ys
 593              | null ys   = xs
 594              | otherwise = concat [xs,ys]
 595 {-# INLINE append #-}
 596
 597 -- ---------------------------------------------------------------------
 598 -- Transformations
 599
 600 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 601 -- element of @xs@. This function is subject to array fusion.
 602 map :: (Word8 -> Word8) -> ByteString -> ByteString
 603 map f = loopArr . loopU (mapEFL f) noAL
 604 {-# INLINE map #-}
 605
 606 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 607 -- slightly faster for one-shot cases.
 608 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
 609 STRICT2(mapF)
 610 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 611     np <- mallocByteString (len+1)
 612     withForeignPtr np $ \p -> do
 613         map_ 0 (a `plusPtr` s) p
 614         return (PS np 0 len)
 615   where
 616     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 617     STRICT3(map_)
 618     map_ n p1 p2
 619        | n >= len = return ()
 620        | otherwise = do
 621             x <- peekByteOff p1 n
 622             pokeByteOff p2 n (f x)
 623             map_ (n+1) p1 p2
 624 {-# INLINE mapF #-}
 625
 626 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 627 reverse :: ByteString -> ByteString
 628 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 629         c_reverse p (f `plusPtr` s) (fromIntegral l)
 630
 631 {-
 632 reverse = pack . P.reverse . unpack
 633 -}
 634
 635 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 636 -- 'ByteString' and \`intersperses\' that byte between the elements of
 637 -- the 'ByteString'.  It is analogous to the intersperse function on
 638 -- Lists.
 639 intersperse :: Word8 -> ByteString -> ByteString
 640 intersperse c ps@(PS x s l)
 641     | length ps < 2  = ps
 642     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 643         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 644
 645 {-
 646 intersperse c = pack . List.intersperse c . unpack
 647 -}
 648
 649 -- | The 'transpose' function transposes the rows and columns of its
 650 -- 'ByteString' argument.
 651 transpose :: [ByteString] -> [ByteString]
 652 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 653
 654 -- ---------------------------------------------------------------------
 655 -- Reducing 'ByteString's
 656
 657 -- | 'foldl', applied to a binary operator, a starting value (typically
 658 -- the left-identity of the operator), and a ByteString, reduces the
 659 -- ByteString using the binary operator, from left to right.
 660 -- This function is subject to array fusion.
 661 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 662 foldl f z = loopAcc . loopU (foldEFL f) z
 663 {-# INLINE foldl #-}
 664
 665 {-
 666 --
 667 -- About twice as fast with 6.4.1, but not fuseable
 668 -- A simple fold . map is enough to make it worth while.
 669 --
 670 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 671         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 672     where
 673         STRICT3(lgo)
 674         lgo z p q | p == q    = return z
 675                   | otherwise = do c <- peek p
 676                                    lgo (f z c) (p `plusPtr` 1) q
 677 -}
 678
 679 -- | 'foldl\'' is like foldl, but strict in the accumulator.
 680 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 681 foldl' f z = loopAcc . loopU (foldEFL' f) z
 682 {-# INLINE foldl' #-}
 683
 684 -- | 'foldr', applied to a binary operator, a starting value
 685 -- (typically the right-identity of the operator), and a ByteString,
 686 -- reduces the ByteString using the binary operator, from right to left.
 687 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 688 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 689         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 690     where
 691         STRICT2(go)
 692         go p q | p == q    = return z
 693                | otherwise = do c  <- peek p
 694                                 ws <- go (p `plusPtr` 1) q
 695                                 return $ c `k` ws
 696
 697 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 698 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 699 -- This function is subject to array fusion.
 700 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 701 foldl1 f ps
 702     | null ps   = errorEmptyList "foldl1"
 703     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 704
 705 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 706 -- and thus must be applied to non-empty 'ByteString's
 707 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 708 foldr1 f ps
 709     | null ps        = errorEmptyList "foldr1"
 710     | otherwise      = foldr f (last ps) (init ps)
 711
 712 -- ---------------------------------------------------------------------
 713 -- Special folds
 714
 715 -- | /O(n)/ Concatenate a list of ByteStrings.
 716 concat :: [ByteString] -> ByteString
 717 concat []     = empty
 718 concat [ps]   = ps
 719 concat xs     = create len $ \ptr -> go xs ptr
 720   where len = P.sum . P.map length $ xs
 721         STRICT2(go)
 722         go []            _   = return ()
 723         go (PS p s l:ps) ptr = do
 724                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 725                 go ps (ptr `plusPtr` l)
 726
 727 -- | Map a function over a 'ByteString' and concatenate the results
 728 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 729 concatMap f = foldr (append . f) empty
 730
 731 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 732 -- any element of the 'ByteString' satisfies the predicate.
 733 any :: (Word8 -> Bool) -> ByteString -> Bool
 734 any _ (PS _ _ 0) = False
 735 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 736         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 737     where
 738         STRICT2(go)
 739         go p q | p == q    = return False
 740                | otherwise = do c <- peek p
 741                                 if f c then return True
 742                                        else go (p `plusPtr` 1) q
 743
 744 -- todo fuse
 745
 746 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 747 -- if all elements of the 'ByteString' satisfy the predicate.
 748 all :: (Word8 -> Bool) -> ByteString -> Bool
 749 all _ (PS _ _ 0) = True
 750 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 751         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 752     where
 753         STRICT2(go)
 754         go p q | p == q     = return True  -- end of list
 755                | otherwise  = do c <- peek p
 756                                  if f c
 757                                     then go (p `plusPtr` 1) q
 758                                     else return False
 759 -- todo fuse
 760
 761 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 762 maximum :: ByteString -> Word8
 763 maximum xs@(PS x s l)
 764     | null xs   = errorEmptyList "maximum"
 765     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 766                     return $ c_maximum (p `plusPtr` s) (fromIntegral l)
 767 {-# INLINE maximum #-}
 768
 769 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 770 minimum :: ByteString -> Word8
 771 minimum xs@(PS x s l)
 772     | null xs   = errorEmptyList "minimum"
 773     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 774                     return $ c_minimum (p `plusPtr` s) (fromIntegral l)
 775 {-# INLINE minimum #-}
 776
 777 -- fusion is too slow here (10x)
 778
 779 {-
 780 maximum xs@(PS x s l)
 781     | null xs   = errorEmptyList "maximum"
 782     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 783                         w <- peek p
 784                         maximum_ (p `plusPtr` s) 0 l w
 785
 786 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 787 STRICT4(maximum_)
 788 maximum_ ptr n m c
 789     | n >= m    = return c
 790     | otherwise = do w <- peekByteOff ptr n
 791                      maximum_ ptr (n+1) m (if w > c then w else c)
 792
 793 minimum xs@(PS x s l)
 794     | null xs   = errorEmptyList "minimum"
 795     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 796                         w <- peek p
 797                         minimum_ (p `plusPtr` s) 0 l w
 798
 799 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 800 STRICT4(minimum_)
 801 minimum_ ptr n m c
 802     | n >= m    = return c
 803     | otherwise = do w <- peekByteOff ptr n
 804                      minimum_ ptr (n+1) m (if w < c then w else c)
 805 -}
 806
 807 -- | /O(n)/ map Word8 functions, provided with the index at each position
 808 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 809 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 810     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 811   where
 812     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 813     STRICT4(go)
 814     go n f t p | f == p    = return ()
 815                | otherwise = do w <- peek f
 816                                 ((poke t) . k n) w
 817                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 818
 819 -- ---------------------------------------------------------------------
 820 -- Unfolds and replicates
 821
 822 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 823 -- the value of every element. The following holds:
 824 --
 825 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 826 --
 827 -- This implemenation uses @memset(3)@
 828 replicate :: Int -> Word8 -> ByteString
 829 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 830
 831 {-
 832 -- About 5x slower
 833 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 834     where
 835         STRICT2(go)
 836         go _   0 = return w
 837         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 838 -}
 839
 840 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 841 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 842 -- the element and returns 'Nothing' if it is done producing the
 843 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 844 -- prepending to the ByteString and @b@ is used as the next element in a
 845 -- recursive call.
 846 --
 847 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 848 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 849 -- final size of the ByteString as an argument. 'cons' can then be
 850 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 851 -- linear complexity. The depth of the recursion is limited to this
 852 -- size, but may be less. For lazy, infinite unfoldr, use
 853 -- 'Data.List.unfoldr' (from 'Data.List').
 854 --
 855 -- Examples:
 856 --
 857 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 858 --
 859 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 860 --
 861 -- > unfoldrN n == take n $ List.unfoldr
 862 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
 863 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
 864     where
 865         STRICT3(go)
 866         go q c n | n == i    = return n      -- stop if we reach `i'
 867                  | otherwise = case f c of
 868                                    Nothing        -> return n
 869                                    Just (a,new_c) -> do
 870                                         poke q a
 871                                         go (q `plusPtr` 1) new_c (n+1)
 872
 873 -- ---------------------------------------------------------------------
 874 -- Substrings
 875
 876 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 877 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 878 take :: Int -> ByteString -> ByteString
 879 take n ps@(PS x s l)
 880     | n < 0     = empty
 881     | n >= l    = ps
 882     | otherwise = PS x s n
 883 {-# INLINE take #-}
 884
 885 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 886 -- elements, or @[]@ if @n > 'length' xs@.
 887 drop  :: Int -> ByteString -> ByteString
 888 drop n ps@(PS x s l)
 889     | n <= 0    = ps
 890     | n >  l    = empty
 891     | otherwise = PS x (s+n) (l-n)
 892 {-# INLINE drop #-}
 893
 894 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 895 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 896 splitAt  n ps  = (take n ps, drop n ps)
 897 {-# INLINE splitAt #-}
 898
 899 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 900 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 901 -- satisfy @p@.
 902 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 903 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
 904 {-# INLINE takeWhile #-}
 905
 906 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 907 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 908 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
 909 {-# INLINE dropWhile #-}
 910
 911 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 912 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 913 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
 914 {-# INLINE break #-}
 915
 916 -- | 'breakByte' breaks its ByteString argument at the first occurence
 917 -- of the specified byte. It is more efficient than 'break' as it is
 918 -- implemented with @memchr(3)@. I.e.
 919 --
 920 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 921 --
 922 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 923 breakByte c p = case elemIndex c p of
 924     Nothing -> (p,empty)
 925     Just n  -> (take n p, drop n p)
 926 {-# INLINE breakByte #-}
 927
 928 -- | 'spanByte' breaks its ByteString argument at the first
 929 -- occurence of a byte other than its argument. It is more efficient
 930 -- than 'span (==)'
 931 --
 932 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 933 --
 934 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 935 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 936     go (p `plusPtr` s) 0
 937   where
 938     STRICT2(go)
 939     go p i | i >= l    = return (ps, empty)
 940            | otherwise = do c' <- peekByteOff p i
 941                             if c /= c'
 942                                 then return (take i ps, drop i ps)
 943                                 else go p (i+1)
 944 {-# INLINE spanByte #-}
 945
 946 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 947 -- occurence of @w@. It behaves like 'break', except the delimiter is
 948 -- not returned, and @Nothing@ is returned if the delimiter is not in
 949 -- the ByteString. I.e.
 950 --
 951 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
 952 --
 953 -- > breakFirst c xs ==
 954 -- > let (x,y) = break (== c) xs
 955 -- > in if null y then Nothing else Just (x, drop 1 y))
 956 --
 957 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 958 breakFirst c p = case elemIndex c p of
 959    Nothing -> Nothing
 960    Just n -> Just (take n p, drop (n+1) p)
 961 {-# INLINE breakFirst #-}
 962
 963 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
 964 -- ByteString.
 965 --
 966 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
 967 --
 968 -- and the following are equivalent:
 969 --
 970 -- > breakLast 'c' "abcdef"
 971 -- > let (x,y) = break (=='c') (reverse "abcdef")
 972 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
 973 --
 974 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 975 breakLast c p = case elemIndexLast c p of
 976     Nothing -> Nothing
 977     Just n -> Just (take n p, drop (n+1) p)
 978 {-# INLINE breakLast #-}
 979
 980 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 981 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 982 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 983 span p ps = break (not . p) ps
 984 {-# INLINE span #-}
 985
 986 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 987 -- We have
 988 --
 989 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 990 --
 991 -- and
 992 --
 993 -- > spanEnd (not . isSpace) ps
 994 -- >    ==
 995 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
 996 --
 997 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 998 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 999
1000 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1001 -- separators, where the predicate returns True for a separator element.
1002 -- The resulting components do not contain the separators.  Two adjacent
1003 -- separators result in an empty component in the output.  eg.
1004 --
1005 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1006 -- > splitWith (=='a') []        == []
1007 --
1008 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1009
1010 #if defined(__GLASGOW_HASKELL__)
1011 splitWith _pred (PS _  _   0) = []
1012 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1013   where pred# c# = pred_ (W8# c#)
1014
1015         STRICT4(splitWith0)
1016         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1017             splitLoop pred' p 0 off' len' fp'
1018
1019         splitLoop :: (Word# -> Bool)
1020                   -> Ptr Word8
1021                   -> Int -> Int -> Int
1022                   -> ForeignPtr Word8
1023                   -> IO [ByteString]
1024
1025         splitLoop pred' p idx' off' len' fp'
1026             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1027             | idx' >= len'  = return [PS fp' off' idx']
1028             | otherwise = do
1029                 w <- peekElemOff p (off'+idx')
1030                 if pred' (case w of W8# w# -> w#)
1031                    then return (PS fp' off' idx' :
1032                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1033                    else splitLoop pred' p (idx'+1) off' len' fp'
1034 {-# INLINE splitWith #-}
1035
1036 #else
1037 splitWith _ (PS _ _ 0) = []
1038 splitWith p ps = loop p ps
1039     where
1040         STRICT2(loop)
1041         loop q qs = if null rest then [chunk]
1042                                  else chunk : loop q (unsafeTail rest)
1043             where (chunk,rest) = break q qs
1044 #endif
1045
1046 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1047 -- argument, consuming the delimiter. I.e.
1048 --
1049 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1050 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1051 -- > split 'x'  "x"          == ["",""]
1052 --
1053 -- and
1054 --
1055 -- > join [c] . split c == id
1056 -- > split == splitWith . (==)
1057 --
1058 -- As for all splitting functions in this library, this function does
1059 -- not copy the substrings, it just constructs new 'ByteStrings' that
1060 -- are slices of the original.
1061 --
1062 split :: Word8 -> ByteString -> [ByteString]
1063 split _ (PS _ _ 0) = []
1064 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1065     let ptr = p `plusPtr` s
1066
1067         STRICT1(loop)
1068         loop n = do
1069             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1070             if q == nullPtr
1071                 then [PS x (s+n) (l-n)]
1072                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1073
1074     return (loop 0)
1075 {-# INLINE split #-}
1076
1077 {-
1078 -- slower. but stays inside Haskell.
1079 split _ (PS _  _   0) = []
1080 split (W8# w#) (PS fp off len) = splitWith' off len fp
1081     where
1082         splitWith' off' len' fp' = withPtr fp $ \p ->
1083             splitLoop p 0 off' len' fp'
1084
1085         splitLoop :: Ptr Word8
1086                   -> Int -> Int -> Int
1087                   -> ForeignPtr Word8
1088                   -> IO [ByteString]
1089
1090         STRICT5(splitLoop)
1091         splitLoop p idx' off' len' fp'
1092             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1093             | idx' >= len'  = return [PS fp' off' idx']
1094             | otherwise = do
1095                 (W8# x#) <- peekElemOff p (off'+idx')
1096                 if word2Int# w# ==# word2Int# x#
1097                    then return (PS fp' off' idx' :
1098                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1099                    else splitLoop p (idx'+1) off' len' fp'
1100 -}
1101
1102 -- | Like 'splitWith', except that sequences of adjacent separators are
1103 -- treated as a single separator. eg.
1104 --
1105 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1106 --
1107 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1108 tokens f = P.filter (not.null) . splitWith f
1109 {-# INLINE tokens #-}
1110
1111 -- | The 'group' function takes a ByteString and returns a list of
1112 -- ByteStrings such that the concatenation of the result is equal to the
1113 -- argument.  Moreover, each sublist in the result contains only equal
1114 -- elements.  For example,
1115 --
1116 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1117 --
1118 -- It is a special case of 'groupBy', which allows the programmer to
1119 -- supply their own equality test. It is about 40% faster than
1120 -- /groupBy (==)/
1121 group :: ByteString -> [ByteString]
1122 group xs
1123     | null xs   = []
1124     | otherwise = ys : group zs
1125     where
1126         (ys, zs) = spanByte (unsafeHead xs) xs
1127
1128 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1129 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1130 groupBy k xs
1131     | null xs   = []
1132     | otherwise = take n xs : groupBy k (drop n xs)
1133     where
1134         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1135
1136 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1137 -- 'ByteString's and concatenates the list after interspersing the first
1138 -- argument between each element of the list.
1139 join :: ByteString -> [ByteString] -> ByteString
1140 join filler pss = concat (splice pss)
1141     where
1142         splice []  = []
1143         splice [x] = [x]
1144         splice (x:y:xs) = x:filler:splice (y:xs)
1145 {-# INLINE join #-}
1146
1147 --
1148 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1149 -- with a char. Around 4 times faster than the generalised join.
1150 --
1151 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1152 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1153     withForeignPtr ffp $ \fp ->
1154     withForeignPtr fgp $ \gp -> do
1155         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1156         poke (ptr `plusPtr` l) c
1157         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1158     where
1159       len = length f + length g + 1
1160 {-# INLINE joinWithByte #-}
1161
1162 -- ---------------------------------------------------------------------
1163 -- Indexing ByteStrings
1164
1165 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1166 index :: ByteString -> Int -> Word8
1167 index ps n
1168     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1169     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1170                                          ++ ", length = " ++ show (length ps))
1171     | otherwise      = ps `unsafeIndex` n
1172 {-# INLINE index #-}
1173
1174 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1175 -- element in the given 'ByteString' which is equal to the query
1176 -- element, or 'Nothing' if there is no such element.
1177 -- This implementation uses memchr(3).
1178 elemIndex :: Word8 -> ByteString -> Maybe Int
1179 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1180     let p' = p `plusPtr` s
1181         q  = memchr p' c (fromIntegral l)
1182     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1183 {-# INLINE elemIndex #-}
1184
1185 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1186 -- element in the given 'ByteString' which is equal to the query
1187 -- element, or 'Nothing' if there is no such element. The following
1188 -- holds:
1189 --
1190 -- > elemIndexLast c xs ==
1191 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1192 --
1193 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1194 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1195     go (p `plusPtr` s) (l-1)
1196   where
1197     STRICT2(go)
1198     go p i | i < 0     = return Nothing
1199            | otherwise = do ch' <- peekByteOff p i
1200                             if ch == ch'
1201                                 then return $ Just i
1202                                 else go p (i-1)
1203 {-# INLINE elemIndexLast #-}
1204
1205 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1206 -- the indices of all elements equal to the query element, in ascending order.
1207 -- This implementation uses memchr(3).
1208 elemIndices :: Word8 -> ByteString -> [Int]
1209 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1210     let ptr = p `plusPtr` s
1211
1212         STRICT1(loop)
1213         loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1214                  in if q == nullPtr
1215                         then []
1216                         else let i = q `minusPtr` ptr
1217                              in i : loop (i+1)
1218     return (loop 0)
1219 {-# INLINE elemIndices #-}
1220
1221 {-
1222 -- much slower
1223 elemIndices :: Word8 -> ByteString -> [Int]
1224 elemIndices c ps = loop 0 ps
1225    where STRICT2(loop)
1226          loop _ ps' | null ps'            = []
1227          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1228                     | otherwise           = loop (n+1) (unsafeTail ps')
1229 -}
1230
1231 -- | count returns the number of times its argument appears in the ByteString
1232 --
1233 -- > count = length . elemIndices
1234 --
1235 -- But more efficiently than using length on the intermediate list.
1236 count :: Word8 -> ByteString -> Int
1237 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1238     return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1239 {-# INLINE count #-}
1240
1241 {-
1242 --
1243 -- around 30% slower
1244 --
1245 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1246      go (p `plusPtr` s) (fromIntegral m) 0
1247     where
1248         go :: Ptr Word8 -> CSize -> Int -> IO Int
1249         STRICT3(go)
1250         go p l i = do
1251             let q = memchr p w l
1252             if q == nullPtr
1253                 then return i
1254                 else do let k = fromIntegral $ q `minusPtr` p
1255                         go (q `plusPtr` 1) (l-k-1) (i+1)
1256 -}
1257
1258 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1259 -- returns the index of the first element in the ByteString
1260 -- satisfying the predicate.
1261 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1262 findIndex k ps@(PS x s l)
1263     | null ps   = Nothing
1264     | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1265   where
1266     STRICT2(go)
1267     go ptr n | n >= l    = return Nothing
1268              | otherwise = do w <- peek ptr
1269                               if k w
1270                                 then return (Just n)
1271                                 else go (ptr `plusPtr` 1) (n+1)
1272 {-# INLINE findIndex #-}
1273
1274 -- | The 'findIndices' function extends 'findIndex', by returning the
1275 -- indices of all elements satisfying the predicate, in ascending order.
1276 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1277 findIndices p ps = loop 0 ps
1278    where
1279      STRICT2(loop)
1280      loop n qs | null qs           = []
1281                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1282                | otherwise         =     loop (n+1) (unsafeTail qs)
1283
1284 -- ---------------------------------------------------------------------
1285 -- Searching ByteStrings
1286
1287 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1288 elem :: Word8 -> ByteString -> Bool
1289 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1290 {-# INLINE elem #-}
1291
1292 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1293 notElem :: Word8 -> ByteString -> Bool
1294 notElem c ps = not (elem c ps)
1295 {-# INLINE notElem #-}
1296
1297 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1298 -- returns a ByteString containing those characters that satisfy the
1299 -- predicate. This function is subject to array fusion.
1300 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1301 filter p  = loopArr . loopU (filterEFL p) noAL
1302 {-# INLINE filter #-}
1303
1304 -- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
1305 -- around 2x faster for some one-shot applications.
1306 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1307 filterF k ps@(PS x s l)
1308     | null ps   = ps
1309     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1310         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1311         return (t `minusPtr` p) -- actual length
1312     where
1313         STRICT3(go)
1314         go f t end | f == end  = return t
1315                    | otherwise = do
1316                         w <- peek f
1317                         if k w
1318                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1319                             else             go (f `plusPtr` 1) t               end
1320 {-# INLINE filterF #-}
1321
1322 --
1323 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1324 -- case of filtering a single byte. It is more efficient to use
1325 -- /filterByte/ in this case.
1326 --
1327 -- > filterByte == filter . (==)
1328 --
1329 -- filterByte is around 10x faster, and uses much less space, than its
1330 -- filter equivalent
1331 filterByte :: Word8 -> ByteString -> ByteString
1332 filterByte w ps = replicate (count w ps) w
1333 {-# INLINE filterByte #-}
1334
1335 --
1336 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1337 -- case of filtering a single byte out of a list. It is more efficient
1338 -- to use /filterNotByte/ in this case.
1339 --
1340 -- > filterNotByte == filter . (/=)
1341 --
1342 -- filterNotByte is around 2x faster than its filter equivalent.
1343 filterNotByte :: Word8 -> ByteString -> ByteString
1344 filterNotByte w = filterF (/= w)
1345 {-# INLINE filterNotByte #-}
1346
1347 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1348 -- and returns the first element in matching the predicate, or 'Nothing'
1349 -- if there is no such element.
1350 --
1351 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1352 --
1353 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1354 find f p = case findIndex f p of
1355                     Just n -> Just (p `unsafeIndex` n)
1356                     _      -> Nothing
1357 {-# INLINE find #-}
1358
1359 {-
1360 --
1361 -- fuseable, but we don't want to walk the whole array.
1362 --
1363 find k = foldl findEFL Nothing
1364     where findEFL a@(Just _) _ = a
1365           findEFL _          c | k c       = Just c
1366                                | otherwise = Nothing
1367 -}
1368
1369 -- ---------------------------------------------------------------------
1370 -- Searching for substrings
1371
1372 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1373 -- iff the first is a prefix of the second.
1374 isPrefixOf :: ByteString -> ByteString -> Bool
1375 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1376     | l1 == 0   = True
1377     | l2 < l1   = False
1378     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1379         withForeignPtr x2 $ \p2 -> do
1380             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1381             return (i == 0)
1382
1383 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1384 -- iff the first is a suffix of the second.
1385 --
1386 -- The following holds:
1387 --
1388 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1389 --
1390 -- However, the real implemenation uses memcmp to compare the end of the
1391 -- string only, with no reverse required..
1392 isSuffixOf :: ByteString -> ByteString -> Bool
1393 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1394     | l1 == 0   = True
1395     | l2 < l1   = False
1396     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1397         withForeignPtr x2 $ \p2 -> do
1398             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1399             return (i == 0)
1400
1401 -- | Check whether one string is a substring of another. @isSubstringOf
1402 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1403 isSubstringOf :: ByteString -- ^ String to search for.
1404               -> ByteString -- ^ String to search in.
1405               -> Bool
1406 isSubstringOf p s = not $ P.null $ findSubstrings p s
1407
1408 -- | Get the first index of a substring in another string,
1409 --   or 'Nothing' if the string is not found.
1410 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1411 findSubstring :: ByteString -- ^ String to search for.
1412               -> ByteString -- ^ String to seach in.
1413               -> Maybe Int
1414 findSubstring = (listToMaybe .) . findSubstrings
1415
1416 -- | Find the indexes of all (possibly overlapping) occurances of a
1417 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1418 -- string matching algorithm.
1419 findSubstrings :: ByteString -- ^ String to search for.
1420                -> ByteString -- ^ String to seach in.
1421                -> [Int]
1422
1423 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1424   where
1425       patc x = pat `unsafeIndex` x
1426       strc x = str `unsafeIndex` x
1427
1428       -- maybe we should make kmpNext a UArray before using it in search?
1429       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1430       kmpNextL p _ | null p = []
1431       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1432                          ps = unsafeTail p
1433                          x = if not (null ps) && unsafeHead ps == patc j'
1434                                 then kmpNext Array.! j' else j'
1435                         in x:kmpNextL ps j'
1436       search i j = match ++ rest -- i: position in string, j: position in pattern
1437         where match = if j == m then [(i - j)] else []
1438               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1439       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1440                | otherwise = j
1441
1442 -- ---------------------------------------------------------------------
1443 -- Zipping
1444
1445 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1446 -- corresponding pairs of bytes. If one input ByteString is short,
1447 -- excess elements of the longer ByteString are discarded. This is
1448 -- equivalent to a pair of 'unpack' operations.
1449 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1450 zip ps qs
1451     | null ps || null qs = []
1452     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1453
1454 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1455 -- the first argument, instead of a tupling function.  For example,
1456 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1457 -- corresponding sums.
1458 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1459 zipWith f ps qs
1460     | null ps || null qs = []
1461     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1462
1463 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1464 -- ByteStrings. Note that this performs two 'pack' operations.
1465 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1466 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1467 {-# INLINE unzip #-}
1468
1469 -- ---------------------------------------------------------------------
1470 -- Special lists
1471
1472 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1473 inits :: ByteString -> [ByteString]
1474 inits (PS x s l) = [PS x s n | n <- [0..l]]
1475
1476 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1477 tails :: ByteString -> [ByteString]
1478 tails p | null p    = [empty]
1479         | otherwise = p : tails (unsafeTail p)
1480
1481 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1482
1483 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1484 elems :: ByteString -> [ByteString]
1485 elems (PS _ _ 0) = []
1486 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1487 {-# INLINE elems #-}
1488
1489 -- ---------------------------------------------------------------------
1490 -- ** Ordered 'ByteString's
1491
1492 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1493 sort :: ByteString -> ByteString
1494 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1495
1496     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1497     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1498
1499     let STRICT2(go)
1500         go 256 _   = return ()
1501         go i   ptr = do n <- peekElemOff arr i
1502                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1503                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1504     go 0 p
1505
1506 -- "countEach counts str l" counts the number of occurences of each Word8 in
1507 -- str, and stores the result in counts.
1508 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1509 STRICT3(countEach)
1510 countEach counts str l = go 0
1511  where
1512     STRICT1(go)
1513     go i | i == l    = return ()
1514          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1515                           x <- peekElemOff counts k
1516                           pokeElemOff counts k (x + 1)
1517                           go (i + 1)
1518
1519 {-
1520 sort :: ByteString -> ByteString
1521 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1522         memcpy p (f `plusPtr` s) l
1523         c_qsort p l -- inplace
1524 -}
1525
1526 {-
1527 sort = pack . List.sort . unpack
1528 -}
1529
1530 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1531 --
1532 -- Try some linear sorts: radix, counting
1533 -- Or mergesort.
1534 --
1535 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1536 -- sortBy f ps = undefined
1537
1538 -- ---------------------------------------------------------------------
1539 --
1540 -- Extensions to the basic interface
1541 --
1542
1543 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1544 -- check for the empty case, so there is an obligation on the programmer
1545 -- to provide a proof that the ByteString is non-empty.
1546 unsafeHead :: ByteString -> Word8
1547 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1548 {-# INLINE unsafeHead #-}
1549
1550 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1551 -- check for the empty case. As with 'unsafeHead', the programmer must
1552 -- provide a separate proof that the ByteString is non-empty.
1553 unsafeTail :: ByteString -> ByteString
1554 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1555 {-# INLINE unsafeTail #-}
1556
1557 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1558 -- This omits the bounds check, which means there is an accompanying
1559 -- obligation on the programmer to ensure the bounds are checked in some
1560 -- other way.
1561 unsafeIndex :: ByteString -> Int -> Word8
1562 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1563 {-# INLINE unsafeIndex #-}
1564
1565 -- ---------------------------------------------------------------------
1566 -- Low level constructors
1567
1568 #if defined(__GLASGOW_HASKELL__)
1569 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1570 -- Addr\# (an arbitrary machine address assumed to point outside the
1571 -- garbage-collected heap) into a @ByteString@. A much faster way to
1572 -- create an Addr\# is with an unboxed string literal, than to pack a
1573 -- boxed string. A unboxed string literal is compiled to a static @char
1574 -- []@ by GHC. Establishing the length of the string requires a call to
1575 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1576 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1577 -- if you know the length of the string statically.
1578 --
1579 -- An example:
1580 --
1581 -- > literalFS = packAddress "literal"#
1582 --
1583 packAddress :: Addr# -> ByteString
1584 packAddress addr# = inlinePerformIO $ do
1585     p <- newForeignPtr_ cstr
1586     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1587   where
1588     cstr = Ptr addr#
1589 {-# INLINE packAddress #-}
1590
1591 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1592 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1593 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1594 -- 'Addr\#' to the string, and the length of the string. Make sure the
1595 -- length is correct, otherwise use the safer 'packAddress' (where the
1596 -- length will be calculated once at runtime).
1597 unsafePackAddress :: Int -> Addr# -> ByteString
1598 unsafePackAddress len addr# = inlinePerformIO $ do
1599     p <- newForeignPtr_ cstr
1600     return $ PS p 0 len
1601     where cstr = Ptr addr#
1602
1603 #endif
1604
1605 -- | /O(1)/ Build a ByteString from a ForeignPtr
1606 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1607 fromForeignPtr fp l = PS fp 0 l
1608
1609 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1610 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1611 toForeignPtr (PS ps s l) = (ps, s, l)
1612
1613 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1614 -- current 'ByteString' from any larger string it was created from, as
1615 -- an 'Int'.
1616 skipIndex :: ByteString -> Int
1617 skipIndex (PS _ s _) = s
1618 {-# INLINE skipIndex #-}
1619
1620 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1621 -- finalizer associated to it. The ByteString length is calculated using
1622 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1623 packCString :: CString -> ByteString
1624 packCString cstr = inlinePerformIO $ do
1625     fp <- newForeignPtr_ (castPtr cstr)
1626     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1627
1628 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1629 -- have /no/ finalizer associated with it. This operation has /O(1)/
1630 -- complexity as we already know the final size, so no /strlen(3)/ is
1631 -- required.
1632 packCStringLen :: CStringLen -> ByteString
1633 packCStringLen (ptr,len) = inlinePerformIO $ do
1634     fp <- newForeignPtr_ (castPtr ptr)
1635     return $ PS fp 0 (fromIntegral len)
1636
1637 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1638 -- have a @free(3)@ finalizer associated to it.
1639 packMallocCString :: CString -> ByteString
1640 packMallocCString cstr = inlinePerformIO $ do
1641     fp <- newForeignFreePtr (castPtr cstr)
1642     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1643
1644 #if defined(__GLASGOW_HASKELL__)
1645 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1646 -- length, and an IO action representing a finalizer. This function is
1647 -- not available on Hugs.
1648 --
1649 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1650 packCStringFinalizer p l f = do
1651     fp <- FC.newForeignPtr p f
1652     return $ PS fp 0 l
1653
1654 -- | Explicitly run the finaliser associated with a 'ByteString'.
1655 -- Further references to this value may generate invalid memory
1656 -- references. This operation is unsafe, as there may be other
1657 -- 'ByteStrings' referring to the same underlying pages. If you use
1658 -- this, you need to have a proof of some kind that all 'ByteString's
1659 -- ever generated from the underlying byte array are no longer live.
1660 unsafeFinalize :: ByteString -> IO ()
1661 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1662
1663 #endif
1664
1665 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1666 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1667 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1668 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1669     where
1670       alloc = withForeignPtr ps $ \p -> do
1671                 buf <- c_malloc (fromIntegral l+1)
1672                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1673                 poke (buf `plusPtr` l) (0::Word8)
1674                 return $ castPtr buf
1675
1676 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1677 -- Warning: modifying the @CString@ will affect the @ByteString@.
1678 -- Why is this function unsafe? It relies on the null byte at the end of
1679 -- the ByteString to be there. This is /not/ the case if your ByteString
1680 -- has been spliced from a larger string (i.e. with take or drop).
1681 -- Unless you can guarantee the null byte, you should use the safe
1682 -- version, which will copy the string first.
1683 --
1684 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1685 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1686
1687 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1688 --   This is mainly useful to allow the rest of the data pointed
1689 --   to by the 'ByteString' to be garbage collected, for example
1690 --   if a large string has been read in, and only a small part of it
1691 --   is needed in the rest of the program.
1692 copy :: ByteString -> ByteString
1693 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1694     memcpy p (f `plusPtr` s) (fromIntegral l)
1695
1696 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1697 -- CString is going to be deallocated from C land.
1698 copyCString :: CString -> ByteString
1699 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1700
1701 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1702 copyCStringLen :: CStringLen -> ByteString
1703 copyCStringLen (cstr, len) = inlinePerformIO $ do
1704     fp <- mallocForeignPtrArray (len+1)
1705     withForeignPtr fp $ \p -> do
1706         memcpy p (castPtr cstr) (fromIntegral len)
1707         poke (p `plusPtr` len) (0 :: Word8)
1708     return $! PS fp 0 len
1709
1710 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1711 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1712 -- This is analogous to unsafeUseAsCString, and comes with the same
1713 -- safety requirements.
1714 --
1715 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1716 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1717
1718 -- | Given the maximum size needed and a function to make the contents
1719 -- of a ByteString, generate makes the 'ByteString'. The generating
1720 -- function is required to return the actual final size (<= the maximum
1721 -- size), and the resulting byte array is realloced to this size.  The
1722 -- string is padded at the end with a null byte.
1723 --
1724 -- generate is the main mechanism for creating custom, efficient
1725 -- ByteString functions, using Haskell or C functions to fill the space.
1726 --
1727 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1728 generate i f = do
1729     p <- mallocArray (i+1)
1730     i' <- f p
1731     p' <- reallocArray p (i'+1)
1732     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1733     fp <- newForeignFreePtr p'
1734     return $ PS fp 0 i'
1735
1736 -- ---------------------------------------------------------------------
1737 -- line IO
1738
1739 #if defined(__GLASGOW_HASKELL__)
1740
1741 -- | getLine, read a line from stdin.
1742 getLine :: IO ByteString
1743 getLine = hGetLine stdin
1744
1745 -- | hGetLine. read a ByteString from a handle
1746 hGetLine :: Handle -> IO ByteString
1747 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1748     case haBufferMode handle_ of
1749        NoBuffering -> error "no buffering"
1750        _other      -> hGetLineBuffered handle_
1751
1752  where
1753     hGetLineBuffered handle_ = do
1754         let ref = haBuffer handle_
1755         buf <- readIORef ref
1756         hGetLineBufferedLoop handle_ ref buf 0 []
1757
1758     hGetLineBufferedLoop handle_ ref
1759             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1760         len `seq` do
1761         off <- findEOL r w raw
1762         let new_len = len + off - r
1763         xs <- mkPS raw r off
1764
1765       -- if eol == True, then off is the offset of the '\n'
1766       -- otherwise off == w and the buffer is now empty.
1767         if off /= w
1768             then do if (w == off + 1)
1769                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1770                             else writeIORef ref buf{ bufRPtr = off + 1 }
1771                     mkBigPS new_len (xs:xss)
1772             else do
1773                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1774                                     buf{ bufWPtr=0, bufRPtr=0 }
1775                  case maybe_buf of
1776                     -- Nothing indicates we caught an EOF, and we may have a
1777                     -- partial line to return.
1778                     Nothing -> do
1779                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1780                          if new_len > 0
1781                             then mkBigPS new_len (xs:xss)
1782                             else ioe_EOF
1783                     Just new_buf ->
1784                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1785
1786     -- find the end-of-line character, if there is one
1787     findEOL r w raw
1788         | r == w = return w
1789         | otherwise =  do
1790             (c,r') <- readCharFromBuffer raw r
1791             if c == '\n'
1792                 then return r -- NB. not r': don't include the '\n'
1793                 else findEOL r' w raw
1794
1795     maybeFillReadBuffer fd is_line is_stream buf = catch
1796         (do buf' <- fillReadBuffer fd is_line is_stream buf
1797             return (Just buf'))
1798         (\e -> if isEOFError e then return Nothing else ioError e)
1799
1800 -- TODO, rewrite to use normal memcpy
1801 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1802 mkPS buf start end = do
1803     let len = end - start
1804     fp <- mallocByteString len
1805     withForeignPtr fp $ \p -> do
1806         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1807         return (PS fp 0 len)
1808
1809 mkBigPS :: Int -> [ByteString] -> IO ByteString
1810 mkBigPS _ [ps] = return ps
1811 mkBigPS _ pss = return $! concat (P.reverse pss)
1812
1813 #endif
1814
1815 -- ---------------------------------------------------------------------
1816 -- Block IO
1817
1818 -- | Outputs a 'ByteString' to the specified 'Handle'.
1819 hPut :: Handle -> ByteString -> IO ()
1820 hPut _ (PS _ _ 0)  = return ()
1821 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1822 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1823
1824 -- | Write a ByteString to stdout
1825 putStr :: ByteString -> IO ()
1826 putStr = hPut stdout
1827
1828 -- | Write a ByteString to stdout, appending a newline byte
1829 putStrLn :: ByteString -> IO ()
1830 putStrLn ps = hPut stdout ps >> hPut stdout nl
1831     where nl = packByte 0x0a
1832
1833 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1834 -- is far more efficient than reading the characters into a 'String'
1835 -- and then using 'pack'.
1836 hGet :: Handle -> Int -> IO ByteString
1837 hGet _ 0 = return empty
1838 hGet h i = do fp <- mallocByteString i
1839               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1840               return $ PS fp 0 l
1841
1842 #if defined(__GLASGOW_HASKELL__)
1843 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1844 -- waiting for data to become available, instead it returns only whatever data
1845 -- is available.
1846 hGetNonBlocking :: Handle -> Int -> IO ByteString
1847 hGetNonBlocking _ 0 = return empty
1848 hGetNonBlocking h i = do
1849     fp <- mallocByteString i
1850     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1851     return $ PS fp 0 l
1852 #endif
1853
1854 -- | Read entire handle contents into a 'ByteString'.
1855 --
1856 -- As with 'hGet', the string representation in the file is assumed to
1857 -- be ISO-8859-1.
1858 --
1859 hGetContents :: Handle -> IO ByteString
1860 hGetContents h = do
1861     let start_size = 1024
1862     p <- mallocArray start_size
1863     i <- hGetBuf h p start_size
1864     if i < start_size
1865         then do p' <- reallocArray p i
1866                 fp <- newForeignFreePtr p'
1867                 return $ PS fp 0 i
1868         else f p start_size
1869     where
1870         f p s = do
1871             let s' = 2 * s
1872             p' <- reallocArray p s'
1873             i  <- hGetBuf h (p' `plusPtr` s) s
1874             if i < s
1875                 then do let i' = s + i
1876                         p'' <- reallocArray p' i'
1877                         fp  <- newForeignFreePtr p''
1878                         return $ PS fp 0 i'
1879                 else f p' s'
1880
1881 -- | getContents. Equivalent to hGetContents stdin
1882 getContents :: IO ByteString
1883 getContents = hGetContents stdin
1884
1885 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1886 -- efficient than reading the characters into a 'String' and then using
1887 -- 'pack'.  It also may be more efficient than opening the file and
1888 -- reading it using hGet.
1889 readFile :: FilePath -> IO ByteString
1890 readFile f = do
1891     h <- openBinaryFile f ReadMode
1892     l <- hFileSize h
1893     s <- hGet h $ fromIntegral l
1894     hClose h
1895     return s
1896
1897 -- | Write a 'ByteString' to a file.
1898 writeFile :: FilePath -> ByteString -> IO ()
1899 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1900     (\h -> hPut h ps)
1901
1902 {-
1903 --
1904 -- Disable until we can move it into a portable .hsc file
1905 --
1906
1907 -- | Like readFile, this reads an entire file directly into a
1908 -- 'ByteString', but it is even more efficient.  It involves directly
1909 -- mapping the file to memory.  This has the advantage that the contents
1910 -- of the file never need to be copied.  Also, under memory pressure the
1911 -- page may simply be discarded, while in the case of readFile it would
1912 -- need to be written to swap.  If you read many small files, mmapFile
1913 -- will be less memory-efficient than readFile, since each mmapFile
1914 -- takes up a separate page of memory.  Also, you can run into bus
1915 -- errors if the file is modified.  As with 'readFile', the string
1916 -- representation in the file is assumed to be ISO-8859-1.
1917 --
1918 -- On systems without mmap, this is the same as a readFile.
1919 --
1920 mmapFile :: FilePath -> IO ByteString
1921 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1922
1923 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1924 mmap f = do
1925     h <- openBinaryFile f ReadMode
1926     l <- fromIntegral `fmap` hFileSize h
1927     -- Don't bother mmaping small files because each mmapped file takes up
1928     -- at least one full VM block.
1929     if l < mmap_limit
1930        then do thefp <- mallocByteString l
1931                withForeignPtr thefp $ \p-> hGetBuf h p l
1932                hClose h
1933                return (thefp, l)
1934        else do
1935                -- unix only :(
1936                fd <- fromIntegral `fmap` handleToFd h
1937                p  <- my_mmap l fd
1938                fp <- if p == nullPtr
1939                      then do thefp <- mallocByteString l
1940                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
1941                              return thefp
1942                      else do
1943                           -- The munmap leads to crashes on OpenBSD.
1944                           -- maybe there's a use after unmap in there somewhere?
1945 #if !defined(__OpenBSD__)
1946                              let unmap = c_munmap p l >> return ()
1947 #else
1948                              let unmap = return ()
1949 #endif
1950                              fp <- FC.newForeignPtr p unmap
1951                              return fp
1952                c_close fd
1953                hClose h
1954                return (fp, l)
1955     where mmap_limit = 16*1024
1956 -}
1957
1958 #if defined(__GLASGOW_HASKELL__)
1959 --
1960 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1961 --
1962 getArgs :: IO [ByteString]
1963 getArgs =
1964   alloca $ \ p_argc ->
1965   alloca $ \ p_argv -> do
1966     getProgArgv p_argc p_argv
1967     p    <- fromIntegral `fmap` peek p_argc
1968     argv <- peek p_argv
1969     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1970 #endif
1971
1972 -- ---------------------------------------------------------------------
1973 -- Internal utilities
1974
1975 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1976 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1977 -- convenience for ByteString construction.
1978 w2c :: Word8 -> Char
1979 #if !defined(__GLASGOW_HASKELL__)
1980 w2c = chr . fromIntegral
1981 #else
1982 w2c = unsafeChr . fromIntegral
1983 #endif
1984 {-# INLINE w2c #-}
1985
1986 c2w :: Char -> Word8
1987 c2w = fromIntegral . ord
1988 {-# INLINE c2w #-}
1989
1990 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1991 -- is padded with a null byte.
1992 mallocByteString :: Int -> IO (ForeignPtr Word8)
1993 mallocByteString l = do
1994     fp <- mallocForeignPtrArray (l+1)
1995     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1996     return fp
1997
1998 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1999 -- argument gives the final size of the ByteString. Unlike 'generate'
2000 -- the ByteString is not reallocated if the final size is less than the
2001 -- estimated size. Also, unlike 'generate' ByteString's created this way
2002 -- are managed on the Haskell heap.
2003 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2004 create l write_ptr = inlinePerformIO $ do
2005     fp <- mallocByteString (l+1)
2006     withForeignPtr fp $ \p -> write_ptr p
2007     return $ PS fp 0 l
2008 {-# INLINE create #-}
2009
2010 -- | Perform an operation with a temporary ByteString
2011 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2012 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2013 {-# INLINE withPtr #-}
2014
2015 -- Common up near identical calls to `error' to reduce the number
2016 -- constant strings created when compiled:
2017 errorEmptyList :: String -> a
2018 errorEmptyList fun = moduleError fun "empty ByteString"
2019 {-# NOINLINE errorEmptyList #-}
2020
2021 moduleError :: String -> String -> a
2022 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2023 {-# NOINLINE moduleError #-}
2024
2025 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2026 -- of the string if no element is found, rather than Nothing.
2027 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2028 STRICT2(findIndexOrEnd)
2029 findIndexOrEnd f ps
2030     | null ps           = 0
2031     | f (unsafeHead ps) = 0
2032     | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
2033 {-# INLINE findIndexOrEnd #-}
2034
2035 -- Find from the end of the string using predicate
2036 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2037 STRICT2(findFromEndUntil)
2038 findFromEndUntil f ps@(PS x s l) =
2039     if null ps then 0
2040     else if f (last ps) then l
2041          else findFromEndUntil f (PS x s (l-1))
2042
2043 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2044 -- it exposes lots of things to further inlining
2045 --
2046 {-# INLINE inlinePerformIO #-}
2047 inlinePerformIO :: IO a -> a
2048 #if defined(__GLASGOW_HASKELL__)
2049 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2050 #else
2051 inlinePerformIO = unsafePerformIO
2052 #endif
2053
2054 {-# INLINE newForeignFreePtr #-}
2055 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2056 #if defined(__GLASGOW_HASKELL__)
2057 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2058 #else
2059 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2060 #endif
2061
2062 -- ---------------------------------------------------------------------
2063 --
2064 -- Standard C functions
2065 --
2066
2067 foreign import ccall unsafe "string.h strlen" c_strlen
2068     :: CString -> CInt
2069
2070 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2071     :: CInt -> IO (Ptr Word8)
2072
2073 foreign import ccall unsafe "static stdlib.h free" c_free
2074     :: Ptr Word8 -> IO ()
2075
2076 #if !defined(__GLASGOW_HASKELL__)
2077 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2078     :: FunPtr (Ptr Word8 -> IO ())
2079 #endif
2080
2081 foreign import ccall unsafe "string.h memset" memset
2082     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2083
2084 foreign import ccall unsafe "string.h memchr" memchr
2085     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2086
2087 foreign import ccall unsafe "string.h memcmp" memcmp
2088     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2089
2090 foreign import ccall unsafe "string.h memcpy" memcpy
2091     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2092
2093 -- ---------------------------------------------------------------------
2094 --
2095 -- Uses our C code
2096 --
2097
2098 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2099     :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2100
2101 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2102     :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2103
2104 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2105     :: Ptr Word8 -> CInt -> Word8
2106
2107 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2108     :: Ptr Word8 -> CInt -> Word8
2109
2110 foreign import ccall unsafe "static fpstring.h count" c_count
2111     :: Ptr Word8 -> CInt -> Word8 -> CInt
2112
2113 -- ---------------------------------------------------------------------
2114 -- MMap
2115
2116 {-
2117 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2118     :: Int -> Int -> IO (Ptr Word8)
2119
2120 foreign import ccall unsafe "static unistd.h close" c_close
2121     :: Int -> IO Int
2122
2123 #  if !defined(__OpenBSD__)
2124 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2125     :: Ptr Word8 -> Int -> IO Int
2126 #  endif
2127 -}
2128
2129 -- ---------------------------------------------------------------------
2130 -- Internal GHC Haskell magic
2131
2132 #if defined(__GLASGOW_HASKELL__)
2133 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2134     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2135
2136 foreign import ccall unsafe "__hscore_memcpy_src_off"
2137    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2138 #endif
2139
2140 -- ---------------------------------------------------------------------
2141 --
2142 -- Functional array fusion for ByteStrings.
2143 --
2144 -- From the Data Parallel Haskell project,
2145 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2146 --
2147
2148 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2149 -- the fact that no bottoms of this type are ever constructed; hence, we can
2150 -- assume @(_ :: NoAL) `seq` x = x@.
2151 --
2152 data NoAL = NoAL
2153
2154 -- | Special forms of loop arguments
2155 --
2156 -- * These are common special cases for the three function arguments of gen
2157 --   and loop; we give them special names to make it easier to trigger RULES
2158 --   applying in the special cases represented by these arguments.  The
2159 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2160 --   two simplifier phases.
2161 --
2162 -- * In the case where the accumulator is not needed, it is better to always
2163 --   explicitly return a value `()', rather than just copy the input to the
2164 --   output, as the former gives GHC better local information.
2165 --
2166
2167 -- | Element function expressing a mapping only
2168 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2169 mapEFL f = \_ e -> (noAL, (Just $ f e))
2170 #if defined(__GLASGOW_HASKELL__)
2171 {-# INLINE [1] mapEFL #-}
2172 #endif
2173
2174 -- | Element function implementing a filter function only
2175 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2176 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2177 #if defined(__GLASGOW_HASKELL__)
2178 {-# INLINE [1] filterEFL #-}
2179 #endif
2180
2181 -- |Element function expressing a reduction only
2182 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2183 foldEFL f = \a e -> (f a e, Nothing)
2184 #if defined(__GLASGOW_HASKELL__)
2185 {-# INLINE [1] foldEFL #-}
2186 #endif
2187
2188 -- | A strict foldEFL.
2189 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2190 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2191 #if defined(__GLASGOW_HASKELL__)
2192 {-# INLINE [1] foldEFL' #-}
2193 #endif
2194
2195 -- | No accumulator
2196 noAL :: NoAL
2197 noAL = NoAL
2198 #if defined(__GLASGOW_HASKELL__)
2199 {-# INLINE [1] noAL #-}
2200 #endif
2201
2202 -- | Projection functions that are fusion friendly (as in, we determine when
2203 -- they are inlined)
2204 loopArr :: (ByteString, acc) -> ByteString
2205 loopArr (arr, _) = arr
2206 #if defined(__GLASGOW_HASKELL__)
2207 {-# INLINE [1] loopArr #-}
2208 #endif
2209
2210 loopAcc :: (ByteString, acc) -> acc
2211 loopAcc (_, acc) = acc
2212 #if defined(__GLASGOW_HASKELL__)
2213 {-# INLINE [1] loopAcc #-}
2214 #endif
2215
2216 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2217 loopSndAcc (arr, (_, acc)) = (arr, acc)
2218 #if defined(__GLASGOW_HASKELL__)
2219 {-# INLINE [1] loopSndAcc #-}
2220 #endif
2221
2222 ------------------------------------------------------------------------
2223
2224 --
2225 -- size, and then percentage.
2226 --
2227
2228 -- | Iteration over over ByteStrings
2229 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2230       -> acc                                   -- ^ initial acc value
2231       -> ByteString                            -- ^ input ByteString
2232       -> (ByteString, acc)
2233
2234 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2235     fp          <- mallocByteString i
2236     (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2237         (acc, i') <- go (a `plusPtr` s) p start
2238         if i' == i
2239             then return (fp,i',acc)                     -- no realloc for map
2240             else do fp_ <- mallocByteString (i'+1)      -- realloc
2241                     withForeignPtr fp_ $ \p' -> do
2242                         memcpy p' p (fromIntegral i')   -- can't avoid this,  right?
2243                         poke (p' `plusPtr` i') (0::Word8)
2244                     return (fp_,i',acc)
2245
2246     return (PS ptr 0 n, acc)
2247   where
2248     go p ma = trans 0 0
2249         where
2250             STRICT3(trans)
2251             trans a_off ma_off acc
2252                 | a_off >= i = return (acc, ma_off)
2253                 | otherwise  = do
2254                     x <- peekByteOff p a_off
2255                     let (acc', oe) = f acc x
2256                     ma_off' <- case oe of
2257                         Nothing  -> return ma_off
2258                         Just e   -> do pokeByteOff ma ma_off e
2259                                        return $ ma_off + 1
2260                     trans (a_off+1) ma_off' acc'
2261
2262 #if defined(__GLASGOW_HASKELL__)
2263 {-# INLINE [1] loopU #-}
2264 #endif
2265
2266 infixr 9 `fuseEFL`
2267
2268 -- |Fuse to flat loop functions
2269 fuseEFL :: (a1 -> Word8  -> (a1, Maybe Word8))
2270         -> (a2 -> Word8  -> (a2, Maybe Word8))
2271         -> (a1, a2)
2272         -> Word8
2273         -> ((a1, a2), Maybe Word8)
2274 fuseEFL f g (acc1, acc2) e1 =
2275     case f acc1 e1 of
2276         (acc1', Nothing) -> ((acc1', acc2), Nothing)
2277         (acc1', Just e2) ->
2278             case g acc2 e2 of
2279                 (acc2', res) -> ((acc1', acc2'), res)
2280
2281 {-# RULES
2282
2283 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2284   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2285     loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2286
2287 "loopArr/loopSndAcc" forall x.
2288   loopArr (loopSndAcc x) = loopArr x
2289
2290 "seq/NoAL" forall (u::NoAL) e.
2291   u `seq` e = e
2292
2293   #-}
2294