Data/ByteString.hs

   1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
   2 --
   3 -- Module      : ByteString
   4 -- Copyright   : (c) The University of Glasgow 2001,
   5 --               (c) David Roundy 2003-2005,
   6 --               (c) Simon Marlow 2005
   7 --               (c) Don Stewart 2005-2006
   8 --               (c) Bjorn Bringert 2006
   9 --
  10 -- Array fusion code:
  11 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
  12 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
  13 --
  14 -- License     : BSD-style
  15 --
  16 -- Maintainer  : dons@cse.unsw.edu.au
  17 -- Stability   : experimental
  18 -- Portability : portable, requires ffi and cpp
  19 -- Tested with : GHC 6.4.1 and Hugs March 2005
  20 --
  21
  22 --
  23 -- | A time and space-efficient implementation of byte vectors using
  24 -- packed Word8 arrays, suitable for high performance use, both in terms
  25 -- of large data quantities, or high speed requirements. Byte vectors
  26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
  27 -- and can be passed between C and Haskell with little effort.
  28 --
  29 -- This module is intended to be imported @qualified@, to avoid name
  30 -- clashes with Prelude functions.  eg.
  31 --
  32 -- > import qualified Data.ByteString as B
  33 --
  34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
  35 -- UArray by Simon Marlow. Rewritten to support slices and use
  36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
  37 --
  38
  39 module Data.ByteString (
  40
  41         -- * The @ByteString@ type
  42         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
  43
  44         -- * Introducing and eliminating 'ByteString's
  45         empty,                  -- :: ByteString
  46         packByte,               -- :: Word8   -> ByteString
  47         pack,                   -- :: [Word8] -> ByteString
  48         unpack,                 -- :: ByteString -> [Word8]
  49         packWith,               -- :: (a -> Word8) -> [a] -> ByteString
  50         unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
  51
  52         -- * Basic interface
  53         cons,                   -- :: Word8 -> ByteString -> ByteString
  54         snoc,                   -- :: ByteString -> Word8 -> ByteString
  55         null,                   -- :: ByteString -> Bool
  56         length,                 -- :: ByteString -> Int
  57         head,                   -- :: ByteString -> Word8
  58         tail,                   -- :: ByteString -> ByteString
  59         last,                   -- :: ByteString -> Word8
  60         init,                   -- :: ByteString -> ByteString
  61         append,                 -- :: ByteString -> ByteString -> ByteString
  62
  63         -- * Special ByteStrings
  64         inits,                  -- :: ByteString -> [ByteString]
  65         tails,                  -- :: ByteString -> [ByteString]
  66         elems,                  -- :: ByteString -> [ByteString]
  67
  68         -- * Transformating ByteStrings
  69         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
  70         reverse,                -- :: ByteString -> ByteString
  71         intersperse,            -- :: Word8 -> ByteString -> ByteString
  72         transpose,              -- :: [ByteString] -> [ByteString]
  73         map',                   -- :: (Word8 -> Word8) -> ByteString -> ByteString
  74
  75         -- * Reducing 'ByteString's
  76         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  77         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  78         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  79         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  80         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  81         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  82
  83         -- ** Special folds
  84         concat,                 -- :: [ByteString] -> ByteString
  85         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  86         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  87         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
  88         maximum,                -- :: ByteString -> Word8
  89         minimum,                -- :: ByteString -> Word8
  90         mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  91
  92         -- * Building ByteStrings
  93         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  94         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  95
  96         -- * Generating and unfolding ByteStrings
  97         replicate,              -- :: Int -> Word8 -> ByteString
  98         unfoldrN,               -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  99
 100         -- * Substrings
 101
 102         -- ** Breaking strings
 103         take,                   -- :: Int -> ByteString -> ByteString
 104         unsafeTake,             -- :: Int -> ByteString -> ByteString
 105         drop,                   -- :: Int -> ByteString -> ByteString
 106         unsafeDrop,             -- :: Int -> ByteString -> ByteString
 107         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
 108         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 109         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
 110         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 111         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 112         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 113
 114         -- ** Breaking and dropping on specific bytes
 115         breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
 116         spanByte,               -- :: Word8 -> ByteString -> (ByteString, ByteString)
 117         breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 118         breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
 119
 120         -- ** Breaking into many substrings
 121         split,                  -- :: Word8 -> ByteString -> [ByteString]
 122         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 123         tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
 124         group,                  -- :: ByteString -> [ByteString]
 125         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 126
 127         -- ** Joining strings
 128         join,                   -- :: ByteString -> [ByteString] -> ByteString
 129         joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
 130
 131         -- * Indexing ByteStrings
 132         index,                  -- :: ByteString -> Int -> Word8
 133         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
 134         elemIndices,            -- :: Word8 -> ByteString -> [Int]
 135         elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
 136         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
 137         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
 138         count,                  -- :: Word8 -> ByteString -> Int
 139         findIndexOrEnd,         -- :: (Word8 -> Bool) -> ByteString -> Int
 140
 141         -- * Ordered ByteStrings
 142         sort,                   -- :: ByteString -> ByteString
 143
 144         -- * Searching ByteStrings
 145
 146         -- ** Searching by equality
 147         -- | These functions use memchr(3) to efficiently search the ByteString
 148
 149         elem,                   -- :: Word8 -> ByteString -> Bool
 150         notElem,                -- :: Word8 -> ByteString -> Bool
 151         filterByte,             -- :: Word8 -> ByteString -> ByteString
 152         filterNotByte,          -- :: Word8 -> ByteString -> ByteString
 153
 154         -- ** Searching with a predicate
 155         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
 156         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 157         filter',                -- :: (Word8 -> Bool) -> ByteString -> ByteString
 158
 159         -- ** Prefixes and suffixes
 160         -- | These functions use memcmp(3) to efficiently compare substrings
 161         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
 162         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
 163
 164         -- ** Search for arbitrary substrings
 165         isSubstringOf,          -- :: ByteString -> ByteString -> Bool
 166         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
 167         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 168
 169         -- * Zipping and unzipping ByteStrings
 170         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
 171         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
 172         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
 173
 174         -- * Unchecked access
 175         unsafeHead,             -- :: ByteString -> Word8
 176         unsafeTail,             -- :: ByteString -> ByteString
 177         unsafeIndex,            -- :: ByteString -> Int -> Word8
 178
 179         -- * Low level introduction and elimination
 180         generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
 181         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
 182         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
 183         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
 184         skipIndex,              -- :: ByteString -> Int
 185
 186         -- ** Packing CStrings and pointers
 187         packCString,            -- :: CString -> ByteString
 188         packCStringLen,         -- :: CString -> ByteString
 189         packMallocCString,      -- :: CString -> ByteString
 190
 191 #if defined(__GLASGOW_HASKELL__)
 192         packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
 193         packAddress,            -- :: Addr# -> ByteString
 194         unsafePackAddress,      -- :: Int -> Addr# -> ByteString
 195         unsafeFinalize,         -- :: ByteString -> IO ()
 196 #endif
 197
 198         -- ** Using ByteStrings as CStrings
 199         useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
 200         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
 201         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
 202
 203         -- ** Copying ByteStrings
 204         -- | These functions perform memcpy(3) operations
 205         copy,                   -- :: ByteString -> ByteString
 206         copyCString,            -- :: CString -> ByteString
 207         copyCStringLen,         -- :: CStringLen -> ByteString
 208
 209         -- * I\/O with @ByteString@s
 210
 211         -- ** Standard input and output
 212
 213 #if defined(__GLASGOW_HASKELL__)
 214         getLine,                -- :: IO ByteString
 215 #endif
 216         getContents,            -- :: IO ByteString
 217         putStr,                 -- :: ByteString -> IO ()
 218         putStrLn,               -- :: ByteString -> IO ()
 219
 220         -- ** Files
 221         readFile,               -- :: FilePath -> IO ByteString
 222         writeFile,              -- :: FilePath -> ByteString -> IO ()
 223 --      mmapFile,               -- :: FilePath -> IO ByteString
 224
 225         -- ** I\/O with Handles
 226 #if defined(__GLASGOW_HASKELL__)
 227         getArgs,                -- :: IO [ByteString]
 228         hGetLine,               -- :: Handle -> IO ByteString
 229         hGetLines,              -- :: Handle -> IO [ByteString]
 230         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
 231 #endif
 232         hGetContents,           -- :: Handle -> IO ByteString
 233         hGet,                   -- :: Handle -> Int -> IO ByteString
 234         hPut,                   -- :: Handle -> ByteString -> IO ()
 235
 236         -- * Fusion utilities
 237 #if defined(__GLASGOW_HASKELL__)
 238         unpackList, -- eek, otherwise it gets thrown away by the simplifier
 239 #endif
 240
 241         noAL, NoAL, loopArr, loopAcc, loopSndAcc,
 242         loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL,
 243
 244   ) where
 245
 246 import qualified Prelude as P
 247 import Prelude hiding           (reverse,head,tail,last,init,null
 248                                 ,length,map,lines,foldl,foldr,unlines
 249                                 ,concat,any,take,drop,splitAt,takeWhile
 250                                 ,dropWhile,span,break,elem,filter,maximum
 251                                 ,minimum,all,concatMap,foldl1,foldr1
 252                                 ,scanl,scanl1,readFile,writeFile,replicate
 253                                 ,getContents,getLine,putStr,putStrLn
 254                                 ,zip,zipWith,unzip,notElem)
 255
 256 import qualified Data.List as List
 257
 258 import Data.Char
 259 import Data.Word                (Word8)
 260 import Data.Maybe               (listToMaybe)
 261 import Data.Array               (listArray)
 262 import qualified Data.Array as Array ((!))
 263
 264 -- Control.Exception.bracket not available in yhc or nhc
 265 import Control.Exception        (bracket, assert)
 266 import Control.Monad            (when)
 267
 268 import Foreign.C.String         (CString, CStringLen)
 269 import Foreign.C.Types          (CSize,CInt)
 270 import Foreign.ForeignPtr
 271 import Foreign.Marshal.Array
 272 import Foreign.Ptr
 273 import Foreign.Storable         (Storable(..))
 274
 275 -- hGetBuf and hPutBuf not available in yhc or nhc
 276 import System.IO                (stdin,stdout,hClose,hFileSize
 277                                 ,hGetBuf,hPutBuf,openBinaryFile
 278                                 ,Handle,IOMode(..))
 279
 280 #if !defined(__GLASGOW_HASKELL__)
 281 import System.IO.Unsafe
 282 #endif
 283
 284 #if defined(__GLASGOW_HASKELL__)
 285
 286 import Data.Generics            (Data(..), Typeable(..))
 287
 288 import System.IO                (hGetBufNonBlocking)
 289 import System.IO.Error          (isEOFError)
 290
 291 import Foreign.Marshal          (alloca)
 292 import qualified Foreign.Concurrent as FC (newForeignPtr)
 293
 294 import GHC.Handle
 295 import GHC.Prim                 (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
 296 import GHC.Base                 (build, unsafeChr)
 297 import GHC.Word hiding (Word8)
 298 import GHC.Ptr                  (Ptr(..))
 299 import GHC.ST                   (ST(..))
 300 import GHC.IOBase
 301
 302 #endif
 303
 304 -- CFILES stuff is Hugs only
 305 {-# CFILES cbits/fpstring.c #-}
 306
 307 -- -----------------------------------------------------------------------------
 308 --
 309 -- Useful macros, until we have bang patterns
 310 --
 311
 312 #define STRICT1(f) f a | a `seq` False = undefined
 313 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
 314 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
 315 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
 316 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
 317
 318 -- -----------------------------------------------------------------------------
 319
 320 -- | A space-efficient representation of a Word8 vector, supporting many
 321 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
 322 --
 323 -- Instances of Eq, Ord, Read, Show, Data, Typeable
 324 --
 325 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
 326                      {-# UNPACK #-} !Int
 327                      {-# UNPACK #-} !Int
 328
 329 #if defined(__GLASGOW_HASKELL__)
 330     deriving (Data, Typeable)
 331 #endif
 332
 333 instance Eq  ByteString
 334     where (==)    = eq
 335
 336 instance Ord ByteString
 337     where compare = compareBytes
 338
 339 instance Show ByteString where
 340     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
 341
 342 instance Read ByteString where
 343     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
 344
 345 {-
 346 instance Arbitrary PackedString where
 347     arbitrary = P.pack `fmap` arbitrary
 348     coarbitrary s = coarbitrary (P.unpack s)
 349 -}
 350
 351 -- | /O(n)/ Equality on the 'ByteString' type.
 352 eq :: ByteString -> ByteString -> Bool
 353 eq a@(PS p s l) b@(PS p' s' l')
 354     | l /= l'            = False    -- short cut on length
 355     | p == p' && s == s' = True     -- short cut for the same string
 356     | otherwise          = compareBytes a b == EQ
 357 {-# INLINE eq #-}
 358
 359 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
 360 compareBytes :: ByteString -> ByteString -> Ordering
 361 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
 362     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
 363     | x1 == x2 && s1 == s2 && l1 == l2  = EQ  -- short cut for the same string
 364     | otherwise                         = inlinePerformIO $
 365         withForeignPtr x1 $ \p1 ->
 366         withForeignPtr x2 $ \p2 -> do
 367             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
 368             return $ case i `compare` 0 of
 369                         EQ  -> l1 `compare` l2
 370                         x   -> x
 371 {-# INLINE compareBytes #-}
 372
 373 {-
 374 --
 375 -- About 4x slower over 32M
 376 --
 377 compareBytes :: ByteString -> ByteString -> Ordering
 378 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
 379     withForeignPtr fp1 $ \p1 ->
 380         withForeignPtr fp2 $ \p2 ->
 381             cmp (p1 `plusPtr` off1)
 382                 (p2 `plusPtr` off2) 0 len1 len2
 383
 384 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
 385 STRICT5(cmp)
 386 cmp p1 p2 n len1 len2
 387       | n == len1 = if n == len2 then return EQ else return LT
 388       | n == len2 = return GT
 389       | otherwise = do
 390           (a :: Word8) <- peekByteOff p1 n
 391           (b :: Word8) <- peekByteOff p2 n
 392           case a `compare` b of
 393                 EQ -> cmp p1 p2 (n+1) len1 len2
 394                 LT -> return LT
 395                 GT -> return GT
 396 {-# INLINE compareBytes #-}
 397 -}
 398
 399 -- -----------------------------------------------------------------------------
 400 -- Introducing and eliminating 'ByteString's
 401
 402 -- | /O(1)/ The empty 'ByteString'
 403 empty :: ByteString
 404 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
 405 {-# NOINLINE empty #-}
 406
 407 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
 408 packByte :: Word8 -> ByteString
 409 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
 410     withForeignPtr fp $ \p -> poke p c
 411     return $ PS fp 0 1
 412 {-# INLINE packByte #-}
 413
 414 --
 415 -- XXX The unsafePerformIO is critical!
 416 --
 417 -- Otherwise:
 418 --
 419 --  packByte 255 `compare` packByte 127
 420 --
 421 -- is compiled to:
 422 --
 423 --  case mallocByteString 2 of
 424 --      ForeignPtr f internals ->
 425 --           case writeWord8OffAddr# f 0 255 of _ ->
 426 --           case writeWord8OffAddr# f 0 127 of _ ->
 427 --           case eqAddr# f f of
 428 --                  False -> case compare (GHC.Prim.plusAddr# f 0)
 429 --                                        (GHC.Prim.plusAddr# f 0)
 430 --
 431 --
 432
 433 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
 434 --
 435 -- For applications with large numbers of string literals, pack can be a
 436 -- bottleneck. In such cases, consider using packAddress (GHC only).
 437 pack :: [Word8] -> ByteString
 438
 439 #if !defined(__GLASGOW_HASKELL__)
 440
 441 pack str = create (P.length str) $ \p -> go p str
 442     where
 443         go _ []     = return ()
 444         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 445
 446 #else /* hack away */
 447
 448 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
 449     where
 450         go _ _ []        = return ()
 451         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
 452
 453         writeByte p i c = ST $ \s# ->
 454             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
 455
 456 #endif
 457
 458 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
 459 unpack :: ByteString -> [Word8]
 460
 461 #if !defined(__GLASGOW_HASKELL__)
 462
 463 unpack (PS _  _ 0) = []
 464 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 465         go (p `plusPtr` s) (l - 1) []
 466     where
 467         STRICT3(go)
 468         go p 0 acc = peek p          >>= \e -> return (e : acc)
 469         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
 470 {-# INLINE unpack #-}
 471
 472 #else
 473
 474 unpack ps = build (unpackFoldr ps)
 475 {-# INLINE unpack #-}
 476
 477 unpackList :: ByteString -> [Word8]
 478 unpackList (PS fp off len) = withPtr fp $ \p -> do
 479     let STRICT3(loop)
 480         loop _ (-1) acc = return acc
 481         loop q n acc = do
 482            a <- peekByteOff q n
 483            loop q (n-1) (a : acc)
 484     loop (p `plusPtr` off) (len-1) []
 485
 486 {-# RULES
 487 "unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 488  #-}
 489
 490 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
 491 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
 492     let STRICT3(loop)
 493         loop _ (-1) acc = return acc
 494         loop q n    acc = do
 495            a <- peekByteOff q n
 496            loop q (n-1) (a `f` acc)
 497     loop (p `plusPtr` off) (len-1) ch
 498 {-# INLINE [0] unpackFoldr #-}
 499
 500 -- TODO just use normal foldr here.
 501
 502 #endif
 503
 504 ------------------------------------------------------------------------
 505
 506 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
 507 -- conversion function
 508 packWith :: (a -> Word8) -> [a] -> ByteString
 509 packWith k str = create (P.length str) $ \p -> go p str
 510     where
 511         STRICT2(go)
 512         go _ []     = return ()
 513         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
 514 {-# INLINE packWith #-}
 515 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
 516
 517 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
 518 unpackWith :: (Word8 -> a) -> ByteString -> [a]
 519 unpackWith _ (PS _  _ 0) = []
 520 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
 521         go (p `plusPtr` s) (l - 1) []
 522     where
 523         STRICT3(go)
 524         go p 0 acc = peek p          >>= \e -> return (k e : acc)
 525         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
 526 {-# INLINE unpackWith #-}
 527 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
 528
 529 -- ---------------------------------------------------------------------
 530 -- Basic interface
 531
 532 -- | /O(1)/ Test whether a ByteString is empty.
 533 null :: ByteString -> Bool
 534 null (PS _ _ l) = l == 0
 535 {-# INLINE null #-}
 536
 537 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
 538 length :: ByteString -> Int
 539 length (PS _ _ l) = l
 540
 541 #if defined(__GLASGOW_HASKELL__)
 542 {-# INLINE [1] length #-}
 543 #endif
 544
 545 {-#
 546
 547 -- Translate length into a loop.
 548 -- Performace ok, but allocates too much, so disable for now.
 549
 550   "length/loop" forall f acc s .
 551   length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
 552
 553   #-}
 554
 555 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 556 -- complexity, as it requires a memcpy.
 557 cons :: Word8 -> ByteString -> ByteString
 558 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 559         poke p c
 560         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
 561 {-# INLINE cons #-}
 562
 563 -- todo fuse
 564
 565 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 566 snoc :: ByteString -> Word8 -> ByteString
 567 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
 568         memcpy p (f `plusPtr` s) (fromIntegral l)
 569         poke (p `plusPtr` l) c
 570 {-# INLINE snoc #-}
 571
 572 -- todo fuse
 573
 574 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
 575 head :: ByteString -> Word8
 576 head ps@(PS x s _)
 577     | null ps   = errorEmptyList "head"
 578     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
 579 {-# INLINE head #-}
 580
 581 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
 582 tail :: ByteString -> ByteString
 583 tail (PS p s l)
 584     | l <= 0    = errorEmptyList "tail"
 585     | otherwise = PS p (s+1) (l-1)
 586 {-# INLINE tail #-}
 587
 588 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
 589 last :: ByteString -> Word8
 590 last ps@(PS x s l)
 591     | null ps   = errorEmptyList "last"
 592     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
 593 {-# INLINE last #-}
 594
 595 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
 596 init :: ByteString -> ByteString
 597 init (PS p s l)
 598     | l <= 0    = errorEmptyList "init"
 599     | otherwise = PS p s (l-1)
 600 {-# INLINE init #-}
 601
 602 -- | /O(n)/ Append two ByteStrings
 603 append :: ByteString -> ByteString -> ByteString
 604 append xs ys | null xs   = ys
 605              | null ys   = xs
 606              | otherwise = concat [xs,ys]
 607 {-# INLINE append #-}
 608
 609 -- ---------------------------------------------------------------------
 610 -- Transformations
 611
 612 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 613 -- element of @xs@. This function is subject to array fusion.
 614 map :: (Word8 -> Word8) -> ByteString -> ByteString
 615 map f = loopArr . loopU (mapEFL f) noAL
 616 {-# INLINE map #-}
 617
 618 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
 619 -- slightly faster for one-shot cases.
 620 map' :: (Word8 -> Word8) -> ByteString -> ByteString
 621 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
 622     np <- mallocByteString (len+1)
 623     withForeignPtr np $ \p -> do
 624         map_ 0 (a `plusPtr` s) p
 625         return (PS np 0 len)
 626   where
 627
 628     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
 629     STRICT3(map_)
 630     map_ n p1 p2
 631        | n >= len = return ()
 632        | otherwise = do
 633             x <- peekByteOff p1 n
 634             pokeByteOff p2 n (f x)
 635             map_ (n+1) p1 p2
 636 {-# INLINE map' #-}
 637
 638 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 639 reverse :: ByteString -> ByteString
 640 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
 641         c_reverse p (f `plusPtr` s) (fromIntegral l)
 642
 643 {-
 644 reverse = pack . P.reverse . unpack
 645 -}
 646
 647 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 648 -- 'ByteString' and \`intersperses\' that byte between the elements of
 649 -- the 'ByteString'.  It is analogous to the intersperse function on
 650 -- Lists.
 651 intersperse :: Word8 -> ByteString -> ByteString
 652 intersperse c ps@(PS x s l)
 653     | length ps < 2  = ps
 654     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
 655         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
 656
 657 {-
 658 intersperse c = pack . List.intersperse c . unpack
 659 -}
 660
 661 -- | The 'transpose' function transposes the rows and columns of its
 662 -- 'ByteString' argument.
 663 transpose :: [ByteString] -> [ByteString]
 664 transpose ps = P.map pack (List.transpose (P.map unpack ps))
 665
 666 -- ---------------------------------------------------------------------
 667 -- Reducing 'ByteString's
 668
 669 -- | 'foldl', applied to a binary operator, a starting value (typically
 670 -- the left-identity of the operator), and a ByteString, reduces the
 671 -- ByteString using the binary operator, from left to right.
 672 -- This function is subject to array fusion.
 673 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
 674 foldl f z = loopAcc . loopU (foldEFL f) z
 675 {-# INLINE foldl #-}
 676
 677 {-
 678 --
 679 -- About twice as fast with 6.4.1, but not fuseable
 680 -- A simple fold . map is enough to make it worth while.
 681 --
 682 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 683         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 684     where
 685         STRICT3(lgo)
 686         lgo z p q | p == q    = return z
 687                   | otherwise = do c <- peek p
 688                                    lgo (f z c) (p `plusPtr` 1) q
 689 -}
 690
 691 -- | 'foldl\'' is like foldl, but strict in the accumulator.
 692 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
 693 foldl' f z = loopAcc . loopU (foldEFL' f) z
 694 {-# INLINE foldl' #-}
 695
 696 -- | 'foldr', applied to a binary operator, a starting value
 697 -- (typically the right-identity of the operator), and a ByteString,
 698 -- reduces the ByteString using the binary operator, from right to left.
 699 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
 700 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 701         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 702     where
 703         STRICT2(go)
 704         go p q | p == q    = return z
 705                | otherwise = do c  <- peek p
 706                                 ws <- go (p `plusPtr` 1) q
 707                                 return $ c `k` ws
 708
 709 -- | 'foldl1' is a variant of 'foldl' that has no starting value
 710 -- argument, and thus must be applied to non-empty 'ByteStrings'.
 711 -- This function is subject to array fusion.
 712 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 713 foldl1 f ps
 714     | null ps   = errorEmptyList "foldl1"
 715     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
 716
 717 -- | A strict version of 'foldl1'
 718 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 719 foldl1' f ps
 720     | null ps   = errorEmptyList "foldl1'"
 721     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
 722
 723 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
 724 -- and thus must be applied to non-empty 'ByteString's
 725 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
 726 foldr1 f ps
 727     | null ps        = errorEmptyList "foldr1"
 728     | otherwise      = foldr f (last ps) (init ps)
 729
 730 -- ---------------------------------------------------------------------
 731 -- Special folds
 732
 733 -- | /O(n)/ Concatenate a list of ByteStrings.
 734 concat :: [ByteString] -> ByteString
 735 concat []     = empty
 736 concat [ps]   = ps
 737 concat xs     = create len $ \ptr -> go xs ptr
 738   where len = P.sum . P.map length $ xs
 739         STRICT2(go)
 740         go []            _   = return ()
 741         go (PS p s l:ps) ptr = do
 742                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 743                 go ps (ptr `plusPtr` l)
 744
 745 -- | Map a function over a 'ByteString' and concatenate the results
 746 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
 747 concatMap f = foldr (append . f) empty
 748 -- A silly function for ByteStrings anyway.
 749
 750 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
 751 -- any element of the 'ByteString' satisfies the predicate.
 752 any :: (Word8 -> Bool) -> ByteString -> Bool
 753 any _ (PS _ _ 0) = False
 754 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 755         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 756     where
 757         STRICT2(go)
 758         go p q | p == q    = return False
 759                | otherwise = do c <- peek p
 760                                 if f c then return True
 761                                        else go (p `plusPtr` 1) q
 762
 763 -- todo fuse
 764
 765 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
 766 -- if all elements of the 'ByteString' satisfy the predicate.
 767 all :: (Word8 -> Bool) -> ByteString -> Bool
 768 all _ (PS _ _ 0) = True
 769 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
 770         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
 771     where
 772         STRICT2(go)
 773         go p q | p == q     = return True  -- end of list
 774                | otherwise  = do c <- peek p
 775                                  if f c
 776                                     then go (p `plusPtr` 1) q
 777                                     else return False
 778 -- todo fuse
 779
 780 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
 781 maximum :: ByteString -> Word8
 782 maximum xs@(PS x s l)
 783     | null xs   = errorEmptyList "maximum"
 784     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 785                     return $ c_maximum (p `plusPtr` s) (fromIntegral l)
 786 {-# INLINE maximum #-}
 787
 788 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
 789 minimum :: ByteString -> Word8
 790 minimum xs@(PS x s l)
 791     | null xs   = errorEmptyList "minimum"
 792     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
 793                     return $ c_minimum (p `plusPtr` s) (fromIntegral l)
 794 {-# INLINE minimum #-}
 795
 796 -- fusion is too slow here (10x)
 797
 798 {-
 799 maximum xs@(PS x s l)
 800     | null xs   = errorEmptyList "maximum"
 801     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 802                         w <- peek p
 803                         maximum_ (p `plusPtr` s) 0 l w
 804
 805 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 806 STRICT4(maximum_)
 807 maximum_ ptr n m c
 808     | n >= m    = return c
 809     | otherwise = do w <- peekByteOff ptr n
 810                      maximum_ ptr (n+1) m (if w > c then w else c)
 811
 812 minimum xs@(PS x s l)
 813     | null xs   = errorEmptyList "minimum"
 814     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
 815                         w <- peek p
 816                         minimum_ (p `plusPtr` s) 0 l w
 817
 818 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
 819 STRICT4(minimum_)
 820 minimum_ ptr n m c
 821     | n >= m    = return c
 822     | otherwise = do w <- peekByteOff ptr n
 823                      minimum_ ptr (n+1) m (if w < c then w else c)
 824 -}
 825
 826 -- | /O(n)/ map Word8 functions, provided with the index at each position
 827 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
 828 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
 829     go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
 830   where
 831     go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 832     STRICT4(go)
 833     go n f t p | f == p    = return ()
 834                | otherwise = do w <- peek f
 835                                 ((poke t) . k n) w
 836                                 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
 837
 838 -- ---------------------------------------------------------------------
 839 -- Building ByteStrings
 840
 841 -- | 'scanl' is similar to 'foldl', but returns a list of successive
 842 -- reduced values from the left. This function will fuse.
 843 --
 844 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 845 --
 846 -- Note that
 847 --
 848 -- > last (scanl f z xs) == foldl f z xs.
 849 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
 850 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space
 851 {-# INLINE scanl #-}
 852
 853 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
 854 -- This function will fuse.
 855 --
 856 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 857 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
 858 scanl1 f ps
 859     | null ps   = empty
 860     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
 861 {-# INLINE scanl1 #-}
 862
 863 -- ---------------------------------------------------------------------
 864 -- Unfolds and replicates
 865
 866 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
 867 -- the value of every element. The following holds:
 868 --
 869 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
 870 --
 871 -- This implemenation uses @memset(3)@
 872 replicate :: Int -> Word8 -> ByteString
 873 replicate w c | w <= 0    = empty
 874               | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
 875
 876 {-
 877 -- About 5x slower
 878 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
 879     where
 880         STRICT2(go)
 881         go _   0 = return w
 882         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
 883 -}
 884
 885 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
 886 -- 'unfoldrN' builds a ByteString from a seed value.  The function takes
 887 -- the element and returns 'Nothing' if it is done producing the
 888 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
 889 -- prepending to the ByteString and @b@ is used as the next element in a
 890 -- recursive call.
 891 --
 892 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
 893 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
 894 -- final size of the ByteString as an argument. 'cons' can then be
 895 -- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
 896 -- linear complexity. The depth of the recursion is limited to this
 897 -- size, but may be less. For lazy, infinite unfoldr, use
 898 -- 'Data.List.unfoldr' (from 'Data.List').
 899 --
 900 -- Examples:
 901 --
 902 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
 903 --
 904 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
 905 --
 906 -- > unfoldrN n == take n $ List.unfoldr
 907 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
 908 unfoldrN i f w
 909     | i <= 0    = empty
 910     | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0
 911     where
 912         STRICT3(go)
 913         go q c n | n == i    = return n      -- stop if we reach `i'
 914                  | otherwise = case f c of
 915                                    Nothing        -> return n
 916                                    Just (a,new_c) -> do
 917                                         poke q a
 918                                         go (q `plusPtr` 1) new_c (n+1)
 919
 920 -- ---------------------------------------------------------------------
 921 -- Substrings
 922
 923 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
 924 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
 925 take :: Int -> ByteString -> ByteString
 926 take n ps@(PS x s l)
 927     | n <= 0    = empty
 928     | n >= l    = ps
 929     | otherwise = PS x s n
 930 {-# INLINE take #-}
 931
 932 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 933 -- elements, or @[]@ if @n > 'length' xs@.
 934 drop  :: Int -> ByteString -> ByteString
 935 drop n ps@(PS x s l)
 936     | n <= 0    = ps
 937     | n >= l    = empty
 938     | otherwise = PS x (s+n) (l-n)
 939 {-# INLINE drop #-}
 940
 941 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 942 splitAt :: Int -> ByteString -> (ByteString, ByteString)
 943 splitAt n ps@(PS x s l)
 944     | n <= 0    = (empty, ps)
 945     | n >= l    = (ps, empty)
 946     | otherwise = (PS x s n, PS x (s+n) (l-n))
 947 {-# INLINE splitAt #-}
 948
 949 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
 950 -- returns the longest prefix (possibly empty) of @xs@ of elements that
 951 -- satisfy @p@.
 952 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 953 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
 954 {-# INLINE takeWhile #-}
 955
 956 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 957 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 958 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
 959 {-# INLINE dropWhile #-}
 960
 961 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 962 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 963 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
 964 {-# INLINE break #-}
 965
 966 -- | 'breakByte' breaks its ByteString argument at the first occurence
 967 -- of the specified byte. It is more efficient than 'break' as it is
 968 -- implemented with @memchr(3)@. I.e.
 969 --
 970 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
 971 --
 972 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
 973 breakByte c p = case elemIndex c p of
 974     Nothing -> (p,empty)
 975     Just n  -> (unsafeTake n p, unsafeDrop n p)
 976 {-# INLINE breakByte #-}
 977
 978 -- | 'spanByte' breaks its ByteString argument at the first
 979 -- occurence of a byte other than its argument. It is more efficient
 980 -- than 'span (==)'
 981 --
 982 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 983 --
 984 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 985 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 986     go (p `plusPtr` s) 0
 987   where
 988     STRICT2(go)
 989     go p i | i >= l    = return (ps, empty)
 990            | otherwise = do c' <- peekByteOff p i
 991                             if c /= c'
 992                                 then return (unsafeTake i ps, unsafeDrop i ps)
 993                                 else go p (i+1)
 994 {-# INLINE spanByte #-}
 995
 996 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
 997 -- occurence of @w@. It behaves like 'break', except the delimiter is
 998 -- not returned, and @Nothing@ is returned if the delimiter is not in
 999 -- the ByteString. I.e.
1000 --
1001 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
1002 --
1003 -- > breakFirst c xs ==
1004 -- > let (x,y) = break (== c) xs
1005 -- > in if null y then Nothing else Just (x, drop 1 y))
1006 --
1007 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
1008 breakFirst c p = case elemIndex c p of
1009    Nothing -> Nothing
1010    Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p)
1011 {-# INLINE breakFirst #-}
1012
1013 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
1014 -- ByteString.
1015 --
1016 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
1017 --
1018 -- and the following are equivalent:
1019 --
1020 -- > breakLast 'c' "abcdef"
1021 -- > let (x,y) = break (=='c') (reverse "abcdef")
1022 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
1023 --
1024 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
1025 breakLast c p = case elemIndexLast c p of
1026     Nothing -> Nothing
1027     Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p)
1028 {-# INLINE breakLast #-}
1029
1030 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1031 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1032 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1033 span p ps = break (not . p) ps
1034 {-# INLINE span #-}
1035
1036 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1037 -- We have
1038 --
1039 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1040 --
1041 -- and
1042 --
1043 -- > spanEnd (not . isSpace) ps
1044 -- >    ==
1045 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1046 --
1047 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1048 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
1049
1050 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1051 -- separators, where the predicate returns True for a separator element.
1052 -- The resulting components do not contain the separators.  Two adjacent
1053 -- separators result in an empty component in the output.  eg.
1054 --
1055 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1056 -- > splitWith (=='a') []        == []
1057 --
1058 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1059
1060 #if defined(__GLASGOW_HASKELL__)
1061 splitWith _pred (PS _  _   0) = []
1062 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1063   where pred# c# = pred_ (W8# c#)
1064
1065         STRICT4(splitWith0)
1066         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1067             splitLoop pred' p 0 off' len' fp'
1068
1069         splitLoop :: (Word# -> Bool)
1070                   -> Ptr Word8
1071                   -> Int -> Int -> Int
1072                   -> ForeignPtr Word8
1073                   -> IO [ByteString]
1074
1075         splitLoop pred' p idx' off' len' fp'
1076             | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1077             | idx' >= len'  = return [PS fp' off' idx']
1078             | otherwise = do
1079                 w <- peekElemOff p (off'+idx')
1080                 if pred' (case w of W8# w# -> w#)
1081                    then return (PS fp' off' idx' :
1082                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1083                    else splitLoop pred' p (idx'+1) off' len' fp'
1084 {-# INLINE splitWith #-}
1085
1086 #else
1087 splitWith _ (PS _ _ 0) = []
1088 splitWith p ps = loop p ps
1089     where
1090         STRICT2(loop)
1091         loop q qs = if null rest then [chunk]
1092                                  else chunk : loop q (unsafeTail rest)
1093             where (chunk,rest) = break q qs
1094 #endif
1095
1096 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1097 -- argument, consuming the delimiter. I.e.
1098 --
1099 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1100 -- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
1101 -- > split 'x'  "x"          == ["",""]
1102 --
1103 -- and
1104 --
1105 -- > join [c] . split c == id
1106 -- > split == splitWith . (==)
1107 --
1108 -- As for all splitting functions in this library, this function does
1109 -- not copy the substrings, it just constructs new 'ByteStrings' that
1110 -- are slices of the original.
1111 --
1112 split :: Word8 -> ByteString -> [ByteString]
1113 split _ (PS _ _ 0) = []
1114 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1115     let ptr = p `plusPtr` s
1116
1117         STRICT1(loop)
1118         loop n = do
1119             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1120             if q == nullPtr
1121                 then [PS x (s+n) (l-n)]
1122                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1123
1124     return (loop 0)
1125 {-# INLINE split #-}
1126
1127 {-
1128 -- slower. but stays inside Haskell.
1129 split _ (PS _  _   0) = []
1130 split (W8# w#) (PS fp off len) = splitWith' off len fp
1131     where
1132         splitWith' off' len' fp' = withPtr fp $ \p ->
1133             splitLoop p 0 off' len' fp'
1134
1135         splitLoop :: Ptr Word8
1136                   -> Int -> Int -> Int
1137                   -> ForeignPtr Word8
1138                   -> IO [ByteString]
1139
1140         STRICT5(splitLoop)
1141         splitLoop p idx' off' len' fp'
1142             | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1143             | idx' >= len'  = return [PS fp' off' idx']
1144             | otherwise = do
1145                 (W8# x#) <- peekElemOff p (off'+idx')
1146                 if word2Int# w# ==# word2Int# x#
1147                    then return (PS fp' off' idx' :
1148                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1149                    else splitLoop p (idx'+1) off' len' fp'
1150 -}
1151
1152 -- | Like 'splitWith', except that sequences of adjacent separators are
1153 -- treated as a single separator. eg.
1154 --
1155 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1156 --
1157 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1158 tokens f = P.filter (not.null) . splitWith f
1159 {-# INLINE tokens #-}
1160
1161 -- | The 'group' function takes a ByteString and returns a list of
1162 -- ByteStrings such that the concatenation of the result is equal to the
1163 -- argument.  Moreover, each sublist in the result contains only equal
1164 -- elements.  For example,
1165 --
1166 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1167 --
1168 -- It is a special case of 'groupBy', which allows the programmer to
1169 -- supply their own equality test. It is about 40% faster than
1170 -- /groupBy (==)/
1171 group :: ByteString -> [ByteString]
1172 group xs
1173     | null xs   = []
1174     | otherwise = ys : group zs
1175     where
1176         (ys, zs) = spanByte (unsafeHead xs) xs
1177
1178 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1179 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1180 groupBy k xs
1181     | null xs   = []
1182     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1183     where
1184         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1185
1186 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1187 -- 'ByteString's and concatenates the list after interspersing the first
1188 -- argument between each element of the list.
1189 join :: ByteString -> [ByteString] -> ByteString
1190 join filler pss = concat (splice pss)
1191     where
1192         splice []  = []
1193         splice [x] = [x]
1194         splice (x:y:xs) = x:filler:splice (y:xs)
1195 {-# INLINE join #-}
1196
1197 --
1198 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1199 -- with a char. Around 4 times faster than the generalised join.
1200 --
1201 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1202 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1203     withForeignPtr ffp $ \fp ->
1204     withForeignPtr fgp $ \gp -> do
1205         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1206         poke (ptr `plusPtr` l) c
1207         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1208     where
1209       len = length f + length g + 1
1210 {-# INLINE joinWithByte #-}
1211
1212 -- ---------------------------------------------------------------------
1213 -- Indexing ByteStrings
1214
1215 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1216 index :: ByteString -> Int -> Word8
1217 index ps n
1218     | n < 0          = moduleError "index" ("negative index: " ++ show n)
1219     | n >= length ps = moduleError "index" ("index too large: " ++ show n
1220                                          ++ ", length = " ++ show (length ps))
1221     | otherwise      = ps `unsafeIndex` n
1222 {-# INLINE index #-}
1223
1224 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1225 -- element in the given 'ByteString' which is equal to the query
1226 -- element, or 'Nothing' if there is no such element.
1227 -- This implementation uses memchr(3).
1228 elemIndex :: Word8 -> ByteString -> Maybe Int
1229 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1230     let p' = p `plusPtr` s
1231         q  = memchr p' c (fromIntegral l)
1232     return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1233 {-# INLINE elemIndex #-}
1234
1235 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1236 -- element in the given 'ByteString' which is equal to the query
1237 -- element, or 'Nothing' if there is no such element. The following
1238 -- holds:
1239 --
1240 -- > elemIndexLast c xs ==
1241 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1242 --
1243 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1244 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1245     go (p `plusPtr` s) (l-1)
1246   where
1247     STRICT2(go)
1248     go p i | i < 0     = return Nothing
1249            | otherwise = do ch' <- peekByteOff p i
1250                             if ch == ch'
1251                                 then return $ Just i
1252                                 else go p (i-1)
1253 {-# INLINE elemIndexLast #-}
1254
1255 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1256 -- the indices of all elements equal to the query element, in ascending order.
1257 -- This implementation uses memchr(3).
1258 elemIndices :: Word8 -> ByteString -> [Int]
1259 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1260     let ptr = p `plusPtr` s
1261
1262         STRICT1(loop)
1263         loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1264                  in if q == nullPtr
1265                         then []
1266                         else let i = q `minusPtr` ptr
1267                              in i : loop (i+1)
1268     return (loop 0)
1269 {-# INLINE elemIndices #-}
1270
1271 {-
1272 -- much slower
1273 elemIndices :: Word8 -> ByteString -> [Int]
1274 elemIndices c ps = loop 0 ps
1275    where STRICT2(loop)
1276          loop _ ps' | null ps'            = []
1277          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1278                     | otherwise           = loop (n+1) (unsafeTail ps')
1279 -}
1280
1281 -- | count returns the number of times its argument appears in the ByteString
1282 --
1283 -- > count = length . elemIndices
1284 --
1285 -- But more efficiently than using length on the intermediate list.
1286 count :: Word8 -> ByteString -> Int
1287 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1288     return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1289 {-# INLINE count #-}
1290
1291 {-
1292 --
1293 -- around 30% slower
1294 --
1295 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1296      go (p `plusPtr` s) (fromIntegral m) 0
1297     where
1298         go :: Ptr Word8 -> CSize -> Int -> IO Int
1299         STRICT3(go)
1300         go p l i = do
1301             let q = memchr p w l
1302             if q == nullPtr
1303                 then return i
1304                 else do let k = fromIntegral $ q `minusPtr` p
1305                         go (q `plusPtr` 1) (l-k-1) (i+1)
1306 -}
1307
1308 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1309 -- returns the index of the first element in the ByteString
1310 -- satisfying the predicate.
1311 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1312 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1313   where
1314     STRICT2(go)
1315     go ptr n | n >= l    = return Nothing
1316              | otherwise = do w <- peek ptr
1317                               if k w
1318                                 then return (Just n)
1319                                 else go (ptr `plusPtr` 1) (n+1)
1320 {-# INLINE findIndex #-}
1321
1322 -- | The 'findIndices' function extends 'findIndex', by returning the
1323 -- indices of all elements satisfying the predicate, in ascending order.
1324 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1325 findIndices p ps = loop 0 ps
1326    where
1327      STRICT2(loop)
1328      loop n qs | null qs           = []
1329                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1330                | otherwise         =     loop (n+1) (unsafeTail qs)
1331
1332 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1333 -- of the string if no element is found, rather than Nothing.
1334 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1335 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1336   where
1337     STRICT2(go)
1338     go ptr n | n >= l    = return l
1339              | otherwise = do w <- peek ptr
1340                               if k w
1341                                 then return n
1342                                 else go (ptr `plusPtr` 1) (n+1)
1343 {-# INLINE findIndexOrEnd #-}
1344
1345 -- ---------------------------------------------------------------------
1346 -- Searching ByteStrings
1347
1348 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1349 elem :: Word8 -> ByteString -> Bool
1350 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1351 {-# INLINE elem #-}
1352
1353 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1354 notElem :: Word8 -> ByteString -> Bool
1355 notElem c ps = not (elem c ps)
1356 {-# INLINE notElem #-}
1357
1358 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1359 -- returns a ByteString containing those characters that satisfy the
1360 -- predicate. This function is subject to array fusion.
1361 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1362 filter p  = loopArr . loopU (filterEFL p) noAL
1363 {-# INLINE filter #-}
1364
1365 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1366 -- around 2x faster for some one-shot applications.
1367 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1368 filter' k ps@(PS x s l)
1369     | null ps   = ps
1370     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1371         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1372         return (t `minusPtr` p) -- actual length
1373     where
1374         STRICT3(go)
1375         go f t end | f == end  = return t
1376                    | otherwise = do
1377                         w <- peek f
1378                         if k w
1379                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1380                             else             go (f `plusPtr` 1) t               end
1381 {-# INLINE filter' #-}
1382
1383 --
1384 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1385 -- case of filtering a single byte. It is more efficient to use
1386 -- /filterByte/ in this case.
1387 --
1388 -- > filterByte == filter . (==)
1389 --
1390 -- filterByte is around 10x faster, and uses much less space, than its
1391 -- filter equivalent
1392 filterByte :: Word8 -> ByteString -> ByteString
1393 filterByte w ps = replicate (count w ps) w
1394 {-# INLINE filterByte #-}
1395
1396 --
1397 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1398 -- case of filtering a single byte out of a list. It is more efficient
1399 -- to use /filterNotByte/ in this case.
1400 --
1401 -- > filterNotByte == filter . (/=)
1402 --
1403 -- filterNotByte is around 2x faster than its filter equivalent.
1404 filterNotByte :: Word8 -> ByteString -> ByteString
1405 filterNotByte w = filter' (/= w)
1406 {-# INLINE filterNotByte #-}
1407
1408 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1409 -- and returns the first element in matching the predicate, or 'Nothing'
1410 -- if there is no such element.
1411 --
1412 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1413 --
1414 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1415 find f p = case findIndex f p of
1416                     Just n -> Just (p `unsafeIndex` n)
1417                     _      -> Nothing
1418 {-# INLINE find #-}
1419
1420 {-
1421 --
1422 -- fuseable, but we don't want to walk the whole array.
1423 --
1424 find k = foldl findEFL Nothing
1425     where findEFL a@(Just _) _ = a
1426           findEFL _          c | k c       = Just c
1427                                | otherwise = Nothing
1428 -}
1429
1430 -- ---------------------------------------------------------------------
1431 -- Searching for substrings
1432
1433 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1434 -- iff the first is a prefix of the second.
1435 isPrefixOf :: ByteString -> ByteString -> Bool
1436 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1437     | l1 == 0   = True
1438     | l2 < l1   = False
1439     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1440         withForeignPtr x2 $ \p2 -> do
1441             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1442             return (i == 0)
1443
1444 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1445 -- iff the first is a suffix of the second.
1446 --
1447 -- The following holds:
1448 --
1449 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1450 --
1451 -- However, the real implemenation uses memcmp to compare the end of the
1452 -- string only, with no reverse required..
1453 isSuffixOf :: ByteString -> ByteString -> Bool
1454 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1455     | l1 == 0   = True
1456     | l2 < l1   = False
1457     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1458         withForeignPtr x2 $ \p2 -> do
1459             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1460             return (i == 0)
1461
1462 -- | Check whether one string is a substring of another. @isSubstringOf
1463 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1464 isSubstringOf :: ByteString -- ^ String to search for.
1465               -> ByteString -- ^ String to search in.
1466               -> Bool
1467 isSubstringOf p s = not $ P.null $ findSubstrings p s
1468
1469 -- | Get the first index of a substring in another string,
1470 --   or 'Nothing' if the string is not found.
1471 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1472 findSubstring :: ByteString -- ^ String to search for.
1473               -> ByteString -- ^ String to seach in.
1474               -> Maybe Int
1475 findSubstring = (listToMaybe .) . findSubstrings
1476
1477 -- | Find the indexes of all (possibly overlapping) occurances of a
1478 -- substring in a string.  This function uses the Knuth-Morris-Pratt
1479 -- string matching algorithm.
1480 findSubstrings :: ByteString -- ^ String to search for.
1481                -> ByteString -- ^ String to seach in.
1482                -> [Int]
1483
1484 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1485   where
1486       patc x = pat `unsafeIndex` x
1487       strc x = str `unsafeIndex` x
1488
1489       -- maybe we should make kmpNext a UArray before using it in search?
1490       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1491       kmpNextL p _ | null p = []
1492       kmpNextL p j = let j' = next (unsafeHead p) j + 1
1493                          ps = unsafeTail p
1494                          x = if not (null ps) && unsafeHead ps == patc j'
1495                                 then kmpNext Array.! j' else j'
1496                         in x:kmpNextL ps j'
1497       search i j = match ++ rest -- i: position in string, j: position in pattern
1498         where match = if j == m then [(i - j)] else []
1499               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1500       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1501                | otherwise = j
1502
1503 -- ---------------------------------------------------------------------
1504 -- Zipping
1505
1506 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1507 -- corresponding pairs of bytes. If one input ByteString is short,
1508 -- excess elements of the longer ByteString are discarded. This is
1509 -- equivalent to a pair of 'unpack' operations.
1510 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1511 zip ps qs
1512     | null ps || null qs = []
1513     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1514
1515 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1516 -- the first argument, instead of a tupling function.  For example,
1517 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1518 -- corresponding sums.
1519 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1520 zipWith f ps qs
1521     | null ps || null qs = []
1522     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1523
1524 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1525 -- ByteStrings. Note that this performs two 'pack' operations.
1526 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1527 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1528 {-# INLINE unzip #-}
1529
1530 -- ---------------------------------------------------------------------
1531 -- Special lists
1532
1533 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1534 inits :: ByteString -> [ByteString]
1535 inits (PS x s l) = [PS x s n | n <- [0..l]]
1536
1537 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1538 tails :: ByteString -> [ByteString]
1539 tails p | null p    = [empty]
1540         | otherwise = p : tails (unsafeTail p)
1541
1542 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1543
1544 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1545 elems :: ByteString -> [ByteString]
1546 elems (PS _ _ 0) = []
1547 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1548 {-# INLINE elems #-}
1549
1550 -- ---------------------------------------------------------------------
1551 -- ** Ordered 'ByteString's
1552
1553 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1554 sort :: ByteString -> ByteString
1555 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1556
1557     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1558     withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1559
1560     let STRICT2(go)
1561         go 256 _   = return ()
1562         go i   ptr = do n <- peekElemOff arr i
1563                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1564                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
1565     go 0 p
1566
1567 -- "countEach counts str l" counts the number of occurences of each Word8 in
1568 -- str, and stores the result in counts.
1569 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1570 STRICT3(countEach)
1571 countEach counts str l = go 0
1572  where
1573     STRICT1(go)
1574     go i | i == l    = return ()
1575          | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1576                           x <- peekElemOff counts k
1577                           pokeElemOff counts k (x + 1)
1578                           go (i + 1)
1579
1580 {-
1581 sort :: ByteString -> ByteString
1582 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1583         memcpy p (f `plusPtr` s) l
1584         c_qsort p l -- inplace
1585 -}
1586
1587 {-
1588 sort = pack . List.sort . unpack
1589 -}
1590
1591 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1592 --
1593 -- Try some linear sorts: radix, counting
1594 -- Or mergesort.
1595 --
1596 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1597 -- sortBy f ps = undefined
1598
1599 -- ---------------------------------------------------------------------
1600 --
1601 -- Extensions to the basic interface
1602 --
1603
1604 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1605 -- check for the empty case, so there is an obligation on the programmer
1606 -- to provide a proof that the ByteString is non-empty.
1607 unsafeHead :: ByteString -> Word8
1608 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1609 {-# INLINE unsafeHead #-}
1610
1611 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1612 -- check for the empty case. As with 'unsafeHead', the programmer must
1613 -- provide a separate proof that the ByteString is non-empty.
1614 unsafeTail :: ByteString -> ByteString
1615 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1616 {-# INLINE unsafeTail #-}
1617
1618 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1619 -- This omits the bounds check, which means there is an accompanying
1620 -- obligation on the programmer to ensure the bounds are checked in some
1621 -- other way.
1622 unsafeIndex :: ByteString -> Int -> Word8
1623 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1624 {-# INLINE unsafeIndex #-}
1625
1626 -- | A variety of 'take' which omits the checks on @n@ so there is an
1627 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1628 unsafeTake :: Int -> ByteString -> ByteString
1629 unsafeTake n (PS x s l) =
1630   assert (0 <= n && n <= l) $ PS x s n
1631 {-# INLINE unsafeTake #-}
1632
1633 -- | A variety of 'drop' which omits the checks on @n@ so there is an
1634 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1635 unsafeDrop  :: Int -> ByteString -> ByteString
1636 unsafeDrop n (PS x s l) =
1637   assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
1638 {-# INLINE unsafeDrop #-}
1639
1640 -- ---------------------------------------------------------------------
1641 -- Low level constructors
1642
1643 #if defined(__GLASGOW_HASKELL__)
1644 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1645 -- Addr\# (an arbitrary machine address assumed to point outside the
1646 -- garbage-collected heap) into a @ByteString@. A much faster way to
1647 -- create an Addr\# is with an unboxed string literal, than to pack a
1648 -- boxed string. A unboxed string literal is compiled to a static @char
1649 -- []@ by GHC. Establishing the length of the string requires a call to
1650 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1651 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1652 -- if you know the length of the string statically.
1653 --
1654 -- An example:
1655 --
1656 -- > literalFS = packAddress "literal"#
1657 --
1658 packAddress :: Addr# -> ByteString
1659 packAddress addr# = inlinePerformIO $ do
1660     p <- newForeignPtr_ cstr
1661     return $ PS p 0 (fromIntegral $ c_strlen cstr)
1662   where
1663     cstr = Ptr addr#
1664 {-# INLINE packAddress #-}
1665
1666 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1667 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1668 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1669 -- 'Addr\#' to the string, and the length of the string. Make sure the
1670 -- length is correct, otherwise use the safer 'packAddress' (where the
1671 -- length will be calculated once at runtime).
1672 unsafePackAddress :: Int -> Addr# -> ByteString
1673 unsafePackAddress len addr# = inlinePerformIO $ do
1674     p <- newForeignPtr_ cstr
1675     return $ PS p 0 len
1676     where cstr = Ptr addr#
1677
1678 #endif
1679
1680 -- | /O(1)/ Build a ByteString from a ForeignPtr
1681 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1682 fromForeignPtr fp l = PS fp 0 l
1683
1684 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1685 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1686 toForeignPtr (PS ps s l) = (ps, s, l)
1687
1688 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1689 -- current 'ByteString' from any larger string it was created from, as
1690 -- an 'Int'.
1691 skipIndex :: ByteString -> Int
1692 skipIndex (PS _ s _) = s
1693 {-# INLINE skipIndex #-}
1694
1695 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1696 -- finalizer associated to it. The ByteString length is calculated using
1697 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1698 packCString :: CString -> ByteString
1699 packCString cstr = inlinePerformIO $ do
1700     fp <- newForeignPtr_ (castPtr cstr)
1701     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1702
1703 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1704 -- have /no/ finalizer associated with it. This operation has /O(1)/
1705 -- complexity as we already know the final size, so no /strlen(3)/ is
1706 -- required.
1707 packCStringLen :: CStringLen -> ByteString
1708 packCStringLen (ptr,len) = inlinePerformIO $ do
1709     fp <- newForeignPtr_ (castPtr ptr)
1710     return $ PS fp 0 (fromIntegral len)
1711
1712 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1713 -- have a @free(3)@ finalizer associated to it.
1714 packMallocCString :: CString -> ByteString
1715 packMallocCString cstr = inlinePerformIO $ do
1716     fp <- newForeignFreePtr (castPtr cstr)
1717     return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1718
1719 #if defined(__GLASGOW_HASKELL__)
1720 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1721 -- length, and an IO action representing a finalizer. This function is
1722 -- not available on Hugs.
1723 --
1724 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1725 packCStringFinalizer p l f = do
1726     fp <- FC.newForeignPtr p f
1727     return $ PS fp 0 l
1728
1729 -- | Explicitly run the finaliser associated with a 'ByteString'.
1730 -- Further references to this value may generate invalid memory
1731 -- references. This operation is unsafe, as there may be other
1732 -- 'ByteStrings' referring to the same underlying pages. If you use
1733 -- this, you need to have a proof of some kind that all 'ByteString's
1734 -- ever generated from the underlying byte array are no longer live.
1735 unsafeFinalize :: ByteString -> IO ()
1736 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1737
1738 #endif
1739
1740 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1741 --   The @CString@ should not be freed afterwards. This is a memcpy(3).
1742 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1743 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1744     where
1745       alloc = withForeignPtr ps $ \p -> do
1746                 buf <- c_malloc (fromIntegral l+1)
1747                 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1748                 poke (buf `plusPtr` l) (0::Word8)
1749                 return $ castPtr buf
1750
1751 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1752 -- Warning: modifying the @CString@ will affect the @ByteString@.
1753 -- Why is this function unsafe? It relies on the null byte at the end of
1754 -- the ByteString to be there. This is /not/ the case if your ByteString
1755 -- has been spliced from a larger string (i.e. with take or drop).
1756 -- Unless you can guarantee the null byte, you should use the safe
1757 -- version, which will copy the string first.
1758 --
1759 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1760 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1761
1762 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1763 --   This is mainly useful to allow the rest of the data pointed
1764 --   to by the 'ByteString' to be garbage collected, for example
1765 --   if a large string has been read in, and only a small part of it
1766 --   is needed in the rest of the program.
1767 copy :: ByteString -> ByteString
1768 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1769     memcpy p (f `plusPtr` s) (fromIntegral l)
1770
1771 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1772 -- CString is going to be deallocated from C land.
1773 copyCString :: CString -> IO ByteString
1774 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1775
1776 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1777 copyCStringLen :: CStringLen -> IO ByteString
1778 copyCStringLen (cstr, len) = do
1779     fp <- mallocForeignPtrArray (len+1)
1780     withForeignPtr fp $ \p -> do
1781         memcpy p (castPtr cstr) (fromIntegral len)
1782         poke (p `plusPtr` len) (0 :: Word8)
1783     return $! PS fp 0 len
1784
1785 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1786 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1787 -- This is analogous to unsafeUseAsCString, and comes with the same
1788 -- safety requirements.
1789 --
1790 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1791 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1792
1793 -- | Given the maximum size needed and a function to make the contents
1794 -- of a ByteString, generate makes the 'ByteString'. The generating
1795 -- function is required to return the actual final size (<= the maximum
1796 -- size), and the resulting byte array is realloced to this size.  The
1797 -- string is padded at the end with a null byte.
1798 --
1799 -- generate is the main mechanism for creating custom, efficient
1800 -- ByteString functions, using Haskell or C functions to fill the space.
1801 --
1802 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1803 generate i f = do
1804     fp      <- mallocByteString i
1805     (ptr,n) <- withForeignPtr fp $ \p -> do
1806         i' <- f p
1807         if i' == i
1808             then return (fp,i')
1809             else do fp_ <- mallocByteString i'      -- realloc
1810                     withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1811                     return (fp_,i')
1812     return (PS ptr 0 n)
1813
1814 {-
1815 --
1816 -- On the C malloc heap. Less fun.
1817 --
1818 generate i f = do
1819     p <- mallocArray (i+1)
1820     i' <- f p
1821     p' <- reallocArray p (i'+1)
1822     poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
1823     fp <- newForeignFreePtr p'
1824     return $ PS fp 0 i'
1825 -}
1826
1827 -- ---------------------------------------------------------------------
1828 -- line IO
1829
1830 #if defined(__GLASGOW_HASKELL__)
1831
1832 -- | getLine, read a line from stdin.
1833 getLine :: IO ByteString
1834 getLine = hGetLine stdin
1835
1836 -- | Lazily construct a list of lines of ByteStrings. This will be much
1837 -- better on memory consumption than using lines =<< getContents.
1838 hGetLines :: Handle -> IO [ByteString]
1839 hGetLines h = go
1840     where
1841         go = unsafeInterleaveIO $ do
1842                 ms <- catch (hGetLine h >>= return . Just)
1843                             (\_ -> return Nothing)
1844                 case ms of
1845                     Nothing -> return []
1846                     Just s  -> do ss <- go
1847                                   return (s:ss)
1848
1849 -- | hGetLine. read a ByteString from a handle
1850 hGetLine :: Handle -> IO ByteString
1851 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1852     case haBufferMode handle_ of
1853        NoBuffering -> error "no buffering"
1854        _other      -> hGetLineBuffered handle_
1855
1856  where
1857     hGetLineBuffered handle_ = do
1858         let ref = haBuffer handle_
1859         buf <- readIORef ref
1860         hGetLineBufferedLoop handle_ ref buf 0 []
1861
1862     hGetLineBufferedLoop handle_ ref
1863             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1864         len `seq` do
1865         off <- findEOL r w raw
1866         let new_len = len + off - r
1867         xs <- mkPS raw r off
1868
1869       -- if eol == True, then off is the offset of the '\n'
1870       -- otherwise off == w and the buffer is now empty.
1871         if off /= w
1872             then do if (w == off + 1)
1873                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1874                             else writeIORef ref buf{ bufRPtr = off + 1 }
1875                     mkBigPS new_len (xs:xss)
1876             else do
1877                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1878                                     buf{ bufWPtr=0, bufRPtr=0 }
1879                  case maybe_buf of
1880                     -- Nothing indicates we caught an EOF, and we may have a
1881                     -- partial line to return.
1882                     Nothing -> do
1883                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1884                          if new_len > 0
1885                             then mkBigPS new_len (xs:xss)
1886                             else ioe_EOF
1887                     Just new_buf ->
1888                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1889
1890     -- find the end-of-line character, if there is one
1891     findEOL r w raw
1892         | r == w = return w
1893         | otherwise =  do
1894             (c,r') <- readCharFromBuffer raw r
1895             if c == '\n'
1896                 then return r -- NB. not r': don't include the '\n'
1897                 else findEOL r' w raw
1898
1899     maybeFillReadBuffer fd is_line is_stream buf = catch
1900         (do buf' <- fillReadBuffer fd is_line is_stream buf
1901             return (Just buf'))
1902         (\e -> if isEOFError e then return Nothing else ioError e)
1903
1904 -- TODO, rewrite to use normal memcpy
1905 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1906 mkPS buf start end = do
1907     let len = end - start
1908     fp <- mallocByteString len
1909     withForeignPtr fp $ \p -> do
1910         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1911         return (PS fp 0 len)
1912
1913 mkBigPS :: Int -> [ByteString] -> IO ByteString
1914 mkBigPS _ [ps] = return ps
1915 mkBigPS _ pss = return $! concat (P.reverse pss)
1916
1917 #endif
1918
1919 -- ---------------------------------------------------------------------
1920 -- Block IO
1921
1922 -- | Outputs a 'ByteString' to the specified 'Handle'.
1923 hPut :: Handle -> ByteString -> IO ()
1924 hPut _ (PS _ _ 0)  = return ()
1925 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1926 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1927
1928 -- | Write a ByteString to stdout
1929 putStr :: ByteString -> IO ()
1930 putStr = hPut stdout
1931
1932 -- | Write a ByteString to stdout, appending a newline byte
1933 putStrLn :: ByteString -> IO ()
1934 putStrLn ps = hPut stdout ps >> hPut stdout nl
1935     where nl = packByte 0x0a
1936
1937 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
1938 -- is far more efficient than reading the characters into a 'String'
1939 -- and then using 'pack'.
1940 hGet :: Handle -> Int -> IO ByteString
1941 hGet _ 0 = return empty
1942 hGet h i = do fp <- mallocByteString i
1943               l  <- withForeignPtr fp $ \p-> hGetBuf h p i
1944               return $ PS fp 0 l
1945
1946 #if defined(__GLASGOW_HASKELL__)
1947 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1948 -- waiting for data to become available, instead it returns only whatever data
1949 -- is available.
1950 hGetNonBlocking :: Handle -> Int -> IO ByteString
1951 hGetNonBlocking _ 0 = return empty
1952 hGetNonBlocking h i = do
1953     fp <- mallocByteString i
1954     l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1955     return $ PS fp 0 l
1956 #endif
1957
1958 -- | Read entire handle contents into a 'ByteString'.
1959 -- This function reads chunks at a time, doubling the chunksize on each
1960 -- read. The final buffer is then realloced to the appropriate size. For
1961 -- files > half of available memory, this may lead to memory exhaustion.
1962 -- Consider using 'readFile' in this case.
1963 --
1964 -- As with 'hGet', the string representation in the file is assumed to
1965 -- be ISO-8859-1.
1966 --
1967 hGetContents :: Handle -> IO ByteString
1968 hGetContents h = do
1969     let start_size = 1024
1970     p <- mallocArray start_size
1971     i <- hGetBuf h p start_size
1972     if i < start_size
1973         then do p' <- reallocArray p i
1974                 fp <- newForeignFreePtr p'
1975                 return $ PS fp 0 i
1976         else f p start_size
1977     where
1978         f p s = do
1979             let s' = 2 * s
1980             p' <- reallocArray p s'
1981             i  <- hGetBuf h (p' `plusPtr` s) s
1982             if i < s
1983                 then do let i' = s + i
1984                         p'' <- reallocArray p' i'
1985                         fp  <- newForeignFreePtr p''
1986                         return $ PS fp 0 i'
1987                 else f p' s'
1988
1989 -- | getContents. Equivalent to hGetContents stdin
1990 getContents :: IO ByteString
1991 getContents = hGetContents stdin
1992
1993 -- | Read an entire file strictly into a 'ByteString'.  This is far more
1994 -- efficient than reading the characters into a 'String' and then using
1995 -- 'pack'.  It also may be more efficient than opening the file and
1996 -- reading it using hGet.
1997 readFile :: FilePath -> IO ByteString
1998 readFile f = do
1999     h <- openBinaryFile f ReadMode
2000     l <- hFileSize h
2001     s <- hGet h $ fromIntegral l
2002     hClose h
2003     return s
2004
2005 -- | Write a 'ByteString' to a file.
2006 writeFile :: FilePath -> ByteString -> IO ()
2007 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
2008     (\h -> hPut h ps)
2009
2010 {-
2011 --
2012 -- Disable until we can move it into a portable .hsc file
2013 --
2014
2015 -- | Like readFile, this reads an entire file directly into a
2016 -- 'ByteString', but it is even more efficient.  It involves directly
2017 -- mapping the file to memory.  This has the advantage that the contents
2018 -- of the file never need to be copied.  Also, under memory pressure the
2019 -- page may simply be discarded, while in the case of readFile it would
2020 -- need to be written to swap.  If you read many small files, mmapFile
2021 -- will be less memory-efficient than readFile, since each mmapFile
2022 -- takes up a separate page of memory.  Also, you can run into bus
2023 -- errors if the file is modified.  As with 'readFile', the string
2024 -- representation in the file is assumed to be ISO-8859-1.
2025 --
2026 -- On systems without mmap, this is the same as a readFile.
2027 --
2028 mmapFile :: FilePath -> IO ByteString
2029 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
2030
2031 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
2032 mmap f = do
2033     h <- openBinaryFile f ReadMode
2034     l <- fromIntegral `fmap` hFileSize h
2035     -- Don't bother mmaping small files because each mmapped file takes up
2036     -- at least one full VM block.
2037     if l < mmap_limit
2038        then do thefp <- mallocByteString l
2039                withForeignPtr thefp $ \p-> hGetBuf h p l
2040                hClose h
2041                return (thefp, l)
2042        else do
2043                -- unix only :(
2044                fd <- fromIntegral `fmap` handleToFd h
2045                p  <- my_mmap l fd
2046                fp <- if p == nullPtr
2047                      then do thefp <- mallocByteString l
2048                              withForeignPtr thefp $ \p' -> hGetBuf h p' l
2049                              return thefp
2050                      else do
2051                           -- The munmap leads to crashes on OpenBSD.
2052                           -- maybe there's a use after unmap in there somewhere?
2053 #if !defined(__OpenBSD__)
2054                              let unmap = c_munmap p l >> return ()
2055 #else
2056                              let unmap = return ()
2057 #endif
2058                              fp <- FC.newForeignPtr p unmap
2059                              return fp
2060                c_close fd
2061                hClose h
2062                return (fp, l)
2063     where mmap_limit = 16*1024
2064 -}
2065
2066 #if defined(__GLASGOW_HASKELL__)
2067 --
2068 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
2069 --
2070 getArgs :: IO [ByteString]
2071 getArgs =
2072   alloca $ \ p_argc ->
2073   alloca $ \ p_argv -> do
2074     getProgArgv p_argc p_argv
2075     p    <- fromIntegral `fmap` peek p_argc
2076     argv <- peek p_argv
2077     P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
2078 #endif
2079
2080 -- ---------------------------------------------------------------------
2081 -- Internal utilities
2082
2083 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
2084 -- silently truncate to 8 bits Chars > '\255'. They are provided as
2085 -- convenience for ByteString construction.
2086 w2c :: Word8 -> Char
2087 #if !defined(__GLASGOW_HASKELL__)
2088 w2c = chr . fromIntegral
2089 #else
2090 w2c = unsafeChr . fromIntegral
2091 #endif
2092 {-# INLINE w2c #-}
2093
2094 c2w :: Char -> Word8
2095 c2w = fromIntegral . ord
2096 {-# INLINE c2w #-}
2097
2098 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2099 -- is padded with a null byte.
2100 mallocByteString :: Int -> IO (ForeignPtr Word8)
2101 mallocByteString l = do
2102     fp <- mallocForeignPtrArray (l+1)
2103     withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2104     return fp
2105
2106 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2107 -- argument gives the final size of the ByteString. Unlike 'generate'
2108 -- the ByteString is not reallocated if the final size is less than the
2109 -- estimated size. Also, unlike 'generate' ByteString's created this way
2110 -- are managed on the Haskell heap.
2111 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2112 create l write_ptr = inlinePerformIO $ do
2113     fp <- mallocByteString (l+1)
2114     withForeignPtr fp $ \p -> write_ptr p
2115     return $ PS fp 0 l
2116 {-# INLINE create #-}
2117
2118 -- | Perform an operation with a temporary ByteString
2119 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2120 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2121 {-# INLINE withPtr #-}
2122
2123 -- Common up near identical calls to `error' to reduce the number
2124 -- constant strings created when compiled:
2125 errorEmptyList :: String -> a
2126 errorEmptyList fun = moduleError fun "empty ByteString"
2127 {-# NOINLINE errorEmptyList #-}
2128
2129 moduleError :: String -> String -> a
2130 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2131 {-# NOINLINE moduleError #-}
2132
2133 -- Find from the end of the string using predicate
2134 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2135 STRICT2(findFromEndUntil)
2136 findFromEndUntil f ps@(PS x s l) =
2137     if null ps then 0
2138     else if f (last ps) then l
2139          else findFromEndUntil f (PS x s (l-1))
2140
2141 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2142 -- it exposes lots of things to further inlining
2143 --
2144 {-# INLINE inlinePerformIO #-}
2145 inlinePerformIO :: IO a -> a
2146 #if defined(__GLASGOW_HASKELL__)
2147 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2148 #else
2149 inlinePerformIO = unsafePerformIO
2150 #endif
2151
2152 {-# INLINE newForeignFreePtr #-}
2153 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2154 #if defined(__GLASGOW_HASKELL__)
2155 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2156 #else
2157 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2158 #endif
2159
2160 -- ---------------------------------------------------------------------
2161 --
2162 -- Standard C functions
2163 --
2164
2165 foreign import ccall unsafe "string.h strlen" c_strlen
2166     :: CString -> CInt
2167
2168 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2169     :: CInt -> IO (Ptr Word8)
2170
2171 foreign import ccall unsafe "static stdlib.h free" c_free
2172     :: Ptr Word8 -> IO ()
2173
2174 #if !defined(__GLASGOW_HASKELL__)
2175 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2176     :: FunPtr (Ptr Word8 -> IO ())
2177 #endif
2178
2179 foreign import ccall unsafe "string.h memset" memset
2180     :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2181
2182 foreign import ccall unsafe "string.h memchr" memchr
2183     :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2184
2185 foreign import ccall unsafe "string.h memcmp" memcmp
2186     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2187
2188 foreign import ccall unsafe "string.h memcpy" memcpy
2189     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2190
2191 -- ---------------------------------------------------------------------
2192 --
2193 -- Uses our C code
2194 --
2195
2196 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2197     :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2198
2199 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2200     :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2201
2202 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2203     :: Ptr Word8 -> CInt -> Word8
2204
2205 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2206     :: Ptr Word8 -> CInt -> Word8
2207
2208 foreign import ccall unsafe "static fpstring.h count" c_count
2209     :: Ptr Word8 -> CInt -> Word8 -> CInt
2210
2211 -- ---------------------------------------------------------------------
2212 -- MMap
2213
2214 {-
2215 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2216     :: Int -> Int -> IO (Ptr Word8)
2217
2218 foreign import ccall unsafe "static unistd.h close" c_close
2219     :: Int -> IO Int
2220
2221 #  if !defined(__OpenBSD__)
2222 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2223     :: Ptr Word8 -> Int -> IO Int
2224 #  endif
2225 -}
2226
2227 -- ---------------------------------------------------------------------
2228 -- Internal GHC Haskell magic
2229
2230 #if defined(__GLASGOW_HASKELL__)
2231 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2232     getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2233
2234 foreign import ccall unsafe "__hscore_memcpy_src_off"
2235    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2236 #endif
2237
2238 -- ---------------------------------------------------------------------
2239 --
2240 -- Functional array fusion for ByteStrings.
2241 --
2242 -- From the Data Parallel Haskell project,
2243 --      http://www.cse.unsw.edu.au/~chak/project/dph/
2244 --
2245
2246 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2247 -- the fact that no bottoms of this type are ever constructed; hence, we can
2248 -- assume @(_ :: NoAL) `seq` x = x@.
2249 --
2250 data NoAL = NoAL
2251
2252 -- | Special forms of loop arguments
2253 --
2254 -- * These are common special cases for the three function arguments of gen
2255 --   and loop; we give them special names to make it easier to trigger RULES
2256 --   applying in the special cases represented by these arguments.  The
2257 --   "INLINE [1]" makes sure that these functions are only inlined in the last
2258 --   two simplifier phases.
2259 --
2260 -- * In the case where the accumulator is not needed, it is better to always
2261 --   explicitly return a value `()', rather than just copy the input to the
2262 --   output, as the former gives GHC better local information.
2263 --
2264
2265 -- | Element function expressing a mapping only
2266 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2267 mapEFL f = \_ e -> (noAL, (Just $ f e))
2268 #if defined(__GLASGOW_HASKELL__)
2269 {-# INLINE [1] mapEFL #-}
2270 #endif
2271
2272 -- | Element function implementing a filter function only
2273 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2274 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2275 #if defined(__GLASGOW_HASKELL__)
2276 {-# INLINE [1] filterEFL #-}
2277 #endif
2278
2279 -- |Element function expressing a reduction only
2280 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2281 foldEFL f = \a e -> (f a e, Nothing)
2282 #if defined(__GLASGOW_HASKELL__)
2283 {-# INLINE [1] foldEFL #-}
2284 #endif
2285
2286 -- | A strict foldEFL.
2287 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2288 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2289 #if defined(__GLASGOW_HASKELL__)
2290 {-# INLINE [1] foldEFL' #-}
2291 #endif
2292
2293 -- | Element function expressing a prefix reduction only
2294 --
2295 scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8)
2296 scanEFL f = \a e -> (f a e, Just a)
2297 #if defined(__GLASGOW_HASKELL__)
2298 {-# INLINE [1] scanEFL #-}
2299 #endif
2300
2301 -- | No accumulator
2302 noAL :: NoAL
2303 noAL = NoAL
2304 #if defined(__GLASGOW_HASKELL__)
2305 {-# INLINE [1] noAL #-}
2306 #endif
2307
2308 -- | Projection functions that are fusion friendly (as in, we determine when
2309 -- they are inlined)
2310 loopArr :: (ByteString, acc) -> ByteString
2311 loopArr (arr, _) = arr
2312 #if defined(__GLASGOW_HASKELL__)
2313 {-# INLINE [1] loopArr #-}
2314 #endif
2315
2316 loopAcc :: (ByteString, acc) -> acc
2317 loopAcc (_, acc) = acc
2318 #if defined(__GLASGOW_HASKELL__)
2319 {-# INLINE [1] loopAcc #-}
2320 #endif
2321
2322 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2323 loopSndAcc (arr, (_, acc)) = (arr, acc)
2324 #if defined(__GLASGOW_HASKELL__)
2325 {-# INLINE [1] loopSndAcc #-}
2326 #endif
2327
2328 ------------------------------------------------------------------------
2329
2330 --
2331 -- size, and then percentage.
2332 --
2333
2334 -- | Iteration over over ByteStrings
2335 loopU :: (acc -> Word8 -> (acc, Maybe Word8))  -- ^ mapping & folding, once per elem
2336       -> acc                                   -- ^ initial acc value
2337       -> ByteString                            -- ^ input ByteString
2338       -> (ByteString, acc)
2339
2340 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2341     fp          <- mallocByteString i
2342     (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2343         (acc, i') <- go (a `plusPtr` s) p start
2344         if i' == i
2345             then return (fp,i',acc)                 -- no realloc for map
2346             else do fp_ <- mallocByteString i'      -- realloc
2347                     withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2348                     return (fp_,i',acc)
2349
2350     return (PS ptr 0 n, acc)
2351   where
2352     go p ma = trans 0 0
2353         where
2354             STRICT3(trans)
2355             trans a_off ma_off acc
2356                 | a_off >= i = return (acc, ma_off)
2357                 | otherwise  = do
2358                     x <- peekByteOff p a_off
2359                     let (acc', oe) = f acc x
2360                     ma_off' <- case oe of
2361                         Nothing  -> return ma_off
2362                         Just e   -> do pokeByteOff ma ma_off e
2363                                        return $ ma_off + 1
2364                     trans (a_off+1) ma_off' acc'
2365
2366 #if defined(__GLASGOW_HASKELL__)
2367 {-# INLINE [1] loopU #-}
2368 #endif
2369
2370 infixr 9 `fuseEFL`
2371
2372 -- |Fuse to flat loop functions
2373 fuseEFL :: (a1 -> Word8  -> (a1, Maybe Word8))
2374         -> (a2 -> Word8  -> (a2, Maybe Word8))
2375         -> (a1, a2)
2376         -> Word8
2377         -> ((a1, a2), Maybe Word8)
2378 fuseEFL f g (acc1, acc2) e1 =
2379     case f acc1 e1 of
2380         (acc1', Nothing) -> ((acc1', acc2), Nothing)
2381         (acc1', Just e2) ->
2382             case g acc2 e2 of
2383                 (acc2', res) -> ((acc1', acc2'), res)
2384
2385 {-# RULES
2386
2387 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2388   loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2389     loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2390
2391 "loopArr/loopSndAcc" forall x.
2392   loopArr (loopSndAcc x) = loopArr x
2393
2394 "seq/NoAL" forall (u::NoAL) e.
2395   u `seq` e = e
2396
2397   #-}
2398