-{-# OPTIONS_GHC -cpp -fffi #-}
+{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
--
-- Module : ByteString
-- Copyright : (c) The University of Glasgow 2001,
-- (c) Simon Marlow 2005
-- (c) Don Stewart 2005-2006
-- (c) Bjorn Bringert 2006
+--
+-- Array fusion code:
+-- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
+-- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
+--
-- License : BSD-style
--
-- Maintainer : dons@cse.unsw.edu.au
-- | A time and space-efficient implementation of byte vectors using
-- packed Word8 arrays, suitable for high performance use, both in terms
-- of large data quantities, or high speed requirements. Byte vectors
--- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
--- be passed between C and Haskell with little effort.
+-- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
+-- and can be passed between C and Haskell with little effort.
--
-- This module is intended to be imported @qualified@, to avoid name
-- clashes with Prelude functions. eg.
-- * Basic interface
cons, -- :: Word8 -> ByteString -> ByteString
- snoc, -- :: Word8 -> ByteString -> ByteString
+ snoc, -- :: ByteString -> Word8 -> ByteString
null, -- :: ByteString -> Bool
length, -- :: ByteString -> Int
head, -- :: ByteString -> Word8
foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+ foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
-- ** Special folds
concat, -- :: [ByteString] -> ByteString
-- * Generating and unfolding ByteStrings
replicate, -- :: Int -> Word8 -> ByteString
- unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
+ unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
-- * Substrings
-- ** Breaking and dropping on specific bytes
breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
+ spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
split, -- :: Word8 -> ByteString -> [ByteString]
splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
+ group, -- :: ByteString -> [ByteString]
+ groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
hGet, -- :: Handle -> Int -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
+ -- * Fusion utilities
#if defined(__GLASGOW_HASKELL__)
- -- * Miscellaneous
unpackList, -- eek, otherwise it gets thrown away by the simplifier
#endif
+ noAL, NoAL, loopArr, loopAcc, loopSndAcc,
+ loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL,
+ filterF, mapF
+
) where
import qualified Prelude as P
import Data.Array (listArray)
import qualified Data.Array as Array ((!))
+-- Control.Exception.bracket not available in yhc or nhc
import Control.Exception (bracket)
+import Control.Monad (when)
import Foreign.C.String (CString, CStringLen)
import Foreign.C.Types (CSize, CInt)
import Foreign.Ptr
import Foreign.Storable (Storable(..))
+-- hGetBuf and hPutBuf not available in yhc or nhc
import System.IO (stdin,stdout,hClose,hFileSize
,hGetBuf,hPutBuf,openBinaryFile
,Handle,IOMode(..))
-- | /O(n)/ Equality on the 'ByteString' type.
eq :: ByteString -> ByteString -> Bool
-eq a b = (compareBytes a b) == EQ
+eq a@(PS p s l) b@(PS p' s' l')
+ | l /= l' = False -- short cut on length
+ | p == p' && s == s' = True -- short cut for the same string
+ | otherwise = compareBytes a b == EQ
{-# INLINE eq #-}
-- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
compareBytes :: ByteString -> ByteString -> Ordering
-compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
-compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
- withForeignPtr x1 $ \p1 ->
- withForeignPtr x2 $ \p2 -> do
- i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
- return $ case i `compare` 0 of
- EQ -> l1 `compare` l2
- x -> x
+compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
+ | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
+ | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
+ | otherwise = inlinePerformIO $
+ withForeignPtr x1 $ \p1 ->
+ withForeignPtr x2 $ \p2 -> do
+ i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
+ return $ case i `compare` 0 of
+ EQ -> l1 `compare` l2
+ x -> x
{-# INLINE compareBytes #-}
{-
-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
packByte :: Word8 -> ByteString
-packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
+packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
withForeignPtr fp $ \p -> poke p c
return $ PS fp 0 1
-{-# NOINLINE packByte #-}
+{-# INLINE packByte #-}
+
+--
+-- XXX The unsafePerformIO is critical!
+--
+-- Otherwise:
+--
+-- packByte 255 `compare` packByte 127
+--
+-- is compiled to:
+--
+-- case mallocByteString 2 of
+-- ForeignPtr f internals ->
+-- case writeWord8OffAddr# f 0 255 of _ ->
+-- case writeWord8OffAddr# f 0 127 of _ ->
+-- case eqAddr# f f of
+-- False -> case compare (GHC.Prim.plusAddr# f 0)
+-- (GHC.Prim.plusAddr# f 0)
+--
+--
-- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
--
-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
length :: ByteString -> Int
length (PS _ _ l) = l
-{-# INLINE length #-}
+
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] length #-}
+#endif
+
+{-#
+
+-- Translate length into a loop.
+-- Performace ok, but allocates too much, so disable for now.
+
+ "length/loop" forall f acc s .
+ length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
+
+ #-}
-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
-- complexity, as it requires a memcpy.
cons :: Word8 -> ByteString -> ByteString
cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
- memcpy (p `plusPtr` 1) (f `plusPtr` s) l
poke p c
+ memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
{-# INLINE cons #-}
+-- todo fuse
+
-- | /O(n)/ Append a byte to the end of a 'ByteString'
snoc :: ByteString -> Word8 -> ByteString
snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
- memcpy p (f `plusPtr` s) l
+ memcpy p (f `plusPtr` s) (fromIntegral l)
poke (p `plusPtr` l) c
{-# INLINE snoc #-}
+-- todo fuse
+
-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
head :: ByteString -> Word8
head ps@(PS x s _)
| otherwise = concat [xs,ys]
{-# INLINE append #-}
-{-
---
--- About 30% faster, but allocating in a big chunk isn't good for memory use
---
-append :: ByteString -> ByteString -> ByteString
-append xs@(PS ffp s l) ys@(PS fgp t m)
- | null xs = ys
- | null ys = xs
- | otherwise = create len $ \ptr ->
- withForeignPtr ffp $ \fp ->
- withForeignPtr fgp $ \gp -> do
- memcpy ptr (fp `plusPtr` s) l
- memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
- where len = length xs + length ys
--}
-
-- ---------------------------------------------------------------------
-- Transformations
-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
--- element of @xs@
---
+-- element of @xs@. This function is subject to array fusion.
map :: (Word8 -> Word8) -> ByteString -> ByteString
-map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
- new_fp <- mallocByteString len
- withForeignPtr new_fp $ \new_p -> do
- map_ f (len-1) (p `plusPtr` start) new_p
- return (PS new_fp 0 len)
+map f = loopArr . loopU (mapEFL f) noAL
{-# INLINE map #-}
-map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
-STRICT4(map_)
-map_ f n p1 p2
- | n < 0 = return ()
- | otherwise = do
- x <- peekByteOff p1 n
- pokeByteOff p2 n (f x)
- map_ f (n-1) p1 p2
-{-# INLINE map_ #-}
+-- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
+-- slightly faster for one-shot cases.
+mapF :: (Word8 -> Word8) -> ByteString -> ByteString
+STRICT2(mapF)
+mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
+ np <- mallocByteString (len+1)
+ withForeignPtr np $ \p -> do
+ map_ 0 (a `plusPtr` s) p
+ return (PS np 0 len)
+ where
+ map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
+ STRICT3(map_)
+ map_ n p1 p2
+ | n >= len = return ()
+ | otherwise = do
+ x <- peekByteOff p1 n
+ pokeByteOff p2 n (f x)
+ map_ (n+1) p1 p2
+{-# INLINE mapF #-}
-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
reverse :: ByteString -> ByteString
reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
- c_reverse p (f `plusPtr` s) l
+ c_reverse p (f `plusPtr` s) (fromIntegral l)
{-
reverse = pack . P.reverse . unpack
intersperse c ps@(PS x s l)
| length ps < 2 = ps
| otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
- c_intersperse p (f `plusPtr` s) l c
+ c_intersperse p (f `plusPtr` s) (fromIntegral l) c
{-
intersperse c = pack . List.intersperse c . unpack
-- | 'foldl', applied to a binary operator, a starting value (typically
-- the left-identity of the operator), and a ByteString, reduces the
-- ByteString using the binary operator, from left to right.
+-- This function is subject to array fusion.
foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
+foldl f z = loopAcc . loopU (foldEFL f) z
+{-# INLINE foldl #-}
+
+{-
+--
+-- About twice as fast with 6.4.1, but not fuseable
+-- A simple fold . map is enough to make it worth while.
+--
foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
where
lgo z p q | p == q = return z
| otherwise = do c <- peek p
lgo (f z c) (p `plusPtr` 1) q
+-}
+
+-- | 'foldl\'' is like foldl, but strict in the accumulator.
+foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
+foldl' f z = loopAcc . loopU (foldEFL' f) z
+{-# INLINE foldl' #-}
-- | 'foldr', applied to a binary operator, a starting value
-- (typically the right-identity of the operator), and a ByteString,
-- | 'foldl1' is a variant of 'foldl' that has no starting value
-- argument, and thus must be applied to non-empty 'ByteStrings'.
+-- This function is subject to array fusion.
foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1 f ps
| null ps = errorEmptyList "foldl1"
foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldr1 f ps
| null ps = errorEmptyList "foldr1"
- | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
+ | otherwise = foldr f (last ps) (init ps)
-- ---------------------------------------------------------------------
-- Special folds
concat :: [ByteString] -> ByteString
concat [] = empty
concat [ps] = ps
-concat xs = inlinePerformIO $ do
- let start_size = 1024
- p <- mallocArray start_size
- f p 0 1024 xs
-
- where f ptr len _ [] = do
- ptr' <- reallocArray ptr (len+1)
- poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
- fp <- newForeignFreePtr ptr'
- return $ PS fp 0 len
-
- f ptr len to_go pss@(PS p s l:pss')
- | l <= to_go = do withForeignPtr p $ \pf ->
- memcpy (ptr `plusPtr` len)
- (pf `plusPtr` s) l
- f ptr (len + l) (to_go - l) pss'
-
- | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
- ptr' <- reallocArray ptr new_total
- f ptr' len (new_total - len) pss
+concat xs = create len $ \ptr -> go xs ptr
+ where len = P.sum . P.map length $ xs
+ STRICT2(go)
+ go [] _ = return ()
+ go (PS p s l:ps) ptr = do
+ withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
+ go ps (ptr `plusPtr` l)
-- | Map a function over a 'ByteString' and concatenate the results
concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
if f c then return True
else go (p `plusPtr` 1) q
+-- todo fuse
+
-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
-- if all elements of the 'ByteString' satisfy the predicate.
all :: (Word8 -> Bool) -> ByteString -> Bool
if f c
then go (p `plusPtr` 1) q
else return False
+-- todo fuse
-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
maximum :: ByteString -> Word8
maximum xs@(PS x s l)
| null xs = errorEmptyList "maximum"
| otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
- return $ c_maximum (p `plusPtr` s) l
+ return $ c_maximum (p `plusPtr` s) (fromIntegral l)
{-# INLINE maximum #-}
-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
minimum xs@(PS x s l)
| null xs = errorEmptyList "minimum"
| otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
- return $ c_minimum (p `plusPtr` s) l
+ return $ c_minimum (p `plusPtr` s) (fromIntegral l)
{-# INLINE minimum #-}
+-- fusion is too slow here (10x)
+
{-
maximum xs@(PS x s l)
| null xs = errorEmptyList "maximum"
-- The following equation connects the depth-limited unfoldr to the List unfoldr:
--
-- > unfoldrN n == take n $ List.unfoldr
-unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
+unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
where
STRICT3(go)
Just n -> (take n p, drop n p)
{-# INLINE breakByte #-}
+-- | 'spanByte' breaks its ByteString argument at the first
+-- occurence of a byte other than its argument. It is more efficient
+-- than 'span (==)'
+--
+-- > span (=='c') "abcd" == spanByte 'c' "abcd"
+--
+spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
+spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
+ go (p `plusPtr` s) 0
+ where
+ STRICT2(go)
+ go p i | i >= l = return (ps, empty)
+ | otherwise = do c' <- peekByteOff p i
+ if c /= c'
+ then return (take i ps, drop i ps)
+ else go p (i+1)
+{-# INLINE spanByte #-}
+
-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
-- occurence of @w@. It behaves like 'break', except the delimiter is
-- not returned, and @Nothing@ is returned if the delimiter is not in
-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
-span p ps = break (not . p) ps
+span p ps = break (not . p) ps
{-# INLINE span #-}
-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
#if defined(__GLASGOW_HASKELL__)
splitWith _pred (PS _ _ 0) = []
-splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
+splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
where pred# c# = pred_ (W8# c#)
- splitWith' pred' off' len' fp' = withPtr fp $ \p ->
+ STRICT4(splitWith0)
+ splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
splitLoop pred' p 0 off' len' fp'
splitLoop :: (Word# -> Bool)
w <- peekElemOff p (off'+idx')
if pred' (case w of W8# w# -> w#)
then return (PS fp' off' idx' :
- splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
+ splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
else splitLoop pred' p (idx'+1) off' len' fp'
{-# INLINE splitWith #-}
#else
splitWith _ (PS _ _ 0) = []
-splitWith p ps = splitWith' p ps
+splitWith p ps = loop p ps
where
- STRICT2(splitWith')
- splitWith' q qs = if null rest then [chunk]
- else chunk : splitWith' q (unsafeTail rest)
+ STRICT2(loop)
+ loop q qs = if null rest then [chunk]
+ else chunk : loop q (unsafeTail rest)
where (chunk,rest) = break q qs
#endif
loop n = do
let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
if q == nullPtr
- then return [PS x (s+n) (l-n)]
- else do let i = q `minusPtr` ptr
- ls <- loop (i+1)
- return $! PS x (s+n) (i-n) : ls
- loop 0
+ then [PS x (s+n) (l-n)]
+ else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
+
+ return (loop 0)
{-# INLINE split #-}
{-
--
tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
tokens f = P.filter (not.null) . splitWith f
+{-# INLINE tokens #-}
+
+-- | The 'group' function takes a ByteString and returns a list of
+-- ByteStrings such that the concatenation of the result is equal to the
+-- argument. Moreover, each sublist in the result contains only equal
+-- elements. For example,
+--
+-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
+--
+-- It is a special case of 'groupBy', which allows the programmer to
+-- supply their own equality test. It is about 40% faster than
+-- /groupBy (==)/
+group :: ByteString -> [ByteString]
+group xs
+ | null xs = []
+ | otherwise = ys : group zs
+ where
+ (ys, zs) = spanByte (unsafeHead xs) xs
+
+-- | The 'groupBy' function is the non-overloaded version of 'group'.
+groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
+groupBy k xs
+ | null xs = []
+ | otherwise = take n xs : groupBy k (drop n xs)
+ where
+ n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
-- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
-- 'ByteString's and concatenates the list after interspersing the first
splice [] = []
splice [x] = [x]
splice (x:y:xs) = x:filler:splice (y:xs)
+{-# INLINE join #-}
--
-- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
withForeignPtr ffp $ \fp ->
withForeignPtr fgp $ \gp -> do
- memcpy ptr (fp `plusPtr` s) l
+ memcpy ptr (fp `plusPtr` s) (fromIntegral l)
poke (ptr `plusPtr` l) c
- memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
+ memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
where
len = length f + length g + 1
{-# INLINE joinWithByte #-}
-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
index :: ByteString -> Int -> Word8
index ps n
- | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
- | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
- ++ ", length = " ++ show (length ps)
+ | n < 0 = moduleError "index" ("negative index: " ++ show n)
+ | n >= length ps = moduleError "index" ("index too large: " ++ show n
+ ++ ", length = " ++ show (length ps))
| otherwise = ps `unsafeIndex` n
{-# INLINE index #-}
let ptr = p `plusPtr` s
STRICT1(loop)
- loop n = do
- let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
- if q == nullPtr
- then return []
- else do let i = q `minusPtr` ptr
- ls <- loop (i+1)
- return $! i:ls
- loop 0
+ loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
+ in if q == nullPtr
+ then []
+ else let i = q `minusPtr` ptr
+ in i : loop (i+1)
+ return (loop 0)
+{-# INLINE elemIndices #-}
{-
-- much slower
-- But more efficiently than using length on the intermediate list.
count :: Word8 -> ByteString -> Int
count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
- return $ c_count (p `plusPtr` s) (fromIntegral m) w
+ return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
{-# INLINE count #-}
{-
-- returns the index of the first element in the ByteString
-- satisfying the predicate.
findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
-findIndex = (listToMaybe .) . findIndices
+findIndex k ps@(PS x s l)
+ | null ps = Nothing
+ | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
+ where
+ STRICT2(go)
+ go ptr n | n >= l = return Nothing
+ | otherwise = do w <- peek ptr
+ if k w
+ then return (Just n)
+ else go (ptr `plusPtr` 1) (n+1)
+{-# INLINE findIndex #-}
-- | The 'findIndices' function extends 'findIndex', by returning the
-- indices of all elements satisfying the predicate, in ascending order.
findIndices p ps = loop 0 ps
where
STRICT2(loop)
- loop _ qs | null qs = []
- loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
+ loop n qs | null qs = []
+ | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
| otherwise = loop (n+1) (unsafeTail qs)
-- ---------------------------------------------------------------------
-- | /O(n)/ 'notElem' is the inverse of 'elem'
notElem :: Word8 -> ByteString -> Bool
-notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
+notElem c ps = not (elem c ps)
{-# INLINE notElem #-}
+-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
+-- returns a ByteString containing those characters that satisfy the
+-- predicate. This function is subject to array fusion.
+filter :: (Word8 -> Bool) -> ByteString -> ByteString
+filter p = loopArr . loopU (filterEFL p) noAL
+{-# INLINE filter #-}
+
+-- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
+-- around 2x faster for some one-shot applications.
+filterF :: (Word8 -> Bool) -> ByteString -> ByteString
+filterF k ps@(PS x s l)
+ | null ps = ps
+ | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+ t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
+ return (t `minusPtr` p) -- actual length
+ where
+ STRICT3(go)
+ go f t end | f == end = return t
+ | otherwise = do
+ w <- peek f
+ if k w
+ then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
+ else go (f `plusPtr` 1) t end
+{-# INLINE filterF #-}
+
--
-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
-- case of filtering a single byte. It is more efficient to use
-- filter equivalent
filterByte :: Word8 -> ByteString -> ByteString
filterByte w ps = replicate (count w ps) w
-
-{-
--- slower than the replicate version
-
-filterByte ch ps@(PS x s l)
- | null ps = ps
- | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
- t <- go (f `plusPtr` s) p l
- return (t `minusPtr` p) -- actual length
- where
- STRICT3(go)
- go _ t 0 = return t
- go f t e = do w <- peek f
- if w == ch
- then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
- else go (f `plusPtr` 1) t (e-1)
--}
+{-# INLINE filterByte #-}
--
-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
--
-- > filterNotByte == filter . (/=)
--
--- filterNotByte is around 3x faster, and uses much less space, than its
--- filter equivalent
+-- filterNotByte is around 2x faster than its filter equivalent.
filterNotByte :: Word8 -> ByteString -> ByteString
-filterNotByte ch ps@(PS x s l)
- | null ps = ps
- | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
- t <- go (f `plusPtr` s) p l
- return (t `minusPtr` p) -- actual length
- where
- STRICT3(go)
- go _ t 0 = return t
- go f t e = do w <- peek f
- if w /= ch
- then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
- else go (f `plusPtr` 1) t (e-1)
-
--- | /O(n)/ 'filter', applied to a predicate and a ByteString,
--- returns a ByteString containing those characters that satisfy the
--- predicate.
-filter :: (Word8 -> Bool) -> ByteString -> ByteString
-filter k ps@(PS x s l)
- | null ps = ps
- | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
- t <- go (f `plusPtr` s) p l
- return (t `minusPtr` p) -- actual length
- where
- STRICT3(go)
- go _ t 0 = return t
- go f t e = do w <- peek f
- if k w
- then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
- else go (f `plusPtr` 1) t (e - 1)
-
--- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
+filterNotByte w = filterF (/= w)
+{-# INLINE filterNotByte #-}
-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
-- and returns the first element in matching the predicate, or 'Nothing'
-- if there is no such element.
+--
+-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
+--
find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
-find p ps = case filter p ps of
- q | null q -> Nothing
- | otherwise -> Just (unsafeHead q)
+find f p = case findIndex f p of
+ Just n -> Just (p `unsafeIndex` n)
+ _ -> Nothing
+{-# INLINE find #-}
+
+{-
+--
+-- fuseable, but we don't want to walk the whole array.
+--
+find k = foldl findEFL Nothing
+ where findEFL a@(Just _) _ = a
+ findEFL _ c | k c = Just c
+ | otherwise = Nothing
+-}
-- ---------------------------------------------------------------------
-- Searching for substrings
| l2 < l1 = False
| otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
withForeignPtr x2 $ \p2 -> do
- i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
+ i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
return (i == 0)
-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
| l2 < l1 = False
| otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
withForeignPtr x2 $ \p2 -> do
- i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
+ i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
return (i == 0)
-- | Check whether one string is a substring of another. @isSubstringOf
-- ---------------------------------------------------------------------
-- ** Ordered 'ByteString's
--- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
+-- | /O(n)/ Sort a ByteString efficiently, using counting sort.
+sort :: ByteString -> ByteString
+sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
+
+ memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
+ withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
+
+ let STRICT2(go)
+ go 256 _ = return ()
+ go i ptr = do n <- peekElemOff arr i
+ when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
+ go (i + 1) (ptr `plusPtr` (fromIntegral n))
+ go 0 p
+
+-- "countEach counts str l" counts the number of occurences of each Word8 in
+-- str, and stores the result in counts.
+countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
+STRICT3(countEach)
+countEach counts str l = go 0
+ where
+ STRICT1(go)
+ go i | i == l = return ()
+ | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
+ x <- peekElemOff counts k
+ pokeElemOff counts k (x + 1)
+ go (i + 1)
+
+{-
sort :: ByteString -> ByteString
sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
memcpy p (f `plusPtr` s) l
c_qsort p l -- inplace
+-}
{-
sort = pack . List.sort . unpack
-}
+-- | The 'sortBy' function is the non-overloaded version of 'sort'.
+--
+-- Try some linear sorts: radix, counting
+-- Or mergesort.
+--
+-- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
+-- sortBy f ps = undefined
+
-- ---------------------------------------------------------------------
--
-- Extensions to the basic interface
-- if a large string has been read in, and only a small part of it
-- is needed in the rest of the program.
copy :: ByteString -> ByteString
-copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
+copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
+ memcpy p (f `plusPtr` s) (fromIntegral l)
-- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
-- CString is going to be deallocated from C land.
copyCStringLen (cstr, len) = inlinePerformIO $ do
fp <- mallocForeignPtrArray (len+1)
withForeignPtr fp $ \p -> do
- memcpy p (castPtr cstr) len
+ memcpy p (castPtr cstr) (fromIntegral len)
poke (p `plusPtr` len) (0 :: Word8)
return $! PS fp 0 len
--
generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
generate i f = do
- p <- mallocArray i
+ fp <- mallocByteString i
+ (ptr,n) <- withForeignPtr fp $ \p -> do
+ i' <- f p
+ if i' == i
+ then return (fp,i')
+ else do fp_ <- mallocByteString i' -- realloc
+ withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
+ return (fp_,i')
+ return (PS ptr 0 n)
+
+{-
+--
+-- On the C malloc heap. Less fun.
+--
+generate i f = do
+ p <- mallocArray (i+1)
i' <- f p
p' <- reallocArray p (i'+1)
poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
fp <- newForeignFreePtr p'
return $ PS fp 0 i'
+-}
-- ---------------------------------------------------------------------
-- line IO
mkPS :: RawBuffer -> Int -> Int -> IO ByteString
mkPS buf start end = do
let len = end - start
- fp <- mallocByteString (len `quot` 8)
+ fp <- mallocByteString len
withForeignPtr fp $ \p -> do
- memcpy_ptr_baoff p buf start (fromIntegral len)
+ memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
return (PS fp 0 len)
mkBigPS :: Int -> [ByteString] -> IO ByteString
getContents :: IO ByteString
getContents = hGetContents stdin
--- | Read an entire file directly into a 'ByteString'. This is far more
+-- | Read an entire file strictly into a 'ByteString'. This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'pack'. It also may be more efficient than opening the file and
-- reading it using hGet.
-- | Write a 'ByteString' to a file.
writeFile :: FilePath -> ByteString -> IO ()
-writeFile f ps = do
- h <- openBinaryFile f WriteMode
- hPut h ps
- hClose h
+writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
+ (\h -> hPut h ps)
{-
--
-- | A way of creating ForeignPtrs outside the IO monad. The @Int@
-- argument gives the final size of the ByteString. Unlike 'generate'
--- the ByteString is no reallocated if the final size is less than the
--- estimated size.
+-- the ByteString is not reallocated if the final size is less than the
+-- estimated size. Also, unlike 'generate' ByteString's created this way
+-- are managed on the Haskell heap.
create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
create l write_ptr = inlinePerformIO $ do
fp <- mallocByteString (l+1)
-- Common up near identical calls to `error' to reduce the number
-- constant strings created when compiled:
errorEmptyList :: String -> a
-errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
-{-# INLINE errorEmptyList #-}
+errorEmptyList fun = moduleError fun "empty ByteString"
+{-# NOINLINE errorEmptyList #-}
+
+moduleError :: String -> String -> a
+moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
+{-# NOINLINE moduleError #-}
-- 'findIndexOrEnd' is a variant of findIndex, that returns the length
-- of the string if no element is found, rather than Nothing.
:: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
foreign import ccall unsafe "string.h memcmp" memcmp
- :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
+ :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
foreign import ccall unsafe "string.h memcpy" memcpy
- :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+ :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
-- ---------------------------------------------------------------------
--
--
foreign import ccall unsafe "static fpstring.h reverse" c_reverse
- :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+ :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
- :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
+ :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
foreign import ccall unsafe "static fpstring.h maximum" c_maximum
- :: Ptr Word8 -> Int -> Word8
+ :: Ptr Word8 -> CInt -> Word8
foreign import ccall unsafe "static fpstring.h minimum" c_minimum
- :: Ptr Word8 -> Int -> Word8
+ :: Ptr Word8 -> CInt -> Word8
foreign import ccall unsafe "static fpstring.h count" c_count
- :: Ptr Word8 -> Int -> Word8 -> Int
-
-foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
- :: Ptr Word8 -> Int -> IO ()
+ :: Ptr Word8 -> CInt -> Word8 -> CInt
-- ---------------------------------------------------------------------
-- MMap
getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
foreign import ccall unsafe "__hscore_memcpy_src_off"
- memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
+ memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
#endif
+
+-- ---------------------------------------------------------------------
+--
+-- Functional array fusion for ByteStrings.
+--
+-- From the Data Parallel Haskell project,
+-- http://www.cse.unsw.edu.au/~chak/project/dph/
+--
+
+-- |Data type for accumulators which can be ignored. The rewrite rules rely on
+-- the fact that no bottoms of this type are ever constructed; hence, we can
+-- assume @(_ :: NoAL) `seq` x = x@.
+--
+data NoAL = NoAL
+
+-- | Special forms of loop arguments
+--
+-- * These are common special cases for the three function arguments of gen
+-- and loop; we give them special names to make it easier to trigger RULES
+-- applying in the special cases represented by these arguments. The
+-- "INLINE [1]" makes sure that these functions are only inlined in the last
+-- two simplifier phases.
+--
+-- * In the case where the accumulator is not needed, it is better to always
+-- explicitly return a value `()', rather than just copy the input to the
+-- output, as the former gives GHC better local information.
+--
+
+-- | Element function expressing a mapping only
+mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
+mapEFL f = \_ e -> (noAL, (Just $ f e))
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] mapEFL #-}
+#endif
+
+-- | Element function implementing a filter function only
+filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
+filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] filterEFL #-}
+#endif
+
+-- |Element function expressing a reduction only
+foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
+foldEFL f = \a e -> (f a e, Nothing)
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] foldEFL #-}
+#endif
+
+-- | A strict foldEFL.
+foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
+foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] foldEFL' #-}
+#endif
+
+-- | No accumulator
+noAL :: NoAL
+noAL = NoAL
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] noAL #-}
+#endif
+
+-- | Projection functions that are fusion friendly (as in, we determine when
+-- they are inlined)
+loopArr :: (ByteString, acc) -> ByteString
+loopArr (arr, _) = arr
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] loopArr #-}
+#endif
+
+loopAcc :: (ByteString, acc) -> acc
+loopAcc (_, acc) = acc
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] loopAcc #-}
+#endif
+
+loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
+loopSndAcc (arr, (_, acc)) = (arr, acc)
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] loopSndAcc #-}
+#endif
+
+------------------------------------------------------------------------
+
+--
+-- size, and then percentage.
+--
+
+-- | Iteration over over ByteStrings
+loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
+ -> acc -- ^ initial acc value
+ -> ByteString -- ^ input ByteString
+ -> (ByteString, acc)
+
+loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
+ fp <- mallocByteString i
+ (ptr,n,acc) <- withForeignPtr fp $ \p -> do
+ (acc, i') <- go (a `plusPtr` s) p start
+ if i' == i
+ then return (fp,i',acc) -- no realloc for map
+ else do fp_ <- mallocByteString i' -- realloc
+ withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
+ return (fp_,i',acc)
+
+ return (PS ptr 0 n, acc)
+ where
+ go p ma = trans 0 0
+ where
+ STRICT3(trans)
+ trans a_off ma_off acc
+ | a_off >= i = return (acc, ma_off)
+ | otherwise = do
+ x <- peekByteOff p a_off
+ let (acc', oe) = f acc x
+ ma_off' <- case oe of
+ Nothing -> return ma_off
+ Just e -> do pokeByteOff ma ma_off e
+ return $ ma_off + 1
+ trans (a_off+1) ma_off' acc'
+
+#if defined(__GLASGOW_HASKELL__)
+{-# INLINE [1] loopU #-}
+#endif
+
+infixr 9 `fuseEFL`
+
+-- |Fuse to flat loop functions
+fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
+ -> (a2 -> Word8 -> (a2, Maybe Word8))
+ -> (a1, a2)
+ -> Word8
+ -> ((a1, a2), Maybe Word8)
+fuseEFL f g (acc1, acc2) e1 =
+ case f acc1 e1 of
+ (acc1', Nothing) -> ((acc1', acc2), Nothing)
+ (acc1', Just e2) ->
+ case g acc2 e2 of
+ (acc2', res) -> ((acc1', acc2'), res)
+
+{-# RULES
+
+"loop/loop fusion!" forall em1 em2 start1 start2 arr.
+ loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
+ loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
+
+"loopArr/loopSndAcc" forall x.
+ loopArr (loopSndAcc x) = loopArr x
+
+"seq/NoAL" forall (u::NoAL) e.
+ u `seq` e = e
+
+ #-}
+