X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FByteString.hs;h=a822c41fd1c2296beb067d2dd355fdfc3bce2ee7;hb=47b3fc37153d12425851fbca1965103f7b1870da;hp=61ed8875960e816769428dc4eb1332f27abf7aab;hpb=39387cbf531ac4de18994726d30650b391fdae65;p=haskell-directory.git diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 61ed887..a822c41 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -1,4 +1,4 @@ -{-# OPTIONS_GHC -cpp -fffi #-} +{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-} -- -- Module : ByteString -- Copyright : (c) The University of Glasgow 2001, @@ -6,6 +6,11 @@ -- (c) Simon Marlow 2005 -- (c) Don Stewart 2005-2006 -- (c) Bjorn Bringert 2006 +-- +-- Array fusion code: +-- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller +-- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy +-- -- License : BSD-style -- -- Maintainer : dons@cse.unsw.edu.au @@ -18,8 +23,8 @@ -- | A time and space-efficient implementation of byte vectors using -- packed Word8 arrays, suitable for high performance use, both in terms -- of large data quantities, or high speed requirements. Byte vectors --- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can --- be passed between C and Haskell with little effort. +-- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr, +-- and can be passed between C and Haskell with little effort. -- -- This module is intended to be imported @qualified@, to avoid name -- clashes with Prelude functions. eg. @@ -46,7 +51,7 @@ module Data.ByteString ( -- * Basic interface cons, -- :: Word8 -> ByteString -> ByteString - snoc, -- :: Word8 -> ByteString -> ByteString + snoc, -- :: ByteString -> Word8 -> ByteString null, -- :: ByteString -> Bool length, -- :: ByteString -> Int head, -- :: ByteString -> Word8 @@ -71,6 +76,7 @@ module Data.ByteString ( foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a -- ** Special folds concat, -- :: [ByteString] -> ByteString @@ -83,7 +89,7 @@ module Data.ByteString ( -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Word8 -> ByteString - unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString + unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString -- * Substrings @@ -99,6 +105,7 @@ module Data.ByteString ( -- ** Breaking and dropping on specific bytes breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) + spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString) breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString) @@ -106,6 +113,8 @@ module Data.ByteString ( split, -- :: Word8 -> ByteString -> [ByteString] splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] + group, -- :: ByteString -> [ByteString] + groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -- ** Joining strings join, -- :: ByteString -> [ByteString] -> ByteString @@ -213,11 +222,15 @@ module Data.ByteString ( hGet, -- :: Handle -> Int -> IO ByteString hPut, -- :: Handle -> ByteString -> IO () + -- * Fusion utilities #if defined(__GLASGOW_HASKELL__) - -- * Miscellaneous unpackList, -- eek, otherwise it gets thrown away by the simplifier #endif + noAL, NoAL, loopArr, loopAcc, loopSndAcc, + loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, + filterF, mapF + ) where import qualified Prelude as P @@ -238,7 +251,9 @@ import Data.Maybe (listToMaybe) import Data.Array (listArray) import qualified Data.Array as Array ((!)) +-- Control.Exception.bracket not available in yhc or nhc import Control.Exception (bracket) +import Control.Monad (when) import Foreign.C.String (CString, CStringLen) import Foreign.C.Types (CSize, CInt) @@ -247,6 +262,7 @@ import Foreign.Marshal.Array import Foreign.Ptr import Foreign.Storable (Storable(..)) +-- hGetBuf and hPutBuf not available in yhc or nhc import System.IO (stdin,stdout,hClose,hFileSize ,hGetBuf,hPutBuf,openBinaryFile ,Handle,IOMode(..)) @@ -324,19 +340,24 @@ instance Arbitrary PackedString where -- | /O(n)/ Equality on the 'ByteString' type. eq :: ByteString -> ByteString -> Bool -eq a b = (compareBytes a b) == EQ +eq a@(PS p s l) b@(PS p' s' l') + | l /= l' = False -- short cut on length + | p == p' && s == s' = True -- short cut for the same string + | otherwise = compareBytes a b == EQ {-# INLINE eq #-} -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. compareBytes :: ByteString -> ByteString -> Ordering -compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings -compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $ - withForeignPtr x1 $ \p1 -> - withForeignPtr x2 $ \p2 -> do - i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2) - return $ case i `compare` 0 of - EQ -> l1 `compare` l2 - x -> x +compareBytes (PS x1 s1 l1) (PS x2 s2 l2) + | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings + | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string + | otherwise = inlinePerformIO $ + withForeignPtr x1 $ \p1 -> + withForeignPtr x2 $ \p2 -> do + i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2) + return $ case i `compare` 0 of + EQ -> l1 `compare` l2 + x -> x {-# INLINE compareBytes #-} {- @@ -375,10 +396,29 @@ empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0 -- | /O(1)/ Convert a 'Word8' into a 'ByteString' packByte :: Word8 -> ByteString -packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do +packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do withForeignPtr fp $ \p -> poke p c return $ PS fp 0 1 -{-# NOINLINE packByte #-} +{-# INLINE packByte #-} + +-- +-- XXX The unsafePerformIO is critical! +-- +-- Otherwise: +-- +-- packByte 255 `compare` packByte 127 +-- +-- is compiled to: +-- +-- case mallocByteString 2 of +-- ForeignPtr f internals -> +-- case writeWord8OffAddr# f 0 255 of _ -> +-- case writeWord8OffAddr# f 0 127 of _ -> +-- case eqAddr# f f of +-- False -> case compare (GHC.Prim.plusAddr# f 0) +-- (GHC.Prim.plusAddr# f 0) +-- +-- -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. -- @@ -485,23 +525,40 @@ null (PS _ _ l) = l == 0 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. length :: ByteString -> Int length (PS _ _ l) = l -{-# INLINE length #-} + +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] length #-} +#endif + +{-# + +-- Translate length into a loop. +-- Performace ok, but allocates too much, so disable for now. + + "length/loop" forall f acc s . + length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s)) + + #-} -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different -- complexity, as it requires a memcpy. cons :: Word8 -> ByteString -> ByteString cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do - memcpy (p `plusPtr` 1) (f `plusPtr` s) l poke p c + memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) {-# INLINE cons #-} +-- todo fuse + -- | /O(n)/ Append a byte to the end of a 'ByteString' snoc :: ByteString -> Word8 -> ByteString snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do - memcpy p (f `plusPtr` s) l + memcpy p (f `plusPtr` s) (fromIntegral l) poke (p `plusPtr` l) c {-# INLINE snoc #-} +-- todo fuse + -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. head :: ByteString -> Word8 head ps@(PS x s _) @@ -537,50 +594,39 @@ append xs ys | null xs = ys | otherwise = concat [xs,ys] {-# INLINE append #-} -{- --- --- About 30% faster, but allocating in a big chunk isn't good for memory use --- -append :: ByteString -> ByteString -> ByteString -append xs@(PS ffp s l) ys@(PS fgp t m) - | null xs = ys - | null ys = xs - | otherwise = create len $ \ptr -> - withForeignPtr ffp $ \fp -> - withForeignPtr fgp $ \gp -> do - memcpy ptr (fp `plusPtr` s) l - memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m - where len = length xs + length ys --} - -- --------------------------------------------------------------------- -- Transformations -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each --- element of @xs@ --- +-- element of @xs@. This function is subject to array fusion. map :: (Word8 -> Word8) -> ByteString -> ByteString -map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do - new_fp <- mallocByteString len - withForeignPtr new_fp $ \new_p -> do - map_ f (len-1) (p `plusPtr` start) new_p - return (PS new_fp 0 len) +map f = loopArr . loopU (mapEFL f) noAL {-# INLINE map #-} -map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO () -STRICT4(map_) -map_ f n p1 p2 - | n < 0 = return () - | otherwise = do - x <- peekByteOff p1 n - pokeByteOff p2 n (f x) - map_ f (n-1) p1 p2 -{-# INLINE map_ #-} +-- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is +-- slightly faster for one-shot cases. +mapF :: (Word8 -> Word8) -> ByteString -> ByteString +STRICT2(mapF) +mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do + np <- mallocByteString (len+1) + withForeignPtr np $ \p -> do + map_ 0 (a `plusPtr` s) p + return (PS np 0 len) + where + map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () + STRICT3(map_) + map_ n p1 p2 + | n >= len = return () + | otherwise = do + x <- peekByteOff p1 n + pokeByteOff p2 n (f x) + map_ (n+1) p1 p2 +{-# INLINE mapF #-} -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. reverse :: ByteString -> ByteString reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> - c_reverse p (f `plusPtr` s) l + c_reverse p (f `plusPtr` s) (fromIntegral l) {- reverse = pack . P.reverse . unpack @@ -594,7 +640,7 @@ intersperse :: Word8 -> ByteString -> ByteString intersperse c ps@(PS x s l) | length ps < 2 = ps | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f -> - c_intersperse p (f `plusPtr` s) l c + c_intersperse p (f `plusPtr` s) (fromIntegral l) c {- intersperse c = pack . List.intersperse c . unpack @@ -611,7 +657,16 @@ transpose ps = P.map pack (List.transpose (P.map unpack ps)) -- | 'foldl', applied to a binary operator, a starting value (typically -- the left-identity of the operator), and a ByteString, reduces the -- ByteString using the binary operator, from left to right. +-- This function is subject to array fusion. foldl :: (a -> Word8 -> a) -> a -> ByteString -> a +foldl f z = loopAcc . loopU (foldEFL f) z +{-# INLINE foldl #-} + +{- +-- +-- About twice as fast with 6.4.1, but not fuseable +-- A simple fold . map is enough to make it worth while. +-- foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) where @@ -619,6 +674,12 @@ foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> lgo z p q | p == q = return z | otherwise = do c <- peek p lgo (f z c) (p `plusPtr` 1) q +-} + +-- | 'foldl\'' is like foldl, but strict in the accumulator. +foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a +foldl' f z = loopAcc . loopU (foldEFL' f) z +{-# INLINE foldl' #-} -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a ByteString, @@ -635,6 +696,7 @@ foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'ByteStrings'. +-- This function is subject to array fusion. foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldl1 f ps | null ps = errorEmptyList "foldl1" @@ -645,7 +707,7 @@ foldl1 f ps foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldr1 f ps | null ps = errorEmptyList "foldr1" - | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps)) + | otherwise = foldr f (last ps) (init ps) -- --------------------------------------------------------------------- -- Special folds @@ -654,26 +716,13 @@ foldr1 f ps concat :: [ByteString] -> ByteString concat [] = empty concat [ps] = ps -concat xs = inlinePerformIO $ do - let start_size = 1024 - p <- mallocArray start_size - f p 0 1024 xs - - where f ptr len _ [] = do - ptr' <- reallocArray ptr (len+1) - poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work - fp <- newForeignFreePtr ptr' - return $ PS fp 0 len - - f ptr len to_go pss@(PS p s l:pss') - | l <= to_go = do withForeignPtr p $ \pf -> - memcpy (ptr `plusPtr` len) - (pf `plusPtr` s) l - f ptr (len + l) (to_go - l) pss' - - | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l) - ptr' <- reallocArray ptr new_total - f ptr' len (new_total - len) pss +concat xs = create len $ \ptr -> go xs ptr + where len = P.sum . P.map length $ xs + STRICT2(go) + go [] _ = return () + go (PS p s l:ps) ptr = do + withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l) + go ps (ptr `plusPtr` l) -- | Map a function over a 'ByteString' and concatenate the results concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString @@ -692,6 +741,8 @@ any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> if f c then return True else go (p `plusPtr` 1) q +-- todo fuse + -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines -- if all elements of the 'ByteString' satisfy the predicate. all :: (Word8 -> Bool) -> ByteString -> Bool @@ -705,13 +756,14 @@ all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> if f c then go (p `plusPtr` 1) q else return False +-- todo fuse -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' maximum :: ByteString -> Word8 maximum xs@(PS x s l) | null xs = errorEmptyList "maximum" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> - return $ c_maximum (p `plusPtr` s) l + return $ c_maximum (p `plusPtr` s) (fromIntegral l) {-# INLINE maximum #-} -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' @@ -719,9 +771,11 @@ minimum :: ByteString -> Word8 minimum xs@(PS x s l) | null xs = errorEmptyList "minimum" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> - return $ c_minimum (p `plusPtr` s) l + return $ c_minimum (p `plusPtr` s) (fromIntegral l) {-# INLINE minimum #-} +-- fusion is too slow here (10x) + {- maximum xs@(PS x s l) | null xs = errorEmptyList "maximum" @@ -805,7 +859,7 @@ replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w -- The following equation connects the depth-limited unfoldr to the List unfoldr: -- -- > unfoldrN n == take n $ List.unfoldr -unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString +unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0 where STRICT3(go) @@ -871,6 +925,24 @@ breakByte c p = case elemIndex c p of Just n -> (take n p, drop n p) {-# INLINE breakByte #-} +-- | 'spanByte' breaks its ByteString argument at the first +-- occurence of a byte other than its argument. It is more efficient +-- than 'span (==)' +-- +-- > span (=='c') "abcd" == spanByte 'c' "abcd" +-- +spanByte :: Word8 -> ByteString -> (ByteString, ByteString) +spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> + go (p `plusPtr` s) 0 + where + STRICT2(go) + go p i | i >= l = return (ps, empty) + | otherwise = do c' <- peekByteOff p i + if c /= c' + then return (take i ps, drop i ps) + else go p (i+1) +{-# INLINE spanByte #-} + -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first -- occurence of @w@. It behaves like 'break', except the delimiter is -- not returned, and @Nothing@ is returned if the delimiter is not in @@ -908,7 +980,7 @@ breakLast c p = case elemIndexLast c p of -- | 'span' @p xs@ breaks the ByteString into two segments. It is -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -span p ps = break (not . p) ps +span p ps = break (not . p) ps {-# INLINE span #-} -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. @@ -937,10 +1009,11 @@ splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] #if defined(__GLASGOW_HASKELL__) splitWith _pred (PS _ _ 0) = [] -splitWith pred_ (PS fp off len) = splitWith' pred# off len fp +splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp where pred# c# = pred_ (W8# c#) - splitWith' pred' off' len' fp' = withPtr fp $ \p -> + STRICT4(splitWith0) + splitWith0 pred' off' len' fp' = withPtr fp $ \p -> splitLoop pred' p 0 off' len' fp' splitLoop :: (Word# -> Bool) @@ -956,17 +1029,17 @@ splitWith pred_ (PS fp off len) = splitWith' pred# off len fp w <- peekElemOff p (off'+idx') if pred' (case w of W8# w# -> w#) then return (PS fp' off' idx' : - splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp') + splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp') else splitLoop pred' p (idx'+1) off' len' fp' {-# INLINE splitWith #-} #else splitWith _ (PS _ _ 0) = [] -splitWith p ps = splitWith' p ps +splitWith p ps = loop p ps where - STRICT2(splitWith') - splitWith' q qs = if null rest then [chunk] - else chunk : splitWith' q (unsafeTail rest) + STRICT2(loop) + loop q qs = if null rest then [chunk] + else chunk : loop q (unsafeTail rest) where (chunk,rest) = break q qs #endif @@ -995,11 +1068,10 @@ split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do loop n = do let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n)) if q == nullPtr - then return [PS x (s+n) (l-n)] - else do let i = q `minusPtr` ptr - ls <- loop (i+1) - return $! PS x (s+n) (i-n) : ls - loop 0 + then [PS x (s+n) (l-n)] + else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1) + + return (loop 0) {-# INLINE split #-} {- @@ -1034,6 +1106,32 @@ split (W8# w#) (PS fp off len) = splitWith' off len fp -- tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] tokens f = P.filter (not.null) . splitWith f +{-# INLINE tokens #-} + +-- | The 'group' function takes a ByteString and returns a list of +-- ByteStrings such that the concatenation of the result is equal to the +-- argument. Moreover, each sublist in the result contains only equal +-- elements. For example, +-- +-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] +-- +-- It is a special case of 'groupBy', which allows the programmer to +-- supply their own equality test. It is about 40% faster than +-- /groupBy (==)/ +group :: ByteString -> [ByteString] +group xs + | null xs = [] + | otherwise = ys : group zs + where + (ys, zs) = spanByte (unsafeHead xs) xs + +-- | The 'groupBy' function is the non-overloaded version of 'group'. +groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] +groupBy k xs + | null xs = [] + | otherwise = take n xs : groupBy k (drop n xs) + where + n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs) -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of -- 'ByteString's and concatenates the list after interspersing the first @@ -1044,6 +1142,7 @@ join filler pss = concat (splice pss) splice [] = [] splice [x] = [x] splice (x:y:xs) = x:filler:splice (y:xs) +{-# INLINE join #-} -- -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings @@ -1053,9 +1152,9 @@ joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr -> withForeignPtr ffp $ \fp -> withForeignPtr fgp $ \gp -> do - memcpy ptr (fp `plusPtr` s) l + memcpy ptr (fp `plusPtr` s) (fromIntegral l) poke (ptr `plusPtr` l) c - memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m + memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m) where len = length f + length g + 1 {-# INLINE joinWithByte #-} @@ -1066,9 +1165,9 @@ joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr -> -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. index :: ByteString -> Int -> Word8 index ps n - | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n - | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n - ++ ", length = " ++ show (length ps) + | n < 0 = moduleError "index" ("negative index: " ++ show n) + | n >= length ps = moduleError "index" ("index too large: " ++ show n + ++ ", length = " ++ show (length ps)) | otherwise = ps `unsafeIndex` n {-# INLINE index #-} @@ -1111,14 +1210,13 @@ elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do let ptr = p `plusPtr` s STRICT1(loop) - loop n = do - let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n)) - if q == nullPtr - then return [] - else do let i = q `minusPtr` ptr - ls <- loop (i+1) - return $! i:ls - loop 0 + loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n)) + in if q == nullPtr + then [] + else let i = q `minusPtr` ptr + in i : loop (i+1) + return (loop 0) +{-# INLINE elemIndices #-} {- -- much slower @@ -1137,7 +1235,7 @@ elemIndices c ps = loop 0 ps -- But more efficiently than using length on the intermediate list. count :: Word8 -> ByteString -> Int count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> - return $ c_count (p `plusPtr` s) (fromIntegral m) w + return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w {-# INLINE count #-} {- @@ -1161,7 +1259,17 @@ count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> -- returns the index of the first element in the ByteString -- satisfying the predicate. findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int -findIndex = (listToMaybe .) . findIndices +findIndex k ps@(PS x s l) + | null ps = Nothing + | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 + where + STRICT2(go) + go ptr n | n >= l = return Nothing + | otherwise = do w <- peek ptr + if k w + then return (Just n) + else go (ptr `plusPtr` 1) (n+1) +{-# INLINE findIndex #-} -- | The 'findIndices' function extends 'findIndex', by returning the -- indices of all elements satisfying the predicate, in ascending order. @@ -1169,8 +1277,8 @@ findIndices :: (Word8 -> Bool) -> ByteString -> [Int] findIndices p ps = loop 0 ps where STRICT2(loop) - loop _ qs | null qs = [] - loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) + loop n qs | null qs = [] + | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) | otherwise = loop (n+1) (unsafeTail qs) -- --------------------------------------------------------------------- @@ -1183,9 +1291,34 @@ elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True -- | /O(n)/ 'notElem' is the inverse of 'elem' notElem :: Word8 -> ByteString -> Bool -notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False +notElem c ps = not (elem c ps) {-# INLINE notElem #-} +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. This function is subject to array fusion. +filter :: (Word8 -> Bool) -> ByteString -> ByteString +filter p = loopArr . loopU (filterEFL p) noAL +{-# INLINE filter #-} + +-- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be +-- around 2x faster for some one-shot applications. +filterF :: (Word8 -> Bool) -> ByteString -> ByteString +filterF k ps@(PS x s l) + | null ps = ps + | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do + t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) + return (t `minusPtr` p) -- actual length + where + STRICT3(go) + go f t end | f == end = return t + | otherwise = do + w <- peek f + if k w + then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end + else go (f `plusPtr` 1) t end +{-# INLINE filterF #-} + -- -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common -- case of filtering a single byte. It is more efficient to use @@ -1197,23 +1330,7 @@ notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False -- filter equivalent filterByte :: Word8 -> ByteString -> ByteString filterByte w ps = replicate (count w ps) w - -{- --- slower than the replicate version - -filterByte ch ps@(PS x s l) - | null ps = ps - | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do - t <- go (f `plusPtr` s) p l - return (t `minusPtr` p) -- actual length - where - STRICT3(go) - go _ t 0 = return t - go f t e = do w <- peek f - if w == ch - then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1) - else go (f `plusPtr` 1) t (e-1) --} +{-# INLINE filterByte #-} -- -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common @@ -1222,48 +1339,32 @@ filterByte ch ps@(PS x s l) -- -- > filterNotByte == filter . (/=) -- --- filterNotByte is around 3x faster, and uses much less space, than its --- filter equivalent +-- filterNotByte is around 2x faster than its filter equivalent. filterNotByte :: Word8 -> ByteString -> ByteString -filterNotByte ch ps@(PS x s l) - | null ps = ps - | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do - t <- go (f `plusPtr` s) p l - return (t `minusPtr` p) -- actual length - where - STRICT3(go) - go _ t 0 = return t - go f t e = do w <- peek f - if w /= ch - then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1) - else go (f `plusPtr` 1) t (e-1) - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. -filter :: (Word8 -> Bool) -> ByteString -> ByteString -filter k ps@(PS x s l) - | null ps = ps - | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do - t <- go (f `plusPtr` s) p l - return (t `minusPtr` p) -- actual length - where - STRICT3(go) - go _ t 0 = return t - go f t e = do w <- peek f - if k w - then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1) - else go (f `plusPtr` 1) t (e - 1) - --- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps +filterNotByte w = filterF (/= w) +{-# INLINE filterNotByte #-} -- | /O(n)/ The 'find' function takes a predicate and a ByteString, -- and returns the first element in matching the predicate, or 'Nothing' -- if there is no such element. +-- +-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing +-- find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 -find p ps = case filter p ps of - q | null q -> Nothing - | otherwise -> Just (unsafeHead q) +find f p = case findIndex f p of + Just n -> Just (p `unsafeIndex` n) + _ -> Nothing +{-# INLINE find #-} + +{- +-- +-- fuseable, but we don't want to walk the whole array. +-- +find k = foldl findEFL Nothing + where findEFL a@(Just _) _ = a + findEFL _ c | k c = Just c + | otherwise = Nothing +-} -- --------------------------------------------------------------------- -- Searching for substrings @@ -1276,7 +1377,7 @@ isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2) | l2 < l1 = False | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> withForeignPtr x2 $ \p2 -> do - i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1 + i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1) return (i == 0) -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' @@ -1294,7 +1395,7 @@ isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2) | l2 < l1 = False | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> withForeignPtr x2 $ \p2 -> do - i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1 + i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1) return (i == 0) -- | Check whether one string is a substring of another. @isSubstringOf @@ -1388,16 +1489,52 @@ elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1))) -- --------------------------------------------------------------------- -- ** Ordered 'ByteString's --- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3). +-- | /O(n)/ Sort a ByteString efficiently, using counting sort. +sort :: ByteString -> ByteString +sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do + + memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) + withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l) + + let STRICT2(go) + go 256 _ = return () + go i ptr = do n <- peekElemOff arr i + when (n /= 0) $ memset ptr (fromIntegral i) n >> return () + go (i + 1) (ptr `plusPtr` (fromIntegral n)) + go 0 p + +-- "countEach counts str l" counts the number of occurences of each Word8 in +-- str, and stores the result in counts. +countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO () +STRICT3(countEach) +countEach counts str l = go 0 + where + STRICT1(go) + go i | i == l = return () + | otherwise = do k <- fromIntegral `fmap` peekElemOff str i + x <- peekElemOff counts k + pokeElemOff counts k (x + 1) + go (i + 1) + +{- sort :: ByteString -> ByteString sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do memcpy p (f `plusPtr` s) l c_qsort p l -- inplace +-} {- sort = pack . List.sort . unpack -} +-- | The 'sortBy' function is the non-overloaded version of 'sort'. +-- +-- Try some linear sorts: radix, counting +-- Or mergesort. +-- +-- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString +-- sortBy f ps = undefined + -- --------------------------------------------------------------------- -- -- Extensions to the basic interface @@ -1553,7 +1690,8 @@ unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plu -- if a large string has been read in, and only a small part of it -- is needed in the rest of the program. copy :: ByteString -> ByteString -copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l +copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> + memcpy p (f `plusPtr` s) (fromIntegral l) -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the -- CString is going to be deallocated from C land. @@ -1565,7 +1703,7 @@ copyCStringLen :: CStringLen -> ByteString copyCStringLen (cstr, len) = inlinePerformIO $ do fp <- mallocForeignPtrArray (len+1) withForeignPtr fp $ \p -> do - memcpy p (castPtr cstr) len + memcpy p (castPtr cstr) (fromIntegral len) poke (p `plusPtr` len) (0 :: Word8) return $! PS fp 0 len @@ -1588,12 +1726,28 @@ unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p ` -- generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString generate i f = do - p <- mallocArray i + fp <- mallocByteString i + (ptr,n) <- withForeignPtr fp $ \p -> do + i' <- f p + if i' == i + then return (fp,i') + else do fp_ <- mallocByteString i' -- realloc + withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i') + return (fp_,i') + return (PS ptr 0 n) + +{- +-- +-- On the C malloc heap. Less fun. +-- +generate i f = do + p <- mallocArray (i+1) i' <- f p p' <- reallocArray p (i'+1) poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work fp <- newForeignFreePtr p' return $ PS fp 0 i' +-} -- --------------------------------------------------------------------- -- line IO @@ -1663,9 +1817,9 @@ hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do mkPS :: RawBuffer -> Int -> Int -> IO ByteString mkPS buf start end = do let len = end - start - fp <- mallocByteString (len `quot` 8) + fp <- mallocByteString len withForeignPtr fp $ \p -> do - memcpy_ptr_baoff p buf start (fromIntegral len) + memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len) return (PS fp 0 len) mkBigPS :: Int -> [ByteString] -> IO ByteString @@ -1744,7 +1898,7 @@ hGetContents h = do getContents :: IO ByteString getContents = hGetContents stdin --- | Read an entire file directly into a 'ByteString'. This is far more +-- | Read an entire file strictly into a 'ByteString'. This is far more -- efficient than reading the characters into a 'String' and then using -- 'pack'. It also may be more efficient than opening the file and -- reading it using hGet. @@ -1758,10 +1912,8 @@ readFile f = do -- | Write a 'ByteString' to a file. writeFile :: FilePath -> ByteString -> IO () -writeFile f ps = do - h <- openBinaryFile f WriteMode - hPut h ps - hClose h +writeFile f ps = bracket (openBinaryFile f WriteMode) hClose + (\h -> hPut h ps) {- -- @@ -1861,8 +2013,9 @@ mallocByteString l = do -- | A way of creating ForeignPtrs outside the IO monad. The @Int@ -- argument gives the final size of the ByteString. Unlike 'generate' --- the ByteString is no reallocated if the final size is less than the --- estimated size. +-- the ByteString is not reallocated if the final size is less than the +-- estimated size. Also, unlike 'generate' ByteString's created this way +-- are managed on the Haskell heap. create :: Int -> (Ptr Word8 -> IO ()) -> ByteString create l write_ptr = inlinePerformIO $ do fp <- mallocByteString (l+1) @@ -1878,8 +2031,12 @@ withPtr fp io = inlinePerformIO (withForeignPtr fp io) -- Common up near identical calls to `error' to reduce the number -- constant strings created when compiled: errorEmptyList :: String -> a -errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString") -{-# INLINE errorEmptyList #-} +errorEmptyList fun = moduleError fun "empty ByteString" +{-# NOINLINE errorEmptyList #-} + +moduleError :: String -> String -> a +moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg) +{-# NOINLINE moduleError #-} -- 'findIndexOrEnd' is a variant of findIndex, that returns the length -- of the string if no element is found, rather than Nothing. @@ -1944,10 +2101,10 @@ foreign import ccall unsafe "string.h memchr" memchr :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8 foreign import ccall unsafe "string.h memcmp" memcmp - :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt foreign import ccall unsafe "string.h memcpy" memcpy - :: Ptr Word8 -> Ptr Word8 -> Int -> IO () + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () -- --------------------------------------------------------------------- -- @@ -1955,22 +2112,19 @@ foreign import ccall unsafe "string.h memcpy" memcpy -- foreign import ccall unsafe "static fpstring.h reverse" c_reverse - :: Ptr Word8 -> Ptr Word8 -> Int -> IO () + :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse - :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO () + :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () foreign import ccall unsafe "static fpstring.h maximum" c_maximum - :: Ptr Word8 -> Int -> Word8 + :: Ptr Word8 -> CInt -> Word8 foreign import ccall unsafe "static fpstring.h minimum" c_minimum - :: Ptr Word8 -> Int -> Word8 + :: Ptr Word8 -> CInt -> Word8 foreign import ccall unsafe "static fpstring.h count" c_count - :: Ptr Word8 -> Int -> Word8 -> Int - -foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort - :: Ptr Word8 -> Int -> IO () + :: Ptr Word8 -> CInt -> Word8 -> CInt -- --------------------------------------------------------------------- -- MMap @@ -1996,5 +2150,159 @@ foreign import ccall unsafe "RtsAPI.h getProgArgv" getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO () foreign import ccall unsafe "__hscore_memcpy_src_off" - memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ()) + memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) #endif + +-- --------------------------------------------------------------------- +-- +-- Functional array fusion for ByteStrings. +-- +-- From the Data Parallel Haskell project, +-- http://www.cse.unsw.edu.au/~chak/project/dph/ +-- + +-- |Data type for accumulators which can be ignored. The rewrite rules rely on +-- the fact that no bottoms of this type are ever constructed; hence, we can +-- assume @(_ :: NoAL) `seq` x = x@. +-- +data NoAL = NoAL + +-- | Special forms of loop arguments +-- +-- * These are common special cases for the three function arguments of gen +-- and loop; we give them special names to make it easier to trigger RULES +-- applying in the special cases represented by these arguments. The +-- "INLINE [1]" makes sure that these functions are only inlined in the last +-- two simplifier phases. +-- +-- * In the case where the accumulator is not needed, it is better to always +-- explicitly return a value `()', rather than just copy the input to the +-- output, as the former gives GHC better local information. +-- + +-- | Element function expressing a mapping only +mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8)) +mapEFL f = \_ e -> (noAL, (Just $ f e)) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] mapEFL #-} +#endif + +-- | Element function implementing a filter function only +filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8)) +filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] filterEFL #-} +#endif + +-- |Element function expressing a reduction only +foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8)) +foldEFL f = \a e -> (f a e, Nothing) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] foldEFL #-} +#endif + +-- | A strict foldEFL. +foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8)) +foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] foldEFL' #-} +#endif + +-- | No accumulator +noAL :: NoAL +noAL = NoAL +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] noAL #-} +#endif + +-- | Projection functions that are fusion friendly (as in, we determine when +-- they are inlined) +loopArr :: (ByteString, acc) -> ByteString +loopArr (arr, _) = arr +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopArr #-} +#endif + +loopAcc :: (ByteString, acc) -> acc +loopAcc (_, acc) = acc +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopAcc #-} +#endif + +loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2) +loopSndAcc (arr, (_, acc)) = (arr, acc) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopSndAcc #-} +#endif + +------------------------------------------------------------------------ + +-- +-- size, and then percentage. +-- + +-- | Iteration over over ByteStrings +loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem + -> acc -- ^ initial acc value + -> ByteString -- ^ input ByteString + -> (ByteString, acc) + +loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do + fp <- mallocByteString i + (ptr,n,acc) <- withForeignPtr fp $ \p -> do + (acc, i') <- go (a `plusPtr` s) p start + if i' == i + then return (fp,i',acc) -- no realloc for map + else do fp_ <- mallocByteString i' -- realloc + withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i') + return (fp_,i',acc) + + return (PS ptr 0 n, acc) + where + go p ma = trans 0 0 + where + STRICT3(trans) + trans a_off ma_off acc + | a_off >= i = return (acc, ma_off) + | otherwise = do + x <- peekByteOff p a_off + let (acc', oe) = f acc x + ma_off' <- case oe of + Nothing -> return ma_off + Just e -> do pokeByteOff ma ma_off e + return $ ma_off + 1 + trans (a_off+1) ma_off' acc' + +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] loopU #-} +#endif + +infixr 9 `fuseEFL` + +-- |Fuse to flat loop functions +fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8)) + -> (a2 -> Word8 -> (a2, Maybe Word8)) + -> (a1, a2) + -> Word8 + -> ((a1, a2), Maybe Word8) +fuseEFL f g (acc1, acc2) e1 = + case f acc1 e1 of + (acc1', Nothing) -> ((acc1', acc2), Nothing) + (acc1', Just e2) -> + case g acc2 e2 of + (acc2', res) -> ((acc1', acc2'), res) + +{-# RULES + +"loop/loop fusion!" forall em1 em2 start1 start2 arr. + loopU em2 start2 (loopArr (loopU em1 start1 arr)) = + loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr) + +"loopArr/loopSndAcc" forall x. + loopArr (loopSndAcc x) = loopArr x + +"seq/NoAL" forall (u::NoAL) e. + u `seq` e = e + + #-} +