From 8252da330a3557f889ff39704c4819c66ea0e6de Mon Sep 17 00:00:00 2001 From: Don Stewart Date: Sat, 20 May 2006 03:04:36 +0000 Subject: [PATCH] Sync with FPS head, including the following patches: Thu May 18 15:45:46 EST 2006 sjanssen@cse.unl.edu * Export unsafeTake and unsafeDrop Fri May 19 11:53:08 EST 2006 Don Stewart * Add foldl1' Fri May 19 13:41:24 EST 2006 Don Stewart * Add fuseable scanl, scanl1 + properties Fri May 19 18:20:40 EST 2006 Don Stewart * Spotted another chance to use unsafeTake,Drop (in groupBy) Thu May 18 09:24:25 EST 2006 Duncan Coutts * More effecient findIndexOrEnd based on the impl of findIndex Thu May 18 09:22:49 EST 2006 Duncan Coutts * Eliminate special case in findIndex since it's handled anyway. Thu May 18 09:19:08 EST 2006 Duncan Coutts * Add unsafeTake and unsafeDrop These versions assume the n is in the bounds of the bytestring, saving two comparison tests. Then use them in varous places where we think this holds. These cases need double checking (and there are a few remaining internal uses of take / drop that might be possible to convert). Not exported for the moment. Tue May 16 23:15:11 EST 2006 Don Stewart * Handle n < 0 in drop and splitAt. Spotted by QC. Tue May 16 22:46:22 EST 2006 Don Stewart * Handle n <= 0 cases for unfoldr and replicate. Spotted by QC Tue May 16 21:34:11 EST 2006 Don Stewart * mapF -> map', filterF -> filter' --- Data/ByteString.hs | 142 +++++++++++++++++++++++++++++++++++++--------- Data/ByteString/Char8.hs | 33 ++++++++++- 2 files changed, 144 insertions(+), 31 deletions(-) diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 139609c..2d4caa7 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -76,6 +76,7 @@ module Data.ByteString ( foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a @@ -88,6 +89,10 @@ module Data.ByteString ( minimum, -- :: ByteString -> Word8 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString + -- * Building ByteStrings + scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString + scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString + -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Word8 -> ByteString unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString @@ -96,7 +101,9 @@ module Data.ByteString ( -- ** Breaking strings take, -- :: Int -> ByteString -> ByteString + unsafeTake, -- :: Int -> ByteString -> ByteString drop, -- :: Int -> ByteString -> ByteString + unsafeDrop, -- :: Int -> ByteString -> ByteString splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString @@ -129,6 +136,7 @@ module Data.ByteString ( findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] count, -- :: Word8 -> ByteString -> Int + findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int -- * Ordered ByteStrings sort, -- :: ByteString -> ByteString @@ -218,6 +226,7 @@ module Data.ByteString ( #if defined(__GLASGOW_HASKELL__) getArgs, -- :: IO [ByteString] hGetLine, -- :: Handle -> IO ByteString + hGetLines, -- :: Handle -> IO [ByteString] hGetNonBlocking, -- :: Handle -> Int -> IO ByteString #endif hGetContents, -- :: Handle -> IO ByteString @@ -230,7 +239,7 @@ module Data.ByteString ( #endif noAL, NoAL, loopArr, loopAcc, loopSndAcc, - loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, + loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL, ) where @@ -240,7 +249,7 @@ import Prelude hiding (reverse,head,tail,last,init,null ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,maximum ,minimum,all,concatMap,foldl1,foldr1 - ,readFile,writeFile,replicate + ,scanl,scanl1,readFile,writeFile,replicate ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) @@ -253,11 +262,11 @@ import Data.Array (listArray) import qualified Data.Array as Array ((!)) -- Control.Exception.bracket not available in yhc or nhc -import Control.Exception (bracket) +import Control.Exception (bracket, assert) import Control.Monad (when) import Foreign.C.String (CString, CStringLen) -import Foreign.C.Types (CSize, CInt) +import Foreign.C.Types (CSize,CInt) import Foreign.ForeignPtr import Foreign.Marshal.Array import Foreign.Ptr @@ -488,6 +497,8 @@ unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do loop (p `plusPtr` off) (len-1) ch {-# INLINE [0] unpackFoldr #-} +-- TODO just use normal foldr here. + #endif ------------------------------------------------------------------------ @@ -703,6 +714,12 @@ foldl1 f ps | null ps = errorEmptyList "foldl1" | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) +-- | A strict version of 'foldl1' +foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldl1' f ps + | null ps = errorEmptyList "foldl1'" + | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) + -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'ByteString's foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 @@ -728,6 +745,7 @@ concat xs = create len $ \ptr -> go xs ptr -- | Map a function over a 'ByteString' and concatenate the results concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString concatMap f = foldr (append . f) empty +-- A silly function for ByteStrings anyway. -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if -- any element of the 'ByteString' satisfies the predicate. @@ -818,6 +836,31 @@ mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f -> go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p -- --------------------------------------------------------------------- +-- Building ByteStrings + +-- | 'scanl' is similar to 'foldl', but returns a list of successive +-- reduced values from the left. This function will fuse. +-- +-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] +-- +-- Note that +-- +-- > last (scanl f z xs) == foldl f z xs. +scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString +scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space +{-# INLINE scanl #-} + +-- | 'scanl1' is a variant of 'scanl' that has no starting value argument. +-- This function will fuse. +-- +-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] +scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString +scanl1 f ps + | null ps = empty + | otherwise = scanl f (unsafeHead ps) (unsafeTail ps) +{-# INLINE scanl1 #-} + +-- --------------------------------------------------------------------- -- Unfolds and replicates -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ @@ -881,7 +924,7 @@ unfoldrN i f w -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. take :: Int -> ByteString -> ByteString take n ps@(PS x s l) - | n < 0 = empty + | n <= 0 = empty | n >= l = ps | otherwise = PS x s n {-# INLINE take #-} @@ -891,30 +934,33 @@ take n ps@(PS x s l) drop :: Int -> ByteString -> ByteString drop n ps@(PS x s l) | n <= 0 = ps - | n > l = empty + | n >= l = empty | otherwise = PS x (s+n) (l-n) {-# INLINE drop #-} -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. splitAt :: Int -> ByteString -> (ByteString, ByteString) -splitAt n ps = (take n ps, drop n ps) +splitAt n ps@(PS x s l) + | n <= 0 = (empty, ps) + | n >= l = (ps, empty) + | otherwise = (PS x s n, PS x (s+n) (l-n)) {-# INLINE splitAt #-} -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, -- returns the longest prefix (possibly empty) of @xs@ of elements that -- satisfy @p@. takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString -takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps +takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps {-# INLINE takeWhile #-} -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString -dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps +dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps {-# INLINE dropWhile #-} -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) -break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps) +break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) {-# INLINE break #-} -- | 'breakByte' breaks its ByteString argument at the first occurence @@ -926,7 +972,7 @@ break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps) breakByte :: Word8 -> ByteString -> (ByteString, ByteString) breakByte c p = case elemIndex c p of Nothing -> (p,empty) - Just n -> (take n p, drop n p) + Just n -> (unsafeTake n p, unsafeDrop n p) {-# INLINE breakByte #-} -- | 'spanByte' breaks its ByteString argument at the first @@ -943,7 +989,7 @@ spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> go p i | i >= l = return (ps, empty) | otherwise = do c' <- peekByteOff p i if c /= c' - then return (take i ps, drop i ps) + then return (unsafeTake i ps, unsafeDrop i ps) else go p (i+1) {-# INLINE spanByte #-} @@ -961,7 +1007,7 @@ spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString) breakFirst c p = case elemIndex c p of Nothing -> Nothing - Just n -> Just (take n p, drop (n+1) p) + Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p) {-# INLINE breakFirst #-} -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the @@ -978,7 +1024,7 @@ breakFirst c p = case elemIndex c p of breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString) breakLast c p = case elemIndexLast c p of Nothing -> Nothing - Just n -> Just (take n p, drop (n+1) p) + Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p) {-# INLINE breakLast #-} -- | 'span' @p xs@ breaks the ByteString into two segments. It is @@ -1133,7 +1179,7 @@ group xs groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] groupBy k xs | null xs = [] - | otherwise = take n xs : groupBy k (drop n xs) + | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs) where n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs) @@ -1263,9 +1309,7 @@ count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> -- returns the index of the first element in the ByteString -- satisfying the predicate. findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int -findIndex k ps@(PS x s l) - | null ps = Nothing - | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 +findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 where STRICT2(go) go ptr n | n >= l = return Nothing @@ -1285,6 +1329,19 @@ findIndices p ps = loop 0 ps | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) | otherwise = loop (n+1) (unsafeTail qs) +-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length +-- of the string if no element is found, rather than Nothing. +findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int +findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 + where + STRICT2(go) + go ptr n | n >= l = return l + | otherwise = do w <- peek ptr + if k w + then return n + else go (ptr `plusPtr` 1) (n+1) +{-# INLINE findIndexOrEnd #-} + -- --------------------------------------------------------------------- -- Searching ByteStrings @@ -1566,6 +1623,20 @@ unsafeIndex :: ByteString -> Int -> Word8 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) {-# INLINE unsafeIndex #-} +-- | A variety of 'take' which omits the checks on @n@ so there is an +-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. +unsafeTake :: Int -> ByteString -> ByteString +unsafeTake n (PS x s l) = + assert (0 <= n && n <= l) $ PS x s n +{-# INLINE unsafeTake #-} + +-- | A variety of 'drop' which omits the checks on @n@ so there is an +-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. +unsafeDrop :: Int -> ByteString -> ByteString +unsafeDrop n (PS x s l) = + assert (0 <= n && n <= l) $ PS x (s+n) (l-n) +{-# INLINE unsafeDrop #-} + -- --------------------------------------------------------------------- -- Low level constructors @@ -1762,6 +1833,19 @@ generate i f = do getLine :: IO ByteString getLine = hGetLine stdin +-- | Lazily construct a list of lines of ByteStrings. This will be much +-- better on memory consumption than using lines =<< getContents. +hGetLines :: Handle -> IO [ByteString] +hGetLines h = go + where + go = unsafeInterleaveIO $ do + ms <- catch (hGetLine h >>= return . Just) + (\_ -> return Nothing) + case ms of + Nothing -> return [] + Just s -> do ss <- go + return (s:ss) + -- | hGetLine. read a ByteString from a handle hGetLine :: Handle -> IO ByteString hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do @@ -1872,6 +1956,10 @@ hGetNonBlocking h i = do #endif -- | Read entire handle contents into a 'ByteString'. +-- This function reads chunks at a time, doubling the chunksize on each +-- read. The final buffer is then realloced to the appropriate size. For +-- files > half of available memory, this may lead to memory exhaustion. +-- Consider using 'readFile' in this case. -- -- As with 'hGet', the string representation in the file is assumed to -- be ISO-8859-1. @@ -2042,16 +2130,6 @@ moduleError :: String -> String -> a moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg) {-# NOINLINE moduleError #-} --- 'findIndexOrEnd' is a variant of findIndex, that returns the length --- of the string if no element is found, rather than Nothing. -findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int -STRICT2(findIndexOrEnd) -findIndexOrEnd f ps - | null ps = 0 - | f (unsafeHead ps) = 0 - | otherwise = 1 + findIndexOrEnd f (unsafeTail ps) -{-# INLINE findIndexOrEnd #-} - -- Find from the end of the string using predicate findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int STRICT2(findFromEndUntil) @@ -2212,6 +2290,14 @@ foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing) {-# INLINE [1] foldEFL' #-} #endif +-- | Element function expressing a prefix reduction only +-- +scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8) +scanEFL f = \a e -> (f a e, Just a) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] scanEFL #-} +#endif + -- | No accumulator noAL :: NoAL noAL = NoAL diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index e33b15f..c4fd8af 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -69,6 +69,7 @@ module Data.ByteString.Char8 ( foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char + foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a @@ -81,6 +82,10 @@ module Data.ByteString.Char8 ( minimum, -- :: ByteString -> Char mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString + -- * Building ByteStrings + scanl, + scanl1, + -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Char -> ByteString unfoldrN, -- :: (a -> Maybe (Char, a)) -> a -> ByteString @@ -221,7 +226,7 @@ module Data.ByteString.Char8 ( unpackList, #endif noAL, NoAL, loopArr, loopAcc, loopSndAcc, - loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, + loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL, filter', map' ) where @@ -231,7 +236,7 @@ import Prelude hiding (reverse,head,tail,last,init,null ,length,map,lines,foldl,foldr,unlines ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,unwords - ,words,maximum,minimum,all,concatMap + ,words,maximum,minimum,all,concatMap,scanl,scanl1 ,foldl1,foldr1,readFile,writeFile,replicate ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) @@ -255,7 +260,7 @@ import Data.ByteString (ByteString(..) ,unpackList #endif ,noAL, NoAL, loopArr, loopAcc, loopSndAcc - ,loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL + ,loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL ,useAsCString, unsafeUseAsCString ) @@ -382,6 +387,11 @@ foldl1 :: (Char -> Char -> Char) -> ByteString -> Char foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) {-# INLINE foldl1 #-} +-- | A strict version of 'foldl1' +foldl1' :: (Char -> Char -> Char) -> ByteString -> Char +foldl1' f ps = w2c (B.foldl1' (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldl1' #-} + -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'ByteString's foldr1 :: (Char -> Char -> Char) -> ByteString -> Char @@ -420,6 +430,23 @@ mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c))) {-# INLINE mapIndexed #-} +-- | 'scanl' is similar to 'foldl', but returns a list of successive +-- reduced values from the left: +-- +-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] +-- +-- Note that +-- +-- > last (scanl f z xs) == foldl f z xs. +scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +scanl f z = B.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) + +-- | 'scanl1' is a variant of 'scanl' that has no starting value argument: +-- +-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] +scanl1 :: (Char -> Char -> Char) -> ByteString -> ByteString +scanl1 f = B.scanl1 (\a b -> c2w (f (w2c a) (w2c b))) + -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ -- the value of every element. The following holds: -- -- 1.7.10.4