From: Don Stewart Date: Thu, 25 May 2006 06:50:12 +0000 (+0000) Subject: Merge in changes from fps head. Highlights: X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=a14a8400e73713d5ac8c1b3405ac13f4cfe86acf;p=haskell-directory.git Merge in changes from fps head. Highlights: Wed May 24 15:49:38 EST 2006 sjanssen@cse.unl.edu * instance Monoid ByteString Wed May 24 15:04:04 EST 2006 Duncan Coutts * Rearange export lists for the .Char8 modules Wed May 24 14:59:56 EST 2006 Duncan Coutts * Implement mapAccumL and reimplement mapIndexed using loopU Wed May 24 14:47:32 EST 2006 Duncan Coutts * Change the implementation of the unfoldr(N) functions. Use a more compact implementation for unfoldrN and change it's behaviour to only return Just in the case that it actually 'overflowed' the N, so the boundary case of unfolding exactly N gives Nothing. Implement unfoldr and Lazy.unfoldr in terms of unfoldrN. Use fibonacci growth for the chunk size in unfoldr Wed May 24 08:32:29 EST 2006 sjanssen@cse.unl.edu * Add unfoldr to ByteString and .Char8 A preliminary implementation of unfoldr. Wed May 24 01:39:41 EST 2006 Duncan Coutts * Reorder the export lists to better match the Data.List api Tue May 23 14:04:32 EST 2006 Don Stewart * pack{Byte,Char} -> singleton. As per fptools convention Tue May 23 14:00:51 EST 2006 Don Stewart * elemIndexLast -> elemIndexEnd Tue May 23 13:57:34 EST 2006 Don Stewart * In the search for a more orthogonal api, we kill breakFirst/breakLast, which were of dubious value Tue May 23 12:24:09 EST 2006 Don Stewart * Abolish elems. It's name implied it was unpack, but its type didn't. it made no sense Tue May 23 10:42:09 EST 2006 Duncan Coutts * Minor doc tidyup. Use haddock markup better. Tue May 23 11:00:31 EST 2006 Don Stewart * Simplify the join() implementation. Spotted by Duncan. --- diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 2d4caa7..2001110 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -23,11 +23,11 @@ -- | A time and space-efficient implementation of byte vectors using -- packed Word8 arrays, suitable for high performance use, both in terms -- of large data quantities, or high speed requirements. Byte vectors --- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr, +-- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr', -- and can be passed between C and Haskell with little effort. -- -- This module is intended to be imported @qualified@, to avoid name --- clashes with Prelude functions. eg. +-- clashes with "Prelude" functions. eg. -- -- > import qualified Data.ByteString as B -- @@ -43,7 +43,7 @@ module Data.ByteString ( -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - packByte, -- :: Word8 -> ByteString + singleton, -- :: Word8 -> ByteString pack, -- :: [Word8] -> ByteString unpack, -- :: ByteString -> [Word8] packWith, -- :: (a -> Word8) -> [a] -> ByteString @@ -52,33 +52,28 @@ module Data.ByteString ( -- * Basic interface cons, -- :: Word8 -> ByteString -> ByteString snoc, -- :: ByteString -> Word8 -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int + append, -- :: ByteString -> ByteString -> ByteString head, -- :: ByteString -> Word8 - tail, -- :: ByteString -> ByteString last, -- :: ByteString -> Word8 + tail, -- :: ByteString -> ByteString init, -- :: ByteString -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - - -- * Special ByteStrings - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - elems, -- :: ByteString -> [ByteString] + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int -- * Transformating ByteStrings map, -- :: (Word8 -> Word8) -> ByteString -> ByteString + map', -- :: (Word8 -> Word8) -> ByteString -> ByteString reverse, -- :: ByteString -> ByteString intersperse, -- :: Word8 -> ByteString -> ByteString transpose, -- :: [ByteString] -> [ByteString] - map', -- :: (Word8 -> Word8) -> ByteString -> ByteString - -- * Reducing 'ByteString's + -- * Reducing 'ByteString's (folds) foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a - foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a + foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 - foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a -- ** Special folds concat, -- :: [ByteString] -> ByteString @@ -87,94 +82,99 @@ module Data.ByteString ( all, -- :: (Word8 -> Bool) -> ByteString -> Bool maximum, -- :: ByteString -> Word8 minimum, -- :: ByteString -> Word8 - mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString -- * Building ByteStrings + -- ** Scans scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString - -- * Generating and unfolding ByteStrings + -- ** Accumulating maps + mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) +-- mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) + mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString + + -- ** Unfolding ByteStrings replicate, -- :: Int -> Word8 -> ByteString - unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString + unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString + unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString -- * Substrings -- ** Breaking strings take, -- :: Int -> ByteString -> ByteString - unsafeTake, -- :: Int -> ByteString -> ByteString drop, -- :: Int -> ByteString -> ByteString - unsafeDrop, -- :: Int -> ByteString -> ByteString splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString - break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + group, -- :: ByteString -> [ByteString] + groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] -- ** Breaking and dropping on specific bytes breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) - breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString) - breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString) -- ** Breaking into many substrings split, -- :: Word8 -> ByteString -> [ByteString] splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -- ** Joining strings join, -- :: ByteString -> [ByteString] -> ByteString joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString - -- * Indexing ByteStrings - index, -- :: ByteString -> Int -> Word8 - elemIndex, -- :: Word8 -> ByteString -> Maybe Int - elemIndices, -- :: Word8 -> ByteString -> [Int] - elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int - findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int - findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] - count, -- :: Word8 -> ByteString -> Int - findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int + -- * Predicates + isPrefixOf, -- :: ByteString -> ByteString -> Bool + isSuffixOf, -- :: ByteString -> ByteString -> Bool - -- * Ordered ByteStrings - sort, -- :: ByteString -> ByteString + -- ** Search for arbitrary substrings + isSubstringOf, -- :: ByteString -> ByteString -> Bool + findSubstring, -- :: ByteString -> ByteString -> Maybe Int + findSubstrings, -- :: ByteString -> ByteString -> [Int] -- * Searching ByteStrings -- ** Searching by equality -- | These functions use memchr(3) to efficiently search the ByteString - elem, -- :: Word8 -> ByteString -> Bool notElem, -- :: Word8 -> ByteString -> Bool filterByte, -- :: Word8 -> ByteString -> ByteString filterNotByte, -- :: Word8 -> ByteString -> ByteString -- ** Searching with a predicate - filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 + filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString +-- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) - -- ** Prefixes and suffixes - -- | These functions use memcmp(3) to efficiently compare substrings - isPrefixOf, -- :: ByteString -> ByteString -> Bool - isSuffixOf, -- :: ByteString -> ByteString -> Bool - - -- ** Search for arbitrary substrings - isSubstringOf, -- :: ByteString -> ByteString -> Bool - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] + -- * Indexing ByteStrings + index, -- :: ByteString -> Int -> Word8 + elemIndex, -- :: Word8 -> ByteString -> Maybe Int + elemIndices, -- :: Word8 -> ByteString -> [Int] + elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int + findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int + findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] + count, -- :: Word8 -> ByteString -> Int + findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int -- * Zipping and unzipping ByteStrings zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) + -- * Ordered ByteStrings + sort, -- :: ByteString -> ByteString + -- * Unchecked access unsafeHead, -- :: ByteString -> Word8 unsafeTail, -- :: ByteString -> ByteString unsafeIndex, -- :: ByteString -> Int -> Word8 + unsafeTake, -- :: Int -> ByteString -> ByteString + unsafeDrop, -- :: Int -> ByteString -> ByteString -- * Low level introduction and elimination generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString @@ -206,7 +206,7 @@ module Data.ByteString ( copyCString, -- :: CString -> ByteString copyCStringLen, -- :: CStringLen -> ByteString - -- * I\/O with @ByteString@s + -- * I\/O with 'ByteString's -- ** Standard input and output @@ -240,6 +240,7 @@ module Data.ByteString ( noAL, NoAL, loopArr, loopAcc, loopSndAcc, loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL, + mapAccumEFL, mapIndexEFL, ) where @@ -277,6 +278,8 @@ import System.IO (stdin,stdout,hClose,hFileSize ,hGetBuf,hPutBuf,openBinaryFile ,Handle,IOMode(..)) +import Data.Monoid (Monoid, mempty, mappend, mconcat) + #if !defined(__GLASGOW_HASKELL__) import System.IO.Unsafe #endif @@ -342,6 +345,11 @@ instance Show ByteString where instance Read ByteString where readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] +instance Monoid ByteString where + mempty = empty + mappend = append + mconcat = concat + {- instance Arbitrary PackedString where arbitrary = P.pack `fmap` arbitrary @@ -405,18 +413,18 @@ empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0 {-# NOINLINE empty #-} -- | /O(1)/ Convert a 'Word8' into a 'ByteString' -packByte :: Word8 -> ByteString -packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do +singleton :: Word8 -> ByteString +singleton c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do withForeignPtr fp $ \p -> poke p c return $ PS fp 0 1 -{-# INLINE packByte #-} +{-# INLINE singleton #-} -- -- XXX The unsafePerformIO is critical! -- -- Otherwise: -- --- packByte 255 `compare` packByte 127 +-- singleton 255 `compare` singleton 127 -- -- is compiled to: -- @@ -688,7 +696,7 @@ foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> lgo (f z c) (p `plusPtr` 1) q -} --- | 'foldl\'' is like foldl, but strict in the accumulator. +-- | 'foldl\'' is like 'foldl', but strict in the accumulator. foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a foldl' f z = loopAcc . loopU (foldEFL' f) z {-# INLINE foldl' #-} @@ -714,7 +722,7 @@ foldl1 f ps | null ps = errorEmptyList "foldl1" | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) --- | A strict version of 'foldl1' +-- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 foldl1' f ps | null ps = errorEmptyList "foldl1'" @@ -823,17 +831,14 @@ minimum_ ptr n m c minimum_ ptr (n+1) m (if w < c then w else c) -} +mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) +mapAccumL f z = loopU (mapAccumEFL f) z + +--mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) + -- | /O(n)/ map Word8 functions, provided with the index at each position mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString -mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f -> - go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l) - where - go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () - STRICT4(go) - go n f t p | f == p = return () - | otherwise = do w <- peek f - ((poke t) . k n) w - go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p +mapIndexed f = loopArr . loopU (mapIndexEFL f) 0 -- --------------------------------------------------------------------- -- Building ByteStrings @@ -882,40 +887,49 @@ replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1) -} --- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'. --- 'unfoldrN' builds a ByteString from a seed value. The function takes --- the element and returns 'Nothing' if it is done producing the --- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a --- prepending to the ByteString and @b@ is used as the next element in a --- recursive call. --- --- To preven unfoldrN having /O(n^2)/ complexity (as prepending a --- character to a ByteString is /O(n)/, this unfoldr requires a maximum --- final size of the ByteString as an argument. 'cons' can then be --- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has --- linear complexity. The depth of the recursion is limited to this --- size, but may be less. For lazy, infinite unfoldr, use --- 'Data.List.unfoldr' (from 'Data.List'). +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- ByteString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the ByteString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, +-- and @b@ is the seed value for further production. -- -- Examples: -- --- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789" --- --- The following equation connects the depth-limited unfoldr to the List unfoldr: --- --- > unfoldrN n == take n $ List.unfoldr -unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString -unfoldrN i f w - | i <= 0 = empty - | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0 - where - STRICT3(go) - go q c n | n == i = return n -- stop if we reach `i' - | otherwise = case f c of - Nothing -> return n - Just (a,new_c) -> do - poke q a - go (q `plusPtr` 1) new_c (n+1) +-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 +-- > == pack [0, 1, 2, 3, 4, 5] +-- +unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString +unfoldr f = concat . unfoldChunk 32 64 + where unfoldChunk n n' x = + case unfoldrN n f x of + (s, Nothing) -> s : [] + (s, Just x') -> s : unfoldChunk n' (n+n') x' + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. +-- +-- The following equation relates 'unfoldrN' and 'unfoldr': +-- +-- > unfoldrN n f s == take n (unfoldr f s) +-- +unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) +unfoldrN i f x0 + | i < 0 = (empty, Just x0) + | otherwise = inlinePerformIO $ do + fp <- mallocByteString i + withForeignPtr fp (\p -> go fp p x0 0) + where STRICT4(go) + go fp p x n = + case f x of + Nothing -> let s = copy (PS fp 0 n) + in s `seq` return (s, Nothing) + Just (w,x') + | n == i -> return (PS fp 0 i, Just x) + | otherwise -> do poke p w + go fp (p `plusPtr` 1) x' (n+1) -- --------------------------------------------------------------------- -- Substrings @@ -993,40 +1007,6 @@ spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> else go p (i+1) {-# INLINE spanByte #-} --- | /O(n)/ 'breakFirst' breaks the given ByteString on the first --- occurence of @w@. It behaves like 'break', except the delimiter is --- not returned, and @Nothing@ is returned if the delimiter is not in --- the ByteString. I.e. --- --- > breakFirst 'b' "aabbcc" == Just ("aa","bcc") --- --- > breakFirst c xs == --- > let (x,y) = break (== c) xs --- > in if null y then Nothing else Just (x, drop 1 y)) --- -breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString) -breakFirst c p = case elemIndex c p of - Nothing -> Nothing - Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p) -{-# INLINE breakFirst #-} - --- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the --- ByteString. --- --- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc") --- --- and the following are equivalent: --- --- > breakLast 'c' "abcdef" --- > let (x,y) = break (=='c') (reverse "abcdef") --- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x) --- -breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString) -breakLast c p = case elemIndexLast c p of - Nothing -> Nothing - Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p) -{-# INLINE breakLast #-} - -- | 'span' @p xs@ breaks the ByteString into two segments. It is -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) @@ -1187,11 +1167,7 @@ groupBy k xs -- 'ByteString's and concatenates the list after interspersing the first -- argument between each element of the list. join :: ByteString -> [ByteString] -> ByteString -join filler pss = concat (splice pss) - where - splice [] = [] - splice [x] = [x] - splice (x:y:xs) = x:filler:splice (y:xs) +join s = concat . (List.intersperse s) {-# INLINE join #-} -- @@ -1232,16 +1208,16 @@ elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p' {-# INLINE elemIndex #-} --- | /O(n)/ The 'elemIndexLast' function returns the last index of the +-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the -- element in the given 'ByteString' which is equal to the query -- element, or 'Nothing' if there is no such element. The following -- holds: -- --- > elemIndexLast c xs == +-- > elemIndexEnd c xs == -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) -- -elemIndexLast :: Word8 -> ByteString -> Maybe Int -elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> +elemIndexEnd :: Word8 -> ByteString -> Maybe Int +elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> go (p `plusPtr` s) (l-1) where STRICT2(go) @@ -1250,7 +1226,7 @@ elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> if ch == ch' then return $ Just i else go p (i-1) -{-# INLINE elemIndexLast #-} +{-# INLINE elemIndexEnd #-} -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. @@ -1541,12 +1517,6 @@ tails p | null p = [empty] -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]] --- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each. -elems :: ByteString -> [ByteString] -elems (PS _ _ 0) = [] -elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1))) -{-# INLINE elems #-} - -- --------------------------------------------------------------------- -- ** Ordered 'ByteString's @@ -1932,7 +1902,7 @@ putStr = hPut stdout -- | Write a ByteString to stdout, appending a newline byte putStrLn :: ByteString -> IO () putStrLn ps = hPut stdout ps >> hPut stdout nl - where nl = packByte 0x0a + where nl = singleton 0x0a -- | Read a 'ByteString' directly from the specified 'Handle'. This -- is far more efficient than reading the characters into a 'String' @@ -2298,6 +2268,22 @@ scanEFL f = \a e -> (f a e, Just a) {-# INLINE [1] scanEFL #-} #endif +-- | Element function implementing a map and fold +-- +mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> acc -> Word8 -> (acc, Maybe Word8) +mapAccumEFL f = \a e -> case f a e of (a', e') -> (a', Just e') +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] mapAccumEFL #-} +#endif + +-- | Element function implementing a map with index +-- +mapIndexEFL :: (Int -> Word8 -> Word8) -> Int -> Word8 -> (Int, Maybe Word8) +mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i', Just $ f i e) +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] mapIndexEFL #-} +#endif + -- | No accumulator noAL :: NoAL noAL = NoAL @@ -2307,20 +2293,20 @@ noAL = NoAL -- | Projection functions that are fusion friendly (as in, we determine when -- they are inlined) -loopArr :: (ByteString, acc) -> ByteString -loopArr (arr, _) = arr +loopArr :: (acc, byteString) -> byteString +loopArr (_, arr) = arr #if defined(__GLASGOW_HASKELL__) {-# INLINE [1] loopArr #-} #endif -loopAcc :: (ByteString, acc) -> acc -loopAcc (_, acc) = acc +loopAcc :: (acc, byteString) -> acc +loopAcc (acc, _) = acc #if defined(__GLASGOW_HASKELL__) {-# INLINE [1] loopAcc #-} #endif -loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2) -loopSndAcc (arr, (_, acc)) = (arr, acc) +loopSndAcc :: ((acc1, acc2), byteString) -> (acc2, byteString) +loopSndAcc ((_, acc), arr) = (acc, arr) #if defined(__GLASGOW_HASKELL__) {-# INLINE [1] loopSndAcc #-} #endif @@ -2335,7 +2321,7 @@ loopSndAcc (arr, (_, acc)) = (arr, acc) loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem -> acc -- ^ initial acc value -> ByteString -- ^ input ByteString - -> (ByteString, acc) + -> (acc, ByteString) loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do fp <- mallocByteString i @@ -2347,7 +2333,7 @@ loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i') return (fp_,i',acc) - return (PS ptr 0 n, acc) + return (acc, PS ptr 0 n) where go p ma = trans 0 0 where diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index c4fd8af..86916f2 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -10,9 +10,9 @@ -- -- --- | Manipulate ByteStrings using Char operations. All Chars will be +-- | Manipulate 'ByteString's using 'Char' operations. All Chars will be -- truncated to 8 bits. It can be expected that these functions will run --- at identical speeds to their Word8 equivalents in @Data.ByteString@. +-- at identical speeds to their 'Word8' equivalents in "Data.ByteString". -- -- More specifically these byte strings are taken to be in the -- subset of Unicode covered by code points 0-255. This covers @@ -27,7 +27,7 @@ -- * -- -- This module is intended to be imported @qualified@, to avoid name --- clashes with Prelude functions. eg. +-- clashes with "Prelude" functions. eg. -- -- > import qualified Data.ByteString.Char8 as B -- @@ -39,25 +39,20 @@ module Data.ByteString.Char8 ( -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - packChar, -- :: Char -> ByteString + singleton, -- :: Char -> ByteString pack, -- :: String -> ByteString unpack, -- :: ByteString -> String -- * Basic interface cons, -- :: Char -> ByteString -> ByteString - snoc, -- :: Char -> ByteString -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int + snoc, -- :: ByteString -> Char -> ByteString + append, -- :: ByteString -> ByteString -> ByteString head, -- :: ByteString -> Char - tail, -- :: ByteString -> ByteString last, -- :: ByteString -> Char + tail, -- :: ByteString -> ByteString init, -- :: ByteString -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - - -- * Special ByteStrings - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - elems, -- :: ByteString -> [ByteString] + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int -- * Transformating ByteStrings map, -- :: (Char -> Char) -> ByteString -> ByteString @@ -65,13 +60,13 @@ module Data.ByteString.Char8 ( intersperse, -- :: Char -> ByteString -> ByteString transpose, -- :: [ByteString] -> [ByteString] - -- * Reducing 'ByteString's + -- * Reducing 'ByteString's (folds) foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a - foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a + foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char + foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char - foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a -- ** Special folds concat, -- :: [ByteString] -> ByteString @@ -80,15 +75,23 @@ module Data.ByteString.Char8 ( all, -- :: (Char -> Bool) -> ByteString -> Bool maximum, -- :: ByteString -> Char minimum, -- :: ByteString -> Char - mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString -- * Building ByteStrings - scanl, - scanl1, + -- ** Scans + scanl, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString + scanl1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString +-- scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +-- scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString + + -- ** Accumulating maps +-- mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) +-- mapAccumR, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) + mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Char -> ByteString - unfoldrN, -- :: (a -> Maybe (Char, a)) -> a -> ByteString + unfoldr, -- :: (a -> Maybe (Char, a)) -> a -> ByteString + unfoldrN, -- :: Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a) -- * Substrings @@ -98,16 +101,18 @@ module Data.ByteString.Char8 ( splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString - break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + group, -- :: ByteString -> [ByteString] + groupBy, -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString] + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] -- ** Breaking and dropping on specific Chars breakChar, -- :: Char -> ByteString -> (ByteString, ByteString) - spanChar, -- :: Char -> ByteString -> (ByteString, ByteString) - breakFirst, -- :: Char -> ByteString -> Maybe (ByteString,ByteString) - breakLast, -- :: Char -> ByteString -> Maybe (ByteString,ByteString) - breakSpace, -- :: ByteString -> Maybe (ByteString,ByteString) + spanChar, -- :: Char -> ByteString -> (ByteString, ByteString) + breakSpace, -- :: ByteString -> (ByteString,ByteString) dropSpace, -- :: ByteString -> ByteString dropSpaceEnd, -- :: ByteString -> ByteString @@ -115,8 +120,6 @@ module Data.ByteString.Char8 ( split, -- :: Char -> ByteString -> [ByteString] splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString] - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -- ** Breaking into lines and words lines, -- :: ByteString -> [ByteString] @@ -138,17 +141,13 @@ module Data.ByteString.Char8 ( join, -- :: ByteString -> [ByteString] -> ByteString joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString - -- * Indexing ByteStrings - index, -- :: ByteString -> Int -> Char - elemIndex, -- :: Char -> ByteString -> Maybe Int - elemIndexLast, -- :: Char -> ByteString -> Maybe Int - elemIndices, -- :: Char -> ByteString -> [Int] - findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int - findIndices, -- :: (Char -> Bool) -> ByteString -> [Int] - count, -- :: Char -> ByteString -> Int - -- * Ordered ByteStrings - sort, -- :: ByteString -> ByteString + -- ** Searching for substrings + isPrefixOf, -- :: ByteString -> ByteString -> Bool + isSuffixOf, -- :: ByteString -> ByteString -> Bool + isSubstringOf, -- :: ByteString -> ByteString -> Bool + findSubstring, -- :: ByteString -> ByteString -> Maybe Int + findSubstrings, -- :: ByteString -> ByteString -> [Int] -- * Searching ByteStrings @@ -159,21 +158,27 @@ module Data.ByteString.Char8 ( filterNotChar, -- :: Char -> ByteString -> ByteString -- ** Searching with a predicate - filter, -- :: (Char -> Bool) -> ByteString -> ByteString find, -- :: (Char -> Bool) -> ByteString -> Maybe Char + filter, -- :: (Char -> Bool) -> ByteString -> ByteString +-- partition -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - -- ** Searching for substrings - isPrefixOf, -- :: ByteString -> ByteString -> Bool - isSuffixOf, -- :: ByteString -> ByteString -> Bool - isSubstringOf, -- :: ByteString -> ByteString -> Bool - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] + -- * Indexing ByteStrings + index, -- :: ByteString -> Int -> Char + elemIndex, -- :: Char -> ByteString -> Maybe Int + elemIndices, -- :: Char -> ByteString -> [Int] + elemIndexEnd, -- :: Char -> ByteString -> Maybe Int + findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int + findIndices, -- :: (Char -> Bool) -> ByteString -> [Int] + count, -- :: Char -> ByteString -> Int - -- * Zipping and unzipping ByteString + -- * Zipping and unzipping ByteStrings zip, -- :: ByteString -> ByteString -> [(Char,Char)] zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c] unzip, -- :: [(Char,Char)] -> (ByteString,ByteString) + -- * Ordered ByteStrings + sort, -- :: ByteString -> ByteString + -- * Unchecked access unsafeHead, -- :: ByteString -> Char unsafeTail, -- :: ByteString -> ByteString @@ -201,8 +206,8 @@ module Data.ByteString.Char8 ( -- ** Files readFile, -- :: FilePath -> IO ByteString --- mmapFile, -- :: FilePath -> IO ByteString writeFile, -- :: FilePath -> ByteString -> IO () +-- mmapFile, -- :: FilePath -> IO ByteString -- ** I\/O with Handles #if defined(__GLASGOW_HASKELL__) @@ -216,7 +221,7 @@ module Data.ByteString.Char8 ( #if defined(__GLASGOW_HASKELL__) -- * Low level construction - -- | For constructors from foreign language types see /Data.ByteString/ + -- | For constructors from foreign language types see "Data.ByteString" packAddress, -- :: Addr# -> ByteString unsafePackAddress, -- :: Int -> Addr# -> ByteString #endif @@ -246,7 +251,7 @@ import qualified Data.ByteString as B -- Listy functions transparently exported import Data.ByteString (ByteString(..) ,empty,null,length,tail,init,append - ,inits,tails,elems,reverse,transpose + ,inits,tails,reverse,transpose ,concat,take,drop,splitAt,join ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring ,findSubstrings,unsafeTail,copy,group @@ -287,9 +292,9 @@ import GHC.ST (ST(..)) ------------------------------------------------------------------------ -- | /O(1)/ Convert a 'Char' into a 'ByteString' -packChar :: Char -> ByteString -packChar = B.packByte . c2w -{-# INLINE packChar #-} +singleton :: Char -> ByteString +singleton = B.singleton . c2w +{-# INLINE singleton #-} -- | /O(n)/ Convert a 'String' into a 'ByteString' -- @@ -457,30 +462,29 @@ replicate :: Int -> Char -> ByteString replicate w = B.replicate w . c2w {-# INLINE replicate #-} --- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'. --- 'unfoldrN' builds a ByteString from a seed value. The function takes --- the element and returns 'Nothing' if it is done producing the --- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a --- prepending to the ByteString and @b@ is used as the next element in a --- recursive call. --- --- To preven unfoldrN having /O(n^2)/ complexity (as prepending a --- character to a ByteString is /O(n)/, this unfoldr requires a maximum --- final size of the ByteString as an argument. 'cons' can then be --- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has --- linear complexity. The depth of the recursion is limited to this --- size, but may be less. For lazy, infinite unfoldr, use --- 'Data.List.unfoldr' (from 'Data.List'). +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- ByteString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the ByteString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next character in the string, +-- and @b@ is the seed value for further production. -- -- Examples: -- --- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789" --- --- The following equation connects the depth-limited unfoldr to the List unfoldr: +-- > unfoldr (\x -> if x <= '9' then Just (x, succ x) else Nothing) '0' == "0123456789" +unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString +unfoldr f x0 = B.unfoldr (fmap k . f) x0 + where k (i, j) = (c2w i, j) + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. -- --- > unfoldrN n == take n $ List.unfoldr +-- The following equation relates 'unfoldrN' and 'unfoldr': -- -unfoldrN :: Int -> (a -> Maybe (Char, a)) -> a -> ByteString +-- > unfoldrN n f s == take n (unfoldr f s) +unfoldrN :: Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a) unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f) w where k (i,j) = (c2w i, j) {-# INLINE unfoldrN #-} @@ -543,36 +547,6 @@ spanChar :: Char -> ByteString -> (ByteString, ByteString) spanChar = B.spanByte . c2w {-# INLINE spanChar #-} --- | /O(n)/ 'breakFirst' breaks the given ByteString on the first --- occurence of @w@. It behaves like 'break', except the delimiter is --- not returned, and @Nothing@ is returned if the delimiter is not in --- the ByteString. I.e. --- --- > breakFirst 'b' "aabbcc" == Just ("aa","bcc") --- --- > breakFirst c xs == --- > let (x,y) = break (== c) xs --- > in if null y then Nothing else Just (x, drop 1 y)) --- -breakFirst :: Char -> ByteString -> Maybe (ByteString,ByteString) -breakFirst = B.breakFirst . c2w -{-# INLINE breakFirst #-} - --- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the --- ByteString. --- --- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc") --- --- and the following are equivalent: --- --- > breakLast 'c' "abcdef" --- > let (x,y) = break (=='c') (reverse "abcdef") --- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x) --- -breakLast :: Char -> ByteString -> Maybe (ByteString,ByteString) -breakLast = B.breakLast . c2w -{-# INLINE breakLast #-} - -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte -- argument, consuming the delimiter. I.e. -- @@ -637,17 +611,17 @@ elemIndex :: Char -> ByteString -> Maybe Int elemIndex = B.elemIndex . c2w {-# INLINE elemIndex #-} --- | /O(n)/ The 'elemIndexLast' function returns the last index of the +-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the -- element in the given 'ByteString' which is equal to the query -- element, or 'Nothing' if there is no such element. The following -- holds: -- --- > elemIndexLast c xs == +-- > elemIndexEnd c xs == -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) -- -elemIndexLast :: Char -> ByteString -> Maybe Int -elemIndexLast = B.elemIndexLast . c2w -{-# INLINE elemIndexLast #-} +elemIndexEnd :: Char -> ByteString -> Maybe Int +elemIndexEnd = B.elemIndexEnd . c2w +{-# INLINE elemIndexEnd #-} -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. @@ -860,7 +834,7 @@ lines ps where search = elemIndex '\n' {-# INLINE lines #-} -{-# RULES +{-# Bogus rule, wrong if there's not \n at end of line "length.lines/count" P.length . lines = count '\n' @@ -890,7 +864,7 @@ lines (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do unlines :: [ByteString] -> ByteString unlines [] = empty unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space - where nl = packChar '\n' + where nl = singleton '\n' -- | 'words' breaks a ByteString up into a list of words, which -- were delimited by Chars representing white space. And @@ -903,7 +877,7 @@ words = B.tokens isSpaceWord8 -- | The 'unwords' function is analogous to the 'unlines' function, on words. unwords :: [ByteString] -> ByteString -unwords = join (packChar ' ') +unwords = join (singleton ' ') {-# INLINE unwords #-} -- | /O(n)/ Indicies of newlines. Shorthand for @@ -948,7 +922,7 @@ unlines' :: [ByteString] -> ByteString unlines' ss = concat $ intersperse_newlines ss where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s) intersperse_newlines s = s - newline = packChar '\n' + newline = singleton '\n' -- | 'unlines\'' behaves like 'unlines', except that it also correctly -- retores lines that do not have terminating newlines (see the