From 1d644d67d64186aeb2b840adf4d1cceed27a5bc6 Mon Sep 17 00:00:00 2001 From: Don Stewart Date: Sat, 9 Sep 2006 05:01:11 +0000 Subject: [PATCH] Sync Data.ByteString with stable branch This patch: * hides the LPS constructor (its in .Base if you need it) * adds functions to convert between strict and lazy bytestrings * and adds readInteger --- Data/ByteString.hs | 35 +------------------ Data/ByteString/Base.hs | 48 +++++++++++++++++++++++++- Data/ByteString/Char8.hs | 48 +++++++++++++++++++++++++- Data/ByteString/Lazy.hs | 41 ++++++++++------------ Data/ByteString/Lazy/Char8.hs | 75 ++++++++++++++++++++++++++++++++++------- 5 files changed, 175 insertions(+), 72 deletions(-) diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 927a91f..64554b8 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -291,12 +291,6 @@ instance Eq ByteString instance Ord ByteString where compare = compareBytes -instance Show ByteString where - showsPrec p ps r = showsPrec p (unpackWith w2c ps) r - -instance Read ByteString where - readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] - instance Monoid ByteString where mempty = empty mappend = append @@ -458,31 +452,6 @@ unpackList (PS fp off len) = withPtr fp $ \p -> do #endif ------------------------------------------------------------------------- - --- | /O(n)/ Convert a '[a]' into a 'ByteString' using some --- conversion function -packWith :: (a -> Word8) -> [a] -> ByteString -packWith k str = unsafeCreate (P.length str) $ \p -> go p str - where - STRICT2(go) - go _ [] = return () - go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff -{-# INLINE packWith #-} -{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} - --- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. -unpackWith :: (Word8 -> a) -> ByteString -> [a] -unpackWith _ (PS _ _ 0) = [] -unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> - go (p `plusPtr` s) (l - 1) [] - where - STRICT3(go) - go p 0 acc = peek p >>= \e -> return (k e : acc) - go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc) -{-# INLINE unpackWith #-} -{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} - -- --------------------------------------------------------------------- -- Basic interface @@ -1778,9 +1747,7 @@ hGetLines h = go hGetLine :: Handle -> IO ByteString #if !defined(__GLASGOW_HASKELL__) -hGetLine h = do - string <- System.IO.hGetLine h - return $ packWith c2w string +hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w #else hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do case haBufferMode handle_ of diff --git a/Data/ByteString/Base.hs b/Data/ByteString/Base.hs index f3c869d..b96a56b 100644 --- a/Data/ByteString/Base.hs +++ b/Data/ByteString/Base.hs @@ -17,7 +17,8 @@ module Data.ByteString.Base ( -- * The @ByteString@ type and representation - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + LazyByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable -- * Unchecked access unsafeHead, -- :: ByteString -> Word8 @@ -149,6 +150,51 @@ data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) deriving (Data, Typeable) #endif +instance Show ByteString where + showsPrec p ps r = showsPrec p (unpackWith w2c ps) r + +instance Read ByteString where + readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] + +-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. +unpackWith :: (Word8 -> a) -> ByteString -> [a] +unpackWith _ (PS _ _ 0) = [] +unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> + go (p `plusPtr` s) (l - 1) [] + where + STRICT3(go) + go p 0 acc = peek p >>= \e -> return (k e : acc) + go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc) +{-# INLINE unpackWith #-} +{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} + +-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some +-- conversion function +packWith :: (a -> Word8) -> [a] -> ByteString +packWith k str = unsafeCreate (length str) $ \p -> go p str + where + STRICT2(go) + go _ [] = return () + go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff +{-# INLINE packWith #-} +{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} + +------------------------------------------------------------------------ + +-- | A space-efficient representation of a Word8 vector, supporting many +-- efficient operations. A 'ByteString' contains 8-bit characters only. +-- +-- Instances of Eq, Ord, Read, Show, Data, Typeable +-- +newtype LazyByteString = LPS [ByteString] -- LPS for lazy packed string + deriving (Show,Read +#if defined(__GLASGOW_HASKELL__) + ,Data, Typeable +#endif + ) + +------------------------------------------------------------------------ + -- | /O(1)/ The empty 'ByteString' empty :: ByteString empty = PS nullForeignPtr 0 0 diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index bd4b31a..03b492d 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -162,7 +162,8 @@ module Data.ByteString.Char8 ( sort, -- :: ByteString -> ByteString -- * Reading from ByteStrings - readInt, -- :: ByteString -> Maybe Int + readInt, -- :: ByteString -> Maybe (Int, ByteString) + readInteger, -- :: ByteString -> Maybe (Integer, ByteString) -- * Low level CString conversions @@ -931,6 +932,51 @@ readInt as end True _ n ps = Just (negate n, ps) end _ _ n ps = Just (n, ps) +-- | readInteger reads an Integer from the beginning of the ByteString. If +-- there is no integer at the beginning of the string, it returns Nothing, +-- otherwise it just returns the int read, and the rest of the string. +readInteger :: ByteString -> Maybe (Integer, ByteString) +readInteger as + | null as = Nothing + | otherwise = + case unsafeHead as of + '-' -> first (unsafeTail as) >>= \(n, bs) -> return (-n, bs) + '+' -> first (unsafeTail as) + _ -> first as + + where first ps | null ps = Nothing + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 && w <= 0x39 -> Just $ + loop 1 (fromIntegral w - 0x30) [] (unsafeTail ps) + | otherwise -> Nothing + + loop :: Int -> Int -> [Integer] + -> ByteString -> (Integer, ByteString) + STRICT4(loop) + loop d acc ns ps + | null ps = combine d acc ns empty + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 && w <= 0x39 -> + if d == 9 then loop 1 (fromIntegral w - 0x30) + (toInteger acc : ns) + (unsafeTail ps) + else loop (d+1) + (10*acc + (fromIntegral w - 0x30)) + ns (unsafeTail ps) + | otherwise -> combine d acc ns ps + + combine _ acc [] ps = (toInteger acc, ps) + combine d acc ns ps = + ((10^d * combine1 1000000000 ns + toInteger acc), ps) + + combine1 _ [n] = n + combine1 b ns = combine1 (b*b) $ combine2 b ns + + combine2 b (n:m:ns) = let t = m*b + n in t `seq` (t : combine2 b ns) + combine2 _ ns = ns + -- | Read an entire file strictly into a 'ByteString'. This is far more -- efficient than reading the characters into a 'String' and then using -- 'pack'. It also may be more efficient than opening the file and diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index eb4ba61..80b80ea 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -20,7 +20,7 @@ -- without requiring the entire vector be resident in memory. -- -- Some operations, such as concat, append, reverse and cons, have --- better complexity than their "Data.ByteString" equivalents, as due to +-- better complexity than their "Data.ByteString" equivalents, due to -- optimisations resulting from the list spine structure. And for other -- operations Lazy ByteStrings are usually within a few percent of -- strict ones, but with better heap usage. For data larger than the @@ -43,13 +43,15 @@ module Data.ByteString.Lazy ( -- * The @ByteString@ type - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - singleton, -- :: Word8 -> ByteString + singleton, -- :: Word8 -> ByteString pack, -- :: [Word8] -> ByteString unpack, -- :: ByteString -> [Word8] + fromChunks, -- :: [Strict.ByteString] -> ByteString + toChunks, -- :: ByteString -> [Strict.ByteString] -- * Basic interface cons, -- :: Word8 -> ByteString -> ByteString @@ -93,7 +95,6 @@ module Data.ByteString.Lazy ( -- ** Accumulating maps mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) - mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString -- ** Infinite ByteStrings @@ -123,7 +124,6 @@ module Data.ByteString.Lazy ( -- ** Breaking into many substrings split, -- :: Word8 -> ByteString -> [ByteString] splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] - tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] -- ** Joining strings join, -- :: ByteString -> [ByteString] -> ByteString @@ -179,6 +179,7 @@ module Data.ByteString.Lazy ( hGet, -- :: Handle -> Int -> IO ByteString hPut, -- :: Handle -> ByteString -> IO () hGetNonBlocking, -- :: Handle -> IO ByteString + -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString -- hGetContentsN, -- :: Int -> Handle -> IO ByteString -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString @@ -196,6 +197,7 @@ import Prelude hiding import qualified Data.List as L -- L for list/lazy import qualified Data.ByteString as P -- P for packed import qualified Data.ByteString.Base as P +import Data.ByteString.Base (LazyByteString(..)) import qualified Data.ByteString.Fusion as P import Data.ByteString.Fusion (PairS(..),loopL) @@ -212,10 +214,6 @@ import Foreign.ForeignPtr (withForeignPtr) import Foreign.Ptr import Foreign.Storable -#if defined(__GLASGOW_HASKELL__) -import Data.Generics (Data(..), Typeable(..)) -#endif - -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns @@ -229,17 +227,7 @@ import Data.Generics (Data(..), Typeable(..)) -- ----------------------------------------------------------------------------- --- | A space-efficient representation of a Word8 vector, supporting many --- efficient operations. A 'ByteString' contains 8-bit characters only. --- --- Instances of Eq, Ord, Read, Show, Data, Typeable --- -newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string - deriving (Show,Read -#if defined(__GLASGOW_HASKELL__) - ,Data, Typeable -#endif - ) +type ByteString = LazyByteString -- -- hmm, what about getting the PS constructor unpacked into the cons cell? @@ -367,6 +355,14 @@ unpack :: ByteString -> [Word8] unpack (LPS ss) = L.concatMap P.unpack ss {-# INLINE unpack #-} +-- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString' +fromChunks :: [P.ByteString] -> ByteString +fromChunks ls = LPS $ L.filter (not . P.null) ls + +-- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString' +toChunks :: ByteString -> [P.ByteString] +toChunks (LPS s) = s + ------------------------------------------------------------------------ {- @@ -589,9 +585,6 @@ minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS -mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumR = error "mapAccumR unimplemented" - -- | /O(n)/ map Word8 functions, provided with the index at each position mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS @@ -834,6 +827,7 @@ split c (LPS (a:as)) = comb [] (P.split c a) as {-# INLINE cons' #-} {-# INLINE split #-} +{- -- | Like 'splitWith', except that sequences of adjacent separators are -- treated as a single separator. eg. -- @@ -841,6 +835,7 @@ split c (LPS (a:as)) = comb [] (P.split c a) as -- tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] tokens f = L.filter (not.null) . splitWith f +-} -- | The 'group' function takes a ByteString and returns a list of -- ByteStrings such that the concatenation of the result is equal to the diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs index ada949b..beec2ba 100644 --- a/Data/ByteString/Lazy/Char8.hs +++ b/Data/ByteString/Lazy/Char8.hs @@ -24,13 +24,15 @@ module Data.ByteString.Lazy.Char8 ( -- * The @ByteString@ type - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString singleton, -- :: Char -> ByteString pack, -- :: String -> ByteString unpack, -- :: ByteString -> String + fromChunks, -- :: [Strict.ByteString] -> ByteString + toChunks, -- :: ByteString -> [Strict.ByteString] -- * Basic interface cons, -- :: Char -> ByteString -> ByteString @@ -103,7 +105,6 @@ module Data.ByteString.Lazy.Char8 ( -- ** Breaking into many substrings split, -- :: Char -> ByteString -> [ByteString] splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] - tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString] -- ** Breaking into lines and words lines, -- :: ByteString -> [ByteString] @@ -149,6 +150,7 @@ module Data.ByteString.Lazy.Char8 ( -- * Reading from ByteStrings readInt, + readInteger, -- * I\/O with 'ByteString's @@ -176,7 +178,7 @@ module Data.ByteString.Lazy.Char8 ( -- Functions transparently exported import Data.ByteString.Lazy - (ByteString(..) + (ByteString, fromChunks, toChunks ,empty,null,length,tail,init,append,reverse,transpose ,concat,take,drop,splitAt,join,isPrefixOf,group,inits,tails,copy ,hGetContents, hGet, hPut, getContents @@ -187,6 +189,8 @@ import Data.ByteString.Lazy import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as B import qualified Data.ByteString.Base as B +import Data.ByteString.Base (LazyByteString(LPS)) + import Data.ByteString.Base (w2c, c2w, isSpaceWord8) import Data.Int (Int64) @@ -455,15 +459,6 @@ splitWith :: (Char -> Bool) -> ByteString -> [ByteString] splitWith f = L.splitWith (f . w2c) {-# INLINE splitWith #-} --- | Like 'splitWith', except that sequences of adjacent separators are --- treated as a single separator. eg. --- --- > tokens (=='a') "aabbaca" == ["bb","c"] --- -tokens :: (Char -> Bool) -> ByteString -> [ByteString] -tokens f = L.tokens (f . w2c) -{-# INLINE tokens #-} - -- | The 'groupBy' function is the non-overloaded version of 'group'. groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString] groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b)) @@ -643,7 +638,7 @@ unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space -- > tokens isSpace = words -- words :: ByteString -> [ByteString] -words = L.tokens isSpaceWord8 +words = P.filter (not . L.null) . L.splitWith isSpaceWord8 {-# INLINE words #-} -- | The 'unwords' function is analogous to the 'unlines' function, on words. @@ -685,6 +680,60 @@ readInt (LPS (x:xs)) = in n' `seq` ps' `seq` Just $! (n', LPS ps') +-- | readInteger reads an Integer from the beginning of the ByteString. If +-- there is no integer at the beginning of the string, it returns Nothing, +-- otherwise it just returns the int read, and the rest of the string. +readInteger :: ByteString -> Maybe (Integer, ByteString) +readInteger (LPS []) = Nothing +readInteger (LPS (x:xs)) = + case w2c (B.unsafeHead x) of + '-' -> first (B.unsafeTail x) xs >>= \(n, bs) -> return (-n, bs) + '+' -> first (B.unsafeTail x) xs + _ -> first x xs + + where first ps pss + | B.null ps = case pss of + [] -> Nothing + (ps':pss') -> first' ps' pss' + | otherwise = first' ps pss + + first' ps pss = case B.unsafeHead ps of + w | w >= 0x30 && w <= 0x39 -> Just $ + loop 1 (fromIntegral w - 0x30) [] (B.unsafeTail ps) pss + | otherwise -> Nothing + + loop :: Int -> Int -> [Integer] + -> B.ByteString -> [B.ByteString] -> (Integer, ByteString) + STRICT5(loop) + loop d acc ns ps pss + | B.null ps = case pss of + [] -> combine d acc ns ps pss + (ps':pss') -> loop d acc ns ps' pss' + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 && w <= 0x39 -> + if d < 9 then loop (d+1) + (10*acc + (fromIntegral w - 0x30)) + ns (B.unsafeTail ps) pss + else loop 1 (fromIntegral w - 0x30) + (fromIntegral acc : ns) + (B.unsafeTail ps) pss + | otherwise -> combine d acc ns ps pss + + combine _ acc [] ps pss = end (fromIntegral acc) ps pss + combine d acc ns ps pss = + end (10^d * combine1 1000000000 ns + fromIntegral acc) ps pss + + combine1 _ [n] = n + combine1 b ns = combine1 (b*b) $ combine2 b ns + + combine2 b (n:m:ns) = let t = n+m*b in t `seq` (t : combine2 b ns) + combine2 _ ns = ns + + end n ps pss = let ps' | B.null ps = pss + | otherwise = ps:pss + in ps' `seq` (n, LPS ps') + -- | Read an entire file /lazily/ into a 'ByteString'. Use 'text mode' -- on Windows to interpret newlines readFile :: FilePath -> IO ByteString -- 1.7.10.4