instance Ord ByteString
where compare = compareBytes
-instance Show ByteString where
- showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
-
-instance Read ByteString where
- readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
-
instance Monoid ByteString where
mempty = empty
mappend = append
#endif
-------------------------------------------------------------------------
-
--- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
--- conversion function
-packWith :: (a -> Word8) -> [a] -> ByteString
-packWith k str = unsafeCreate (P.length str) $ \p -> go p str
- where
- STRICT2(go)
- go _ [] = return ()
- go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
-{-# INLINE packWith #-}
-{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
-
--- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
-unpackWith :: (Word8 -> a) -> ByteString -> [a]
-unpackWith _ (PS _ _ 0) = []
-unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
- go (p `plusPtr` s) (l - 1) []
- where
- STRICT3(go)
- go p 0 acc = peek p >>= \e -> return (k e : acc)
- go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
-{-# INLINE unpackWith #-}
-{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
-
-- ---------------------------------------------------------------------
-- Basic interface
hGetLine :: Handle -> IO ByteString
#if !defined(__GLASGOW_HASKELL__)
-hGetLine h = do
- string <- System.IO.hGetLine h
- return $ packWith c2w string
+hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
#else
hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
case haBufferMode handle_ of
module Data.ByteString.Base (
-- * The @ByteString@ type and representation
- ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+ ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+ LazyByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
-- * Unchecked access
unsafeHead, -- :: ByteString -> Word8
deriving (Data, Typeable)
#endif
+instance Show ByteString where
+ showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
+
+instance Read ByteString where
+ readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
+
+-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
+unpackWith :: (Word8 -> a) -> ByteString -> [a]
+unpackWith _ (PS _ _ 0) = []
+unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
+ go (p `plusPtr` s) (l - 1) []
+ where
+ STRICT3(go)
+ go p 0 acc = peek p >>= \e -> return (k e : acc)
+ go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
+{-# INLINE unpackWith #-}
+{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
+
+-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
+-- conversion function
+packWith :: (a -> Word8) -> [a] -> ByteString
+packWith k str = unsafeCreate (length str) $ \p -> go p str
+ where
+ STRICT2(go)
+ go _ [] = return ()
+ go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
+{-# INLINE packWith #-}
+{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
+
+------------------------------------------------------------------------
+
+-- | A space-efficient representation of a Word8 vector, supporting many
+-- efficient operations. A 'ByteString' contains 8-bit characters only.
+--
+-- Instances of Eq, Ord, Read, Show, Data, Typeable
+--
+newtype LazyByteString = LPS [ByteString] -- LPS for lazy packed string
+ deriving (Show,Read
+#if defined(__GLASGOW_HASKELL__)
+ ,Data, Typeable
+#endif
+ )
+
+------------------------------------------------------------------------
+
-- | /O(1)/ The empty 'ByteString'
empty :: ByteString
empty = PS nullForeignPtr 0 0
sort, -- :: ByteString -> ByteString
-- * Reading from ByteStrings
- readInt, -- :: ByteString -> Maybe Int
+ readInt, -- :: ByteString -> Maybe (Int, ByteString)
+ readInteger, -- :: ByteString -> Maybe (Integer, ByteString)
-- * Low level CString conversions
end True _ n ps = Just (negate n, ps)
end _ _ n ps = Just (n, ps)
+-- | readInteger reads an Integer from the beginning of the ByteString. If
+-- there is no integer at the beginning of the string, it returns Nothing,
+-- otherwise it just returns the int read, and the rest of the string.
+readInteger :: ByteString -> Maybe (Integer, ByteString)
+readInteger as
+ | null as = Nothing
+ | otherwise =
+ case unsafeHead as of
+ '-' -> first (unsafeTail as) >>= \(n, bs) -> return (-n, bs)
+ '+' -> first (unsafeTail as)
+ _ -> first as
+
+ where first ps | null ps = Nothing
+ | otherwise =
+ case B.unsafeHead ps of
+ w | w >= 0x30 && w <= 0x39 -> Just $
+ loop 1 (fromIntegral w - 0x30) [] (unsafeTail ps)
+ | otherwise -> Nothing
+
+ loop :: Int -> Int -> [Integer]
+ -> ByteString -> (Integer, ByteString)
+ STRICT4(loop)
+ loop d acc ns ps
+ | null ps = combine d acc ns empty
+ | otherwise =
+ case B.unsafeHead ps of
+ w | w >= 0x30 && w <= 0x39 ->
+ if d == 9 then loop 1 (fromIntegral w - 0x30)
+ (toInteger acc : ns)
+ (unsafeTail ps)
+ else loop (d+1)
+ (10*acc + (fromIntegral w - 0x30))
+ ns (unsafeTail ps)
+ | otherwise -> combine d acc ns ps
+
+ combine _ acc [] ps = (toInteger acc, ps)
+ combine d acc ns ps =
+ ((10^d * combine1 1000000000 ns + toInteger acc), ps)
+
+ combine1 _ [n] = n
+ combine1 b ns = combine1 (b*b) $ combine2 b ns
+
+ combine2 b (n:m:ns) = let t = m*b + n in t `seq` (t : combine2 b ns)
+ combine2 _ ns = ns
+
-- | Read an entire file strictly into a 'ByteString'. This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'pack'. It also may be more efficient than opening the file and
-- without requiring the entire vector be resident in memory.
--
-- Some operations, such as concat, append, reverse and cons, have
--- better complexity than their "Data.ByteString" equivalents, as due to
+-- better complexity than their "Data.ByteString" equivalents, due to
-- optimisations resulting from the list spine structure. And for other
-- operations Lazy ByteStrings are usually within a few percent of
-- strict ones, but with better heap usage. For data larger than the
module Data.ByteString.Lazy (
-- * The @ByteString@ type
- ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+ ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable
-- * Introducing and eliminating 'ByteString's
empty, -- :: ByteString
- singleton, -- :: Word8 -> ByteString
+ singleton, -- :: Word8 -> ByteString
pack, -- :: [Word8] -> ByteString
unpack, -- :: ByteString -> [Word8]
+ fromChunks, -- :: [Strict.ByteString] -> ByteString
+ toChunks, -- :: ByteString -> [Strict.ByteString]
-- * Basic interface
cons, -- :: Word8 -> ByteString -> ByteString
-- ** Accumulating maps
mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
- mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
-- ** Infinite ByteStrings
-- ** Breaking into many substrings
split, -- :: Word8 -> ByteString -> [ByteString]
splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
- tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
hGet, -- :: Handle -> Int -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
hGetNonBlocking, -- :: Handle -> IO ByteString
+
-- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
-- hGetContentsN, -- :: Int -> Handle -> IO ByteString
-- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
import qualified Data.List as L -- L for list/lazy
import qualified Data.ByteString as P -- P for packed
import qualified Data.ByteString.Base as P
+import Data.ByteString.Base (LazyByteString(..))
import qualified Data.ByteString.Fusion as P
import Data.ByteString.Fusion (PairS(..),loopL)
import Foreign.Ptr
import Foreign.Storable
-#if defined(__GLASGOW_HASKELL__)
-import Data.Generics (Data(..), Typeable(..))
-#endif
-
-- -----------------------------------------------------------------------------
--
-- Useful macros, until we have bang patterns
-- -----------------------------------------------------------------------------
--- | A space-efficient representation of a Word8 vector, supporting many
--- efficient operations. A 'ByteString' contains 8-bit characters only.
---
--- Instances of Eq, Ord, Read, Show, Data, Typeable
---
-newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string
- deriving (Show,Read
-#if defined(__GLASGOW_HASKELL__)
- ,Data, Typeable
-#endif
- )
+type ByteString = LazyByteString
--
-- hmm, what about getting the PS constructor unpacked into the cons cell?
unpack (LPS ss) = L.concatMap P.unpack ss
{-# INLINE unpack #-}
+-- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
+fromChunks :: [P.ByteString] -> ByteString
+fromChunks ls = LPS $ L.filter (not . P.null) ls
+
+-- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
+toChunks :: ByteString -> [P.ByteString]
+toChunks (LPS s) = s
+
------------------------------------------------------------------------
{-
mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
-mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
-mapAccumR = error "mapAccumR unimplemented"
-
-- | /O(n)/ map Word8 functions, provided with the index at each position
mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
{-# INLINE cons' #-}
{-# INLINE split #-}
+{-
-- | Like 'splitWith', except that sequences of adjacent separators are
-- treated as a single separator. eg.
--
--
tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
tokens f = L.filter (not.null) . splitWith f
+-}
-- | The 'group' function takes a ByteString and returns a list of
-- ByteStrings such that the concatenation of the result is equal to the
module Data.ByteString.Lazy.Char8 (
-- * The @ByteString@ type
- ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+ ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable
-- * Introducing and eliminating 'ByteString's
empty, -- :: ByteString
singleton, -- :: Char -> ByteString
pack, -- :: String -> ByteString
unpack, -- :: ByteString -> String
+ fromChunks, -- :: [Strict.ByteString] -> ByteString
+ toChunks, -- :: ByteString -> [Strict.ByteString]
-- * Basic interface
cons, -- :: Char -> ByteString -> ByteString
-- ** Breaking into many substrings
split, -- :: Char -> ByteString -> [ByteString]
splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString]
- tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString]
-- ** Breaking into lines and words
lines, -- :: ByteString -> [ByteString]
-- * Reading from ByteStrings
readInt,
+ readInteger,
-- * I\/O with 'ByteString's
-- Functions transparently exported
import Data.ByteString.Lazy
- (ByteString(..)
+ (ByteString, fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose
,concat,take,drop,splitAt,join,isPrefixOf,group,inits,tails,copy
,hGetContents, hGet, hPut, getContents
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as B
import qualified Data.ByteString.Base as B
+import Data.ByteString.Base (LazyByteString(LPS))
+
import Data.ByteString.Base (w2c, c2w, isSpaceWord8)
import Data.Int (Int64)
splitWith f = L.splitWith (f . w2c)
{-# INLINE splitWith #-}
--- | Like 'splitWith', except that sequences of adjacent separators are
--- treated as a single separator. eg.
---
--- > tokens (=='a') "aabbaca" == ["bb","c"]
---
-tokens :: (Char -> Bool) -> ByteString -> [ByteString]
-tokens f = L.tokens (f . w2c)
-{-# INLINE tokens #-}
-
-- | The 'groupBy' function is the non-overloaded version of 'group'.
groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b))
-- > tokens isSpace = words
--
words :: ByteString -> [ByteString]
-words = L.tokens isSpaceWord8
+words = P.filter (not . L.null) . L.splitWith isSpaceWord8
{-# INLINE words #-}
-- | The 'unwords' function is analogous to the 'unlines' function, on words.
in n' `seq` ps' `seq` Just $! (n', LPS ps')
+-- | readInteger reads an Integer from the beginning of the ByteString. If
+-- there is no integer at the beginning of the string, it returns Nothing,
+-- otherwise it just returns the int read, and the rest of the string.
+readInteger :: ByteString -> Maybe (Integer, ByteString)
+readInteger (LPS []) = Nothing
+readInteger (LPS (x:xs)) =
+ case w2c (B.unsafeHead x) of
+ '-' -> first (B.unsafeTail x) xs >>= \(n, bs) -> return (-n, bs)
+ '+' -> first (B.unsafeTail x) xs
+ _ -> first x xs
+
+ where first ps pss
+ | B.null ps = case pss of
+ [] -> Nothing
+ (ps':pss') -> first' ps' pss'
+ | otherwise = first' ps pss
+
+ first' ps pss = case B.unsafeHead ps of
+ w | w >= 0x30 && w <= 0x39 -> Just $
+ loop 1 (fromIntegral w - 0x30) [] (B.unsafeTail ps) pss
+ | otherwise -> Nothing
+
+ loop :: Int -> Int -> [Integer]
+ -> B.ByteString -> [B.ByteString] -> (Integer, ByteString)
+ STRICT5(loop)
+ loop d acc ns ps pss
+ | B.null ps = case pss of
+ [] -> combine d acc ns ps pss
+ (ps':pss') -> loop d acc ns ps' pss'
+ | otherwise =
+ case B.unsafeHead ps of
+ w | w >= 0x30 && w <= 0x39 ->
+ if d < 9 then loop (d+1)
+ (10*acc + (fromIntegral w - 0x30))
+ ns (B.unsafeTail ps) pss
+ else loop 1 (fromIntegral w - 0x30)
+ (fromIntegral acc : ns)
+ (B.unsafeTail ps) pss
+ | otherwise -> combine d acc ns ps pss
+
+ combine _ acc [] ps pss = end (fromIntegral acc) ps pss
+ combine d acc ns ps pss =
+ end (10^d * combine1 1000000000 ns + fromIntegral acc) ps pss
+
+ combine1 _ [n] = n
+ combine1 b ns = combine1 (b*b) $ combine2 b ns
+
+ combine2 b (n:m:ns) = let t = n+m*b in t `seq` (t : combine2 b ns)
+ combine2 _ ns = ns
+
+ end n ps pss = let ps' | B.null ps = pss
+ | otherwise = ps:pss
+ in ps' `seq` (n, LPS ps')
+
-- | Read an entire file /lazily/ into a 'ByteString'. Use 'text mode'
-- on Windows to interpret newlines
readFile :: FilePath -> IO ByteString