Sync Data.ByteString with stable branch

author Don Stewart <dons@cse.unsw.edu.au>

Sat, 9 Sep 2006 05:01:11 +0000 (05:01 +0000)

committer Don Stewart <dons@cse.unsw.edu.au>

Sat, 9 Sep 2006 05:01:11 +0000 (05:01 +0000)
author Don Stewart <dons@cse.unsw.edu.au>
Sat, 9 Sep 2006 05:01:11 +0000 (05:01 +0000)
committer Don Stewart <dons@cse.unsw.edu.au>
Sat, 9 Sep 2006 05:01:11 +0000 (05:01 +0000)
diff --git a/Data/ByteString.hs b/Data/ByteString.hs

index 927a91f..64554b8 100644 (file)
--- a/Data/ByteString.hs
+++ b/Data/ByteString.hs
@@ -291,12 +291,6 @@ instance Eq  ByteString
  instance Ord ByteString
      where compare = compareBytes
  
-instance Show ByteString where
-    showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
-
-instance Read ByteString where
-    readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
-
  instance Monoid ByteString where
      mempty  = empty
      mappend = append
@@ -458,31 +452,6 @@ unpackList (PS fp off len) = withPtr fp $ \p -> do
  
  #endif
  
-------------------------------------------------------------------------
-
--- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
--- conversion function
-packWith :: (a -> Word8) -> [a] -> ByteString
-packWith k str = unsafeCreate (P.length str) $ \p -> go p str
-    where
-        STRICT2(go)
-        go _ []     = return ()
-        go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
-{-# INLINE packWith #-}
-{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
-
--- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
-unpackWith :: (Word8 -> a) -> ByteString -> [a]
-unpackWith _ (PS _  _ 0) = []
-unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
-        go (p `plusPtr` s) (l - 1) []
-    where
-        STRICT3(go)
-        go p 0 acc = peek p          >>= \e -> return (k e : acc)
-        go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
-{-# INLINE unpackWith #-}
-{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
-
  -- ---------------------------------------------------------------------
  -- Basic interface
  
@@ -1778,9 +1747,7 @@ hGetLines h = go
  
  hGetLine :: Handle -> IO ByteString
  #if !defined(__GLASGOW_HASKELL__)
-hGetLine h = do
-  string <- System.IO.hGetLine h
-  return $ packWith c2w string
+hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
  #else
  hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
      case haBufferMode handle_ of
diff --git a/Data/ByteString/Base.hs b/Data/ByteString/Base.hs

index f3c869d..b96a56b 100644 (file)
--- a/Data/ByteString/Base.hs
+++ b/Data/ByteString/Base.hs
@@ -17,7 +17,8 @@
  module Data.ByteString.Base (
  
          -- * The @ByteString@ type and representation
-        ByteString(..),             -- instances: Eq, Ord, Show, Read, Data, Typeable
+        ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
+        LazyByteString(..),     -- instances: Eq, Ord, Show, Read, Data, Typeable      
  
          -- * Unchecked access
          unsafeHead,             -- :: ByteString -> Word8
@@ -149,6 +150,51 @@ data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
      deriving (Data, Typeable)
  #endif
  
+instance Show ByteString where
+    showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
+
+instance Read ByteString where
+    readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
+
+-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
+unpackWith :: (Word8 -> a) -> ByteString -> [a]
+unpackWith _ (PS _  _ 0) = []
+unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
+        go (p `plusPtr` s) (l - 1) []
+    where
+        STRICT3(go)
+        go p 0 acc = peek p          >>= \e -> return (k e : acc)
+        go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
+{-# INLINE unpackWith #-}
+{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
+
+-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
+-- conversion function
+packWith :: (a -> Word8) -> [a] -> ByteString
+packWith k str = unsafeCreate (length str) $ \p -> go p str
+    where
+        STRICT2(go)
+        go _ []     = return ()
+        go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
+{-# INLINE packWith #-}
+{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
+
+------------------------------------------------------------------------
+
+-- | A space-efficient representation of a Word8 vector, supporting many
+-- efficient operations.  A 'ByteString' contains 8-bit characters only.
+--
+-- Instances of Eq, Ord, Read, Show, Data, Typeable
+--
+newtype LazyByteString = LPS [ByteString] -- LPS for lazy packed string
+    deriving (Show,Read
+#if defined(__GLASGOW_HASKELL__)
+                        ,Data, Typeable
+#endif
+             )
+
+------------------------------------------------------------------------
+
  -- | /O(1)/ The empty 'ByteString'
  empty :: ByteString
  empty = PS nullForeignPtr 0 0
diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs

index bd4b31a..03b492d 100644 (file)
--- a/Data/ByteString/Char8.hs
+++ b/Data/ByteString/Char8.hs
@@ -162,7 +162,8 @@ module Data.ByteString.Char8 (
          sort,                   -- :: ByteString -> ByteString
  
          -- * Reading from ByteStrings
-        readInt,                -- :: ByteString -> Maybe Int
+        readInt,                -- :: ByteString -> Maybe (Int, ByteString)
+        readInteger,            -- :: ByteString -> Maybe (Integer, ByteString)
  
          -- * Low level CString conversions
  
@@ -931,6 +932,51 @@ readInt as
            end True _ n ps = Just (negate n, ps)
            end _    _ n ps = Just (n, ps)
  
+-- | readInteger reads an Integer from the beginning of the ByteString.  If
+-- there is no integer at the beginning of the string, it returns Nothing,
+-- otherwise it just returns the int read, and the rest of the string.
+readInteger :: ByteString -> Maybe (Integer, ByteString)
+readInteger as
+    | null as   = Nothing
+    | otherwise =
+        case unsafeHead as of
+            '-' -> first (unsafeTail as) >>= \(n, bs) -> return (-n, bs)
+            '+' -> first (unsafeTail as)
+            _   -> first as
+
+    where first ps | null ps   = Nothing
+                   | otherwise =
+                       case B.unsafeHead ps of
+                        w | w >= 0x30 && w <= 0x39 -> Just $
+                            loop 1 (fromIntegral w - 0x30) [] (unsafeTail ps)
+                          | otherwise              -> Nothing
+
+          loop :: Int -> Int -> [Integer]
+               -> ByteString -> (Integer, ByteString)
+          STRICT4(loop)
+          loop d acc ns ps
+              | null ps   = combine d acc ns empty
+              | otherwise =
+                  case B.unsafeHead ps of
+                   w | w >= 0x30 && w <= 0x39 ->
+                       if d == 9 then loop 1 (fromIntegral w - 0x30)
+                                           (toInteger acc : ns)
+                                           (unsafeTail ps)
+                                 else loop (d+1)
+                                           (10*acc + (fromIntegral w - 0x30))
+                                           ns (unsafeTail ps)
+                     | otherwise -> combine d acc ns ps
+
+          combine _ acc [] ps = (toInteger acc, ps)
+          combine d acc ns ps =
+              ((10^d * combine1 1000000000 ns + toInteger acc), ps)
+
+          combine1 _ [n] = n
+          combine1 b ns  = combine1 (b*b) $ combine2 b ns
+
+          combine2 b (n:m:ns) = let t = m*b + n in t `seq` (t : combine2 b ns)
+          combine2 _ ns       = ns
+
  -- | Read an entire file strictly into a 'ByteString'.  This is far more
  -- efficient than reading the characters into a 'String' and then using
  -- 'pack'.  It also may be more efficient than opening the file and
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs

index eb4ba61..80b80ea 100644 (file)
--- a/Data/ByteString/Lazy.hs
+++ b/Data/ByteString/Lazy.hs
@@ -20,7 +20,7 @@
  -- without requiring the entire vector be resident in memory.
  --
  -- Some operations, such as concat, append, reverse and cons, have
--- better complexity than their "Data.ByteString" equivalents, as due to
+-- better complexity than their "Data.ByteString" equivalents, due to
  -- optimisations resulting from the list spine structure. And for other
  -- operations Lazy ByteStrings are usually within a few percent of
  -- strict ones, but with better heap usage. For data larger than the
@@ -43,13 +43,15 @@
  module Data.ByteString.Lazy (
  
          -- * The @ByteString@ type
-        ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
+        ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable
  
          -- * Introducing and eliminating 'ByteString's
          empty,                  -- :: ByteString
-        singleton,               -- :: Word8   -> ByteString
+        singleton,              -- :: Word8   -> ByteString
          pack,                   -- :: [Word8] -> ByteString
          unpack,                 -- :: ByteString -> [Word8]
+        fromChunks,             -- :: [Strict.ByteString] -> ByteString
+        toChunks,               -- :: ByteString -> [Strict.ByteString]
  
          -- * Basic interface
          cons,                   -- :: Word8 -> ByteString -> ByteString
@@ -93,7 +95,6 @@ module Data.ByteString.Lazy (
  
          -- ** Accumulating maps
          mapAccumL,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
-        mapAccumR,  -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
          mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
  
          -- ** Infinite ByteStrings
@@ -123,7 +124,6 @@ module Data.ByteString.Lazy (
          -- ** Breaking into many substrings
          split,                  -- :: Word8 -> ByteString -> [ByteString]
          splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
-        tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  
          -- ** Joining strings
          join,                   -- :: ByteString -> [ByteString] -> ByteString
@@ -179,6 +179,7 @@ module Data.ByteString.Lazy (
          hGet,                   -- :: Handle -> Int -> IO ByteString
          hPut,                   -- :: Handle -> ByteString -> IO ()
          hGetNonBlocking,        -- :: Handle -> IO ByteString
+
  --      hGetN,                  -- :: Int -> Handle -> Int -> IO ByteString
  --      hGetContentsN,          -- :: Int -> Handle -> IO ByteString
  --      hGetNonBlockingN,       -- :: Int -> Handle -> IO ByteString
@@ -196,6 +197,7 @@ import Prelude hiding
  import qualified Data.List              as L  -- L for list/lazy
  import qualified Data.ByteString        as P  -- P for packed
  import qualified Data.ByteString.Base   as P
+import Data.ByteString.Base (LazyByteString(..))
  import qualified Data.ByteString.Fusion as P
  import Data.ByteString.Fusion (PairS(..),loopL)
  
@@ -212,10 +214,6 @@ import Foreign.ForeignPtr       (withForeignPtr)
  import Foreign.Ptr
  import Foreign.Storable
  
-#if defined(__GLASGOW_HASKELL__)
-import Data.Generics            (Data(..), Typeable(..))
-#endif
-
  -- -----------------------------------------------------------------------------
  --
  -- Useful macros, until we have bang patterns
@@ -229,17 +227,7 @@ import Data.Generics            (Data(..), Typeable(..))
  
  -- -----------------------------------------------------------------------------
  
--- | A space-efficient representation of a Word8 vector, supporting many
--- efficient operations.  A 'ByteString' contains 8-bit characters only.
---
--- Instances of Eq, Ord, Read, Show, Data, Typeable
---
-newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string
-    deriving (Show,Read
-#if defined(__GLASGOW_HASKELL__)
-                        ,Data, Typeable
-#endif
-             )
+type ByteString = LazyByteString
  
  --
  -- hmm, what about getting the PS constructor unpacked into the cons cell?
@@ -367,6 +355,14 @@ unpack :: ByteString -> [Word8]
  unpack (LPS ss) = L.concatMap P.unpack ss
  {-# INLINE unpack #-}
  
+-- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
+fromChunks :: [P.ByteString] -> ByteString
+fromChunks ls = LPS $ L.filter (not . P.null) ls
+
+-- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
+toChunks :: ByteString -> [P.ByteString]
+toChunks (LPS s) = s
+
  ------------------------------------------------------------------------
  
  {-
@@ -589,9 +585,6 @@ minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
  mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
  
-mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
-mapAccumR = error "mapAccumR unimplemented"
-
  -- | /O(n)/ map Word8 functions, provided with the index at each position
  mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
  mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
@@ -834,6 +827,7 @@ split c (LPS (a:as)) = comb [] (P.split c a) as
          {-# INLINE cons' #-}
  {-# INLINE split #-}
  
+{-
  -- | Like 'splitWith', except that sequences of adjacent separators are
  -- treated as a single separator. eg.
  -- 
@@ -841,6 +835,7 @@ split c (LPS (a:as)) = comb [] (P.split c a) as
  --
  tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
  tokens f = L.filter (not.null) . splitWith f
+-}
  
  -- | The 'group' function takes a ByteString and returns a list of
  -- ByteStrings such that the concatenation of the result is equal to the
diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs

index ada949b..beec2ba 100644 (file)
--- a/Data/ByteString/Lazy/Char8.hs
+++ b/Data/ByteString/Lazy/Char8.hs
@@ -24,13 +24,15 @@
  module Data.ByteString.Lazy.Char8 (
  
          -- * The @ByteString@ type
-        ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
+        ByteString,            -- instances: Eq, Ord, Show, Read, Data, Typeable
  
          -- * Introducing and eliminating 'ByteString's
          empty,                  -- :: ByteString
          singleton,               -- :: Char   -> ByteString
          pack,                   -- :: String -> ByteString
          unpack,                 -- :: ByteString -> String
+        fromChunks,             -- :: [Strict.ByteString] -> ByteString
+        toChunks,               -- :: ByteString -> [Strict.ByteString]
  
          -- * Basic interface
          cons,                   -- :: Char -> ByteString -> ByteString
@@ -103,7 +105,6 @@ module Data.ByteString.Lazy.Char8 (
          -- ** Breaking into many substrings
          split,                  -- :: Char -> ByteString -> [ByteString]
          splitWith,              -- :: (Char -> Bool) -> ByteString -> [ByteString]
-        tokens,                 -- :: (Char -> Bool) -> ByteString -> [ByteString]
  
          -- ** Breaking into lines and words
          lines,                  -- :: ByteString -> [ByteString]
@@ -149,6 +150,7 @@ module Data.ByteString.Lazy.Char8 (
  
          -- * Reading from ByteStrings
          readInt,
+        readInteger,
  
          -- * I\/O with 'ByteString's
  
@@ -176,7 +178,7 @@ module Data.ByteString.Lazy.Char8 (
  
  -- Functions transparently exported
  import Data.ByteString.Lazy 
-        (ByteString(..)
+        (ByteString, fromChunks, toChunks
          ,empty,null,length,tail,init,append,reverse,transpose
          ,concat,take,drop,splitAt,join,isPrefixOf,group,inits,tails,copy
          ,hGetContents, hGet, hPut, getContents
@@ -187,6 +189,8 @@ import Data.ByteString.Lazy
  import qualified Data.ByteString.Lazy as L
  import qualified Data.ByteString as B
  import qualified Data.ByteString.Base as B
+import Data.ByteString.Base (LazyByteString(LPS))
+
  import Data.ByteString.Base (w2c, c2w, isSpaceWord8)
  
  import Data.Int (Int64)
@@ -455,15 +459,6 @@ splitWith :: (Char -> Bool) -> ByteString -> [ByteString]
  splitWith f = L.splitWith (f . w2c)
  {-# INLINE splitWith #-}
  
--- | Like 'splitWith', except that sequences of adjacent separators are
--- treated as a single separator. eg.
--- 
--- > tokens (=='a') "aabbaca" == ["bb","c"]
---
-tokens :: (Char -> Bool) -> ByteString -> [ByteString]
-tokens f = L.tokens (f . w2c)
-{-# INLINE tokens #-}
-
  -- | The 'groupBy' function is the non-overloaded version of 'group'.
  groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
  groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b))
@@ -643,7 +638,7 @@ unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space
  -- > tokens isSpace = words
  --
  words :: ByteString -> [ByteString]
-words = L.tokens isSpaceWord8
+words = P.filter (not . L.null) . L.splitWith isSpaceWord8
  {-# INLINE words #-}
  
  -- | The 'unwords' function is analogous to the 'unlines' function, on words.
@@ -685,6 +680,60 @@ readInt (LPS (x:xs)) =
                                  in n' `seq` ps' `seq` Just $! (n', LPS ps')
  
  
+-- | readInteger reads an Integer from the beginning of the ByteString.  If
+-- there is no integer at the beginning of the string, it returns Nothing,
+-- otherwise it just returns the int read, and the rest of the string.
+readInteger :: ByteString -> Maybe (Integer, ByteString)
+readInteger (LPS []) = Nothing
+readInteger (LPS (x:xs)) =
+        case w2c (B.unsafeHead x) of
+            '-' -> first (B.unsafeTail x) xs >>= \(n, bs) -> return (-n, bs)
+            '+' -> first (B.unsafeTail x) xs
+            _   -> first x xs
+
+    where first ps pss
+              | B.null ps = case pss of
+                  []         -> Nothing
+                  (ps':pss') -> first' ps' pss'
+              | otherwise = first' ps pss
+
+          first' ps pss = case B.unsafeHead ps of
+              w | w >= 0x30 && w <= 0x39 -> Just $
+                  loop 1 (fromIntegral w - 0x30) [] (B.unsafeTail ps) pss
+                | otherwise              -> Nothing
+
+          loop :: Int -> Int -> [Integer]
+               -> B.ByteString -> [B.ByteString] -> (Integer, ByteString)
+          STRICT5(loop)
+          loop d acc ns ps pss
+              | B.null ps = case pss of
+                                []         -> combine d acc ns ps pss
+                                (ps':pss') -> loop d acc ns ps' pss'
+              | otherwise =
+                  case B.unsafeHead ps of
+                   w | w >= 0x30 && w <= 0x39 ->
+                       if d < 9 then loop (d+1)
+                                          (10*acc + (fromIntegral w - 0x30))
+                                          ns (B.unsafeTail ps) pss
+                                else loop 1 (fromIntegral w - 0x30)
+                                          (fromIntegral acc : ns)
+                                          (B.unsafeTail ps) pss
+                     | otherwise -> combine d acc ns ps pss
+
+          combine _ acc [] ps pss = end (fromIntegral acc) ps pss
+          combine d acc ns ps pss =
+              end (10^d * combine1 1000000000 ns + fromIntegral acc) ps pss
+
+          combine1 _ [n] = n
+          combine1 b ns  = combine1 (b*b) $ combine2 b ns
+
+          combine2 b (n:m:ns) = let t = n+m*b in t `seq` (t : combine2 b ns)
+          combine2 _ ns       = ns
+
+          end n ps pss = let ps' | B.null ps =    pss
+                                 | otherwise = ps:pss
+                          in ps' `seq` (n, LPS ps')
+
  -- | Read an entire file /lazily/ into a 'ByteString'. Use 'text mode'
  -- on Windows to interpret newlines
  readFile :: FilePath -> IO ByteString
author	Don Stewart <dons@cse.unsw.edu.au>
	Sat, 9 Sep 2006 05:01:11 +0000 (05:01 +0000)
committer	Don Stewart <dons@cse.unsw.edu.au>
	Sat, 9 Sep 2006 05:01:11 +0000 (05:01 +0000)
Data/ByteString.hs		patch \| blob \| history
Data/ByteString/Base.hs		patch \| blob \| history
Data/ByteString/Char8.hs		patch \| blob \| history
Data/ByteString/Lazy.hs		patch \| blob \| history
Data/ByteString/Lazy/Char8.hs		patch \| blob \| history