X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FByteString%2FLazy.hs;h=c9d3bdba28ed8f4a987736e3b65cb555464b1e31;hb=6b1a36a595eddf1e124529646afdb75c76a9966d;hp=eb4ba61942ebbb4ed9007244cea048cc8f041997;hpb=a87eeb296e699c8000b371c9676158499be3d17f;p=haskell-directory.git diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index eb4ba61..c9d3bdb 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -1,18 +1,15 @@ {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-} --- --- Module : ByteString.Lazy +-- | +-- Module : Data.ByteString.Lazy -- Copyright : (c) Don Stewart 2006 -- (c) Duncan Coutts 2006 -- License : BSD-style -- -- Maintainer : dons@cse.unsw.edu.au -- Stability : experimental --- Portability : portable, requires ffi and cpp --- Tested with : GHC 6.4.1 and Hugs March 2005 +-- Portability : non-portable (instance of type synonym) -- - --- --- | A time and space-efficient implementation of lazy byte vectors +-- A time and space-efficient implementation of lazy byte vectors -- using lists of packed 'Word8' arrays, suitable for high performance -- use, both in terms of large data quantities, or high speed -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8' @@ -20,9 +17,9 @@ -- without requiring the entire vector be resident in memory. -- -- Some operations, such as concat, append, reverse and cons, have --- better complexity than their "Data.ByteString" equivalents, as due to +-- better complexity than their "Data.ByteString" equivalents, due to -- optimisations resulting from the list spine structure. And for other --- operations Lazy ByteStrings are usually within a few percent of +-- operations lazy ByteStrings are usually within a few percent of -- strict ones, but with better heap usage. For data larger than the -- available memory, or if you have tight memory constraints, this -- module will be the only option. The default chunk size is 64k, which @@ -34,22 +31,26 @@ -- -- > import qualified Data.ByteString.Lazy as B -- --- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use --- UArray by Simon Marlow. Rewritten to support slices and use --- ForeignPtr by David Roundy. Polished and extended by Don Stewart. +-- Original GHC implementation by Bryan O\'Sullivan. +-- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. +-- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr' +-- by David Roundy. +-- Polished and extended by Don Stewart. -- Lazy variant by Duncan Coutts and Don Stewart. -- module Data.ByteString.Lazy ( -- * The @ByteString@ type - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - singleton, -- :: Word8 -> ByteString + singleton, -- :: Word8 -> ByteString pack, -- :: [Word8] -> ByteString unpack, -- :: ByteString -> [Word8] + fromChunks, -- :: [Strict.ByteString] -> ByteString + toChunks, -- :: ByteString -> [Strict.ByteString] -- * Basic interface cons, -- :: Word8 -> ByteString -> ByteString @@ -93,7 +94,6 @@ module Data.ByteString.Lazy ( -- ** Accumulating maps mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) - mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString -- ** Infinite ByteStrings @@ -123,7 +123,6 @@ module Data.ByteString.Lazy ( -- ** Breaking into many substrings split, -- :: Word8 -> ByteString -> [ByteString] splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] - tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] -- ** Joining strings join, -- :: ByteString -> [ByteString] -> ByteString @@ -179,6 +178,7 @@ module Data.ByteString.Lazy ( hGet, -- :: Handle -> Int -> IO ByteString hPut, -- :: Handle -> ByteString -> IO () hGetNonBlocking, -- :: Handle -> IO ByteString + -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString -- hGetContentsN, -- :: Int -> Handle -> IO ByteString -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString @@ -196,6 +196,7 @@ import Prelude hiding import qualified Data.List as L -- L for list/lazy import qualified Data.ByteString as P -- P for packed import qualified Data.ByteString.Base as P +import Data.ByteString.Base (LazyByteString(..)) import qualified Data.ByteString.Fusion as P import Data.ByteString.Fusion (PairS(..),loopL) @@ -212,10 +213,6 @@ import Foreign.ForeignPtr (withForeignPtr) import Foreign.Ptr import Foreign.Storable -#if defined(__GLASGOW_HASKELL__) -import Data.Generics (Data(..), Typeable(..)) -#endif - -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns @@ -229,17 +226,7 @@ import Data.Generics (Data(..), Typeable(..)) -- ----------------------------------------------------------------------------- --- | A space-efficient representation of a Word8 vector, supporting many --- efficient operations. A 'ByteString' contains 8-bit characters only. --- --- Instances of Eq, Ord, Read, Show, Data, Typeable --- -newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string - deriving (Show,Read -#if defined(__GLASGOW_HASKELL__) - ,Data, Typeable -#endif - ) +type ByteString = LazyByteString -- -- hmm, what about getting the PS constructor unpacked into the cons cell? @@ -367,6 +354,14 @@ unpack :: ByteString -> [Word8] unpack (LPS ss) = L.concatMap P.unpack ss {-# INLINE unpack #-} +-- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString' +fromChunks :: [P.ByteString] -> ByteString +fromChunks ls = LPS $ L.filter (not . P.null) ls + +-- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString' +toChunks :: ByteString -> [P.ByteString] +toChunks (LPS s) = s + ------------------------------------------------------------------------ {- @@ -415,8 +410,8 @@ length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings. -- cons :: Word8 -> ByteString -> ByteString -cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss) -cons c (LPS ss) = LPS (P.singleton c : ss) +cons c (LPS (s:ss)) | P.length s < 16 = LPS (P.cons c s : ss) +cons c (LPS ss) = LPS (P.singleton c : ss) {-# INLINE cons #-} -- | /O(n\/c)/ Append a byte to the end of a 'ByteString' @@ -589,9 +584,6 @@ minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS -mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumR = error "mapAccumR unimplemented" - -- | /O(n)/ map Word8 functions, provided with the index at each position mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS @@ -670,10 +662,10 @@ unfoldr f = LPS . unfoldChunk 32 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. take :: Int64 -> ByteString -> ByteString -take n _ | n < 0 = empty -take i (LPS ps) = LPS (take' i ps) - where take' _ [] = [] - take' 0 _ = [] +take i _ | i <= 0 = empty +take i (LPS ps) = LPS (take' i ps) + where take' 0 _ = [] + take' _ [] = [] take' n (x:xs) = if n < fromIntegral (P.length x) then P.take (fromIntegral n) x : [] @@ -684,8 +676,8 @@ take i (LPS ps) = LPS (take' i ps) drop :: Int64 -> ByteString -> ByteString drop i p | i <= 0 = p drop i (LPS ps) = LPS (drop' i ps) - where drop' _ [] = [] - drop' 0 xs = xs + where drop' 0 xs = xs + drop' _ [] = [] drop' n (x:xs) = if n < fromIntegral (P.length x) then P.drop (fromIntegral n) x : xs @@ -695,8 +687,8 @@ drop i (LPS ps) = LPS (drop' i ps) splitAt :: Int64 -> ByteString -> (ByteString, ByteString) splitAt i p | i <= 0 = (empty, p) splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b) - where splitAt' _ [] = ([], []) - splitAt' 0 xs = ([], xs) + where splitAt' 0 xs = ([], xs) + splitAt' _ [] = ([], []) splitAt' n (x:xs) = if n < fromIntegral (P.length x) then (P.take (fromIntegral n) x : [], @@ -808,7 +800,7 @@ splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as -- argument, consuming the delimiter. I.e. -- -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] --- > split 'a' "aXaXaXa" == ["","X","X","X"] +-- > split 'a' "aXaXaXa" == ["","X","X","X",""] -- > split 'x' "x" == ["",""] -- -- and @@ -834,6 +826,7 @@ split c (LPS (a:as)) = comb [] (P.split c a) as {-# INLINE cons' #-} {-# INLINE split #-} +{- -- | Like 'splitWith', except that sequences of adjacent separators are -- treated as a single separator. eg. -- @@ -841,6 +834,7 @@ split c (LPS (a:as)) = comb [] (P.split c a) as -- tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] tokens f = L.filter (not.null) . splitWith f +-} -- | The 'group' function takes a ByteString and returns a list of -- ByteStrings such that the concatenation of the result is equal to the