X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FByteString%2FChar8.hs;h=03b492d3d7e3da9ad48ba7578b24597495dd751b;hb=1d644d67d64186aeb2b840adf4d1cceed27a5bc6;hp=530dda88be6b887352ade69276499b6e283d52fd;hpb=3bdb7f5a1cf6de821f032f9507dd35433afd1d3d;p=haskell-directory.git diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index 530dda8..03b492d 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -1,4 +1,4 @@ -{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-} +{-# OPTIONS_GHC -cpp -fglasgow-exts #-} -- -- Module : Data.ByteString.Char8 -- Copyright : (c) Don Stewart 2006 @@ -10,9 +10,9 @@ -- -- --- | Manipulate ByteStrings using Char operations. All Chars will be +-- | Manipulate 'ByteString's using 'Char' operations. All Chars will be -- truncated to 8 bits. It can be expected that these functions will run --- at identical speeds to their Word8 equivalents in @Data.ByteString@. +-- at identical speeds to their 'Word8' equivalents in "Data.ByteString". -- -- More specifically these byte strings are taken to be in the -- subset of Unicode covered by code points 0-255. This covers @@ -27,7 +27,7 @@ -- * -- -- This module is intended to be imported @qualified@, to avoid name --- clashes with Prelude functions. eg. +-- clashes with "Prelude" functions. eg. -- -- > import qualified Data.ByteString.Char8 as B -- @@ -35,29 +35,24 @@ module Data.ByteString.Char8 ( -- * The @ByteString@ type - ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid -- * Introducing and eliminating 'ByteString's empty, -- :: ByteString - packChar, -- :: Char -> ByteString + singleton, -- :: Char -> ByteString pack, -- :: String -> ByteString unpack, -- :: ByteString -> String -- * Basic interface cons, -- :: Char -> ByteString -> ByteString - snoc, -- :: Char -> ByteString -> ByteString - null, -- :: ByteString -> Bool - length, -- :: ByteString -> Int + snoc, -- :: ByteString -> Char -> ByteString + append, -- :: ByteString -> ByteString -> ByteString head, -- :: ByteString -> Char - tail, -- :: ByteString -> ByteString last, -- :: ByteString -> Char + tail, -- :: ByteString -> ByteString init, -- :: ByteString -> ByteString - append, -- :: ByteString -> ByteString -> ByteString - - -- * Special ByteStrings - inits, -- :: ByteString -> [ByteString] - tails, -- :: ByteString -> [ByteString] - elems, -- :: ByteString -> [ByteString] + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int -- * Transformating ByteStrings map, -- :: (Char -> Char) -> ByteString -> ByteString @@ -65,11 +60,16 @@ module Data.ByteString.Char8 ( intersperse, -- :: Char -> ByteString -> ByteString transpose, -- :: [ByteString] -> [ByteString] - -- * Reducing 'ByteString's + -- * Reducing 'ByteString's (folds) foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a - foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a + foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char + foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char + + foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a + foldr', -- :: (Char -> a -> a) -> a -> ByteString -> a foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char + foldr1', -- :: (Char -> Char -> Char) -> ByteString -> Char -- ** Special folds concat, -- :: [ByteString] -> ByteString @@ -78,11 +78,23 @@ module Data.ByteString.Char8 ( all, -- :: (Char -> Bool) -> ByteString -> Bool maximum, -- :: ByteString -> Char minimum, -- :: ByteString -> Char + + -- * Building ByteStrings + -- ** Scans + scanl, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString + scanl1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString + scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString + scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString + + -- ** Accumulating maps + mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) + mapAccumR, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Char -> ByteString - unfoldrN, -- :: (Char -> Maybe (Char, Char)) -> Char -> ByteString + unfoldr, -- :: (a -> Maybe (Char, a)) -> a -> ByteString + unfoldrN, -- :: Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a) -- * Substrings @@ -92,25 +104,18 @@ module Data.ByteString.Char8 ( splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString - break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - - -- ** Breaking and dropping on specific Chars - breakChar, -- :: Char -> ByteString -> (ByteString, ByteString) - spanChar, -- :: Char -> ByteString -> (ByteString, ByteString) - breakFirst, -- :: Char -> ByteString -> Maybe (ByteString,ByteString) - breakLast, -- :: Char -> ByteString -> Maybe (ByteString,ByteString) - breakSpace, -- :: ByteString -> Maybe (ByteString,ByteString) - dropSpace, -- :: ByteString -> ByteString - dropSpaceEnd, -- :: ByteString -> ByteString + break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + breakEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + group, -- :: ByteString -> [ByteString] + groupBy, -- :: (Char -> Char -> Bool) -> ByteString -> [ByteString] + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] -- ** Breaking into many substrings split, -- :: Char -> ByteString -> [ByteString] splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] - tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString] - group, -- :: ByteString -> [ByteString] - groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] -- ** Breaking into lines and words lines, -- :: ByteString -> [ByteString] @@ -118,103 +123,100 @@ module Data.ByteString.Char8 ( unlines, -- :: [ByteString] -> ByteString unwords, -- :: ByteString -> [ByteString] - lines', -- :: ByteString -> [ByteString] - unlines', -- :: [ByteString] -> ByteString - linesCRLF', -- :: ByteString -> [ByteString] - unlinesCRLF', -- :: [ByteString] -> ByteString - words', -- :: ByteString -> [ByteString] - unwords', -- :: ByteString -> [ByteString] - - lineIndices, -- :: ByteString -> [Int] - betweenLines, -- :: ByteString -> ByteString -> ByteString -> Maybe (ByteString) - -- ** Joining strings join, -- :: ByteString -> [ByteString] -> ByteString - joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString - -- * Indexing ByteStrings - index, -- :: ByteString -> Int -> Char - elemIndex, -- :: Char -> ByteString -> Maybe Int - elemIndexLast, -- :: Char -> ByteString -> Maybe Int - elemIndices, -- :: Char -> ByteString -> [Int] - findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int - findIndices, -- :: (Char -> Bool) -> ByteString -> [Int] - count, -- :: Char -> ByteString -> Int - - -- * Ordered ByteStrings - sort, -- :: ByteString -> ByteString + -- ** Searching for substrings + isPrefixOf, -- :: ByteString -> ByteString -> Bool + isSuffixOf, -- :: ByteString -> ByteString -> Bool + isSubstringOf, -- :: ByteString -> ByteString -> Bool + findSubstring, -- :: ByteString -> ByteString -> Maybe Int + findSubstrings, -- :: ByteString -> ByteString -> [Int] -- * Searching ByteStrings -- ** Searching by equality elem, -- :: Char -> ByteString -> Bool notElem, -- :: Char -> ByteString -> Bool - filterChar, -- :: Char -> ByteString -> ByteString - filterNotChar, -- :: Char -> ByteString -> ByteString -- ** Searching with a predicate - filter, -- :: (Char -> Bool) -> ByteString -> ByteString find, -- :: (Char -> Bool) -> ByteString -> Maybe Char + filter, -- :: (Char -> Bool) -> ByteString -> ByteString +-- partition -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) - -- ** Searching for substrings - isPrefixOf, -- :: ByteString -> ByteString -> Bool - isSuffixOf, -- :: ByteString -> ByteString -> Bool - isSubstringOf, -- :: ByteString -> ByteString -> Bool - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] + -- * Indexing ByteStrings + index, -- :: ByteString -> Int -> Char + elemIndex, -- :: Char -> ByteString -> Maybe Int + elemIndices, -- :: Char -> ByteString -> [Int] + elemIndexEnd, -- :: Char -> ByteString -> Maybe Int + findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int + findIndices, -- :: (Char -> Bool) -> ByteString -> [Int] + count, -- :: Char -> ByteString -> Int - -- * Zipping and unzipping ByteString + -- * Zipping and unzipping ByteStrings zip, -- :: ByteString -> ByteString -> [(Char,Char)] zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c] unzip, -- :: [(Char,Char)] -> (ByteString,ByteString) - -- * Unchecked access - unsafeHead, -- :: ByteString -> Char - unsafeTail, -- :: ByteString -> ByteString - unsafeIndex, -- :: ByteString -> Int -> Char - w2c, -- :: Word8 -> Char - c2w, -- :: Char -> Word8 + -- * Ordered ByteStrings + sort, -- :: ByteString -> ByteString -- * Reading from ByteStrings - readInt, -- :: ByteString -> Maybe Int - unsafeReadInt, -- :: ByteString -> Maybe Int + readInt, -- :: ByteString -> Maybe (Int, ByteString) + readInteger, -- :: ByteString -> Maybe (Integer, ByteString) + + -- * Low level CString conversions + + -- ** Packing CStrings and pointers + packCString, -- :: CString -> ByteString + packCStringLen, -- :: CString -> ByteString + packMallocCString, -- :: CString -> ByteString + + -- ** Using ByteStrings as CStrings + useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a + useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a -- * Copying ByteStrings copy, -- :: ByteString -> ByteString + copyCString, -- :: CString -> IO ByteString + copyCStringLen, -- :: CStringLen -> IO ByteString -- * I\/O with @ByteString@s -- ** Standard input and output - -#if defined(__GLASGOW_HASKELL__) getLine, -- :: IO ByteString -#endif getContents, -- :: IO ByteString putStr, -- :: ByteString -> IO () putStrLn, -- :: ByteString -> IO () + interact, -- :: (ByteString -> ByteString) -> IO () -- ** Files readFile, -- :: FilePath -> IO ByteString --- mmapFile, -- :: FilePath -> IO ByteString writeFile, -- :: FilePath -> ByteString -> IO () + appendFile, -- :: FilePath -> ByteString -> IO () +-- mmapFile, -- :: FilePath -> IO ByteString -- ** I\/O with Handles -#if defined(__GLASGOW_HASKELL__) - getArgs, -- :: IO [ByteString] hGetLine, -- :: Handle -> IO ByteString hGetNonBlocking, -- :: Handle -> Int -> IO ByteString -#endif hGetContents, -- :: Handle -> IO ByteString hGet, -- :: Handle -> Int -> IO ByteString hPut, -- :: Handle -> ByteString -> IO () + hPutStr, -- :: Handle -> ByteString -> IO () + hPutStrLn, -- :: Handle -> ByteString -> IO () #if defined(__GLASGOW_HASKELL__) -- * Low level construction - -- | For constructors from foreign language types see /Data.ByteString/ + -- | For constructors from foreign language types see "Data.ByteString" packAddress, -- :: Addr# -> ByteString unsafePackAddress, -- :: Int -> Addr# -> ByteString #endif + -- * Utilities (needed for array fusion) +#if defined(__GLASGOW_HASKELL__) + unpackList, +#endif + ) where import qualified Prelude as P @@ -223,42 +225,51 @@ import Prelude hiding (reverse,head,tail,last,init,null ,concat,any,take,drop,splitAt,takeWhile ,dropWhile,span,break,elem,filter,unwords ,words,maximum,minimum,all,concatMap - ,foldl1,foldr1,readFile,writeFile,replicate - ,getContents,getLine,putStr,putStrLn + ,scanl,scanl1,scanr,scanr1 + ,appendFile,readFile,writeFile + ,foldl1,foldr1,replicate + ,getContents,getLine,putStr,putStrLn,interact ,zip,zipWith,unzip,notElem) import qualified Data.ByteString as B +import qualified Data.ByteString.Base as B -- Listy functions transparently exported -import Data.ByteString (ByteString(..) - ,empty,null,length,tail,init,append - ,inits,tails,elems,reverse,transpose +import Data.ByteString (empty,null,length,tail,init,append + ,inits,tails,reverse,transpose ,concat,take,drop,splitAt,join ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring - ,findSubstrings,unsafeTail,copy,group + ,findSubstrings,copy,group - ,getContents, putStr, putStrLn - ,readFile, {-mmapFile,-} writeFile - ,hGetContents, hGet, hPut + ,getLine, getContents, putStr, putStrLn, interact + ,hGetContents, hGet, hPut, hPutStr, hPutStrLn + ,hGetLine, hGetNonBlocking + ,packCString,packCStringLen, packMallocCString + ,useAsCString,useAsCStringLen, copyCString,copyCStringLen #if defined(__GLASGOW_HASKELL__) - ,getLine, getArgs, hGetLine, hGetNonBlocking - ,packAddress, unsafePackAddress + ,unpackList #endif - ,useAsCString, unsafeUseAsCString ) -import Data.Char +import Data.ByteString.Base ( + ByteString(..) +#if defined(__GLASGOW_HASKELL__) + ,packAddress, unsafePackAddress +#endif + ,c2w, w2c, unsafeTail, isSpaceWord8, inlinePerformIO + ) +import Data.Char ( isSpace ) import qualified Data.List as List (intersperse) +import System.IO (openFile,hClose,hFileSize,IOMode(..)) +import Control.Exception (bracket) import Foreign -import Foreign.C.Types (CLong) -import Foreign.Marshal.Utils (with) #if defined(__GLASGOW_HASKELL__) -import GHC.Base (Char(..),unsafeChr,unpackCString#,unsafeCoerce#) +import GHC.Base (Char(..),unpackCString#,unsafeCoerce#) import GHC.IOBase (IO(..),stToIO) -import GHC.Prim (Addr#,writeWord8OffAddr#,realWorld#,plusAddr#) +import GHC.Prim (Addr#,writeWord8OffAddr#,plusAddr#) import GHC.Ptr (Ptr(..)) import GHC.ST (ST(..)) #endif @@ -266,13 +277,14 @@ import GHC.ST (ST(..)) #define STRICT1(f) f a | a `seq` False = undefined #define STRICT2(f) f a b | a `seq` b `seq` False = undefined #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined ------------------------------------------------------------------------ -- | /O(1)/ Convert a 'Char' into a 'ByteString' -packChar :: Char -> ByteString -packChar = B.packByte . c2w -{-# INLINE packChar #-} +singleton :: Char -> ByteString +singleton = B.singleton . c2w +{-# INLINE singleton #-} -- | /O(n)/ Convert a 'String' into a 'ByteString' -- @@ -281,13 +293,13 @@ packChar = B.packByte . c2w pack :: String -> ByteString #if !defined(__GLASGOW_HASKELL__) -pack str = B.create (P.length str) $ \p -> go p str +pack str = B.unsafeCreate (P.length str) $ \p -> go p str where go _ [] = return () go p (x:xs) = poke p (c2w x) >> go (p `plusPtr` 1) xs #else /* hack away */ -pack str = B.create (P.length str) $ \(Ptr p) -> stToIO (go p str) +pack str = B.unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p str) where go :: Addr# -> [Char] -> ST a () go _ [] = return () @@ -296,19 +308,18 @@ pack str = B.create (P.length str) $ \(Ptr p) -> stToIO (go p str) writeByte p c = ST $ \s# -> case writeWord8OffAddr# p 0# c s# of s2# -> (# s2#, () #) {-# INLINE writeByte #-} +{-# INLINE [1] pack #-} {-# RULES -"pack/packAddress" forall s# . - pack (unpackCString# s#) = B.packAddress s# + "FPS pack/packAddress" forall s . + pack (unpackCString# s) = B.packAddress s #-} #endif -{-# INLINE pack #-} - -- | /O(n)/ Converts a 'ByteString' to a 'String'. unpack :: ByteString -> [Char] -unpack = B.unpackWith w2c +unpack = P.map w2c . B.unpack {-# INLINE unpack #-} -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different @@ -352,6 +363,11 @@ foldl :: (a -> Char -> a) -> a -> ByteString -> a foldl f = B.foldl (\a c -> f a (w2c c)) {-# INLINE foldl #-} +-- | 'foldl\'' is like foldl, but strict in the accumulator. +foldl' :: (a -> Char -> a) -> a -> ByteString -> a +foldl' f = B.foldl' (\a c -> f a (w2c c)) +{-# INLINE foldl' #-} + -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a packed string, -- reduces the packed string using the binary operator, from right to left. @@ -359,18 +375,33 @@ foldr :: (Char -> a -> a) -> a -> ByteString -> a foldr f = B.foldr (\c a -> f (w2c c) a) {-# INLINE foldr #-} +-- | 'foldr\'' is a strict variant of foldr +foldr' :: (Char -> a -> a) -> a -> ByteString -> a +foldr' f = B.foldr' (\c a -> f (w2c c) a) +{-# INLINE foldr' #-} + -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'ByteStrings'. foldl1 :: (Char -> Char -> Char) -> ByteString -> Char foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) {-# INLINE foldl1 #-} +-- | A strict version of 'foldl1' +foldl1' :: (Char -> Char -> Char) -> ByteString -> Char +foldl1' f ps = w2c (B.foldl1' (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldl1' #-} + -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'ByteString's foldr1 :: (Char -> Char -> Char) -> ByteString -> Char foldr1 f ps = w2c (B.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps) {-# INLINE foldr1 #-} +-- | A strict variant of foldr1 +foldr1' :: (Char -> Char -> Char) -> ByteString -> Char +foldr1' f ps = w2c (B.foldr1' (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldr1' #-} + -- | Map a function over a 'ByteString' and concatenate the results concatMap :: (Char -> ByteString) -> ByteString -> ByteString concatMap f = B.concatMap (f . w2c) @@ -403,6 +434,45 @@ mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c))) {-# INLINE mapIndexed #-} +-- | The 'mapAccumL' function behaves like a combination of 'map' and +-- 'foldl'; it applies a function to each element of a ByteString, +-- passing an accumulating parameter from left to right, and returning a +-- final value of this accumulator together with the new list. +mapAccumL :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) +mapAccumL f = B.mapAccumL (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c)) + +-- | The 'mapAccumR' function behaves like a combination of 'map' and +-- 'foldr'; it applies a function to each element of a ByteString, +-- passing an accumulating parameter from right to left, and returning a +-- final value of this accumulator together with the new ByteString. +mapAccumR :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString) +mapAccumR f = B.mapAccumR (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c)) + +-- | 'scanl' is similar to 'foldl', but returns a list of successive +-- reduced values from the left: +-- +-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] +-- +-- Note that +-- +-- > last (scanl f z xs) == foldl f z xs. +scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +scanl f z = B.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) + +-- | 'scanl1' is a variant of 'scanl' that has no starting value argument: +-- +-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] +scanl1 :: (Char -> Char -> Char) -> ByteString -> ByteString +scanl1 f = B.scanl1 (\a b -> c2w (f (w2c a) (w2c b))) + +-- | scanr is the right-to-left dual of scanl. +scanr :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString +scanr f z = B.scanr (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) + +-- | 'scanr1' is a variant of 'scanr' that has no starting value argument. +scanr1 :: (Char -> Char -> Char) -> ByteString -> ByteString +scanr1 f = B.scanr1 (\a b -> c2w (f (w2c a) (w2c b))) + -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ -- the value of every element. The following holds: -- @@ -413,32 +483,31 @@ replicate :: Int -> Char -> ByteString replicate w = B.replicate w . c2w {-# INLINE replicate #-} --- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'. --- 'unfoldrN' builds a ByteString from a seed value. The function takes --- the element and returns 'Nothing' if it is done producing the --- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a --- prepending to the ByteString and @b@ is used as the next element in a --- recursive call. --- --- To preven unfoldrN having /O(n^2)/ complexity (as prepending a --- character to a ByteString is /O(n)/, this unfoldr requires a maximum --- final size of the ByteString as an argument. 'cons' can then be --- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has --- linear complexity. The depth of the recursion is limited to this --- size, but may be less. For lazy, infinite unfoldr, use --- 'Data.List.unfoldr' (from 'Data.List'). +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- ByteString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the ByteString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next character in the string, +-- and @b@ is the seed value for further production. -- -- Examples: -- --- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789" --- --- The following equation connects the depth-limited unfoldr to the List unfoldr: +-- > unfoldr (\x -> if x <= '9' then Just (x, succ x) else Nothing) '0' == "0123456789" +unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString +unfoldr f x0 = B.unfoldr (fmap k . f) x0 + where k (i, j) = (c2w i, j) + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. -- --- > unfoldrN n == take n $ List.unfoldr +-- The following equation relates 'unfoldrN' and 'unfoldr': -- -unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString -unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f . w2c) (c2w w) - where k (i,j) = (c2w i, c2w j) -- (c2w *** c2w) +-- > unfoldrN n f s == take n (unfoldr f s) +unfoldrN :: Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a) +unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f) w + where k (i,j) = (c2w i, j) {-# INLINE unfoldrN #-} -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, @@ -451,12 +520,16 @@ takeWhile f = B.takeWhile (f . w2c) -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. dropWhile :: (Char -> Bool) -> ByteString -> ByteString dropWhile f = B.dropWhile (f . w2c) -{-# INLINE dropWhile #-} +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] dropWhile #-} +#endif -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) break f = B.break (f . w2c) -{-# INLINE break #-} +#if defined(__GLASGOW_HASKELL__) +{-# INLINE [1] break #-} +#endif -- | 'span' @p xs@ breaks the ByteString into two segments. It is -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ @@ -479,6 +552,14 @@ spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) spanEnd f = B.spanEnd (f . w2c) {-# INLINE spanEnd #-} +-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' +-- +-- breakEnd p == spanEnd (not.p) +breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +breakEnd f = B.breakEnd (f . w2c) +{-# INLINE breakEnd #-} + +{- -- | 'breakChar' breaks its ByteString argument at the first occurence -- of the specified Char. It is more efficient than 'break' as it is -- implemented with @memchr(3)@. I.e. @@ -498,36 +579,7 @@ breakChar = B.breakByte . c2w spanChar :: Char -> ByteString -> (ByteString, ByteString) spanChar = B.spanByte . c2w {-# INLINE spanChar #-} - --- | /O(n)/ 'breakFirst' breaks the given ByteString on the first --- occurence of @w@. It behaves like 'break', except the delimiter is --- not returned, and @Nothing@ is returned if the delimiter is not in --- the ByteString. I.e. --- --- > breakFirst 'b' "aabbcc" == Just ("aa","bcc") --- --- > breakFirst c xs == --- > let (x,y) = break (== c) xs --- > in if null y then Nothing else Just (x, drop 1 y)) --- -breakFirst :: Char -> ByteString -> Maybe (ByteString,ByteString) -breakFirst = B.breakFirst . c2w -{-# INLINE breakFirst #-} - --- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the --- ByteString. --- --- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc") --- --- and the following are equivalent: --- --- > breakLast 'c' "abcdef" --- > let (x,y) = break (=='c') (reverse "abcdef") --- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x) --- -breakLast :: Char -> ByteString -> Maybe (ByteString,ByteString) -breakLast = B.breakLast . c2w -{-# INLINE breakLast #-} +-} -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte -- argument, consuming the delimiter. I.e. @@ -561,6 +613,7 @@ splitWith f = B.splitWith (f . w2c) {-# INLINE splitWith #-} -- the inline makes a big difference here. +{- -- | Like 'splitWith', except that sequences of adjacent separators are -- treated as a single separator. eg. -- @@ -569,17 +622,20 @@ splitWith f = B.splitWith (f . w2c) tokens :: (Char -> Bool) -> ByteString -> [ByteString] tokens f = B.tokens (f . w2c) {-# INLINE tokens #-} +-} -- | The 'groupBy' function is the non-overloaded version of 'group'. groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString] groupBy k = B.groupBy (\a b -> k (w2c a) (w2c b)) +{- -- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a -- char. Around 4 times faster than the generalised join. -- joinWithChar :: Char -> ByteString -> ByteString -> ByteString joinWithChar = B.joinWithByte . c2w {-# INLINE joinWithChar #-} +-} -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. index :: ByteString -> Int -> Char @@ -593,17 +649,17 @@ elemIndex :: Char -> ByteString -> Maybe Int elemIndex = B.elemIndex . c2w {-# INLINE elemIndex #-} --- | /O(n)/ The 'elemIndexLast' function returns the last index of the +-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the -- element in the given 'ByteString' which is equal to the query -- element, or 'Nothing' if there is no such element. The following -- holds: -- --- > elemIndexLast c xs == +-- > elemIndexEnd c xs == -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) -- -elemIndexLast :: Char -> ByteString -> Maybe Int -elemIndexLast = B.elemIndexLast . c2w -{-# INLINE elemIndexLast #-} +elemIndexEnd :: Char -> ByteString -> Maybe Int +elemIndexEnd = B.elemIndexEnd . c2w +{-# INLINE elemIndexEnd #-} -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. @@ -659,6 +715,7 @@ find :: (Char -> Bool) -> ByteString -> Maybe Char find f ps = w2c `fmap` B.find (f . w2c) ps {-# INLINE find #-} +{- -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common -- case of filtering a single Char. It is more efficient to use -- filterChar in this case. @@ -684,6 +741,7 @@ filterChar c = B.filterByte (c2w c) filterNotChar :: Char -> ByteString -> ByteString filterNotChar c = B.filterNotByte (c2w c) {-# INLINE filterNotChar #-} +-} -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of -- corresponding pairs of Chars. If one input ByteString is short, @@ -716,33 +774,14 @@ unsafeHead :: ByteString -> Char unsafeHead = w2c . B.unsafeHead {-# INLINE unsafeHead #-} --- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a Char. --- This omits the bounds check, which means there is an accompanying --- obligation on the programmer to ensure the bounds are checked in some --- other way. -unsafeIndex :: ByteString -> Int -> Char -unsafeIndex = (w2c .) . B.unsafeIndex -{-# INLINE unsafeIndex #-} - --- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. -w2c :: Word8 -> Char -#if !defined(__GLASGOW_HASKELL__) -w2c = chr . fromIntegral -#else -w2c = unsafeChr . fromIntegral -#endif -{-# INLINE w2c #-} - --- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and --- silently truncates to 8 bits Chars > '\255'. It is provided as --- convenience for ByteString construction. -c2w :: Char -> Word8 -c2w = fromIntegral . ord -{-# INLINE c2w #-} - -- --------------------------------------------------------------------- -- Things that depend on the encoding +{-# RULES + "FPS specialise break -> breakSpace" + break isSpace = breakSpace + #-} + -- | 'breakSpace' returns the pair of ByteStrings when the argument is -- broken at the first whitespace byte. I.e. -- @@ -751,7 +790,7 @@ c2w = fromIntegral . ord breakSpace :: ByteString -> (ByteString,ByteString) breakSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do i <- firstspace (p `plusPtr` s) 0 l - return $ case () of {_ + return $! case () of {_ | i == 0 -> (empty, PS x s l) | i == l -> (PS x s l, empty) | otherwise -> (PS x s i, PS x (s+i) (l-i)) @@ -765,6 +804,11 @@ firstspace ptr n m | otherwise = do w <- peekByteOff ptr n if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n +{-# RULES + "FPS specialise dropWhile isSpace -> dropSpace" + dropWhile isSpace = dropSpace + #-} + -- | 'dropSpace' efficiently returns the 'ByteString' argument with -- white space Chars removed from the front. It is more efficient than -- calling dropWhile for removing whitespace. I.e. @@ -774,7 +818,7 @@ firstspace ptr n m dropSpace :: ByteString -> ByteString dropSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do i <- firstnonspace (p `plusPtr` s) 0 l - return $ if i == l then empty else PS x (s+i) (l-i) + return $! if i == l then empty else PS x (s+i) (l-i) {-# INLINE dropSpace #-} firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int @@ -784,6 +828,7 @@ firstnonspace ptr n m | otherwise = do w <- peekElemOff ptr n if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n +{- -- | 'dropSpaceEnd' efficiently returns the 'ByteString' argument with -- white space removed from the end. I.e. -- @@ -794,7 +839,7 @@ firstnonspace ptr n m dropSpaceEnd :: ByteString -> ByteString dropSpaceEnd (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do i <- lastnonspace (p `plusPtr` s) (l-1) - return $ if i == (-1) then empty else PS x s (i+1) + return $! if i == (-1) then empty else PS x s (i+1) {-# INLINE dropSpaceEnd #-} lastnonspace :: Ptr Word8 -> Int -> IO Int @@ -803,6 +848,7 @@ lastnonspace ptr n | n < 0 = return n | otherwise = do w <- peekElemOff ptr n if isSpaceWord8 w then lastnonspace ptr (n-1) else return n +-} -- | 'lines' breaks a ByteString up into a list of ByteStrings at -- newline Chars. The resulting strings do not contain newlines. @@ -839,7 +885,7 @@ lines (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do unlines :: [ByteString] -> ByteString unlines [] = empty unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space - where nl = packChar '\n' + where nl = singleton '\n' -- | 'words' breaks a ByteString up into a list of words, which -- were delimited by Chars representing white space. And @@ -847,183 +893,105 @@ unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space -- > tokens isSpace = words -- words :: ByteString -> [ByteString] -words = B.tokens isSpaceWord8 +words = P.filter (not . B.null) . B.splitWith isSpaceWord8 +{-# INLINE words #-} -- | The 'unwords' function is analogous to the 'unlines' function, on words. unwords :: [ByteString] -> ByteString -unwords = join (packChar ' ') - --- | /O(n)/ Indicies of newlines. Shorthand for --- --- > elemIndices '\n' --- -lineIndices :: ByteString -> [Int] -lineIndices = elemIndices '\n' - --- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on --- newline Chars. However, unlike the Prelude functions, 'lines\'' and --- 'unlines\'' correctly reconstruct lines that are missing terminating --- newlines characters. I.e. --- --- > unlines (lines "a\nb\nc") == "a\nb\nc\n" --- > unlines' (lines' "a\nb\nc") == "a\nb\nc" --- --- Note that this means: --- --- > lines "a\nb\nc\n" == ["a","b","c"] --- > lines' "a\nb\nc\n" == ["a","b","c",""] --- -lines' :: ByteString -> [ByteString] -lines' ps = ps `seq` case elemIndex '\n' ps of - Nothing -> [ps] - Just n -> take n ps : lines' (drop (n+1) ps) - --- | 'linesCRLF\'' behaves like 'lines\'', but breaks on (\\cr?\\lf) -linesCRLF' :: ByteString -> [ByteString] -linesCRLF' ps = ps `seq` case elemIndex '\n' ps of - Nothing -> [ps] - Just 0 -> empty : linesCRLF' (drop 1 ps) - Just n -> let k = if ps `unsafeIndex` (n-1) == '\r' then n-1 else n - in take k ps : linesCRLF' (drop (n+1) ps) - --- | 'unlines\'' behaves like 'unlines', except that it also correctly --- retores lines that do not have terminating newlines (see the --- description for 'lines\''). --- -unlines' :: [ByteString] -> ByteString -unlines' ss = concat $ intersperse_newlines ss - where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s) - intersperse_newlines s = s - newline = packChar '\n' - --- | 'unlines\'' behaves like 'unlines', except that it also correctly --- retores lines that do not have terminating newlines (see the --- description for 'lines\''). Uses CRLF instead of LF. --- -unlinesCRLF' :: [ByteString] -> ByteString -unlinesCRLF' ss = concat $ intersperse_newlines ss - where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s) - intersperse_newlines s = s - newline = pack "\r\n" - --- | 'words\'' behaves like 'words', with the exception that it produces --- output on ByteStrings with trailing whitespace that can be --- correctly inverted by 'unwords'. I.e. --- --- > words "a b c " == ["a","b","c"] --- > words' "a b c " == ["a","b","c",""] --- --- > unwords $ words "a b c " == "a b c" --- > unwords $ words' "a b c " == "a b c " --- -words' :: ByteString -> [ByteString] -words' = B.splitWith isSpaceWord8 - --- | 'unwords\'' behaves like 'unwords'. It is provided for consistency --- with the other invertable words and lines functions. -unwords' :: [ByteString] -> ByteString -unwords' = unwords - --- | 'betweenLines' returns the ByteString between the two lines given, --- or Nothing if they do not appear. The returned string is the first --- and shortest string such that the line before it is the given first --- line, and the line after it is the given second line. -betweenLines :: ByteString -- ^ First line to look for - -> ByteString -- ^ Second line to look for - -> ByteString -- ^ 'ByteString' to look in - -> Maybe (ByteString) - -betweenLines start end ps = - case P.break (start ==) (lines ps) of - (_, _:rest@(PS ps1 s1 _:_)) -> - case P.break (end ==) rest of - (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1) - _ -> Nothing - _ -> Nothing +unwords = join (singleton ' ') +{-# INLINE unwords #-} -- --------------------------------------------------------------------- -- Reading from ByteStrings --- | readInt skips any whitespace at the beginning of its argument, and --- reads an Int from the beginning of the ByteString. If there is no +-- | readInt reads an Int from the beginning of the ByteString. If there is no -- integer at the beginning of the string, it returns Nothing, otherwise -- it just returns the int read, and the rest of the string. readInt :: ByteString -> Maybe (Int, ByteString) -readInt p@(PS x s l) = inlinePerformIO $ useAsCString p $ \cstr -> - with (castPtr cstr) $ \endpp -> do - val <- c_strtol (castPtr cstr) endpp 0 - skipped <- (`minusPtr` cstr) `fmap` peek endpp - return $ if skipped == 0 - then Nothing - else Just (fromIntegral val, PS x (s+skipped) (l-skipped)) - --- | unsafeReadInt is like readInt, but requires a null terminated --- ByteString. It avoids a copy if this is the case. It returns the Int --- read, if any, and the rest of the string. -unsafeReadInt :: ByteString -> Maybe (Int, ByteString) -unsafeReadInt p@(PS x s l) = inlinePerformIO $ unsafeUseAsCString p $ \cstr -> - with (castPtr cstr) $ \endpp -> do - val <- c_strtol (castPtr cstr) endpp 0 - skipped <- (`minusPtr` cstr) `fmap` peek endpp - return $ if skipped == 0 - then Nothing - else Just (fromIntegral val, PS x (s+skipped) (l-skipped)) - -foreign import ccall unsafe "stdlib.h strtol" c_strtol - :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong - -{- --- --- not quite there yet --- -readInt :: ByteString -> Maybe (Int, ByteString) -readInt = go 0 - where - STRICT2(go) - go i ps - | B.null ps = Nothing - | x == '-' = neg 0 xs - | otherwise = pos (parse x) xs - where (x, xs) = (ps `unsafeIndex` 0, unsafeTail ps) - - STRICT2(neg) - neg n qs | isSpace x = return $ Just ((i-n),xs) - | otherwise = neg (parse x + (10 * n)) xs - where (x, xs) = (qs `unsafeIndex` 0, unsafeTail qs) - - STRICT2(pos) - pos n qs | isSpace x = go (i+n) xs - | otherwise = pos (parse x + (10 * n)) xs - where (x, xs) = (qs `unsafeIndexWord8` 0, unsafeTail qs) - - parse w = fromIntegral (w - 48) :: Int - {-# INLINE parse #-} --} - --- --------------------------------------------------------------------- --- Internals - --- Just like inlinePerformIO, but we inline it. Big performance gains as --- it exposes lots of things to further inlining --- -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -#if defined(__GLASGOW_HASKELL__) -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -#else -inlinePerformIO = unsafePerformIO -#endif - --- ordered by frequency --- Idea from Ketil -isSpaceWord8 :: Word8 -> Bool -isSpaceWord8 w = case w of - 0x20 -> True -- SPACE - 0x0A -> True -- LF, \n - 0x09 -> True -- HT, \t - 0x0C -> True -- FF, \f - 0x0D -> True -- CR, \r - 0x0B -> True -- VT, \v - _ -> False -{-# INLINE isSpaceWord8 #-} +readInt as + | null as = Nothing + | otherwise = + case unsafeHead as of + '-' -> loop True 0 0 (unsafeTail as) + '+' -> loop False 0 0 (unsafeTail as) + _ -> loop False 0 0 as + + where loop :: Bool -> Int -> Int -> ByteString -> Maybe (Int, ByteString) + STRICT4(loop) + loop neg i n ps + | null ps = end neg i n ps + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 + && w <= 0x39 -> loop neg (i+1) + (n * 10 + (fromIntegral w - 0x30)) + (unsafeTail ps) + | otherwise -> end neg i n ps + + end _ 0 _ _ = Nothing + end True _ n ps = Just (negate n, ps) + end _ _ n ps = Just (n, ps) + +-- | readInteger reads an Integer from the beginning of the ByteString. If +-- there is no integer at the beginning of the string, it returns Nothing, +-- otherwise it just returns the int read, and the rest of the string. +readInteger :: ByteString -> Maybe (Integer, ByteString) +readInteger as + | null as = Nothing + | otherwise = + case unsafeHead as of + '-' -> first (unsafeTail as) >>= \(n, bs) -> return (-n, bs) + '+' -> first (unsafeTail as) + _ -> first as + + where first ps | null ps = Nothing + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 && w <= 0x39 -> Just $ + loop 1 (fromIntegral w - 0x30) [] (unsafeTail ps) + | otherwise -> Nothing + + loop :: Int -> Int -> [Integer] + -> ByteString -> (Integer, ByteString) + STRICT4(loop) + loop d acc ns ps + | null ps = combine d acc ns empty + | otherwise = + case B.unsafeHead ps of + w | w >= 0x30 && w <= 0x39 -> + if d == 9 then loop 1 (fromIntegral w - 0x30) + (toInteger acc : ns) + (unsafeTail ps) + else loop (d+1) + (10*acc + (fromIntegral w - 0x30)) + ns (unsafeTail ps) + | otherwise -> combine d acc ns ps + + combine _ acc [] ps = (toInteger acc, ps) + combine d acc ns ps = + ((10^d * combine1 1000000000 ns + toInteger acc), ps) + + combine1 _ [n] = n + combine1 b ns = combine1 (b*b) $ combine2 b ns + + combine2 b (n:m:ns) = let t = m*b + n in t `seq` (t : combine2 b ns) + combine2 _ ns = ns + +-- | Read an entire file strictly into a 'ByteString'. This is far more +-- efficient than reading the characters into a 'String' and then using +-- 'pack'. It also may be more efficient than opening the file and +-- reading it using hGet. +readFile :: FilePath -> IO ByteString +readFile f = bracket (openFile f ReadMode) hClose + (\h -> hFileSize h >>= hGet h . fromIntegral) + +-- | Write a 'ByteString' to a file. +writeFile :: FilePath -> ByteString -> IO () +writeFile f txt = bracket (openFile f WriteMode) hClose + (\h -> hPut h txt) + +-- | Append a 'ByteString' to a file. +appendFile :: FilePath -> ByteString -> IO () +appendFile f txt = bracket (openFile f AppendMode) hClose + (\h -> hPut h txt)