-{-# OPTIONS_GHC -cpp -optc-O1 -fno-warn-orphans #-}
---
--- -optc-O2 breaks with 4.0.4 gcc on debian
---
+{-# OPTIONS_GHC -cpp -fno-warn-orphans #-}
+-- |
-- Module : Data.ByteString.Lazy.Char8
-- Copyright : (c) Don Stewart 2006
-- License : BSD-style
--
-- Maintainer : dons@cse.unsw.edu.au
-- Stability : experimental
--- Portability : portable (tested with GHC>=6.4.1 and Hugs 2005)
---
-
+-- Portability : non-portable (imports Data.ByteString.Lazy)
--
--- | Manipulate /lazy/ 'ByteString's using 'Char' operations. All Chars will
+-- Manipulate /lazy/ 'ByteString's using 'Char' operations. All Chars will
-- be truncated to 8 bits. It can be expected that these functions will
--- run at identical speeds to their Word8 equivalents in
+-- run at identical speeds to their 'Data.Word.Word8' equivalents in
-- "Data.ByteString.Lazy".
--
-- This module is intended to be imported @qualified@, to avoid name
module Data.ByteString.Lazy.Char8 (
-- * The @ByteString@ type
- ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
+ ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable
-- * Introducing and eliminating 'ByteString's
empty, -- :: ByteString
singleton, -- :: Char -> ByteString
pack, -- :: String -> ByteString
unpack, -- :: ByteString -> String
+ fromChunks, -- :: [Strict.ByteString] -> ByteString
+ toChunks, -- :: ByteString -> [Strict.ByteString]
-- * Basic interface
cons, -- :: Char -> ByteString -> ByteString
inits, -- :: ByteString -> [ByteString]
tails, -- :: ByteString -> [ByteString]
- -- ** Breaking and dropping on specific Chars
- breakChar, -- :: Char -> ByteString -> (ByteString, ByteString)
- spanChar, -- :: Char -> ByteString -> (ByteString, ByteString)
-
-- ** Breaking into many substrings
split, -- :: Char -> ByteString -> [ByteString]
splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString]
- tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString]
-- ** Breaking into lines and words
lines, -- :: ByteString -> [ByteString]
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
- joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString
-- * Predicates
isPrefixOf, -- :: ByteString -> ByteString -> Bool
-- ** Searching by equality
elem, -- :: Char -> ByteString -> Bool
notElem, -- :: Char -> ByteString -> Bool
- filterChar, -- :: Char -> ByteString -> ByteString
- filterNotChar, -- :: Char -> ByteString -> ByteString
-- ** Searching with a predicate
find, -- :: (Char -> Bool) -> ByteString -> Maybe Char
-- * Ordered ByteStrings
-- sort, -- :: ByteString -> ByteString
+ copy, -- :: ByteString -> ByteString
+
-- * Reading from ByteStrings
readInt,
+ readInteger,
-- * I\/O with 'ByteString's
-- ** I\/O with Handles
hGetContents, -- :: Handle -> IO ByteString
- hGetContentsN, -- :: Int -> Handle -> IO ByteString
hGet, -- :: Handle -> Int64 -> IO ByteString
- hGetN, -- :: Int -> Handle -> Int64 -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
-#if defined(__GLASGOW_HASKELL__)
hGetNonBlocking, -- :: Handle -> IO ByteString
- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
-#endif
+
+-- hGetN, -- :: Int -> Handle -> Int64 -> IO ByteString
+-- hGetContentsN, -- :: Int -> Handle -> IO ByteString
+-- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
) where
-- Functions transparently exported
import Data.ByteString.Lazy
- (ByteString(..)
- ,empty,null,length,tail,init,append,reverse,transpose
- ,concat,take,drop,splitAt,join,isPrefixOf,group,inits, tails
- ,hGetContentsN, hGetN, hGetContents, hGet, hPut, getContents
-#if defined(__GLASGOW_HASKELL__)
- ,hGetNonBlocking, hGetNonBlockingN
-#endif
- ,putStr, putStrLn
- ,readFile, writeFile, appendFile)
+ (ByteString, fromChunks, toChunks
+ ,empty,null,length,tail,init,append,reverse,transpose,cycle
+ ,concat,take,drop,splitAt,join,isPrefixOf,group,inits,tails,copy
+ ,hGetContents, hGet, hPut, getContents
+ ,hGetNonBlocking
+ ,putStr, putStrLn, interact)
-- Functions we need to wrap.
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as B
import qualified Data.ByteString.Base as B
+import Data.ByteString.Base (LazyByteString(LPS))
+
import Data.ByteString.Base (w2c, c2w, isSpaceWord8)
import Data.Int (Int64)
,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter
,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,foldl1,foldr1
,readFile,writeFile,appendFile,replicate,getContents,getLine,putStr,putStrLn
- ,zip,zipWith,unzip,notElem,repeat,iterate)
+ ,zip,zipWith,unzip,notElem,repeat,iterate,interact,cycle)
+
+import System.IO (hClose,openFile,IOMode(..))
+import Control.Exception (bracket)
#define STRICT1(f) f a | a `seq` False = undefined
#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
-- | /O(n)/ Convert a 'String' into a 'ByteString'.
pack :: [Char] -> ByteString
-pack = L.packWith c2w
+pack = L.pack. P.map c2w
-- | /O(n)/ Converts a 'ByteString' to a 'String'.
unpack :: ByteString -> [Char]
-unpack = L.unpackWith w2c
+unpack = P.map w2c . L.unpack
{-# INLINE unpack #-}
-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
span f = L.span (f . w2c)
{-# INLINE span #-}
+{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
-- implemented with @memchr(3)@. I.e.
spanChar :: Char -> ByteString -> (ByteString, ByteString)
spanChar = L.spanByte . c2w
{-# INLINE spanChar #-}
+-}
+
+--
+-- TODO, more rules for breakChar*
+--
-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
-- argument, consuming the delimiter. I.e.
splitWith f = L.splitWith (f . w2c)
{-# INLINE splitWith #-}
--- | Like 'splitWith', except that sequences of adjacent separators are
--- treated as a single separator. eg.
---
--- > tokens (=='a') "aabbaca" == ["bb","c"]
---
-tokens :: (Char -> Bool) -> ByteString -> [ByteString]
-tokens f = L.tokens (f . w2c)
-{-# INLINE tokens #-}
-
-- | The 'groupBy' function is the non-overloaded version of 'group'.
groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b))
--- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
--- char. Around 4 times faster than the generalised join.
---
-joinWithChar :: Char -> ByteString -> ByteString -> ByteString
-joinWithChar = L.joinWithByte . c2w
-{-# INLINE joinWithChar #-}
-
-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
index :: ByteString -> Int64 -> Char
index = (w2c .) . L.index
find f ps = w2c `fmap` L.find (f . w2c) ps
{-# INLINE find #-}
+{-
-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
-- case of filtering a single Char. It is more efficient to use
-- filterChar in this case.
filterNotChar :: Char -> ByteString -> ByteString
filterNotChar c = L.filterNotByte (c2w c)
{-# INLINE filterNotChar #-}
+-}
-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
-- corresponding pairs of Chars. If one input ByteString is short,
-- > tokens isSpace = words
--
words :: ByteString -> [ByteString]
-words = L.tokens isSpaceWord8
+words = P.filter (not . L.null) . L.splitWith isSpaceWord8
{-# INLINE words #-}
-- | The 'unwords' function is analogous to the 'unlines' function, on words.
| otherwise = ps:pss
in n' `seq` ps' `seq` Just $! (n', LPS ps')
+
+-- | readInteger reads an Integer from the beginning of the ByteString. If
+-- there is no integer at the beginning of the string, it returns Nothing,
+-- otherwise it just returns the int read, and the rest of the string.
+readInteger :: ByteString -> Maybe (Integer, ByteString)
+readInteger (LPS []) = Nothing
+readInteger (LPS (x:xs)) =
+ case w2c (B.unsafeHead x) of
+ '-' -> first (B.unsafeTail x) xs >>= \(n, bs) -> return (-n, bs)
+ '+' -> first (B.unsafeTail x) xs
+ _ -> first x xs
+
+ where first ps pss
+ | B.null ps = case pss of
+ [] -> Nothing
+ (ps':pss') -> first' ps' pss'
+ | otherwise = first' ps pss
+
+ first' ps pss = case B.unsafeHead ps of
+ w | w >= 0x30 && w <= 0x39 -> Just $
+ loop 1 (fromIntegral w - 0x30) [] (B.unsafeTail ps) pss
+ | otherwise -> Nothing
+
+ loop :: Int -> Int -> [Integer]
+ -> B.ByteString -> [B.ByteString] -> (Integer, ByteString)
+ STRICT5(loop)
+ loop d acc ns ps pss
+ | B.null ps = case pss of
+ [] -> combine d acc ns ps pss
+ (ps':pss') -> loop d acc ns ps' pss'
+ | otherwise =
+ case B.unsafeHead ps of
+ w | w >= 0x30 && w <= 0x39 ->
+ if d < 9 then loop (d+1)
+ (10*acc + (fromIntegral w - 0x30))
+ ns (B.unsafeTail ps) pss
+ else loop 1 (fromIntegral w - 0x30)
+ (fromIntegral acc : ns)
+ (B.unsafeTail ps) pss
+ | otherwise -> combine d acc ns ps pss
+
+ combine _ acc [] ps pss = end (fromIntegral acc) ps pss
+ combine d acc ns ps pss =
+ end (10^d * combine1 1000000000 ns + fromIntegral acc) ps pss
+
+ combine1 _ [n] = n
+ combine1 b ns = combine1 (b*b) $ combine2 b ns
+
+ combine2 b (n:m:ns) = let t = n+m*b in t `seq` (t : combine2 b ns)
+ combine2 _ ns = ns
+
+ end n ps pss = let ps' | B.null ps = pss
+ | otherwise = ps:pss
+ in ps' `seq` (n, LPS ps')
+
+-- | Read an entire file /lazily/ into a 'ByteString'. Use 'text mode'
+-- on Windows to interpret newlines
+readFile :: FilePath -> IO ByteString
+readFile f = openFile f ReadMode >>= hGetContents
+
+-- | Write a 'ByteString' to a file.
+writeFile :: FilePath -> ByteString -> IO ()
+writeFile f txt = bracket (openFile f WriteMode) hClose
+ (\hdl -> hPut hdl txt)
+
+-- | Append a 'ByteString' to a file.
+appendFile :: FilePath -> ByteString -> IO ()
+appendFile f txt = bracket (openFile f AppendMode) hClose
+ (\hdl -> hPut hdl txt)