singleton, -- :: Word8 -> ByteString
pack, -- :: [Word8] -> ByteString
unpack, -- :: ByteString -> [Word8]
- packWith, -- :: (a -> Word8) -> [a] -> ByteString
- unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
-- * Basic interface
cons, -- :: Word8 -> ByteString -> ByteString
-- * Transformating ByteStrings
map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
- map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
reverse, -- :: ByteString -> ByteString
intersperse, -- :: Word8 -> ByteString -> ByteString
transpose, -- :: [ByteString] -> [ByteString]
foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+
foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
+ foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+ foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
-- ** Special folds
concat, -- :: [ByteString] -> ByteString
-- ** Unfolding ByteStrings
replicate, -- :: Int -> Word8 -> ByteString
unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
- unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
+ unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
-- * Substrings
inits, -- :: ByteString -> [ByteString]
tails, -- :: ByteString -> [ByteString]
- -- ** Breaking and dropping on specific bytes
- breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
- spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
-
-- ** Breaking into many substrings
split, -- :: Word8 -> ByteString -> [ByteString]
splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
- tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
- joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
-- * Predicates
isPrefixOf, -- :: ByteString -> ByteString -> Bool
-- | These functions use memchr(3) to efficiently search the ByteString
elem, -- :: Word8 -> ByteString -> Bool
notElem, -- :: Word8 -> ByteString -> Bool
- filterByte, -- :: Word8 -> ByteString -> ByteString
- filterNotByte, -- :: Word8 -> ByteString -> ByteString
-- ** Searching with a predicate
find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
- filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
-- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
-- * Indexing ByteStrings
findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
count, -- :: Word8 -> ByteString -> Int
- findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int
-- * Zipping and unzipping ByteStrings
zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
- zipWith',
unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
-- * Ordered ByteStrings
-- * I\/O with 'ByteString's
-- ** Standard input and output
-
-#if defined(__GLASGOW_HASKELL__)
getLine, -- :: IO ByteString
-#endif
getContents, -- :: IO ByteString
putStr, -- :: ByteString -> IO ()
putStrLn, -- :: ByteString -> IO ()
+ interact, -- :: (ByteString -> ByteString) -> IO ()
-- ** Files
readFile, -- :: FilePath -> IO ByteString
-- mmapFile, -- :: FilePath -> IO ByteString
-- ** I\/O with Handles
-#if defined(__GLASGOW_HASKELL__)
- getArgs, -- :: IO [ByteString]
hGetLine, -- :: Handle -> IO ByteString
- hGetLines, -- :: Handle -> IO [ByteString]
- hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
-#endif
hGetContents, -- :: Handle -> IO ByteString
hGet, -- :: Handle -> Int -> IO ByteString
+ hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
hPutStr, -- :: Handle -> ByteString -> IO ()
hPutStrLn, -- :: Handle -> ByteString -> IO ()
- -- * Fusion utilities
#if defined(__GLASGOW_HASKELL__)
+ -- * Fusion utilities
unpackList, -- eek, otherwise it gets thrown away by the simplifier
-#endif
lengthU, maximumU, minimumU
+#endif
+
) where
import qualified Prelude as P
,minimum,all,concatMap,foldl1,foldr1
,scanl,scanl1,scanr,scanr1
,readFile,writeFile,appendFile,replicate
- ,getContents,getLine,putStr,putStrLn
+ ,getContents,getLine,putStr,putStrLn,interact
,zip,zipWith,unzip,notElem)
import Data.ByteString.Base
-- Control.Exception.bracket not available in yhc or nhc
import Control.Exception (bracket, assert)
+import qualified Control.Exception as Exception
import Control.Monad (when)
import Foreign.C.String (CString, CStringLen)
#if !defined(__GLASGOW_HASKELL__)
import System.IO.Unsafe
+import qualified System.Environment
+import qualified System.IO (hGetLine)
#endif
#if defined(__GLASGOW_HASKELL__)
import System.IO (hGetBufNonBlocking)
import System.IO.Error (isEOFError)
-import Foreign.Marshal (alloca)
-import qualified Foreign.Concurrent as FC (newForeignPtr)
-
import GHC.Handle
import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
import GHC.Base (build)
-- -----------------------------------------------------------------------------
-- Introducing and eliminating 'ByteString's
--- | /O(1)/ The empty 'ByteString'
-empty :: ByteString
-empty = unsafeCreate 0 $ const $ return ()
-{-# NOINLINE empty #-}
-
-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
singleton :: Word8 -> ByteString
singleton c = unsafeCreate 1 $ \p -> poke p c
-{-# INLINE singleton #-}
+{-# INLINE [1] singleton #-}
--
-- XXX The unsafePerformIO is critical!
#else
---
--- Interacting with head/build fusion rule in ghc 6.5. Disable for now
---
-
unpack ps = build (unpackFoldr ps)
{-# INLINE unpack #-}
--
-- critical this isn't strict in the acc
-- as it will break in the presence of list fusion. this is a known
--- issue with seq and rewrite rules
+-- issue with seq and build/foldr rewrite rules, which rely on lazy
+-- demanding to avoid bottoms in the list.
--
unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
#endif
{-# INLINE map #-}
+{-
-- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
-- slightly faster for one-shot cases.
map' :: (Word8 -> Word8) -> ByteString -> ByteString
pokeByteOff p2 n (f x)
map_ (n+1) p1 p2
{-# INLINE map' #-}
+-}
-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
reverse :: ByteString -> ByteString
foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
{-# INLINE foldr #-}
+-- | 'foldr\'' is like 'foldr', but strict in the accumulator.
+foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
+foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+ go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
+ where
+ STRICT3(go)
+ go z p q | p == q = return z
+ | otherwise = do c <- peek p
+ go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
+{-# INLINE [1] foldr' #-}
+
-- | 'foldl1' is a variant of 'foldl' that has no starting value
-- argument, and thus must be applied to non-empty 'ByteStrings'.
-- This function is subject to array fusion.
| otherwise = foldr f (last ps) (init ps)
{-# INLINE foldr1 #-}
+-- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
+-- accumulator.
+foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+foldr1' f ps
+ | null ps = errorEmptyList "foldr1"
+ | otherwise = foldr' f (last ps) (init ps)
+{-# INLINE [1] foldr1' #-}
+
-- ---------------------------------------------------------------------
-- Special folds
-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
-{-# INLINE break #-}
+{-# INLINE [1] break #-}
--- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
---
--- breakEnd p == spanEnd (not.p)
-breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
-breakEnd p ps = splitAt (findFromEndUntil p ps) ps
+{-# RULES
+"FPS specialise break (x==)" forall x.
+ break ((==) x) = breakByte x
+ #-}
+
+#if __GLASGOW_HASKELL__ >= 605
+-- {-# RULES
+-- "FPS specialise break (==x)" forall x.
+-- break (==x) = breakByte x
+-- #-}
+#endif
-- | 'breakByte' breaks its ByteString argument at the first occurence
-- of the specified byte. It is more efficient than 'break' as it is
Just n -> (unsafeTake n p, unsafeDrop n p)
{-# INLINE breakByte #-}
+-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
+--
+-- breakEnd p == spanEnd (not.p)
+breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+breakEnd p ps = splitAt (findFromEndUntil p ps) ps
+
+-- | 'span' @p xs@ breaks the ByteString into two segments. It is
+-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
+span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+span p ps = break (not . p) ps
+{-# INLINE [1] span #-}
+
-- | 'spanByte' breaks its ByteString argument at the first
-- occurence of a byte other than its argument. It is more efficient
-- than 'span (==)'
else go p (i+1)
{-# INLINE spanByte #-}
--- | 'span' @p xs@ breaks the ByteString into two segments. It is
--- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
-span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
-span p ps = break (not . p) ps
-{-# INLINE span #-}
+{-# RULES
+"FPS specialise span (x==)" forall x.
+ span ((==) x) = spanByte x
+ #-}
+
+#if __GLASGOW_HASKELL__ >= 605
+-- {-# RULES
+-- "FPS specialise span (==x)" forall x.
+-- span (==x) = spanByte x
+-- #-}
+#endif
-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
-- We have
else splitLoop p (idx'+1) off' len' fp'
-}
+{-
-- | Like 'splitWith', except that sequences of adjacent separators are
-- treated as a single separator. eg.
--
tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
tokens f = P.filter (not.null) . splitWith f
{-# INLINE tokens #-}
+-}
-- | The 'group' function takes a ByteString and returns a list of
-- ByteStrings such that the concatenation of the result is equal to the
-- argument between each element of the list.
join :: ByteString -> [ByteString] -> ByteString
join s = concat . (List.intersperse s)
-{-# INLINE join #-}
+{-# INLINE [1] join #-}
+
+{-# RULES
+"FPS specialise join c -> joinByte" forall c s1 s2 .
+ join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
+ #-}
--
-- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
| p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
| otherwise = loop (n+1) (unsafeTail qs)
--- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
--- of the string if no element is found, rather than Nothing.
-findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
-findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
- where
- STRICT2(go)
- go ptr n | n >= l = return l
- | otherwise = do w <- peek ptr
- if k w
- then return n
- else go (ptr `plusPtr` 1) (n+1)
-{-# INLINE findIndexOrEnd #-}
-
-- ---------------------------------------------------------------------
-- Searching ByteStrings
#endif
{-# INLINE filter #-}
+{-
-- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
-- around 2x faster for some one-shot applications.
filter' :: (Word8 -> Bool) -> ByteString -> ByteString
then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
else go (f `plusPtr` 1) t end
{-# INLINE filter' #-}
+-}
--
-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
filterByte w ps = replicate (count w ps) w
{-# INLINE filterByte #-}
+{-# RULES
+ "FPS specialise filter (== x)" forall x.
+ filter ((==) x) = filterByte x
+ #-}
--
-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
-- case of filtering a single byte out of a list. It is more efficient
--
-- filterNotByte is around 2x faster than its filter equivalent.
filterNotByte :: Word8 -> ByteString -> ByteString
-filterNotByte w = filter' (/= w)
+filterNotByte w = filter (/= w)
{-# INLINE filterNotByte #-}
+{-# RULES
+"FPS specialise filter (/= x)" forall x.
+ filter ((/=) x) = filterNotByte x
+ #-}
-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
-- and returns the first element in matching the predicate, or 'Nothing'
-- if there is no such element.
{-# RULES
-"Specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
- pack (zipWith f p q) = zipWith' f p q
+"FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
+ zipWith f p q = unpack (zipWith' f p q)
#-}
-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
-- null-terminated @CString@. The @CString@ will be freed
-- automatically. This is a memcpy(3).
useAsCString :: ByteString -> (CString -> IO a) -> IO a
-useAsCString ps f = useAsCStringLen ps (\(s,_) -> f s)
-
--- | /O(n) construction/ Use a @ByteString@ with a function requiring a
--- @CStringLen@. The @CStringLen@ will be freed automatically. This is a
--- memcpy(3).
+useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
+ where alloc = withForeignPtr ps $ \p -> do
+ buf <- c_malloc (fromIntegral l+1)
+ memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
+ poke (buf `plusPtr` l) (0::Word8) -- n.b.
+ return (castPtr buf)
+
+-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
-useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
- where
- alloc = withForeignPtr ps $ \p -> do
- buf <- c_malloc (fromIntegral l+1)
- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
- poke (buf `plusPtr` l) (0::Word8) -- n.b.
- return $! (castPtr buf, l)
+useAsCStringLen = unsafeUseAsCStringLen
+
+--
+-- why were we doing this?
+--
+-- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
+-- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
+-- where
+-- alloc = withForeignPtr ps $ \p -> do
+-- buf <- c_malloc (fromIntegral l+1)
+-- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
+-- poke (buf `plusPtr` l) (0::Word8) -- n.b.
+-- return $! (castPtr buf, l)
+--
-- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
-- This is mainly useful to allow the rest of the data pointed
-- ---------------------------------------------------------------------
-- line IO
-#if defined(__GLASGOW_HASKELL__)
-
--- | getLine, read a line from stdin.
+-- | Read a line from stdin.
getLine :: IO ByteString
getLine = hGetLine stdin
+{-
-- | Lazily construct a list of lines of ByteStrings. This will be much
--- better on memory consumption than using lines =<< getContents.
+-- better on memory consumption than using 'hGetContents >>= lines'
+-- If you're considering this, a better choice might be to use
+-- Data.ByteString.Lazy
hGetLines :: Handle -> IO [ByteString]
hGetLines h = go
where
go = unsafeInterleaveIO $ do
- ms <- catch (hGetLine h >>= return . Just)
- (\_ -> return Nothing)
- case ms of
- Nothing -> return []
- Just s -> do ss <- go
- return (s:ss)
-
--- | hGetLine. read a ByteString from a handle
+ e <- hIsEOF h
+ if e
+ then return []
+ else do
+ x <- hGetLine h
+ xs <- go
+ return (x:xs)
+-}
+
+-- | Read a line from a handle
+
hGetLine :: Handle -> IO ByteString
+#if !defined(__GLASGOW_HASKELL__)
+hGetLine h = do
+ string <- System.IO.hGetLine h
+ return $ packWith c2w string
+#else
hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
case haBufferMode handle_ of
NoBuffering -> error "no buffering"
hGet _ 0 = return empty
hGet h i = createAndTrim i $ \p -> hGetBuf h p i
-#if defined(__GLASGOW_HASKELL__)
-- | hGetNonBlocking is identical to 'hGet', except that it will never block
-- waiting for data to become available, instead it returns only whatever data
-- is available.
hGetNonBlocking :: Handle -> Int -> IO ByteString
+#if defined(__GLASGOW_HASKELL__)
hGetNonBlocking _ 0 = return empty
hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
+#else
+hGetNonBlocking = hGet
#endif
-- | Read entire handle contents into a 'ByteString'.
getContents :: IO ByteString
getContents = hGetContents stdin
+-- | The interact function takes a function of type @ByteString -> ByteString@
+-- as its argument. The entire input from the standard input device is passed
+-- to this function as its argument, and the resulting string is output on the
+-- standard output device. It's great for writing one line programs!
+interact :: (ByteString -> ByteString) -> IO ()
+interact transformer = putStr . transformer =<< getContents
+
-- | Read an entire file strictly into a 'ByteString'. This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'pack'. It also may be more efficient than opening the file and
--- reading it using hGet.
+-- reading it using hGet. Files are read using 'binary mode' on Windows,
+-- for 'text mode' use the Char8 version of this function.
readFile :: FilePath -> IO ByteString
readFile f = bracket (openBinaryFile f ReadMode) hClose
(\h -> hFileSize h >>= hGet h . fromIntegral)
-- | Write a 'ByteString' to a file.
writeFile :: FilePath -> ByteString -> IO ()
-writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
- (\h -> hPut h ps)
+writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
+ (\h -> hPut h txt)
-- | Append a 'ByteString' to a file.
appendFile :: FilePath -> ByteString -> IO ()
appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
- (\hdl -> hPut hdl txt)
+ (\h -> hPut h txt)
{-
--
#else
let unmap = return ()
#endif
- fp <- FC.newForeignPtr p unmap
+ fp <- newForeignPtr p unmap
return fp
c_close fd
hClose h
where mmap_limit = 16*1024
-}
-#if defined(__GLASGOW_HASKELL__)
---
--- | A ByteString equivalent for getArgs. More efficient for large argument lists
---
-getArgs :: IO [ByteString]
-getArgs =
- alloca $ \ p_argc ->
- alloca $ \ p_argv -> do
- getProgArgv p_argc p_argv
- p <- fromIntegral `fmap` peek p_argc
- argv <- peek p_argv
- P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
-#endif
-
-- ---------------------------------------------------------------------
-- Internal utilities
+-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
+-- of the string if no element is found, rather than Nothing.
+findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
+findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
+ where
+ STRICT2(go)
+ go ptr n | n >= l = return l
+ | otherwise = do w <- peek ptr
+ if k w
+ then return n
+ else go (ptr `plusPtr` 1) (n+1)
+{-# INLINE findIndexOrEnd #-}
+
-- | Perform an operation with a temporary ByteString
withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
withPtr fp io = inlinePerformIO (withForeignPtr fp io)
{-# INLINE newForeignFreePtr #-}
newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
-#if defined(__GLASGOW_HASKELL__)
-newForeignFreePtr p = FC.newForeignPtr p (c_free p)
-#else
newForeignFreePtr p = newForeignPtr c_free_finalizer p
-#endif
unsafeDrop, -- :: Int -> ByteString -> ByteString
-- * Low level introduction and elimination
+ empty, -- :: ByteString
create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
+ mallocByteString, -- :: Int -> IO (ForeignPtr a)
unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
-- * Utilities
inlinePerformIO, -- :: IO a -> a
+ nullForeignPtr, -- :: ForeignPtr Word8
countOccurrences, -- :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO ()
c_strlen, -- :: CString -> IO CInt
c_malloc, -- :: CInt -> IO (Ptr Word8)
c_free, -- :: Ptr Word8 -> IO ()
-
-#if !defined(__GLASGOW_HASKELL__)
c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ())
-#endif
memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8
memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
-- * Internal GHC magic
#if defined(__GLASGOW_HASKELL__)
- getProgArgv, -- :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
memcpy_ptr_baoff, -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
#endif
) where
-import Foreign.ForeignPtr
-import Foreign.Ptr
+import Foreign.ForeignPtr (ForeignPtr, newForeignPtr_, withForeignPtr)
+import Foreign.Ptr (Ptr, FunPtr, plusPtr, castPtr)
import Foreign.Storable (Storable(..))
-import Foreign.C.Types
+import Foreign.C.Types (CInt, CSize, CULong)
import Foreign.C.String (CString, CStringLen)
import Control.Exception (assert)
import Data.Word (Word8)
#if defined(__GLASGOW_HASKELL__)
+import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr)
import qualified Foreign.Concurrent as FC (newForeignPtr)
import Data.Generics (Data(..), Typeable(..))
import GHC.Prim (Addr#)
import GHC.Ptr (Ptr(..))
import GHC.Base (realWorld#,unsafeChr)
-import GHC.IOBase
+import GHC.IOBase (IO(IO), unsafePerformIO, RawBuffer)
+#else
+import Data.Char (chr)
+import System.IO.Unsafe (unsafePerformIO)
+#endif
-#if defined(__GLASGOW_HASKELL__) && !defined(SLOW_FOREIGN_PTR)
+#if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
+#else
+import Foreign.ForeignPtr (mallocForeignPtrBytes)
#endif
+#if __GLASGOW_HASKELL__>=605
+import GHC.ForeignPtr (ForeignPtr(ForeignPtr))
+import GHC.Base (nullAddr#)
#else
-import Data.Char (chr)
-import System.IO.Unsafe (unsafePerformIO)
+import Foreign.Ptr (nullPtr)
#endif
-- CFILES stuff is Hugs only
deriving (Data, Typeable)
#endif
+-- | /O(1)/ The empty 'ByteString'
+empty :: ByteString
+empty = PS nullForeignPtr 0 0
+
+nullForeignPtr :: ForeignPtr Word8
+#if __GLASGOW_HASKELL__>=605
+nullForeignPtr = ForeignPtr nullAddr# undefined --TODO: should ForeignPtrContents be strict?
+#else
+nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr
+{-# NOINLINE nullForeignPtr #-}
+#endif
+
-- ---------------------------------------------------------------------
--
-- Extensions to the basic interface
unsafeCreate l f = unsafePerformIO (create l f)
{-# INLINE unsafeCreate #-}
--- | Wrapper of mallocForeignPtrBytes.
+-- | Create ByteString of size @l@ and use action @f@ to fill it's contents.
create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
create l f = do
-#if defined(SLOW_FOREIGN_PTR) || !defined(__GLASGOW_HASKELL__)
- fp <- mallocForeignPtrBytes l
-#else
- fp <- mallocPlainForeignPtrBytes l
-#endif
+ fp <- mallocByteString l
withForeignPtr fp $ \p -> f p
return $! PS fp 0 l
--
createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
createAndTrim l f = do
-#if defined(SLOW_FOREIGN_PTR) || !defined(__GLASGOW_HASKELL__)
- fp <- mallocForeignPtrBytes l
-#else
- fp <- mallocPlainForeignPtrBytes l
-#endif
+ fp <- mallocByteString l
withForeignPtr fp $ \p -> do
l' <- f p
if assert (l' <= l) $ l' >= l
createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
createAndTrim' l f = do
-#if defined(SLOW_FOREIGN_PTR) || !defined(__GLASGOW_HASKELL__)
- fp <- mallocForeignPtrBytes l
-#else
- fp <- mallocPlainForeignPtrBytes l
-#endif
+ fp <- mallocByteString l
withForeignPtr fp $ \p -> do
(off, l', res) <- f p
if assert (l' <= l) $ l' >= l
memcpy p' (p `plusPtr` off) (fromIntegral l')
return $! (ps, res)
+-- | Wrapper of mallocForeignPtrBytes with faster implementation
+-- for GHC 6.5 builds newer than 06/06/06
+mallocByteString :: Int -> IO (ForeignPtr a)
+mallocByteString l = do
+#if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
+ mallocPlainForeignPtrBytes l
+#else
+ mallocForeignPtrBytes l
+#endif
+
#if defined(__GLASGOW_HASKELL__)
-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
-- Addr\# (an arbitrary machine address assumed to point outside the
-- this, you need to have a proof of some kind that all 'ByteString's
-- ever generated from the underlying byte array are no longer live.
unsafeFinalize :: ByteString -> IO ()
-unsafeFinalize (PS p _ _) = finalizeForeignPtr p
+unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p
#endif
unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
-- | /O(1) construction/ Use a @ByteString@ with a function requiring a
--- @CStringLen@. Warning: modifying the @CStringLen@ will affect the
--- @ByteString@. This is analogous to unsafeUseAsCString, and comes
--- with the same safety requirements. The user must ensure there is a
--- null byte at the end of the string.
+-- @CStringLen@.
unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
-unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
+unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l)
-- ---------------------------------------------------------------------
--
--
foreign import ccall unsafe "string.h strlen" c_strlen
- :: CString -> IO CInt
+ :: CString -> IO CSize
foreign import ccall unsafe "stdlib.h malloc" c_malloc
- :: CInt -> IO (Ptr Word8)
+ :: CSize -> IO (Ptr Word8)
foreign import ccall unsafe "static stdlib.h free" c_free
:: Ptr Word8 -> IO ()
-#if !defined(__GLASGOW_HASKELL__)
foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
:: FunPtr (Ptr Word8 -> IO ())
-#endif
foreign import ccall unsafe "string.h memchr" memchr
:: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
--
foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
+ :: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
+ :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
- :: Ptr Word8 -> CInt -> IO Word8
+ :: Ptr Word8 -> CULong -> IO Word8
foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
- :: Ptr Word8 -> CInt -> IO Word8
+ :: Ptr Word8 -> CULong -> IO Word8
foreign import ccall unsafe "static fpstring.h fps_count" c_count
- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
+ :: Ptr Word8 -> CULong -> Word8 -> IO CULong
-- ---------------------------------------------------------------------
-- MMap
-- Internal GHC Haskell magic
#if defined(__GLASGOW_HASKELL__)
-foreign import ccall unsafe "RtsAPI.h getProgArgv"
- getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
-
foreign import ccall unsafe "__hscore_memcpy_src_off"
memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
#endif
foldl', -- :: (a -> Char -> a) -> a -> ByteString -> a
foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char
foldl1', -- :: (Char -> Char -> Char) -> ByteString -> Char
+
foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a
+ foldr', -- :: (Char -> a -> a) -> a -> ByteString -> a
foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char
+ foldr1', -- :: (Char -> Char -> Char) -> ByteString -> Char
-- ** Special folds
concat, -- :: [ByteString] -> ByteString
-- ** Scans
scanl, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
scanl1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString
--- scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
--- scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString
+ scanr, -- :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
+ scanr1, -- :: (Char -> Char -> Char) -> ByteString -> ByteString
-- ** Accumulating maps
--- mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
--- mapAccumR, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
+ mapAccumL, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
+ mapAccumR, -- :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString
-- * Generating and unfolding ByteStrings
inits, -- :: ByteString -> [ByteString]
tails, -- :: ByteString -> [ByteString]
- -- ** Breaking and dropping on specific Chars
- breakChar, -- :: Char -> ByteString -> (ByteString, ByteString)
- spanChar, -- :: Char -> ByteString -> (ByteString, ByteString)
- breakSpace, -- :: ByteString -> (ByteString,ByteString)
- dropSpace, -- :: ByteString -> ByteString
- dropSpaceEnd, -- :: ByteString -> ByteString
-
-- ** Breaking into many substrings
split, -- :: Char -> ByteString -> [ByteString]
splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString]
- tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString]
-- ** Breaking into lines and words
lines, -- :: ByteString -> [ByteString]
unlines, -- :: [ByteString] -> ByteString
unwords, -- :: ByteString -> [ByteString]
- lines', -- :: ByteString -> [ByteString]
- unlines', -- :: [ByteString] -> ByteString
- linesCRLF', -- :: ByteString -> [ByteString]
- unlinesCRLF', -- :: [ByteString] -> ByteString
- words', -- :: ByteString -> [ByteString]
- unwords', -- :: ByteString -> [ByteString]
-
- lineIndices, -- :: ByteString -> [Int]
- betweenLines, -- :: ByteString -> ByteString -> ByteString -> Maybe (ByteString)
-
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
- joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString
-
-- ** Searching for substrings
isPrefixOf, -- :: ByteString -> ByteString -> Bool
-- ** Searching by equality
elem, -- :: Char -> ByteString -> Bool
notElem, -- :: Char -> ByteString -> Bool
- filterChar, -- :: Char -> ByteString -> ByteString
- filterNotChar, -- :: Char -> ByteString -> ByteString
-- ** Searching with a predicate
find, -- :: (Char -> Bool) -> ByteString -> Maybe Char
-- * Ordered ByteStrings
sort, -- :: ByteString -> ByteString
- -- * Conversion
- w2c, -- :: Word8 -> Char
- c2w, -- :: Char -> Word8
-
-- * Reading from ByteStrings
readInt, -- :: ByteString -> Maybe Int
-- * I\/O with @ByteString@s
-- ** Standard input and output
-
-#if defined(__GLASGOW_HASKELL__)
getLine, -- :: IO ByteString
-#endif
getContents, -- :: IO ByteString
putStr, -- :: ByteString -> IO ()
putStrLn, -- :: ByteString -> IO ()
+ interact, -- :: (ByteString -> ByteString) -> IO ()
-- ** Files
readFile, -- :: FilePath -> IO ByteString
-- mmapFile, -- :: FilePath -> IO ByteString
-- ** I\/O with Handles
-#if defined(__GLASGOW_HASKELL__)
- getArgs, -- :: IO [ByteString]
hGetLine, -- :: Handle -> IO ByteString
- hGetLines, -- :: Handle -> IO ByteString
hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
-#endif
hGetContents, -- :: Handle -> IO ByteString
hGet, -- :: Handle -> Int -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
#if defined(__GLASGOW_HASKELL__)
unpackList,
#endif
- filter', map'
) where
,length,map,lines,foldl,foldr,unlines
,concat,any,take,drop,splitAt,takeWhile
,dropWhile,span,break,elem,filter,unwords
- ,words,maximum,minimum,all,concatMap,scanl,scanl1
- ,foldl1,foldr1,readFile,writeFile,appendFile,replicate
- ,getContents,getLine,putStr,putStrLn
+ ,words,maximum,minimum,all,concatMap
+ ,scanl,scanl1,scanr,scanr1
+ ,appendFile,readFile,writeFile
+ ,foldl1,foldr1,replicate
+ ,getContents,getLine,putStr,putStrLn,interact
,zip,zipWith,unzip,notElem)
import qualified Data.ByteString as B
,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring
,findSubstrings,copy,group
- ,getContents, putStr, putStrLn
- ,readFile, {-mmapFile,-} writeFile, appendFile
+ ,getLine, getContents, putStr, putStrLn, interact
,hGetContents, hGet, hPut, hPutStr, hPutStrLn
+ ,hGetLine, hGetNonBlocking
,packCString,packCStringLen, packMallocCString
,useAsCString,useAsCStringLen, copyCString,copyCStringLen
#if defined(__GLASGOW_HASKELL__)
- ,getLine, getArgs, hGetLine, hGetLines, hGetNonBlocking
,unpackList
#endif
)
#if defined(__GLASGOW_HASKELL__)
,packAddress, unsafePackAddress
#endif
- ,c2w, w2c, unsafeTail, inlinePerformIO, isSpaceWord8
+ ,c2w, w2c, unsafeTail, isSpaceWord8, inlinePerformIO
)
+import Data.Char ( isSpace )
import qualified Data.List as List (intersperse)
+import System.IO (openFile,hClose,hFileSize,IOMode(..))
+import Control.Exception (bracket)
import Foreign
#if defined(__GLASGOW_HASKELL__)
-- | /O(n)/ Converts a 'ByteString' to a 'String'.
unpack :: ByteString -> [Char]
-unpack = B.unpackWith w2c
+unpack = P.map w2c . B.unpack
{-# INLINE unpack #-}
-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
foldr f = B.foldr (\c a -> f (w2c c) a)
{-# INLINE foldr #-}
+-- | 'foldr\'' is a strict variant of foldr
+foldr' :: (Char -> a -> a) -> a -> ByteString -> a
+foldr' f = B.foldr' (\c a -> f (w2c c) a)
+{-# INLINE foldr' #-}
+
-- | 'foldl1' is a variant of 'foldl' that has no starting value
-- argument, and thus must be applied to non-empty 'ByteStrings'.
foldl1 :: (Char -> Char -> Char) -> ByteString -> Char
foldr1 f ps = w2c (B.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
{-# INLINE foldr1 #-}
+-- | A strict variant of foldr1
+foldr1' :: (Char -> Char -> Char) -> ByteString -> Char
+foldr1' f ps = w2c (B.foldr1' (\x y -> c2w (f (w2c x) (w2c y))) ps)
+{-# INLINE foldr1' #-}
+
-- | Map a function over a 'ByteString' and concatenate the results
concatMap :: (Char -> ByteString) -> ByteString -> ByteString
concatMap f = B.concatMap (f . w2c)
mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c)))
{-# INLINE mapIndexed #-}
+-- | The 'mapAccumL' function behaves like a combination of 'map' and
+-- 'foldl'; it applies a function to each element of a ByteString,
+-- passing an accumulating parameter from left to right, and returning a
+-- final value of this accumulator together with the new list.
+mapAccumL :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
+mapAccumL f = B.mapAccumL (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c))
+
+-- | The 'mapAccumR' function behaves like a combination of 'map' and
+-- 'foldr'; it applies a function to each element of a ByteString,
+-- passing an accumulating parameter from right to left, and returning a
+-- final value of this accumulator together with the new ByteString.
+mapAccumR :: (acc -> Char -> (acc, Char)) -> acc -> ByteString -> (acc, ByteString)
+mapAccumR f = B.mapAccumR (\acc w -> case f acc (w2c w) of (acc', c) -> (acc', c2w c))
+
-- | 'scanl' is similar to 'foldl', but returns a list of successive
-- reduced values from the left:
--
scanl1 :: (Char -> Char -> Char) -> ByteString -> ByteString
scanl1 f = B.scanl1 (\a b -> c2w (f (w2c a) (w2c b)))
+-- | scanr is the right-to-left dual of scanl.
+scanr :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString
+scanr f z = B.scanr (\a b -> c2w (f (w2c a) (w2c b))) (c2w z)
+
+-- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
+scanr1 :: (Char -> Char -> Char) -> ByteString -> ByteString
+scanr1 f = B.scanr1 (\a b -> c2w (f (w2c a) (w2c b)))
+
-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
-- the value of every element. The following holds:
--
breakEnd f = B.breakEnd (f . w2c)
{-# INLINE breakEnd #-}
+{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
-- implemented with @memchr(3)@. I.e.
spanChar :: Char -> ByteString -> (ByteString, ByteString)
spanChar = B.spanByte . c2w
{-# INLINE spanChar #-}
+-}
-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
-- argument, consuming the delimiter. I.e.
{-# INLINE splitWith #-}
-- the inline makes a big difference here.
+{-
-- | Like 'splitWith', except that sequences of adjacent separators are
-- treated as a single separator. eg.
--
tokens :: (Char -> Bool) -> ByteString -> [ByteString]
tokens f = B.tokens (f . w2c)
{-# INLINE tokens #-}
+-}
-- | The 'groupBy' function is the non-overloaded version of 'group'.
groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
groupBy k = B.groupBy (\a b -> k (w2c a) (w2c b))
+{-
-- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
-- char. Around 4 times faster than the generalised join.
--
joinWithChar :: Char -> ByteString -> ByteString -> ByteString
joinWithChar = B.joinWithByte . c2w
{-# INLINE joinWithChar #-}
+-}
-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
index :: ByteString -> Int -> Char
find f ps = w2c `fmap` B.find (f . w2c) ps
{-# INLINE find #-}
+{-
-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
-- case of filtering a single Char. It is more efficient to use
-- filterChar in this case.
filterNotChar :: Char -> ByteString -> ByteString
filterNotChar c = B.filterNotByte (c2w c)
{-# INLINE filterNotChar #-}
+-}
-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
-- corresponding pairs of Chars. If one input ByteString is short,
unsafeHead = w2c . B.unsafeHead
{-# INLINE unsafeHead #-}
--- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a Char.
--- This omits the bounds check, which means there is an accompanying
--- obligation on the programmer to ensure the bounds are checked in some
--- other way.
-unsafeIndex :: ByteString -> Int -> Char
-unsafeIndex = (w2c .) . B.unsafeIndex
-{-# INLINE unsafeIndex #-}
-
-- ---------------------------------------------------------------------
-- Things that depend on the encoding
+{-# RULES
+ "FPS specialise break -> breakSpace"
+ break isSpace = breakSpace
+ #-}
+
-- | 'breakSpace' returns the pair of ByteStrings when the argument is
-- broken at the first whitespace byte. I.e.
--
| otherwise = do w <- peekByteOff ptr n
if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n
+{-# RULES
+ "FPS specialise dropWhile isSpace -> dropSpace"
+ dropWhile isSpace = dropSpace
+ #-}
+
-- | 'dropSpace' efficiently returns the 'ByteString' argument with
-- white space Chars removed from the front. It is more efficient than
-- calling dropWhile for removing whitespace. I.e.
| otherwise = do w <- peekElemOff ptr n
if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n
+{-
-- | 'dropSpaceEnd' efficiently returns the 'ByteString' argument with
-- white space removed from the end. I.e.
--
| n < 0 = return n
| otherwise = do w <- peekElemOff ptr n
if isSpaceWord8 w then lastnonspace ptr (n-1) else return n
+-}
-- | 'lines' breaks a ByteString up into a list of ByteStrings at
-- newline Chars. The resulting strings do not contain newlines.
where search = elemIndex '\n'
{-# INLINE lines #-}
-{-# Bogus rule, wrong if there's not \n at end of line
-
-"length.lines/count"
- P.length . lines = count '\n'
-
- #-}
-
{-
-- Just as fast, but more complex. Should be much faster, I thought.
lines :: ByteString -> [ByteString]
-- > tokens isSpace = words
--
words :: ByteString -> [ByteString]
-words = B.tokens isSpaceWord8
+words = P.filter (not . B.null) . B.splitWith isSpaceWord8
{-# INLINE words #-}
-- | The 'unwords' function is analogous to the 'unlines' function, on words.
unwords = join (singleton ' ')
{-# INLINE unwords #-}
--- | /O(n)/ Indicies of newlines. Shorthand for
---
--- > elemIndices '\n'
---
-lineIndices :: ByteString -> [Int]
-lineIndices = elemIndices '\n'
-{-# INLINE lineIndices #-}
-
--- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on
--- newline Chars. However, unlike the Prelude functions, 'lines\'' and
--- 'unlines\'' correctly reconstruct lines that are missing terminating
--- newlines characters. I.e.
---
--- > unlines (lines "a\nb\nc") == "a\nb\nc\n"
--- > unlines' (lines' "a\nb\nc") == "a\nb\nc"
---
--- Note that this means:
---
--- > lines "a\nb\nc\n" == ["a","b","c"]
--- > lines' "a\nb\nc\n" == ["a","b","c",""]
---
-lines' :: ByteString -> [ByteString]
-lines' ps = ps `seq` case elemIndex '\n' ps of
- Nothing -> [ps]
- Just n -> take n ps : lines' (drop (n+1) ps)
-
--- | 'linesCRLF\'' behaves like 'lines\'', but breaks on (\\cr?\\lf)
-linesCRLF' :: ByteString -> [ByteString]
-linesCRLF' ps = ps `seq` case elemIndex '\n' ps of
- Nothing -> [ps]
- Just 0 -> empty : linesCRLF' (drop 1 ps)
- Just n -> let k = if ps `unsafeIndex` (n-1) == '\r' then n-1 else n
- in take k ps : linesCRLF' (drop (n+1) ps)
-
--- | 'unlines\'' behaves like 'unlines', except that it also correctly
--- retores lines that do not have terminating newlines (see the
--- description for 'lines\'').
---
-unlines' :: [ByteString] -> ByteString
-unlines' ss = concat $ intersperse_newlines ss
- where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
- intersperse_newlines s = s
- newline = singleton '\n'
-
--- | 'unlines\'' behaves like 'unlines', except that it also correctly
--- retores lines that do not have terminating newlines (see the
--- description for 'lines\''). Uses CRLF instead of LF.
---
-unlinesCRLF' :: [ByteString] -> ByteString
-unlinesCRLF' ss = concat $ intersperse_newlines ss
- where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
- intersperse_newlines s = s
- newline = pack "\r\n"
-
--- | 'words\'' behaves like 'words', with the exception that it produces
--- output on ByteStrings with trailing whitespace that can be
--- correctly inverted by 'unwords'. I.e.
---
--- > words "a b c " == ["a","b","c"]
--- > words' "a b c " == ["a","b","c",""]
---
--- > unwords $ words "a b c " == "a b c"
--- > unwords $ words' "a b c " == "a b c "
---
-words' :: ByteString -> [ByteString]
-words' = B.splitWith isSpaceWord8
-
--- | 'unwords\'' behaves like 'unwords'. It is provided for consistency
--- with the other invertable words and lines functions.
-unwords' :: [ByteString] -> ByteString
-unwords' = unwords
-
--- | 'betweenLines' returns the ByteString between the two lines given,
--- or Nothing if they do not appear. The returned string is the first
--- and shortest string such that the line before it is the given first
--- line, and the line after it is the given second line.
-betweenLines :: ByteString -- ^ First line to look for
- -> ByteString -- ^ Second line to look for
- -> ByteString -- ^ 'ByteString' to look in
- -> Maybe (ByteString)
-
-betweenLines start end ps =
- case P.break (start ==) (lines ps) of
- (_, _:rest@(PS ps1 s1 _:_)) ->
- case P.break (end ==) rest of
- (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1)
- _ -> Nothing
- _ -> Nothing
-
-- ---------------------------------------------------------------------
-- Reading from ByteStrings
end True _ n ps = Just (negate n, ps)
end _ _ n ps = Just (n, ps)
--- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
--- slightly faster for one-shot cases.
-map' :: (Char -> Char) -> ByteString -> ByteString
-map' f = B.map' (c2w . f . w2c)
+-- | Read an entire file strictly into a 'ByteString'. This is far more
+-- efficient than reading the characters into a 'String' and then using
+-- 'pack'. It also may be more efficient than opening the file and
+-- reading it using hGet.
+readFile :: FilePath -> IO ByteString
+readFile f = bracket (openFile f ReadMode) hClose
+ (\h -> hFileSize h >>= hGet h . fromIntegral)
+
+-- | Write a 'ByteString' to a file.
+writeFile :: FilePath -> ByteString -> IO ()
+writeFile f txt = bracket (openFile f WriteMode) hClose
+ (\h -> hPut h txt)
+
+-- | Append a 'ByteString' to a file.
+appendFile :: FilePath -> ByteString -> IO ()
+appendFile f txt = bracket (openFile f AppendMode) hClose
+ (\h -> hPut h txt)
--- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
--- around 2x faster for some one-shot applications.
-filter' :: (Char -> Bool) -> ByteString -> ByteString
-filter' f = B.filter' (f . w2c)
-{-# OPTIONS_GHC -cpp -optc-O1 -fffi -fglasgow-exts -fno-warn-incomplete-patterns #-}
---
--- -optc-O2 breaks with 4.0.4 gcc on debian
+{-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
--
-- Module : ByteString.Lazy
-- Copyright : (c) Don Stewart 2006
singleton, -- :: Word8 -> ByteString
pack, -- :: [Word8] -> ByteString
unpack, -- :: ByteString -> [Word8]
- packWith, -- :: (a -> Word8) -> [a] -> ByteString
- unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
-- * Basic interface
cons, -- :: Word8 -> ByteString -> ByteString
-- ** Accumulating maps
mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
+ mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
-- ** Infinite ByteStrings
inits, -- :: ByteString -> [ByteString]
tails, -- :: ByteString -> [ByteString]
- -- ** Breaking and dropping on specific bytes
- breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
- spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
-
-- ** Breaking into many substrings
split, -- :: Word8 -> ByteString -> [ByteString]
splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
- joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
-- * Predicates
isPrefixOf, -- :: ByteString -> ByteString -> Bool
-- ** Searching by equality
elem, -- :: Word8 -> ByteString -> Bool
notElem, -- :: Word8 -> ByteString -> Bool
- filterByte, -- :: Word8 -> ByteString -> ByteString
- filterNotByte, -- :: Word8 -> ByteString -> ByteString
-- ** Searching with a predicate
find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
-- * Ordered ByteStrings
-- sort, -- :: ByteString -> ByteString
+ copy, -- :: ByteString -> ByteString
+
-- * I\/O with 'ByteString's
-- ** Standard input and output
-- ** I\/O with Handles
hGetContents, -- :: Handle -> IO ByteString
- hGetContentsN, -- :: Int -> Handle -> IO ByteString
hGet, -- :: Handle -> Int -> IO ByteString
- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
-#if defined(__GLASGOW_HASKELL__)
hGetNonBlocking, -- :: Handle -> IO ByteString
- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
-#endif
+-- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
+-- hGetContentsN, -- :: Int -> Handle -> IO ByteString
+-- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
) where
import Data.Word (Word8)
import Data.Int (Int64)
-import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..),hClose)
+import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..)
+ ,hClose,hWaitForInput,hIsEOF)
import System.IO.Unsafe
import Control.Exception (bracket)
+import Foreign.ForeignPtr (withForeignPtr)
+import Foreign.Ptr
+import Foreign.Storable
+
#if defined(__GLASGOW_HASKELL__)
import Data.Generics (Data(..), Typeable(..))
#endif
-- and need to share the cache with other programs.
--
defaultChunkSize :: Int
-defaultChunkSize = 64 * k
+defaultChunkSize = 32 * k - overhead
where k = 1024
+ overhead = 2 * sizeOf (undefined :: Int)
smallChunkSize :: Int
-smallChunkSize = 4 * k
+smallChunkSize = 4 * k - overhead
where k = 1024
+ overhead = 2 * sizeOf (undefined :: Int)
-- defaultChunkSize = 1
------------------------------------------------------------------------
+{-
-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
-- conversion function
packWith :: (a -> Word8) -> [a] -> ByteString
unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
{-# INLINE unpackWith #-}
{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
+-}
-- ---------------------------------------------------------------------
-- Basic interface
-- | /O(1)/ Test whether a ByteString is empty.
null :: ByteString -> Bool
null (LPS []) = True
-null (_) = False -- TODO: guarantee this invariant is maintained
+null (_) = False
{-# INLINE null #-}
-- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
length :: ByteString -> Int64
-length (LPS ss) = L.sum (L.map (fromIntegral.P.length) ss)
+length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
-- avoid the intermediate list?
-- length (LPS ss) = L.foldl lengthF 0 ss
last (LPS xs) = P.last (L.last xs)
{-# INLINE last #-}
--- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
+-- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
init :: ByteString -> ByteString
init (LPS []) = errorEmptyList "init"
init (LPS xs)
-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
reverse :: ByteString -> ByteString
-reverse (LPS xs) = LPS (L.reverse . L.map P.reverse $ xs)
+reverse (LPS ps) = LPS (rev [] ps)
+ where rev a [] = a
+ rev a (x:xs) = rev (P.reverse x:a) xs
+-- note, here is one example where the extra element lazyness is an advantage.
+-- we can reerse the list of chunks strictly but reverse each chunk lazily
+-- so while we may force the whole lot into memory we do not need to copy
+-- each chunk until it is used.
{-# INLINE reverse #-}
-- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
maximum :: ByteString -> Word8
-maximum (LPS []) = errorEmptyList "maximum"
-maximum (LPS xs) = L.maximum (L.map P.maximum xs)
+maximum (LPS []) = errorEmptyList "maximum"
+maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
{-# INLINE maximum #-}
-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
minimum :: ByteString -> Word8
-minimum (LPS []) = errorEmptyList "minimum"
-minimum (LPS xs) = L.minimum (L.map P.minimum xs)
+minimum (LPS []) = errorEmptyList "minimum"
+minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
{-# INLINE minimum #-}
-- | The 'mapAccumL' function behaves like a combination of 'map' and
mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
+mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
+mapAccumR = error "mapAccumR unimplemented"
+
-- | /O(n)/ map Word8 functions, provided with the index at each position
mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
takeWhile f (LPS ps) = LPS (takeWhile' ps)
where takeWhile' [] = []
takeWhile' (x:xs) =
- case P.findIndexOrEnd (not . f) x of
+ case findIndexOrEnd (not . f) x of
0 -> []
n | n < P.length x -> P.take n x : []
| otherwise -> x : takeWhile' xs
dropWhile f (LPS ps) = LPS (dropWhile' ps)
where dropWhile' [] = []
dropWhile' (x:xs) =
- case P.findIndexOrEnd (not . f) x of
+ case findIndexOrEnd (not . f) x of
n | n < P.length x -> P.drop n x : xs
| otherwise -> dropWhile' xs
break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
where break' [] = ([], [])
break' (x:xs) =
- case P.findIndexOrEnd f x of
+ case findIndexOrEnd f x of
0 -> ([], x : xs)
n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
| otherwise -> let (xs', xs'') = break' xs
in (x : xs', xs'')
+--
+-- TODO
+--
+-- Add rules
+--
+
+{-
-- | 'breakByte' breaks its ByteString argument at the first occurence
-- of the specified byte. It is more efficient than 'break' as it is
-- implemented with @memchr(3)@. I.e.
| P.null x'' -> let (xs', xs'') = spanByte' xs
in (x : xs', xs'')
| otherwise -> (x' : [], x'' : xs)
+-}
-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
-- | The 'groupBy' function is the non-overloaded version of 'group'.
--
groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
+groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
+{-
groupBy _ (LPS []) = []
groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs
groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
+-}
{-
TODO: check if something like this might be faster
join :: ByteString -> [ByteString] -> ByteString
join s = concat . (L.intersperse s)
--- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
--- with a char.
---
-joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
-joinWithByte c x y = append x (cons c y)
-
-- ---------------------------------------------------------------------
-- Indexing ByteStrings
--
-- But more efficiently than using length on the intermediate list.
count :: Word8 -> ByteString -> Int64
-count w (LPS xs) = L.sum (L.map (fromIntegral . P.count w) xs)
+count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
-- | The 'findIndex' function takes a predicate and a 'ByteString' and
-- returns the index of the first element in the ByteString
filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
{-# INLINE filter #-}
+{-
-- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
-- (==)/, for the common case of filtering a single byte. It is more
-- efficient to use /filterByte/ in this case.
-- filterNotByte is around 2x faster than its filter equivalent.
filterNotByte :: Word8 -> ByteString -> ByteString
filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
+-}
-- ---------------------------------------------------------------------
-- Searching for substrings
| otherwise = LPS xs : tails' (P.unsafeTail x : xs')
-- ---------------------------------------------------------------------
+-- Low level constructors
+
+-- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
+-- This is mainly useful to allow the rest of the data pointed
+-- to by the 'ByteString' to be garbage collected, for example
+-- if a large string has been read in, and only a small part of it
+-- is needed in the rest of the program.
+copy :: ByteString -> ByteString
+copy (LPS lps) = LPS (L.map P.copy lps)
+--TODO, we could coalese small blocks here
+--FIXME: probably not strict enough, if we're doing this to avoid retaining
+-- the parent blocks then we'd better copy strictly.
+
+-- ---------------------------------------------------------------------
-- TODO defrag func that concatenates block together that are below a threshold
-- defrag :: Int -> ByteString -> ByteString
-- Lazy ByteString IO
-- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
--- are read on demand, in @k@-sized chunks.
+-- are read on demand, in at most @k@-sized chunks. It does not block
+-- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
+-- available then they will be returned immediately as a smaller chunk.
hGetContentsN :: Int -> Handle -> IO ByteString
hGetContentsN k h = lazyRead >>= return . LPS
where
- lazyRead = unsafeInterleaveIO $ do
- ps <- P.hGet h k
- case P.length ps of
- 0 -> return []
- n | n < k -> return [ps]
- _ -> do pss <- lazyRead
- return (ps : pss)
+ lazyRead = unsafeInterleaveIO loop
+
+ loop = do
+ ps <- P.hGetNonBlocking h k
+ --TODO: I think this should distinguish EOF from no data available
+ -- the otherlying POSIX call makes this distincion, returning either
+ -- 0 or EAGAIN
+ if P.null ps
+ then do eof <- hIsEOF h
+ if eof then return []
+ else hWaitForInput h (-1)
+ >> loop
+ else do pss <- lazyRead
+ return (ps : pss)
-- | Read @n@ bytes into a 'ByteString', directly from the
-- specified 'Handle', in chunks of size @k@.
readChunks i = do
ps <- P.hGet h (min k i)
case P.length ps of
- 0 -> return []
- m | m == i -> return [ps]
- m -> do pss <- readChunks (i - m)
- return (ps : pss)
+ 0 -> return []
+ m -> do pss <- readChunks (i - m)
+ return (ps : pss)
-#if defined(__GLASGOW_HASKELL__)
-- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
-- waiting for data to become available, instead it returns only whatever data
-- is available. Chunks are read on demand, in @k@-sized chunks.
hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
+#if defined(__GLASGOW_HASKELL__)
hGetNonBlockingN _ _ 0 = return empty
hGetNonBlockingN k h n = readChunks n >>= return . LPS
where
+ STRICT1(readChunks)
readChunks i = do
ps <- P.hGetNonBlocking h (min k i)
case P.length ps of
- 0 -> return []
- m | fromIntegral m < i -> return [ps]
- m -> do pss <- readChunks (i - m)
- return (ps : pss)
+ 0 -> return []
+ m -> do pss <- readChunks (i - m)
+ return (ps : pss)
+#else
+hGetNonBlockingN = hGetN
#endif
-- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
hGet :: Handle -> Int -> IO ByteString
hGet = hGetN defaultChunkSize
-#if defined(__GLASGOW_HASKELL__)
-- | hGetNonBlocking is similar to 'hGet', except that it will never block
-- waiting for data to become available, instead it returns only whatever data
-- is available.
+#if defined(__GLASGOW_HASKELL__)
hGetNonBlocking :: Handle -> Int -> IO ByteString
hGetNonBlocking = hGetNonBlockingN defaultChunkSize
+#else
+hGetNonBlocking = hGet
#endif
-
-- | Read an entire file /lazily/ into a 'ByteString'.
readFile :: FilePath -> IO ByteString
readFile f = openBinaryFile f ReadMode >>= hGetContents
| otherwise -> y : filterMap f xs
{-# INLINE filterMap #-}
+
+-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
+-- of the string if no element is found, rather than Nothing.
+findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
+findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
+ where
+ STRICT2(go)
+ go ptr n | n >= l = return l
+ | otherwise = do w <- peek ptr
+ if k w
+ then return n
+ else go (ptr `plusPtr` 1) (n+1)
+{-# INLINE findIndexOrEnd #-}
-{-# OPTIONS_GHC -cpp -optc-O1 -fno-warn-orphans #-}
---
--- -optc-O2 breaks with 4.0.4 gcc on debian
+{-# OPTIONS_GHC -cpp -fno-warn-orphans #-}
--
-- Module : Data.ByteString.Lazy.Char8
-- Copyright : (c) Don Stewart 2006
inits, -- :: ByteString -> [ByteString]
tails, -- :: ByteString -> [ByteString]
- -- ** Breaking and dropping on specific Chars
- breakChar, -- :: Char -> ByteString -> (ByteString, ByteString)
- spanChar, -- :: Char -> ByteString -> (ByteString, ByteString)
-
-- ** Breaking into many substrings
split, -- :: Char -> ByteString -> [ByteString]
splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString]
-- ** Joining strings
join, -- :: ByteString -> [ByteString] -> ByteString
- joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString
-- * Predicates
isPrefixOf, -- :: ByteString -> ByteString -> Bool
-- ** Searching by equality
elem, -- :: Char -> ByteString -> Bool
notElem, -- :: Char -> ByteString -> Bool
- filterChar, -- :: Char -> ByteString -> ByteString
- filterNotChar, -- :: Char -> ByteString -> ByteString
-- ** Searching with a predicate
find, -- :: (Char -> Bool) -> ByteString -> Maybe Char
-- * Ordered ByteStrings
-- sort, -- :: ByteString -> ByteString
+ copy, -- :: ByteString -> ByteString
+
-- * Reading from ByteStrings
readInt,
-- ** I\/O with Handles
hGetContents, -- :: Handle -> IO ByteString
- hGetContentsN, -- :: Int -> Handle -> IO ByteString
hGet, -- :: Handle -> Int64 -> IO ByteString
- hGetN, -- :: Int -> Handle -> Int64 -> IO ByteString
hPut, -- :: Handle -> ByteString -> IO ()
-#if defined(__GLASGOW_HASKELL__)
hGetNonBlocking, -- :: Handle -> IO ByteString
- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
-#endif
+
+-- hGetN, -- :: Int -> Handle -> Int64 -> IO ByteString
+-- hGetContentsN, -- :: Int -> Handle -> IO ByteString
+-- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
) where
-- Functions transparently exported
import Data.ByteString.Lazy
(ByteString(..)
,empty,null,length,tail,init,append,reverse,transpose
- ,concat,take,drop,splitAt,join,isPrefixOf,group,inits, tails
- ,hGetContentsN, hGetN, hGetContents, hGet, hPut, getContents
-#if defined(__GLASGOW_HASKELL__)
- ,hGetNonBlocking, hGetNonBlockingN
-#endif
- ,putStr, putStrLn
- ,readFile, writeFile, appendFile)
+ ,concat,take,drop,splitAt,join,isPrefixOf,group,inits,tails,copy
+ ,hGetContents, hGet, hPut, getContents
+ ,hGetNonBlocking
+ ,putStr, putStrLn, interact)
-- Functions we need to wrap.
import qualified Data.ByteString.Lazy as L
,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter
,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,foldl1,foldr1
,readFile,writeFile,appendFile,replicate,getContents,getLine,putStr,putStrLn
- ,zip,zipWith,unzip,notElem,repeat,iterate)
+ ,zip,zipWith,unzip,notElem,repeat,iterate,interact)
+
+import System.IO (hClose,openFile,IOMode(..))
+import Control.Exception (bracket)
#define STRICT1(f) f a | a `seq` False = undefined
#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
-- | /O(n)/ Convert a 'String' into a 'ByteString'.
pack :: [Char] -> ByteString
-pack = L.packWith c2w
+pack = L.pack. P.map c2w
-- | /O(n)/ Converts a 'ByteString' to a 'String'.
unpack :: ByteString -> [Char]
-unpack = L.unpackWith w2c
+unpack = P.map w2c . L.unpack
{-# INLINE unpack #-}
-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
span f = L.span (f . w2c)
{-# INLINE span #-}
+{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
-- implemented with @memchr(3)@. I.e.
spanChar :: Char -> ByteString -> (ByteString, ByteString)
spanChar = L.spanByte . c2w
{-# INLINE spanChar #-}
+-}
+
+--
+-- TODO, more rules for breakChar*
+--
-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
-- argument, consuming the delimiter. I.e.
groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
groupBy k = L.groupBy (\a b -> k (w2c a) (w2c b))
--- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
--- char. Around 4 times faster than the generalised join.
---
-joinWithChar :: Char -> ByteString -> ByteString -> ByteString
-joinWithChar = L.joinWithByte . c2w
-{-# INLINE joinWithChar #-}
-
-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
index :: ByteString -> Int64 -> Char
index = (w2c .) . L.index
find f ps = w2c `fmap` L.find (f . w2c) ps
{-# INLINE find #-}
+{-
-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
-- case of filtering a single Char. It is more efficient to use
-- filterChar in this case.
filterNotChar :: Char -> ByteString -> ByteString
filterNotChar c = L.filterNotByte (c2w c)
{-# INLINE filterNotChar #-}
+-}
-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
-- corresponding pairs of Chars. If one input ByteString is short,
| otherwise = ps:pss
in n' `seq` ps' `seq` Just $! (n', LPS ps')
+
+-- | Read an entire file /lazily/ into a 'ByteString'. Use 'text mode'
+-- on Windows to interpret newlines
+readFile :: FilePath -> IO ByteString
+readFile f = openFile f ReadMode >>= hGetContents
+
+-- | Write a 'ByteString' to a file.
+writeFile :: FilePath -> ByteString -> IO ()
+writeFile f txt = bracket (openFile f WriteMode) hClose
+ (\hdl -> hPut hdl txt)
+
+-- | Append a 'ByteString' to a file.
+appendFile :: FilePath -> ByteString -> IO ()
+appendFile f txt = bracket (openFile f AppendMode) hClose
+ (\hdl -> hPut hdl txt)
#include "fpstring.h"
/* copy a string in reverse */
-void fps_reverse(unsigned char *dest, unsigned char *from, int len) {
- unsigned char *p, *q;
- p = from + len - 1;
- q = dest;
-
- while (p >= from)
+void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
+ p += n-1;
+ while (n-- != 0)
*q++ = *p--;
}
/* duplicate a string, interspersing the character through the elements
of the duplicated string */
-void fps_intersperse(unsigned char *dest, unsigned char *from, int len, char c) {
- unsigned char *p, *q;
- p = from;
- q = dest;
- while (p < from + len - 1) {
- *q++ = *p++;
+void fps_intersperse(unsigned char *q,
+ unsigned char *p,
+ unsigned long n,
+ unsigned char c) {
+
+ while (n > 1) {
+ *q++ = *p++;
*q++ = c;
+ n--;
}
- *q = *p;
+ if (n == 1)
+ *q = *p;
}
/* find maximum char in a packed string */
-unsigned char fps_maximum(unsigned char *p, int len) {
+unsigned char fps_maximum(unsigned char *p, unsigned long len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q > c)
}
/* find minimum char in a packed string */
-unsigned char fps_minimum(unsigned char *p, int len) {
+unsigned char fps_minimum(unsigned char *p, unsigned long len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q < c)
}
/* count the number of occurences of a char in a string */
-int fps_count(unsigned char *p, int len, unsigned char w) {
- int c;
- for (c = 0; len--; ++p)
+unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) {
+ unsigned long c;
+ for (c = 0; len-- != 0; ++p)
if (*p == w)
++c;
return c;
-void fps_reverse(unsigned char *dest, unsigned char *from, int len);
-void fps_intersperse(unsigned char *dest, unsigned char *from, int len, char c);
-unsigned char fps_maximum(unsigned char *p, int len);
-unsigned char fps_minimum(unsigned char *p, int len);
-int fps_count(unsigned char *p, int len, unsigned char w);
+void fps_reverse(unsigned char *dest, unsigned char *from, unsigned long len);
+void fps_intersperse(unsigned char *dest, unsigned char *from, unsigned long len, unsigned char c);
+unsigned char fps_maximum(unsigned char *p, unsigned long len);
+unsigned char fps_minimum(unsigned char *p, unsigned long len);
+unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w);