-{-# OPTIONS_GHC -cpp -fffi #-}
+{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
--
-- Module : Data.ByteString.Char8
-- Copyright : (c) Don Stewart 2006
maximum, -- :: ByteString -> Char
minimum, -- :: ByteString -> Char
mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString
- hash, -- :: ByteString -> Int32
-- * Generating and unfolding ByteStrings
replicate, -- :: Int -> Char -> ByteString
-- ** Breaking and dropping on specific Chars
breakChar, -- :: Char -> ByteString -> (ByteString, ByteString)
+ spanChar, -- :: Char -> ByteString -> (ByteString, ByteString)
breakFirst, -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
breakLast, -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
breakSpace, -- :: ByteString -> Maybe (ByteString,ByteString)
split, -- :: Char -> ByteString -> [ByteString]
splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString]
tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString]
+ group, -- :: ByteString -> [ByteString]
+ groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
-- ** Breaking into lines and words
lines, -- :: ByteString -> [ByteString]
-- ** Files
readFile, -- :: FilePath -> IO ByteString
+-- mmapFile, -- :: FilePath -> IO ByteString
writeFile, -- :: FilePath -> ByteString -> IO ()
-- ** I\/O with Handles
unsafePackAddress, -- :: Int -> Addr# -> ByteString
#endif
+ -- * Utilities (needed for array fusion)
+#if defined(__GLASGOW_HASKELL__)
+ unpackList,
+#endif
+ noAL, NoAL, loopArr, loopAcc, loopSndAcc,
+ loopU, mapEFL, filterEFL, foldEFL, fuseEFL,
+ filterF, mapF
+
) where
import qualified Prelude as P
import Data.ByteString (ByteString(..)
,empty,null,length,tail,init,append
,inits,tails,elems,reverse,transpose
- ,concat,hash,take,drop,splitAt,join
+ ,concat,take,drop,splitAt,join
,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring
- ,findSubstrings,unsafeTail,copy
+ ,findSubstrings,unsafeTail,copy,group
,getContents, putStr, putStrLn
- ,readFile, writeFile
+ ,readFile, {-mmapFile,-} writeFile
,hGetContents, hGet, hPut
#if defined(__GLASGOW_HASKELL__)
,getLine, getArgs, hGetLine, hGetNonBlocking
,packAddress, unsafePackAddress
+ ,unpackList
#endif
+ ,noAL, NoAL, loopArr, loopAcc, loopSndAcc
+ ,loopU, mapEFL, filterEFL, foldEFL, fuseEFL
,useAsCString, unsafeUseAsCString
)
-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
-- and thus must be applied to non-empty 'ByteString's
foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
-foldr1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
+foldr1 f ps = w2c (B.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
{-# INLINE foldr1 #-}
-- | Map a function over a 'ByteString' and concatenate the results
breakChar = B.breakByte . c2w
{-# INLINE breakChar #-}
+-- | 'spanChar' breaks its ByteString argument at the first
+-- occurence of a Char other than its argument. It is more efficient
+-- than 'span (==)'
+--
+-- > span (=='c') "abcd" == spanByte 'c' "abcd"
+--
+spanChar :: Char -> ByteString -> (ByteString, ByteString)
+spanChar = B.spanByte . c2w
+{-# INLINE spanChar #-}
+
-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
-- occurence of @w@. It behaves like 'break', except the delimiter is
-- not returned, and @Nothing@ is returned if the delimiter is not in
tokens f = B.tokens (f . w2c)
{-# INLINE tokens #-}
+-- | The 'groupBy' function is the non-overloaded version of 'group'.
+groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
+groupBy k = B.groupBy (\a b -> k (w2c a) (w2c b))
+
-- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
-- char. Around 4 times faster than the generalised join.
--
-- | count returns the number of times its argument appears in the ByteString
--
-- > count = length . elemIndices
+--
+-- Also
+--
+-- > count '\n' == length . lines
--
-- But more efficiently than using length on the intermediate list.
count :: Char -> ByteString -> Int
-- | 'lines' breaks a ByteString up into a list of ByteStrings at
-- newline Chars. The resulting strings do not contain newlines.
+--
lines :: ByteString -> [ByteString]
lines ps
| null ps = []
where search = elemIndex '\n'
{-# INLINE lines #-}
+{-# RULES
+
+"length.lines/count"
+ P.length . lines = count '\n'
+
+ #-}
+
{-
-- Just as fast, but more complex. Should be much faster, I thought.
lines :: ByteString -> [ByteString]
--
words :: ByteString -> [ByteString]
words = B.tokens isSpaceWord8
+{-# INLINE words #-}
-- | The 'unwords' function is analogous to the 'unlines' function, on words.
unwords :: [ByteString] -> ByteString
unwords = join (packChar ' ')
+{-# INLINE unwords #-}
-- | /O(n)/ Indicies of newlines. Shorthand for
--
--
lineIndices :: ByteString -> [Int]
lineIndices = elemIndices '\n'
+{-# INLINE lineIndices #-}
-- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on
-- newline Chars. However, unlike the Prelude functions, 'lines\'' and
inlinePerformIO = unsafePerformIO
#endif
+-- Selects white-space characters in the Latin-1 range
-- ordered by frequency
-- Idea from Ketil
isSpaceWord8 :: Word8 -> Bool
0x0C -> True -- FF, \f
0x0D -> True -- CR, \r
0x0B -> True -- VT, \v
+ 0xA0 -> True -- spotted by QC..
_ -> False
{-# INLINE isSpaceWord8 #-}
+-- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
+-- slightly faster for one-shot cases.
+mapF :: (Char -> Char) -> ByteString -> ByteString
+mapF f = B.mapF (c2w . f . w2c)
+
+-- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
+-- around 2x faster for some one-shot applications.
+filterF :: (Char -> Bool) -> ByteString -> ByteString
+filterF f = B.filterF (f . w2c)