Two things. #if defined(__GLASGOW_HASKELL__) on INLINE [n] pragmas (for jhc). And...

[haskell-directory.git] / Data / ByteString / Char8.hs
diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs

index 9f39e61..4baf8e3 100644 (file)
--- a/Data/ByteString/Char8.hs
+++ b/Data/ByteString/Char8.hs
@@ -1,4 +1,4 @@
-{-# OPTIONS_GHC -cpp -fffi #-}
+{-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
  --
  -- Module      : Data.ByteString.Char8
  -- Copyright   : (c) Don Stewart 2006
@@ -79,7 +79,6 @@ module Data.ByteString.Char8 (
          maximum,                -- :: ByteString -> Char
          minimum,                -- :: ByteString -> Char
          mapIndexed,             -- :: (Int -> Char -> Char) -> ByteString -> ByteString
-        hash,                   -- :: ByteString -> Int32
  
          -- * Generating and unfolding ByteStrings
          replicate,              -- :: Int -> Char -> ByteString
@@ -99,6 +98,7 @@ module Data.ByteString.Char8 (
  
          -- ** Breaking and dropping on specific Chars
          breakChar,              -- :: Char -> ByteString -> (ByteString, ByteString)
+        spanChar,           -- :: Char -> ByteString -> (ByteString, ByteString)
          breakFirst,             -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
          breakLast,              -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
          breakSpace,             -- :: ByteString -> Maybe (ByteString,ByteString)
@@ -109,6 +109,8 @@ module Data.ByteString.Char8 (
          split,                  -- :: Char -> ByteString -> [ByteString]
          splitWith,              -- :: (Char -> Bool) -> ByteString -> [ByteString]
          tokens,                 -- :: (Char -> Bool) -> ByteString -> [ByteString]
+        group,                  -- :: ByteString -> [ByteString]
+        groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  
          -- ** Breaking into lines and words
          lines,                  -- :: ByteString -> [ByteString]
@@ -193,6 +195,7 @@ module Data.ByteString.Char8 (
  
          -- ** Files
          readFile,               -- :: FilePath -> IO ByteString
+--      mmapFile,               -- :: FilePath -> IO ByteString
          writeFile,              -- :: FilePath -> ByteString -> IO ()
  
          -- ** I\/O with Handles
@@ -212,6 +215,14 @@ module Data.ByteString.Char8 (
          unsafePackAddress,      -- :: Int -> Addr# -> ByteString
  #endif
  
+        -- * Utilities (needed for array fusion)
+#if defined(__GLASGOW_HASKELL__)
+        unpackList,
+#endif
+        noAL, NoAL, loopArr, loopAcc, loopSndAcc,
+        loopU, mapEFL, filterEFL, foldEFL, fuseEFL,
+        filterF, mapF
+
      ) where
  
  import qualified Prelude as P
@@ -230,17 +241,20 @@ import qualified Data.ByteString as B
  import Data.ByteString (ByteString(..)
                         ,empty,null,length,tail,init,append
                         ,inits,tails,elems,reverse,transpose
-                       ,concat,hash,take,drop,splitAt,join
+                       ,concat,take,drop,splitAt,join
                         ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring
-                       ,findSubstrings,unsafeTail,copy
+                       ,findSubstrings,unsafeTail,copy,group
  
                         ,getContents, putStr, putStrLn
-                       ,readFile, writeFile
+                       ,readFile, {-mmapFile,-} writeFile
                         ,hGetContents, hGet, hPut
  #if defined(__GLASGOW_HASKELL__)
                         ,getLine, getArgs, hGetLine, hGetNonBlocking
                         ,packAddress, unsafePackAddress
+                       ,unpackList
  #endif
+                       ,noAL, NoAL, loopArr, loopAcc, loopSndAcc
+                       ,loopU, mapEFL, filterEFL, foldEFL, fuseEFL
                         ,useAsCString, unsafeUseAsCString
                         )
  
@@ -365,7 +379,7 @@ foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
  -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  -- and thus must be applied to non-empty 'ByteString's
  foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
-foldr1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
+foldr1 f ps = w2c (B.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
  {-# INLINE foldr1 #-}
  
  -- | Map a function over a 'ByteString' and concatenate the results
@@ -486,6 +500,16 @@ breakChar :: Char -> ByteString -> (ByteString, ByteString)
  breakChar = B.breakByte . c2w
  {-# INLINE breakChar #-}
  
+-- | 'spanChar' breaks its ByteString argument at the first
+-- occurence of a Char other than its argument. It is more efficient
+-- than 'span (==)'
+--
+-- > span  (=='c') "abcd" == spanByte 'c' "abcd"
+--
+spanChar :: Char -> ByteString -> (ByteString, ByteString)
+spanChar = B.spanByte . c2w
+{-# INLINE spanChar #-}
+
  -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
  -- occurence of @w@. It behaves like 'break', except the delimiter is
  -- not returned, and @Nothing@ is returned if the delimiter is not in
@@ -557,6 +581,10 @@ tokens :: (Char -> Bool) -> ByteString -> [ByteString]
  tokens f = B.tokens (f . w2c)
  {-# INLINE tokens #-}
  
+-- | The 'groupBy' function is the non-overloaded version of 'group'.
+groupBy :: (Char -> Char -> Bool) -> ByteString -> [ByteString]
+groupBy k = B.groupBy (\a b -> k (w2c a) (w2c b))
+
  -- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
  -- char. Around 4 times faster than the generalised join.
  --
@@ -608,6 +636,10 @@ findIndices f = B.findIndices (f . w2c)
  -- | count returns the number of times its argument appears in the ByteString
  --
  -- > count = length . elemIndices
+-- 
+-- Also
+--  
+-- > count '\n' == length . lines
  --
  -- But more efficiently than using length on the intermediate list.
  count :: Char -> ByteString -> Int
@@ -785,6 +817,7 @@ lastnonspace ptr n
  
  -- | 'lines' breaks a ByteString up into a list of ByteStrings at
  -- newline Chars. The resulting strings do not contain newlines.
+--
  lines :: ByteString -> [ByteString]
  lines ps
      | null ps = []
@@ -794,6 +827,13 @@ lines ps
      where search = elemIndex '\n'
  {-# INLINE lines #-}
  
+{-# RULES
+
+"length.lines/count" 
+    P.length . lines = count '\n'
+
+  #-}
+
  {-
  -- Just as fast, but more complex. Should be much faster, I thought.
  lines :: ByteString -> [ByteString]
@@ -826,10 +866,12 @@ unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space
  --
  words :: ByteString -> [ByteString]
  words = B.tokens isSpaceWord8
+{-# INLINE words #-}
  
  -- | The 'unwords' function is analogous to the 'unlines' function, on words.
  unwords :: [ByteString] -> ByteString
  unwords = join (packChar ' ')
+{-# INLINE unwords #-}
  
  -- | /O(n)/ Indicies of newlines. Shorthand for 
  --
@@ -837,6 +879,7 @@ unwords = join (packChar ' ')
  --
  lineIndices :: ByteString -> [Int]
  lineIndices = elemIndices '\n'
+{-# INLINE lineIndices #-}
  
  -- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on
  -- newline Chars. However, unlike the Prelude functions, 'lines\'' and
@@ -992,6 +1035,7 @@ inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
  inlinePerformIO = unsafePerformIO
  #endif
  
+-- Selects white-space characters in the Latin-1 range
  -- ordered by frequency
  -- Idea from Ketil
  isSpaceWord8 :: Word8 -> Bool
@@ -1002,6 +1046,16 @@ isSpaceWord8 w = case w of
      0x0C -> True -- FF, \f
      0x0D -> True -- CR, \r
      0x0B -> True -- VT, \v
+    0xA0 -> True -- spotted by QC..
      _    -> False
  {-# INLINE isSpaceWord8 #-}
  
+-- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
+-- slightly faster for one-shot cases.
+mapF :: (Char -> Char) -> ByteString -> ByteString
+mapF f = B.mapF (c2w . f . w2c)
+
+-- | /O(n)/ 'filterF' is a non-fuseable version of filter, that may be
+-- around 2x faster for some one-shot applications.
+filterF :: (Char -> Bool) -> ByteString -> ByteString
+filterF f = B.filterF (f . w2c)