From: Don Stewart Date: Fri, 28 Apr 2006 13:07:18 +0000 (+0000) Subject: Import Data.ByteString from fps 0.5. X-Git-Tag: 2007-09-13~385 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=09521ae6f260453e810280f09d85a44ec3e54089;p=ghc-base.git Import Data.ByteString from fps 0.5. Fast, packed byte vectors, providing a better PackedString. --- diff --git a/Data/ByteString.hs b/Data/ByteString.hs new file mode 100644 index 0000000..7350eb8 --- /dev/null +++ b/Data/ByteString.hs @@ -0,0 +1,1921 @@ +{-# OPTIONS_GHC -cpp -fffi #-} +-- +-- Module : ByteString +-- Copyright : (c) The University of Glasgow 2001, +-- (c) David Roundy 2003-2005, +-- (c) Simon Marlow 2005 +-- (c) Don Stewart 2005-2006 +-- (c) Bjorn Bringert 2006 +-- License : BSD-style +-- +-- Maintainer : dons@cse.unsw.edu.au +-- Stability : experimental +-- Portability : portable, requires ffi and cpp +-- Tested with : GHC 6.4.1 and Hugs March 2005 +-- + +-- +-- | A time and space-efficient implementation of byte vectors using +-- packed Word8 arrays, suitable for high performance use, both in terms +-- of large data quantities, or high speed requirements. Byte vectors +-- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can +-- be passed between C and Haskell with little effort. +-- +-- This module is intended to be imported @qualified@, to avoid name +-- clashes with Prelude functions. eg. +-- +-- > import qualified Data.ByteString as B +-- +-- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use +-- UArray by Simon Marlow. Rewritten to support slices and use +-- ForeignPtr by David Roundy. Polished and extended by Don Stewart. +-- + +module Data.ByteString ( + + -- * The @ByteString@ type + ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + + -- * Introducing and eliminating 'ByteString's + empty, -- :: ByteString + packByte, -- :: Word8 -> ByteString + pack, -- :: [Word8] -> ByteString + unpack, -- :: ByteString -> [Word8] + packWith, -- :: (a -> Word8) -> [a] -> ByteString + unpackWith, -- :: (Word8 -> a) -> ByteString -> [a] + + -- * Basic interface + cons, -- :: Word8 -> ByteString -> ByteString + snoc, -- :: Word8 -> ByteString -> ByteString + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int + head, -- :: ByteString -> Word8 + tail, -- :: ByteString -> ByteString + last, -- :: ByteString -> Word8 + init, -- :: ByteString -> ByteString + append, -- :: ByteString -> ByteString -> ByteString + + -- * Special ByteStrings + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] + elems, -- :: ByteString -> [ByteString] + + -- * Transformating ByteStrings + map, -- :: (Word8 -> Word8) -> ByteString -> ByteString + reverse, -- :: ByteString -> ByteString + intersperse, -- :: Word8 -> ByteString -> ByteString + transpose, -- :: [ByteString] -> [ByteString] + + -- * Reducing 'ByteString's + foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a + foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a + foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 + + -- ** Special folds + concat, -- :: [ByteString] -> ByteString + concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString + any, -- :: (Word8 -> Bool) -> ByteString -> Bool + all, -- :: (Word8 -> Bool) -> ByteString -> Bool + maximum, -- :: ByteString -> Word8 + minimum, -- :: ByteString -> Word8 + mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString + hash, -- :: ByteString -> Int32 + + -- * Generating and unfolding ByteStrings + replicate, -- :: Int -> Word8 -> ByteString + unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString + + -- * Substrings + + -- ** Breaking strings + take, -- :: Int -> ByteString -> ByteString + drop, -- :: Int -> ByteString -> ByteString + splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) + takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString + dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString + break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) + + -- ** Breaking and dropping on specific bytes + breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString) + breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString) + breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString) + + -- ** Breaking into many substrings + split, -- :: Word8 -> ByteString -> [ByteString] + splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] + tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] + + -- ** Joining strings + join, -- :: ByteString -> [ByteString] -> ByteString + joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString + + -- * Indexing ByteStrings + index, -- :: ByteString -> Int -> Word8 + elemIndex, -- :: Word8 -> ByteString -> Maybe Int + elemIndices, -- :: Word8 -> ByteString -> [Int] + elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int + findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int + findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] + count, -- :: Word8 -> ByteString -> Int + + -- * Ordered ByteStrings + sort, -- :: ByteString -> ByteString + + -- * Searching ByteStrings + + -- ** Searching by equality + -- | These functions use memchr(3) to efficiently search the ByteString + + elem, -- :: Word8 -> ByteString -> Bool + notElem, -- :: Word8 -> ByteString -> Bool + filterByte, -- :: Word8 -> ByteString -> ByteString + filterNotByte, -- :: Word8 -> ByteString -> ByteString + + -- ** Searching with a predicate + filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString + find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 + + -- ** Prefixes and suffixes + -- | These functions use memcmp(3) to efficiently compare substrings + isPrefixOf, -- :: ByteString -> ByteString -> Bool + isSuffixOf, -- :: ByteString -> ByteString -> Bool + + -- ** Search for arbitrary substrings + isSubstringOf, -- :: ByteString -> ByteString -> Bool + findSubstring, -- :: ByteString -> ByteString -> Maybe Int + findSubstrings, -- :: ByteString -> ByteString -> [Int] + + -- * Zipping and unzipping ByteStrings + zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] + zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] + unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) + + -- * Unchecked access + unsafeHead, -- :: ByteString -> Word8 + unsafeTail, -- :: ByteString -> ByteString + unsafeIndex, -- :: ByteString -> Int -> Word8 + + -- * Low level introduction and elimination + generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString + create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString + fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString + toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int) + skipIndex, -- :: ByteString -> Int + + -- ** Packing CStrings and pointers + packCString, -- :: CString -> ByteString + packCStringLen, -- :: CString -> ByteString + packMallocCString, -- :: CString -> ByteString + +#if defined(__GLASGOW_HASKELL__) + packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString + packAddress, -- :: Addr# -> ByteString + unsafePackAddress, -- :: Int -> Addr# -> ByteString + unsafeFinalize, -- :: ByteString -> IO () +#endif + + -- ** Using ByteStrings as CStrings + useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a + unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a + unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a + + -- ** Copying ByteStrings + -- | These functions perform memcpy(3) operations + copy, -- :: ByteString -> ByteString + copyCString, -- :: CString -> ByteString + copyCStringLen, -- :: CStringLen -> ByteString + + -- * I\/O with @ByteString@s + + -- ** Standard input and output + +#if defined(__GLASGOW_HASKELL__) + getLine, -- :: IO ByteString +#endif + getContents, -- :: IO ByteString + putStr, -- :: ByteString -> IO () + putStrLn, -- :: ByteString -> IO () + + -- ** Files + readFile, -- :: FilePath -> IO ByteString + writeFile, -- :: FilePath -> ByteString -> IO () + + -- ** I\/O with Handles +#if defined(__GLASGOW_HASKELL__) + getArgs, -- :: IO [ByteString] + hGetLine, -- :: Handle -> IO ByteString + hGetNonBlocking, -- :: Handle -> Int -> IO ByteString +#endif + hGetContents, -- :: Handle -> IO ByteString + hGet, -- :: Handle -> Int -> IO ByteString + hPut, -- :: Handle -> ByteString -> IO () + +#if defined(__GLASGOW_HASKELL__) + -- * Miscellaneous + unpackList, -- eek, otherwise it gets thrown away by the simplifier +#endif + + ) where + +import qualified Prelude as P +import Prelude hiding (reverse,head,tail,last,init,null + ,length,map,lines,foldl,foldr,unlines + ,concat,any,take,drop,splitAt,takeWhile + ,dropWhile,span,break,elem,filter,maximum + ,minimum,all,concatMap,foldl1,foldr1 + ,readFile,writeFile,replicate + ,getContents,getLine,putStr,putStrLn + ,zip,zipWith,unzip,notElem) + +import qualified Data.List as List + +import Data.Char +import Data.Word (Word8) +import Data.Int (Int32) +import Data.Bits (rotateL) +import Data.Maybe (listToMaybe) +import Data.Array (listArray) +import qualified Data.Array as Array ((!)) + +import Control.Exception (bracket) + +import Foreign.C.Types (CSize, CInt) +import Foreign.C.String (CString, CStringLen) +import Foreign.Storable +import Foreign.ForeignPtr +import Foreign.Ptr +import Foreign.Marshal.Array + +import System.IO (stdin,stdout,hClose,hFileSize + ,hGetBuf,hPutBuf,openBinaryFile + ,Handle,IOMode(..)) + +#if defined(__GLASGOW_HASKELL__) + +import System.IO (hGetBufNonBlocking) + +import qualified Foreign.Concurrent as FC (newForeignPtr) + +import Data.Generics (Data(..), Typeable(..)) + +import System.IO.Error (isEOFError) +import Foreign.Marshal (alloca) + +import GHC.Handle +import GHC.Prim +import GHC.Base (build, unsafeChr) +import GHC.Word hiding (Word8) +import GHC.Ptr (Ptr(..)) +import GHC.ST (ST(..)) +import GHC.IOBase + +#else + +import System.IO.Unsafe + +#endif + +-- ----------------------------------------------------------------------------- +-- +-- Useful macros, until we have bang patterns +-- + +#define STRICT1(f) f a | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined + +-- ----------------------------------------------------------------------------- + +-- | A space-efficient representation of a Word8 vector, supporting many +-- efficient operations. A 'ByteString' contains 8-bit characters only. +-- +-- Instances of Eq, Ord, Read, Show, Data, Typeable +-- +data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) + {-# UNPACK #-} !Int + {-# UNPACK #-} !Int + +#if defined(__GLASGOW_HASKELL__) + deriving (Data, Typeable) +#endif + +instance Eq ByteString + where (==) = eq + +instance Ord ByteString + where compare = compareBytes + +instance Show ByteString where + showsPrec p ps r = showsPrec p (unpackWith w2c ps) r + +instance Read ByteString where + readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] + +{- +instance Arbitrary PackedString where + arbitrary = P.pack `fmap` arbitrary + coarbitrary s = coarbitrary (P.unpack s) +-} + +-- | /O(n)/ Equality on the 'ByteString' type. +eq :: ByteString -> ByteString -> Bool +eq a b = (compareBytes a b) == EQ +{-# INLINE eq #-} + +-- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. +compareBytes :: ByteString -> ByteString -> Ordering +compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings +compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $ + withForeignPtr x1 $ \p1 -> + withForeignPtr x2 $ \p2 -> do + i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2) + return $ case i `compare` 0 of + EQ -> l1 `compare` l2 + x -> x +{-# INLINE compareBytes #-} + +{- +-- +-- About 4x slower over 32M +-- +compareBytes :: ByteString -> ByteString -> Ordering +compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $ + withForeignPtr fp1 $ \p1 -> + withForeignPtr fp2 $ \p2 -> + cmp (p1 `plusPtr` off1) + (p2 `plusPtr` off2) 0 len1 len2 + +cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering +STRICT5(cmp) +cmp p1 p2 n len1 len2 + | n == len1 = if n == len2 then return EQ else return LT + | n == len2 = return GT + | otherwise = do + (a :: Word8) <- peekByteOff p1 n + (b :: Word8) <- peekByteOff p2 n + case a `compare` b of + EQ -> cmp p1 p2 (n+1) len1 len2 + LT -> return LT + GT -> return GT +{-# INLINE compareBytes #-} +-} + +-- ----------------------------------------------------------------------------- +-- Introducing and eliminating 'ByteString's + +-- | /O(1)/ The empty 'ByteString' +empty :: ByteString +empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0 +{-# NOINLINE empty #-} + +-- | /O(1)/ Convert a 'Word8' into a 'ByteString' +packByte :: Word8 -> ByteString +packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do + withForeignPtr fp $ \p -> poke p c + return $ PS fp 0 1 +{-# NOINLINE packByte #-} + +-- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. +-- +-- For applications with large numbers of string literals, pack can be a +-- bottleneck. In such cases, consider using packAddress (GHC only). +pack :: [Word8] -> ByteString + +#if !defined(__GLASGOW_HASKELL__) + +pack str = create (P.length str) $ \p -> go p str + where + go _ [] = return () + go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff + +#else /* hack away */ + +pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) + where + go _ _ [] = return () + go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs + + writeByte p i c = ST $ \s# -> + case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #) + +#endif + +-- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. +unpack :: ByteString -> [Word8] + +#if !defined(__GLASGOW_HASKELL__) + +unpack (PS _ _ 0) = [] +unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> + go (p `plusPtr` s) (l - 1) [] + where + STRICT3(go) + go p 0 acc = peek p >>= \e -> return (e : acc) + go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc) +{-# INLINE unpack #-} + +#else + +unpack ps = build (unpackFoldr ps) +{-# INLINE unpack #-} + +unpackList :: ByteString -> [Word8] +unpackList (PS fp off len) = withPtr fp $ \p -> do + let STRICT3(loop) + loop _ (-1) acc = return acc + loop q n acc = do + a <- peekByteOff q n + loop q (n-1) (a : acc) + loop (p `plusPtr` off) (len-1) [] + +{-# RULES +"unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p + #-} + +unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a +unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do + let STRICT3(loop) + loop _ (-1) acc = return acc + loop q n acc = do + a <- peekByteOff q n + loop q (n-1) (a `f` acc) + loop (p `plusPtr` off) (len-1) ch +{-# INLINE [0] unpackFoldr #-} + +#endif + +------------------------------------------------------------------------ + +-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some +-- conversion function +packWith :: (a -> Word8) -> [a] -> ByteString +packWith k str = create (P.length str) $ \p -> go p str + where + STRICT2(go) + go _ [] = return () + go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff +{-# INLINE packWith #-} +{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} + +-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. +unpackWith :: (Word8 -> a) -> ByteString -> [a] +unpackWith _ (PS _ _ 0) = [] +unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> + go (p `plusPtr` s) (l - 1) [] + where + STRICT3(go) + go p 0 acc = peek p >>= \e -> return (k e : acc) + go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc) +{-# INLINE unpackWith #-} +{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} + +-- --------------------------------------------------------------------- +-- Basic interface + +-- | /O(1)/ Test whether a ByteString is empty. +null :: ByteString -> Bool +null (PS _ _ l) = l == 0 +{-# INLINE null #-} + +-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. +length :: ByteString -> Int +length (PS _ _ l) = l +{-# INLINE length #-} + +-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different +-- complexity, as it requires a memcpy. +cons :: Word8 -> ByteString -> ByteString +cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do + memcpy (p `plusPtr` 1) (f `plusPtr` s) l + poke p c +{-# INLINE cons #-} + +-- | /O(n)/ Append a byte to the end of a 'ByteString' +snoc :: ByteString -> Word8 -> ByteString +snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do + memcpy p (f `plusPtr` s) l + poke (p `plusPtr` l) c +{-# INLINE snoc #-} + +-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. +head :: ByteString -> Word8 +head ps@(PS x s _) + | null ps = errorEmptyList "head" + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s +{-# INLINE head #-} + +-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. +tail :: ByteString -> ByteString +tail (PS p s l) + | l <= 0 = errorEmptyList "tail" + | otherwise = PS p (s+1) (l-1) +{-# INLINE tail #-} + +-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. +last :: ByteString -> Word8 +last ps@(PS x s l) + | null ps = errorEmptyList "last" + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1) +{-# INLINE last #-} + +-- | /O(1)/ Return all the elements of a 'ByteString' except the last one. +init :: ByteString -> ByteString +init (PS p s l) + | l <= 0 = errorEmptyList "init" + | otherwise = PS p s (l-1) +{-# INLINE init #-} + +-- | /O(n)/ Append two ByteStrings +append :: ByteString -> ByteString -> ByteString +append xs ys | null xs = ys + | null ys = xs + | otherwise = concat [xs,ys] +{-# INLINE append #-} + +{- +-- +-- About 30% faster, but allocating in a big chunk isn't good for memory use +-- +append :: ByteString -> ByteString -> ByteString +append xs@(PS ffp s l) ys@(PS fgp t m) + | null xs = ys + | null ys = xs + | otherwise = create len $ \ptr -> + withForeignPtr ffp $ \fp -> + withForeignPtr fgp $ \gp -> do + memcpy ptr (fp `plusPtr` s) l + memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m + where len = length xs + length ys +-} + +-- --------------------------------------------------------------------- +-- Transformations + +-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each +-- element of @xs@ +-- +map :: (Word8 -> Word8) -> ByteString -> ByteString +map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do + new_fp <- mallocByteString len + withForeignPtr new_fp $ \new_p -> do + map_ f (len-1) (p `plusPtr` start) new_p + return (PS new_fp 0 len) +{-# INLINE map #-} + +map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO () +STRICT4(map_) +map_ f n p1 p2 + | n < 0 = return () + | otherwise = do + x <- peekByteOff p1 n + pokeByteOff p2 n (f x) + map_ f (n-1) p1 p2 +{-# INLINE map_ #-} + +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. +reverse :: ByteString -> ByteString +reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> + c_reverse p (f `plusPtr` s) l + +-- reverse = pack . P.reverse . unpack + +-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a +-- 'ByteString' and \`intersperses\' that byte between the elements of +-- the 'ByteString'. It is analogous to the intersperse function on +-- Lists. +intersperse :: Word8 -> ByteString -> ByteString +intersperse c ps@(PS x s l) + | length ps < 2 = ps + | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f -> + c_intersperse p (f `plusPtr` s) l c + +-- intersperse c = pack . List.intersperse c . unpack + +-- | The 'transpose' function transposes the rows and columns of its +-- 'ByteString' argument. +transpose :: [ByteString] -> [ByteString] +transpose ps = P.map pack (List.transpose (P.map unpack ps)) + +-- --------------------------------------------------------------------- +-- Reducing 'ByteString's + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a ByteString, reduces the +-- ByteString using the binary operator, from left to right. +foldl :: (a -> Word8 -> a) -> a -> ByteString -> a +foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> + lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) + where + STRICT3(lgo) + lgo z p q | p == q = return z + | otherwise = do c <- peek p + lgo (f z c) (p `plusPtr` 1) q + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a ByteString, +-- reduces the ByteString using the binary operator, from right to left. +foldr :: (Word8 -> a -> a) -> a -> ByteString -> a +foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> + go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) + where + STRICT2(go) + go p q | p == q = return z + | otherwise = do c <- peek p + ws <- go (p `plusPtr` 1) q + return $ c `k` ws + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'ByteStrings'. +foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldl1 f ps + | null ps = errorEmptyList "foldl1" + | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'ByteString's +foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldr1 f ps + | null ps = errorEmptyList "foldr1" + | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps)) + +-- --------------------------------------------------------------------- +-- Special folds + +-- | /O(n)/ Concatenate a list of ByteStrings. +concat :: [ByteString] -> ByteString +concat [] = empty +concat [ps] = ps +concat xs = inlinePerformIO $ do + let start_size = 1024 + p <- mallocArray start_size + f p 0 1024 xs + + where f ptr len _ [] = do + ptr' <- reallocArray ptr (len+1) + poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work + fp <- newForeignFreePtr ptr' + return $ PS fp 0 len + + f ptr len to_go pss@(PS p s l:pss') + | l <= to_go = do withForeignPtr p $ \pf -> + memcpy (ptr `plusPtr` len) + (pf `plusPtr` s) l + f ptr (len + l) (to_go - l) pss' + + | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l) + ptr' <- reallocArray ptr new_total + f ptr' len (new_total - len) pss + +-- | Map a function over a 'ByteString' and concatenate the results +concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString +concatMap f = foldr (append . f) empty + +-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if +-- any element of the 'ByteString' satisfies the predicate. +any :: (Word8 -> Bool) -> ByteString -> Bool +any _ (PS _ _ 0) = False +any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> + go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) + where + STRICT2(go) + go p q | p == q = return False + | otherwise = do c <- peek p + if f c then return True + else go (p `plusPtr` 1) q + +-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines +-- if all elements of the 'ByteString' satisfy the predicate. +all :: (Word8 -> Bool) -> ByteString -> Bool +all _ (PS _ _ 0) = True +all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> + go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) + where + STRICT2(go) + go p q | p == q = return True -- end of list + | otherwise = do c <- peek p + if f c + then go (p `plusPtr` 1) q + else return False + +-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' +maximum :: ByteString -> Word8 +maximum xs@(PS x s l) + | null xs = errorEmptyList "maximum" + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> + return $ c_maximum (p `plusPtr` s) l + +-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' +minimum :: ByteString -> Word8 +minimum xs@(PS x s l) + | null xs = errorEmptyList "minimum" + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> + return $ c_minimum (p `plusPtr` s) l + +{- +maximum xs@(PS x s l) + | null xs = errorEmptyList "maximum" + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do + w <- peek p + maximum_ (p `plusPtr` s) 0 l w +{-# INLINE maximum #-} + +maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 +STRICT4(maximum_) +maximum_ ptr n m c + | n >= m = return c + | otherwise = do w <- peekByteOff ptr n + maximum_ ptr (n+1) m (if w > c then w else c) + +minimum xs@(PS x s l) + | null xs = errorEmptyList "minimum" + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do + w <- peek p + minimum_ (p `plusPtr` s) 0 l w +{-# INLINE minimum #-} + +minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 +STRICT4(minimum_) +minimum_ ptr n m c + | n >= m = return c + | otherwise = do w <- peekByteOff ptr n + minimum_ ptr (n+1) m (if w < c then w else c) +-} +-- | /O(n)/ map Word8 functions, provided with the index at each position +mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString +mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f -> + go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l) + where + go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () + STRICT4(go) + go n f t p | f == p = return () + | otherwise = do w <- peek f + ((poke t) . k n) w + go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p + +-- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key. +hash :: ByteString -> Int32 +hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> + go (0 :: Int32) (p `plusPtr` s) l + where + go :: Int32 -> Ptr Word8 -> Int -> IO Int32 + STRICT3(go) + go h _ 0 = return h + go h p n = do w <- peek p + go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1) + +-- --------------------------------------------------------------------- +-- Unfolds and replicates + +-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ +-- the value of every element. The following holds: +-- +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c +-- +-- This implemenation uses @memset(3)@ +replicate :: Int -> Word8 -> ByteString +replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return () + +{- +-- About 5x slower +replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w + where + STRICT2(go) + go _ 0 = return w + go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1) +-} + +-- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'. +-- 'unfoldrN' builds a ByteString from a seed value. The function takes +-- the element and returns 'Nothing' if it is done producing the +-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a +-- prepending to the ByteString and @b@ is used as the next element in a +-- recursive call. +-- +-- To preven unfoldrN having /O(n^2)/ complexity (as prepending a +-- character to a ByteString is /O(n)/, this unfoldr requires a maximum +-- final size of the ByteString as an argument. 'cons' can then be +-- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has +-- linear complexity. The depth of the recursion is limited to this +-- size, but may be less. For lazy, infinite unfoldr, use +-- 'Data.List.unfoldr' (from 'Data.List'). +-- +-- Examples: +-- +-- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789" +-- +-- The following equation connects the depth-limited unfoldr to the List unfoldr: +-- +-- > unfoldrN n == take n $ List.unfoldr +unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString +unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0 + where + STRICT3(go) + go q c n | n == i = return n -- stop if we reach `i' + | otherwise = case f c of + Nothing -> return n + Just (a,new_c) -> do + poke q a + go (q `plusPtr` 1) new_c (n+1) + +-- --------------------------------------------------------------------- +-- Substrings + +-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. +take :: Int -> ByteString -> ByteString +take n ps@(PS x s l) + | n < 0 = empty + | n >= l = ps + | otherwise = PS x s n +{-# INLINE take #-} + +-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ +-- elements, or @[]@ if @n > 'length' xs@. +drop :: Int -> ByteString -> ByteString +drop n ps@(PS x s l) + | n <= 0 = ps + | n > l = empty + | otherwise = PS x (s+n) (l-n) +{-# INLINE drop #-} + +-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. +splitAt :: Int -> ByteString -> (ByteString, ByteString) +splitAt n ps = (take n ps, drop n ps) +{-# INLINE splitAt #-} + +-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, +-- returns the longest prefix (possibly empty) of @xs@ of elements that +-- satisfy @p@. +takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString +takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps +{-# INLINE takeWhile #-} + +-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. +dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString +dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps +{-# INLINE dropWhile #-} + +-- | 'break' @p@ is equivalent to @'span' ('not' . p)@. +break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) +break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps) +{-# INLINE break #-} + +-- | 'breakByte' breaks its ByteString argument at the first occurence +-- of the specified byte. It is more efficient than 'break' as it is +-- implemented with @memchr(3)@. I.e. +-- +-- > break (=='c') "abcd" == breakByte 'c' "abcd" +-- +breakByte :: Word8 -> ByteString -> (ByteString, ByteString) +breakByte c p = case elemIndex c p of + Nothing -> (p,empty) + Just n -> (take n p, drop n p) +{-# INLINE breakByte #-} + +-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first +-- occurence of @w@. It behaves like 'break', except the delimiter is +-- not returned, and @Nothing@ is returned if the delimiter is not in +-- the ByteString. I.e. +-- +-- > breakFirst 'b' "aabbcc" == Just ("aa","bcc") +-- +-- > breakFirst c xs == +-- > let (x,y) = break (== c) xs +-- > in if null y then Nothing else Just (x, drop 1 y)) +-- +breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString) +breakFirst c p = case elemIndex c p of + Nothing -> Nothing + Just n -> Just (take n p, drop (n+1) p) +{-# INLINE breakFirst #-} + +-- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the +-- ByteString. +-- +-- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc") +-- +-- and the following are equivalent: +-- +-- > breakLast 'c' "abcdef" +-- > let (x,y) = break (=='c') (reverse "abcdef") +-- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x) +-- +breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString) +breakLast c p = case elemIndexLast c p of + Nothing -> Nothing + Just n -> Just (take n p, drop (n+1) p) +{-# INLINE breakLast #-} + +-- | 'span' @p xs@ breaks the ByteString into two segments. It is +-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ +span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) +span p ps = break (not . p) ps +{-# INLINE span #-} + +-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. +-- We have +-- +-- > spanEnd (not.isSpace) "x y z" == ("x y ","z") +-- +-- and +-- +-- > spanEnd (not . isSpace) ps +-- > == +-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) +-- +spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) +spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps + +-- | /O(n)/ Splits a 'ByteString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] +-- > splitWith (=='a') [] == [] +-- +splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] + +#if defined(__GLASGOW_HASKELL__) +splitWith _pred (PS _ _ 0) = [] +splitWith pred_ (PS fp off len) = splitWith' pred# off len fp + where pred# c# = pred_ (W8# c#) + + splitWith' pred' off' len' fp' = withPtr fp $ \p -> + splitLoop pred' p 0 off' len' fp' + + splitLoop :: (Word# -> Bool) + -> Ptr Word8 + -> Int -> Int -> Int + -> ForeignPtr Word8 + -> IO [ByteString] + + splitLoop pred' p idx' off' len' fp' + | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined + | idx' >= len' = return [PS fp' off' idx'] + | otherwise = do + w <- peekElemOff p (off'+idx') + if pred' (case w of W8# w# -> w#) + then return (PS fp' off' idx' : + splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp') + else splitLoop pred' p (idx'+1) off' len' fp' +{-# INLINE splitWith #-} + +#else +splitWith _ (PS _ _ 0) = [] +splitWith p ps = splitWith' p ps + where + STRICT2(splitWith') + splitWith' q qs = if null rest then [chunk] + else chunk : splitWith' q (unsafeTail rest) + where (chunk,rest) = break q qs +#endif + +-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] +-- > split 'a' "aXaXaXa" == ["","X","X","X"] +-- > split 'x' "x" == ["",""] +-- +-- and +-- +-- > join [c] . split c == id +-- > split == splitWith . (==) +-- +-- As for all splitting functions in this library, this function does +-- not copy the substrings, it just constructs new 'ByteStrings' that +-- are slices of the original. +-- +split :: Word8 -> ByteString -> [ByteString] +split _ (PS _ _ 0) = [] +split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + let ptr = p `plusPtr` s + + STRICT1(loop) + loop n = do + let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n)) + if q == nullPtr + then return [PS x (s+n) (l-n)] + else do let i = q `minusPtr` ptr + ls <- loop (i+1) + return $! PS x (s+n) (i-n) : ls + loop 0 +{-# INLINE split #-} + +{- +-- slower. but stays inside Haskell. +split _ (PS _ _ 0) = [] +split (W8# w#) (PS fp off len) = splitWith' off len fp + where + splitWith' off' len' fp' = withPtr fp $ \p -> + splitLoop p 0 off' len' fp' + + splitLoop :: Ptr Word8 + -> Int -> Int -> Int + -> ForeignPtr Word8 + -> IO [ByteString] + + STRICT5(splitLoop) + splitLoop p idx' off' len' fp' + | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined + | idx' >= len' = return [PS fp' off' idx'] + | otherwise = do + (W8# x#) <- peekElemOff p (off'+idx') + if word2Int# w# ==# word2Int# x# + then return (PS fp' off' idx' : + splitWith' (off'+idx'+1) (len'-idx'-1) fp') + else splitLoop p (idx'+1) off' len' fp' +-} + +-- | Like 'splitWith', except that sequences of adjacent separators are +-- treated as a single separator. eg. +-- +-- > tokens (=='a') "aabbaca" == ["bb","c"] +-- +tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] +tokens f = P.filter (not.null) . splitWith f + +-- | /O(n)/ The 'join' function takes a 'ByteString' and a list of +-- 'ByteString's and concatenates the list after interspersing the first +-- argument between each element of the list. +join :: ByteString -> [ByteString] -> ByteString +join filler pss = concat (splice pss) + where + splice [] = [] + splice [x] = [x] + splice (x:y:xs) = x:filler:splice (y:xs) + +-- +-- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings +-- with a char. Around 4 times faster than the generalised join. +-- +joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString +joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr -> + withForeignPtr ffp $ \fp -> + withForeignPtr fgp $ \gp -> do + memcpy ptr (fp `plusPtr` s) l + poke (ptr `plusPtr` l) c + memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m + where + len = length f + length g + 1 +{-# INLINE joinWithByte #-} + +-- --------------------------------------------------------------------- +-- Indexing ByteStrings + +-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. +index :: ByteString -> Int -> Word8 +index ps n + | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n + | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n + ++ ", length = " ++ show (length ps) + | otherwise = ps `unsafeIndex` n +{-# INLINE index #-} + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'ByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. +-- This implementation uses memchr(3). +elemIndex :: Word8 -> ByteString -> Maybe Int +elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + let p' = p `plusPtr` s + q = memchr p' c (fromIntegral l) + return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p' +{-# INLINE elemIndex #-} + +-- | /O(n)/ The 'elemIndexLast' function returns the last index of the +-- element in the given 'ByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. The following +-- holds: +-- +-- > elemIndexLast c xs == +-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) +-- +elemIndexLast :: Word8 -> ByteString -> Maybe Int +elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> + go (p `plusPtr` s) (l-1) + where + STRICT2(go) + go p i | i < 0 = return Nothing + | otherwise = do ch' <- peekByteOff p i + if ch == ch' + then return $ Just i + else go p (i-1) +{-# INLINE elemIndexLast #-} + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +-- This implementation uses memchr(3). +elemIndices :: Word8 -> ByteString -> [Int] +elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + let ptr = p `plusPtr` s + + STRICT1(loop) + loop n = do + let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n)) + if q == nullPtr + then return [] + else do let i = q `minusPtr` ptr + ls <- loop (i+1) + return $! i:ls + loop 0 + +{- +-- much slower +elemIndices :: Word8 -> ByteString -> [Int] +elemIndices c ps = loop 0 ps + where STRICT2(loop) + loop _ ps' | null ps' = [] + loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps') + | otherwise = loop (n+1) (unsafeTail ps') +-} + +-- | count returns the number of times its argument appears in the ByteString +-- +-- > count = length . elemIndices +-- +-- But more efficiently than using length on the intermediate list. +count :: Word8 -> ByteString -> Int +count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> + go (p `plusPtr` s) (fromIntegral m) 0 + where + go :: Ptr Word8 -> CSize -> Int -> IO Int + STRICT3(go) + go p l i = do + let q = memchr p w l + if q == nullPtr + then return i + else do let k = fromIntegral $ q `minusPtr` p + go (q `plusPtr` 1) (l-k-1) (i+1) +{-# INLINE count #-} + +-- | The 'findIndex' function takes a predicate and a 'ByteString' and +-- returns the index of the first element in the ByteString +-- satisfying the predicate. +findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int +findIndex = (listToMaybe .) . findIndices + +-- | The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +findIndices :: (Word8 -> Bool) -> ByteString -> [Int] +findIndices p ps = loop 0 ps + where + STRICT2(loop) + loop _ qs | null qs = [] + loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) + | otherwise = loop (n+1) (unsafeTail qs) + +-- --------------------------------------------------------------------- +-- Searching ByteStrings + +-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. +elem :: Word8 -> ByteString -> Bool +elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True +{-# INLINE elem #-} + +-- | /O(n)/ 'notElem' is the inverse of 'elem' +notElem :: Word8 -> ByteString -> Bool +notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False +{-# INLINE notElem #-} + +-- +-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common +-- case of filtering a single byte. It is more efficient to use +-- /filterByte/ in this case. +-- +-- > filterByte == filter . (==) +-- +-- filterByte is around 10x faster, and uses much less space, than its +-- filter equivalent +filterByte :: Word8 -> ByteString -> ByteString +filterByte w ps = replicate (count w ps) w + +{- +-- slower than the replicate version + +filterByte ch ps@(PS x s l) + | null ps = ps + | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do + t <- go (f `plusPtr` s) p l + return (t `minusPtr` p) -- actual length + where + STRICT3(go) + go _ t 0 = return t + go f t e = do w <- peek f + if w == ch + then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1) + else go (f `plusPtr` 1) t (e-1) +-} + +-- +-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common +-- case of filtering a single byte out of a list. It is more efficient +-- to use /filterNotByte/ in this case. +-- +-- > filterNotByte == filter . (/=) +-- +-- filterNotByte is around 3x faster, and uses much less space, than its +-- filter equivalent +filterNotByte :: Word8 -> ByteString -> ByteString +filterNotByte ch ps@(PS x s l) + | null ps = ps + | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do + t <- go (f `plusPtr` s) p l + return (t `minusPtr` p) -- actual length + where + STRICT3(go) + go _ t 0 = return t + go f t e = do w <- peek f + if w /= ch + then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1) + else go (f `plusPtr` 1) t (e-1) + +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. +filter :: (Word8 -> Bool) -> ByteString -> ByteString +filter k ps@(PS x s l) + | null ps = ps + | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do + t <- go (f `plusPtr` s) p l + return (t `minusPtr` p) -- actual length + where + STRICT3(go) + go _ t 0 = return t + go f t e = do w <- peek f + if k w + then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1) + else go (f `plusPtr` 1) t (e - 1) + +-- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps + +-- | /O(n)/ The 'find' function takes a predicate and a ByteString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 +find p ps = case filter p ps of + q | null q -> Nothing + | otherwise -> Just (unsafeHead q) + +-- --------------------------------------------------------------------- +-- Searching for substrings + +-- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' +-- iff the first is a prefix of the second. +isPrefixOf :: ByteString -> ByteString -> Bool +isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2) + | l1 == 0 = True + | l2 < l1 = False + | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> + withForeignPtr x2 $ \p2 -> do + i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1 + return (i == 0) + +-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' +-- iff the first is a suffix of the second. +-- +-- The following holds: +-- +-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y +-- +-- However, the real implemenation uses memcmp to compare the end of the +-- string only, with no reverse required.. +isSuffixOf :: ByteString -> ByteString -> Bool +isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2) + | l1 == 0 = True + | l2 < l1 = False + | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> + withForeignPtr x2 $ \p2 -> do + i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1 + return (i == 0) + +-- | Check whether one string is a substring of another. @isSubstringOf +-- p s@ is equivalent to @not (null (findSubstrings p s))@. +isSubstringOf :: ByteString -- ^ String to search for. + -> ByteString -- ^ String to search in. + -> Bool +isSubstringOf p s = not $ P.null $ findSubstrings p s + +-- | Get the first index of a substring in another string, +-- or 'Nothing' if the string is not found. +-- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@. +findSubstring :: ByteString -- ^ String to search for. + -> ByteString -- ^ String to seach in. + -> Maybe Int +findSubstring = (listToMaybe .) . findSubstrings + +-- | Find the indexes of all (possibly overlapping) occurances of a +-- substring in a string. This function uses the Knuth-Morris-Pratt +-- string matching algorithm. +findSubstrings :: ByteString -- ^ String to search for. + -> ByteString -- ^ String to seach in. + -> [Int] + +findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0 + where + patc x = pat `unsafeIndex` x + strc x = str `unsafeIndex` x + + -- maybe we should make kmpNext a UArray before using it in search? + kmpNext = listArray (0,m) (-1:kmpNextL pat (-1)) + kmpNextL p _ | null p = [] + kmpNextL p j = let j' = next (unsafeHead p) j + 1 + ps = unsafeTail p + x = if not (null ps) && unsafeHead ps == patc j' + then kmpNext Array.! j' else j' + in x:kmpNextL ps j' + search i j = match ++ rest -- i: position in string, j: position in pattern + where match = if j == m then [(i - j)] else [] + rest = if i == n then [] else search (i+1) (next (strc i) j + 1) + next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j) + | otherwise = j + +-- --------------------------------------------------------------------- +-- Zipping + +-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of +-- corresponding pairs of bytes. If one input ByteString is short, +-- excess elements of the longer ByteString are discarded. This is +-- equivalent to a pair of 'unpack' operations. +zip :: ByteString -> ByteString -> [(Word8,Word8)] +zip ps qs + | null ps || null qs = [] + | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs) + +-- | 'zipWith' generalises 'zip' by zipping with the function given as +-- the first argument, instead of a tupling function. For example, +-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of +-- corresponding sums. +zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] +zipWith f ps qs + | null ps || null qs = [] + | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs) + +-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of +-- ByteStrings. Note that this performs two 'pack' operations. +unzip :: [(Word8,Word8)] -> (ByteString,ByteString) +unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) +{-# INLINE unzip #-} + +-- --------------------------------------------------------------------- +-- Special lists + +-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. +inits :: ByteString -> [ByteString] +inits (PS x s l) = [PS x s n | n <- [0..l]] + +-- | /O(n)/ Return all final segments of the given 'ByteString', longest first. +tails :: ByteString -> [ByteString] +tails p | null p = [empty] + | otherwise = p : tails (unsafeTail p) + +-- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]] + +-- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each. +elems :: ByteString -> [ByteString] +elems (PS _ _ 0) = [] +elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1))) +{-# INLINE elems #-} + +-- --------------------------------------------------------------------- +-- ** Ordered 'ByteString's + +-- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3). +sort :: ByteString -> ByteString +sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do + memcpy p (f `plusPtr` s) l + c_qsort p l -- inplace + +-- sort = pack . List.sort . unpack + +-- --------------------------------------------------------------------- +-- +-- Extensions to the basic interface +-- + +-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the +-- check for the empty case, so there is an obligation on the programmer +-- to provide a proof that the ByteString is non-empty. +unsafeHead :: ByteString -> Word8 +unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s +{-# INLINE unsafeHead #-} + +-- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the +-- check for the empty case. As with 'unsafeHead', the programmer must +-- provide a separate proof that the ByteString is non-empty. +unsafeTail :: ByteString -> ByteString +unsafeTail (PS ps s l) = PS ps (s+1) (l-1) +{-# INLINE unsafeTail #-} + +-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' +-- This omits the bounds check, which means there is an accompanying +-- obligation on the programmer to ensure the bounds are checked in some +-- other way. +unsafeIndex :: ByteString -> Int -> Word8 +unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) +{-# INLINE unsafeIndex #-} + +-- --------------------------------------------------------------------- +-- Low level constructors + +#if defined(__GLASGOW_HASKELL__) +-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an +-- Addr\# (an arbitrary machine address assumed to point outside the +-- garbage-collected heap) into a @ByteString@. A much faster way to +-- create an Addr\# is with an unboxed string literal, than to pack a +-- boxed string. A unboxed string literal is compiled to a static @char +-- []@ by GHC. Establishing the length of the string requires a call to +-- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as +-- is the case with "string"# literals in GHC). Use 'unsafePackAddress' +-- if you know the length of the string statically. +-- +-- An example: +-- +-- > literalFS = packAddress "literal"# +-- +packAddress :: Addr# -> ByteString +packAddress addr# = inlinePerformIO $ do + p <- newForeignPtr_ cstr + return $ PS p 0 (fromIntegral $ c_strlen cstr) + where + cstr = Ptr addr# +{-# INLINE packAddress #-} + +-- | /O(1)/ 'unsafePackAddress' provides constant-time construction of +-- 'ByteStrings' -- which is ideal for string literals. It packs a +-- null-terminated sequence of bytes into a 'ByteString', given a raw +-- 'Addr\#' to the string, and the length of the string. Make sure the +-- length is correct, otherwise use the safer 'packAddress' (where the +-- length will be calculated once at runtime). +unsafePackAddress :: Int -> Addr# -> ByteString +unsafePackAddress len addr# = inlinePerformIO $ do + p <- newForeignPtr_ cstr + return $ PS p 0 len + where cstr = Ptr addr# + +#endif + +-- | /O(1)/ Build a ByteString from a ForeignPtr +fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString +fromForeignPtr fp l = PS fp 0 l + +-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString +toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) +toForeignPtr (PS ps s l) = (ps, s, l) + +-- | /O(1)/ 'skipIndex' returns the internal skipped index of the +-- current 'ByteString' from any larger string it was created from, as +-- an 'Int'. +skipIndex :: ByteString -> Int +skipIndex (PS _ s _) = s +{-# INLINE skipIndex #-} + +-- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/ +-- finalizer associated to it. The ByteString length is calculated using +-- /strlen(3)/, and thus the complexity is a /O(n)/. +packCString :: CString -> ByteString +packCString cstr = inlinePerformIO $ do + fp <- newForeignPtr_ (castPtr cstr) + return $ PS fp 0 (fromIntegral $ c_strlen cstr) + +-- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will +-- have /no/ finalizer associated with it. This operation has /O(1)/ +-- complexity as we already know the final size, so no /strlen(3)/ is +-- required. +packCStringLen :: CStringLen -> ByteString +packCStringLen (ptr,len) = inlinePerformIO $ do + fp <- newForeignPtr_ (castPtr ptr) + return $ PS fp 0 (fromIntegral len) + +-- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will +-- have a @free(3)@ finalizer associated to it. +packMallocCString :: CString -> ByteString +packMallocCString cstr = inlinePerformIO $ do + fp <- newForeignFreePtr (castPtr cstr) + return $ PS fp 0 (fromIntegral $ c_strlen cstr) + +#if defined(__GLASGOW_HASKELL__) +-- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a +-- length, and an IO action representing a finalizer. This function is +-- not available on Hugs. +-- +packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString +packCStringFinalizer p l f = do + fp <- FC.newForeignPtr p f + return $ PS fp 0 l + +-- | Explicitly run the finaliser associated with a 'ByteString'. +-- Further references to this value may generate invalid memory +-- references. This operation is unsafe, as there may be other +-- 'ByteStrings' referring to the same underlying pages. If you use +-- this, you need to have a proof of some kind that all 'ByteString's +-- ever generated from the underlying byte array are no longer live. +unsafeFinalize :: ByteString -> IO () +unsafeFinalize (PS p _ _) = finalizeForeignPtr p + +#endif + +-- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@. +-- The @CString@ should not be freed afterwards. This is a memcpy(3). +useAsCString :: ByteString -> (CString -> IO a) -> IO a +useAsCString (PS ps s l) = bracket alloc (c_free.castPtr) + where + alloc = withForeignPtr ps $ \p -> do + buf <- c_malloc (fromIntegral l+1) + memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l) + poke (buf `plusPtr` l) (0::Word8) + return $ castPtr buf + +-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@. +-- Warning: modifying the @CString@ will affect the @ByteString@. +-- Why is this function unsafe? It relies on the null byte at the end of +-- the ByteString to be there. This is /not/ the case if your ByteString +-- has been spliced from a larger string (i.e. with take or drop). +-- Unless you can guarantee the null byte, you should use the safe +-- version, which will copy the string first. +-- +unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a +unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s) + +-- | /O(n)/ Make a copy of the 'ByteString' with its own storage. +-- This is mainly useful to allow the rest of the data pointed +-- to by the 'ByteString' to be garbage collected, for example +-- if a large string has been read in, and only a small part of it +-- is needed in the rest of the program. +copy :: ByteString -> ByteString +copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l + +-- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the +-- CString is going to be deallocated from C land. +copyCString :: CString -> ByteString +copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr)) + +-- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known. +copyCStringLen :: CStringLen -> ByteString +copyCStringLen (cstr, len) = inlinePerformIO $ do + fp <- mallocForeignPtrArray (len+1) + withForeignPtr fp $ \p -> do + memcpy p (castPtr cstr) len + poke (p `plusPtr` len) (0 :: Word8) + return $! PS fp 0 len + +-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@. +-- Warning: modifying the @CStringLen@ will affect the @ByteString@. +-- This is analogous to unsafeUseAsCString, and comes with the same +-- safety requirements. +-- +unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a +unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l) + +-- | Given the maximum size needed and a function to make the contents +-- of a ByteString, generate makes the 'ByteString'. The generating +-- function is required to return the actual final size (<= the maximum +-- size), and the resulting byte array is realloced to this size. The +-- string is padded at the end with a null byte. +-- +-- generate is the main mechanism for creating custom, efficient +-- ByteString functions, using Haskell or C functions to fill the space. +-- +generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString +generate i f = do + p <- mallocArray i + i' <- f p + p' <- reallocArray p (i'+1) + poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work + fp <- newForeignFreePtr p' + return $ PS fp 0 i' + +-- --------------------------------------------------------------------- +-- line IO + +#if defined(__GLASGOW_HASKELL__) + +-- | getLine, read a line from stdin. +getLine :: IO ByteString +getLine = hGetLine stdin + +-- | hGetLine. read a ByteString from a handle +hGetLine :: Handle -> IO ByteString +hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do + case haBufferMode handle_ of + NoBuffering -> error "no buffering" + _other -> hGetLineBuffered handle_ + + where + hGetLineBuffered handle_ = do + let ref = haBuffer handle_ + buf <- readIORef ref + hGetLineBufferedLoop handle_ ref buf 0 [] + + hGetLineBufferedLoop handle_ ref + buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss = + len `seq` do + off <- findEOL r w raw + let new_len = len + off - r + xs <- mkPS raw r off + + -- if eol == True, then off is the offset of the '\n' + -- otherwise off == w and the buffer is now empty. + if off /= w + then do if (w == off + 1) + then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 } + else writeIORef ref buf{ bufRPtr = off + 1 } + mkBigPS new_len (xs:xss) + else do + maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_) + buf{ bufWPtr=0, bufRPtr=0 } + case maybe_buf of + -- Nothing indicates we caught an EOF, and we may have a + -- partial line to return. + Nothing -> do + writeIORef ref buf{ bufRPtr=0, bufWPtr=0 } + if new_len > 0 + then mkBigPS new_len (xs:xss) + else ioe_EOF + Just new_buf -> + hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss) + + -- find the end-of-line character, if there is one + findEOL r w raw + | r == w = return w + | otherwise = do + (c,r') <- readCharFromBuffer raw r + if c == '\n' + then return r -- NB. not r': don't include the '\n' + else findEOL r' w raw + + maybeFillReadBuffer fd is_line is_stream buf = catch + (do buf' <- fillReadBuffer fd is_line is_stream buf + return (Just buf')) + (\e -> if isEOFError e then return Nothing else ioError e) + +-- TODO, rewrite to use normal memcpy +mkPS :: RawBuffer -> Int -> Int -> IO ByteString +mkPS buf start end = do + let len = end - start + fp <- mallocByteString (len `quot` 8) + withForeignPtr fp $ \p -> do + memcpy_ptr_baoff p buf start (fromIntegral len) + return (PS fp 0 len) + +mkBigPS :: Int -> [ByteString] -> IO ByteString +mkBigPS _ [ps] = return ps +mkBigPS _ pss = return $! concat (P.reverse pss) + +#endif + +-- --------------------------------------------------------------------- +-- Block IO + +-- | Outputs a 'ByteString' to the specified 'Handle'. +hPut :: Handle -> ByteString -> IO () +hPut _ (PS _ _ 0) = return () +hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l +hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l + +-- | Write a ByteString to stdout +putStr :: ByteString -> IO () +putStr = hPut stdout + +-- | Write a ByteString to stdout, appending a newline byte +putStrLn :: ByteString -> IO () +putStrLn ps = hPut stdout ps >> hPut stdout nl + where nl = packByte 0x0a + +-- | Read a 'ByteString' directly from the specified 'Handle'. This +-- is far more efficient than reading the characters into a 'String' +-- and then using 'pack'. +hGet :: Handle -> Int -> IO ByteString +hGet _ 0 = return empty +hGet h i = do fp <- mallocByteString i + l <- withForeignPtr fp $ \p-> hGetBuf h p i + return $ PS fp 0 l + +#if defined(__GLASGOW_HASKELL__) +-- | hGetNonBlocking is identical to 'hGet', except that it will never block +-- waiting for data to become available, instead it returns only whatever data +-- is available. +hGetNonBlocking :: Handle -> Int -> IO ByteString +hGetNonBlocking _ 0 = return empty +hGetNonBlocking h i = do + fp <- mallocByteString i + l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i + return $ PS fp 0 l +#endif + +-- | Read entire handle contents into a 'ByteString'. +-- +-- As with 'hGet', the string representation in the file is assumed to +-- be ISO-8859-1. +-- +hGetContents :: Handle -> IO ByteString +hGetContents h = do + let start_size = 1024 + p <- mallocArray start_size + i <- hGetBuf h p start_size + if i < start_size + then do p' <- reallocArray p i + fp <- newForeignFreePtr p' + return $ PS fp 0 i + else f p start_size + where + f p s = do + let s' = 2 * s + p' <- reallocArray p s' + i <- hGetBuf h (p' `plusPtr` s) s + if i < s + then do let i' = s + i + p'' <- reallocArray p' i' + fp <- newForeignFreePtr p'' + return $ PS fp 0 i' + else f p' s' + +-- | getContents. Equivalent to hGetContents stdin +getContents :: IO ByteString +getContents = hGetContents stdin + +-- | Read an entire file directly into a 'ByteString'. This is far more +-- efficient than reading the characters into a 'String' and then using +-- 'pack'. It also may be more efficient than opening the file and +-- reading it using hGet. +readFile :: FilePath -> IO ByteString +readFile f = do + h <- openBinaryFile f ReadMode + l <- hFileSize h + s <- hGet h $ fromIntegral l + hClose h + return s + +-- | Write a 'ByteString' to a file. +writeFile :: FilePath -> ByteString -> IO () +writeFile f ps = do + h <- openBinaryFile f WriteMode + hPut h ps + hClose h + +#if defined(__GLASGOW_HASKELL__) +-- +-- | A ByteString equivalent for getArgs. More efficient for large argument lists +-- +getArgs :: IO [ByteString] +getArgs = + alloca $ \ p_argc -> + alloca $ \ p_argv -> do + getProgArgv p_argc p_argv + p <- fromIntegral `fmap` peek p_argc + argv <- peek p_argv + P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1) +#endif + +-- --------------------------------------------------------------------- +-- Internal utilities + +-- Unsafe conversion between 'Word8' and 'Char'. These are nops, and +-- silently truncate to 8 bits Chars > '\255'. They are provided as +-- convenience for ByteString construction. +w2c :: Word8 -> Char +#if !defined(__GLASGOW_HASKELL__) +w2c = chr . fromIntegral +#else +w2c = unsafeChr . fromIntegral +#endif +{-# INLINE w2c #-} + +c2w :: Char -> Word8 +c2w = fromIntegral . ord +{-# INLINE c2w #-} + +-- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way +-- is padded with a null byte. +mallocByteString :: Int -> IO (ForeignPtr Word8) +mallocByteString l = do + fp <- mallocForeignPtrArray (l+1) + withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8) + return fp + +-- | A way of creating ForeignPtrs outside the IO monad. The @Int@ +-- argument gives the final size of the ByteString. Unlike 'generate' +-- the ByteString is no reallocated if the final size is less than the +-- estimated size. +create :: Int -> (Ptr Word8 -> IO ()) -> ByteString +create l write_ptr = inlinePerformIO $ do + fp <- mallocByteString (l+1) + withForeignPtr fp $ \p -> write_ptr p + return $ PS fp 0 l +{-# INLINE create #-} + +-- | Perform an operation with a temporary ByteString +withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b +withPtr fp io = inlinePerformIO (withForeignPtr fp io) +{-# INLINE withPtr #-} + +-- Common up near identical calls to `error' to reduce the number +-- constant strings created when compiled: +errorEmptyList :: String -> a +errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString") +{-# INLINE errorEmptyList #-} + +-- 'findIndexOrEnd' is a variant of findIndex, that returns the length +-- of the string if no element is found, rather than Nothing. +findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int +STRICT2(findIndexOrEnd) +findIndexOrEnd f ps + | null ps = 0 + | f (unsafeHead ps) = 0 + | otherwise = 1 + findIndexOrEnd f (unsafeTail ps) +{-# INLINE findIndexOrEnd #-} + +-- Find from the end of the string using predicate +findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int +STRICT2(findFromEndUntil) +findFromEndUntil f ps@(PS x s l) = + if null ps then 0 + else if f (last ps) then l + else findFromEndUntil f (PS x s (l-1)) + +-- Just like inlinePerformIO, but we inline it. Big performance gains as +-- it exposes lots of things to further inlining +-- +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +#if defined(__GLASGOW_HASKELL__) +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r +#else +inlinePerformIO = unsafePerformIO +#endif + +{-# INLINE newForeignFreePtr #-} +newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8) +#if defined(__GLASGOW_HASKELL__) +newForeignFreePtr p = FC.newForeignPtr p (c_free p) +#else +newForeignFreePtr p = newForeignPtr c_free_finalizer p +#endif + +-- --------------------------------------------------------------------- +-- +-- Standard C functions +-- + +foreign import ccall unsafe "string.h strlen" c_strlen + :: CString -> CInt + +foreign import ccall unsafe "stdlib.h malloc" c_malloc + :: CInt -> IO (Ptr Word8) + +foreign import ccall unsafe "static stdlib.h free" c_free + :: Ptr Word8 -> IO () + +#if !defined(__GLASGOW_HASKELL__) +foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer + :: FunPtr (Ptr Word8 -> IO ()) +#endif + +foreign import ccall unsafe "string.h memset" memset + :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) + +foreign import ccall unsafe "string.h memchr" memchr + :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8 + +foreign import ccall unsafe "string.h memcmp" memcmp + :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int + +foreign import ccall unsafe "string.h memcpy" memcpy + :: Ptr Word8 -> Ptr Word8 -> Int -> IO () + +-- --------------------------------------------------------------------- +-- +-- Uses our C code +-- + +foreign import ccall unsafe "static fpstring.h reverse" c_reverse + :: Ptr Word8 -> Ptr Word8 -> Int -> IO () + +foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse + :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO () + +foreign import ccall unsafe "static fpstring.h maximum" c_maximum + :: Ptr Word8 -> Int -> Word8 + +foreign import ccall unsafe "static fpstring.h minimum" c_minimum + :: Ptr Word8 -> Int -> Word8 + +foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort + :: Ptr Word8 -> Int -> IO () + +-- --------------------------------------------------------------------- +-- Internal GHC Haskell magic + +#if defined(__GLASGOW_HASKELL__) +foreign import ccall unsafe "RtsAPI.h getProgArgv" + getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO () + +foreign import ccall unsafe "__hscore_memcpy_src_off" + memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ()) +#endif diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs new file mode 100644 index 0000000..9f39e61 --- /dev/null +++ b/Data/ByteString/Char8.hs @@ -0,0 +1,1007 @@ +{-# OPTIONS_GHC -cpp -fffi #-} +-- +-- Module : Data.ByteString.Char8 +-- Copyright : (c) Don Stewart 2006 +-- License : BSD-style +-- +-- Maintainer : dons@cse.unsw.edu.au +-- Stability : experimental +-- Portability : portable (tested with GHC>=6.4.1 and Hugs 2005) +-- + +-- +-- | Manipulate ByteStrings using Char operations. All Chars will be +-- truncated to 8 bits. It can be expected that these functions will run +-- at identical speeds to their Word8 equivalents in @Data.ByteString@. +-- +-- More specifically these byte strings are taken to be in the +-- subset of Unicode covered by code points 0-255. This covers +-- Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls. +-- +-- See: +-- +-- * +-- +-- * +-- +-- * +-- +-- This module is intended to be imported @qualified@, to avoid name +-- clashes with Prelude functions. eg. +-- +-- > import qualified Data.ByteString.Char8 as B +-- + +module Data.ByteString.Char8 ( + + -- * The @ByteString@ type + ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable + + -- * Introducing and eliminating 'ByteString's + empty, -- :: ByteString + packChar, -- :: Char -> ByteString + pack, -- :: String -> ByteString + unpack, -- :: ByteString -> String + + -- * Basic interface + cons, -- :: Char -> ByteString -> ByteString + snoc, -- :: Char -> ByteString -> ByteString + null, -- :: ByteString -> Bool + length, -- :: ByteString -> Int + head, -- :: ByteString -> Char + tail, -- :: ByteString -> ByteString + last, -- :: ByteString -> Char + init, -- :: ByteString -> ByteString + append, -- :: ByteString -> ByteString -> ByteString + + -- * Special ByteStrings + inits, -- :: ByteString -> [ByteString] + tails, -- :: ByteString -> [ByteString] + elems, -- :: ByteString -> [ByteString] + + -- * Transformating ByteStrings + map, -- :: (Char -> Char) -> ByteString -> ByteString + reverse, -- :: ByteString -> ByteString + intersperse, -- :: Char -> ByteString -> ByteString + transpose, -- :: [ByteString] -> [ByteString] + + -- * Reducing 'ByteString's + foldl, -- :: (a -> Char -> a) -> a -> ByteString -> a + foldr, -- :: (Char -> a -> a) -> a -> ByteString -> a + foldl1, -- :: (Char -> Char -> Char) -> ByteString -> Char + foldr1, -- :: (Char -> Char -> Char) -> ByteString -> Char + + -- ** Special folds + concat, -- :: [ByteString] -> ByteString + concatMap, -- :: (Char -> ByteString) -> ByteString -> ByteString + any, -- :: (Char -> Bool) -> ByteString -> Bool + all, -- :: (Char -> Bool) -> ByteString -> Bool + maximum, -- :: ByteString -> Char + minimum, -- :: ByteString -> Char + mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString + hash, -- :: ByteString -> Int32 + + -- * Generating and unfolding ByteStrings + replicate, -- :: Int -> Char -> ByteString + unfoldrN, -- :: (Char -> Maybe (Char, Char)) -> Char -> ByteString + + -- * Substrings + + -- ** Breaking strings + take, -- :: Int -> ByteString -> ByteString + drop, -- :: Int -> ByteString -> ByteString + splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) + takeWhile, -- :: (Char -> Bool) -> ByteString -> ByteString + dropWhile, -- :: (Char -> Bool) -> ByteString -> ByteString + break, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + span, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + spanEnd, -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) + + -- ** Breaking and dropping on specific Chars + breakChar, -- :: Char -> ByteString -> (ByteString, ByteString) + breakFirst, -- :: Char -> ByteString -> Maybe (ByteString,ByteString) + breakLast, -- :: Char -> ByteString -> Maybe (ByteString,ByteString) + breakSpace, -- :: ByteString -> Maybe (ByteString,ByteString) + dropSpace, -- :: ByteString -> ByteString + dropSpaceEnd, -- :: ByteString -> ByteString + + -- ** Breaking into many substrings + split, -- :: Char -> ByteString -> [ByteString] + splitWith, -- :: (Char -> Bool) -> ByteString -> [ByteString] + tokens, -- :: (Char -> Bool) -> ByteString -> [ByteString] + + -- ** Breaking into lines and words + lines, -- :: ByteString -> [ByteString] + words, -- :: ByteString -> [ByteString] + unlines, -- :: [ByteString] -> ByteString + unwords, -- :: ByteString -> [ByteString] + + lines', -- :: ByteString -> [ByteString] + unlines', -- :: [ByteString] -> ByteString + linesCRLF', -- :: ByteString -> [ByteString] + unlinesCRLF', -- :: [ByteString] -> ByteString + words', -- :: ByteString -> [ByteString] + unwords', -- :: ByteString -> [ByteString] + + lineIndices, -- :: ByteString -> [Int] + betweenLines, -- :: ByteString -> ByteString -> ByteString -> Maybe (ByteString) + + -- ** Joining strings + join, -- :: ByteString -> [ByteString] -> ByteString + joinWithChar, -- :: Char -> ByteString -> ByteString -> ByteString + + -- * Indexing ByteStrings + index, -- :: ByteString -> Int -> Char + elemIndex, -- :: Char -> ByteString -> Maybe Int + elemIndexLast, -- :: Char -> ByteString -> Maybe Int + elemIndices, -- :: Char -> ByteString -> [Int] + findIndex, -- :: (Char -> Bool) -> ByteString -> Maybe Int + findIndices, -- :: (Char -> Bool) -> ByteString -> [Int] + count, -- :: Char -> ByteString -> Int + + -- * Ordered ByteStrings + sort, -- :: ByteString -> ByteString + + -- * Searching ByteStrings + + -- ** Searching by equality + elem, -- :: Char -> ByteString -> Bool + notElem, -- :: Char -> ByteString -> Bool + filterChar, -- :: Char -> ByteString -> ByteString + filterNotChar, -- :: Char -> ByteString -> ByteString + + -- ** Searching with a predicate + filter, -- :: (Char -> Bool) -> ByteString -> ByteString + find, -- :: (Char -> Bool) -> ByteString -> Maybe Char + + -- ** Searching for substrings + isPrefixOf, -- :: ByteString -> ByteString -> Bool + isSuffixOf, -- :: ByteString -> ByteString -> Bool + isSubstringOf, -- :: ByteString -> ByteString -> Bool + findSubstring, -- :: ByteString -> ByteString -> Maybe Int + findSubstrings, -- :: ByteString -> ByteString -> [Int] + + -- * Zipping and unzipping ByteString + zip, -- :: ByteString -> ByteString -> [(Char,Char)] + zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c] + unzip, -- :: [(Char,Char)] -> (ByteString,ByteString) + + -- * Unchecked access + unsafeHead, -- :: ByteString -> Char + unsafeTail, -- :: ByteString -> ByteString + unsafeIndex, -- :: ByteString -> Int -> Char + w2c, -- :: Word8 -> Char + c2w, -- :: Char -> Word8 + + -- * Reading from ByteStrings + readInt, -- :: ByteString -> Maybe Int + unsafeReadInt, -- :: ByteString -> Maybe Int + + -- * Copying ByteStrings + copy, -- :: ByteString -> ByteString + + -- * I\/O with @ByteString@s + + -- ** Standard input and output + +#if defined(__GLASGOW_HASKELL__) + getLine, -- :: IO ByteString +#endif + getContents, -- :: IO ByteString + putStr, -- :: ByteString -> IO () + putStrLn, -- :: ByteString -> IO () + + -- ** Files + readFile, -- :: FilePath -> IO ByteString + writeFile, -- :: FilePath -> ByteString -> IO () + + -- ** I\/O with Handles +#if defined(__GLASGOW_HASKELL__) + getArgs, -- :: IO [ByteString] + hGetLine, -- :: Handle -> IO ByteString + hGetNonBlocking, -- :: Handle -> Int -> IO ByteString +#endif + hGetContents, -- :: Handle -> IO ByteString + hGet, -- :: Handle -> Int -> IO ByteString + hPut, -- :: Handle -> ByteString -> IO () + +#if defined(__GLASGOW_HASKELL__) + -- * Low level construction + -- | For constructors from foreign language types see /Data.ByteString/ + packAddress, -- :: Addr# -> ByteString + unsafePackAddress, -- :: Int -> Addr# -> ByteString +#endif + + ) where + +import qualified Prelude as P +import Prelude hiding (reverse,head,tail,last,init,null + ,length,map,lines,foldl,foldr,unlines + ,concat,any,take,drop,splitAt,takeWhile + ,dropWhile,span,break,elem,filter,unwords + ,words,maximum,minimum,all,concatMap + ,foldl1,foldr1,readFile,writeFile,replicate + ,getContents,getLine,putStr,putStrLn + ,zip,zipWith,unzip,notElem) + +import qualified Data.ByteString as B + +-- Listy functions transparently exported +import Data.ByteString (ByteString(..) + ,empty,null,length,tail,init,append + ,inits,tails,elems,reverse,transpose + ,concat,hash,take,drop,splitAt,join + ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring + ,findSubstrings,unsafeTail,copy + + ,getContents, putStr, putStrLn + ,readFile, writeFile + ,hGetContents, hGet, hPut +#if defined(__GLASGOW_HASKELL__) + ,getLine, getArgs, hGetLine, hGetNonBlocking + ,packAddress, unsafePackAddress +#endif + ,useAsCString, unsafeUseAsCString + ) + +import Data.Char + +import qualified Data.List as List (intersperse) + +import Foreign +import Foreign.C.Types (CLong) +import Foreign.Marshal.Utils (with) + +#if defined(__GLASGOW_HASKELL__) +import GHC.Base (Char(..),unsafeChr,unpackCString#,unsafeCoerce#) +import GHC.IOBase (IO(..),stToIO) +import GHC.Prim (Addr#,writeWord8OffAddr#,realWorld#,plusAddr#) +import GHC.Ptr (Ptr(..)) +import GHC.ST (ST(..)) +#endif + +#define STRICT1(f) f a | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined + +------------------------------------------------------------------------ + +-- | /O(1)/ Convert a 'Char' into a 'ByteString' +packChar :: Char -> ByteString +packChar = B.packByte . c2w +{-# INLINE packChar #-} + +-- | /O(n)/ Convert a 'String' into a 'ByteString' +-- +-- For applications with large numbers of string literals, pack can be a +-- bottleneck. In such cases, consider using packAddress (GHC only). +pack :: String -> ByteString +#if !defined(__GLASGOW_HASKELL__) + +pack str = B.create (P.length str) $ \p -> go p str + where go _ [] = return () + go p (x:xs) = poke p (c2w x) >> go (p `plusPtr` 1) xs + +#else /* hack away */ + +pack str = B.create (P.length str) $ \(Ptr p) -> stToIO (go p str) + where + go :: Addr# -> [Char] -> ST a () + go _ [] = return () + go p (C# c:cs) = writeByte p (unsafeCoerce# c) >> go (p `plusAddr#` 1#) cs + + writeByte p c = ST $ \s# -> + case writeWord8OffAddr# p 0# c s# of s2# -> (# s2#, () #) + {-# INLINE writeByte #-} + +{-# RULES +"pack/packAddress" forall s# . + pack (unpackCString# s#) = B.packAddress s# + #-} + +#endif + +{-# INLINE pack #-} + +-- | /O(n)/ Converts a 'ByteString' to a 'String'. +unpack :: ByteString -> [Char] +unpack = B.unpackWith w2c +{-# INLINE unpack #-} + +-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different +-- complexity, as it requires a memcpy. +cons :: Char -> ByteString -> ByteString +cons = B.cons . c2w +{-# INLINE cons #-} + +-- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to +-- 'cons', this function performs a memcpy. +snoc :: ByteString -> Char -> ByteString +snoc p = B.snoc p . c2w +{-# INLINE snoc #-} + +-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. +head :: ByteString -> Char +head = w2c . B.head +{-# INLINE head #-} + +-- | /O(1)/ Extract the last element of a packed string, which must be non-empty. +last :: ByteString -> Char +last = w2c . B.last +{-# INLINE last #-} + +-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@ +map :: (Char -> Char) -> ByteString -> ByteString +map f = B.map (c2w . f . w2c) +{-# INLINE map #-} + +-- | /O(n)/ The 'intersperse' function takes a Char and a 'ByteString' +-- and \`intersperses\' that Char between the elements of the +-- 'ByteString'. It is analogous to the intersperse function on Lists. +intersperse :: Char -> ByteString -> ByteString +intersperse = B.intersperse . c2w +{-# INLINE intersperse #-} + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a ByteString, reduces the +-- ByteString using the binary operator, from left to right. +foldl :: (a -> Char -> a) -> a -> ByteString -> a +foldl f = B.foldl (\a c -> f a (w2c c)) +{-# INLINE foldl #-} + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a packed string, +-- reduces the packed string using the binary operator, from right to left. +foldr :: (Char -> a -> a) -> a -> ByteString -> a +foldr f = B.foldr (\c a -> f (w2c c) a) +{-# INLINE foldr #-} + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'ByteStrings'. +foldl1 :: (Char -> Char -> Char) -> ByteString -> Char +foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldl1 #-} + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'ByteString's +foldr1 :: (Char -> Char -> Char) -> ByteString -> Char +foldr1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps) +{-# INLINE foldr1 #-} + +-- | Map a function over a 'ByteString' and concatenate the results +concatMap :: (Char -> ByteString) -> ByteString -> ByteString +concatMap f = B.concatMap (f . w2c) +{-# INLINE concatMap #-} + +-- | Applied to a predicate and a ByteString, 'any' determines if +-- any element of the 'ByteString' satisfies the predicate. +any :: (Char -> Bool) -> ByteString -> Bool +any f = B.any (f . w2c) +{-# INLINE any #-} + +-- | Applied to a predicate and a 'ByteString', 'all' determines if +-- all elements of the 'ByteString' satisfy the predicate. +all :: (Char -> Bool) -> ByteString -> Bool +all f = B.all (f . w2c) +{-# INLINE all #-} + +-- | 'maximum' returns the maximum value from a 'ByteString' +maximum :: ByteString -> Char +maximum = w2c . B.maximum +{-# INLINE maximum #-} + +-- | 'minimum' returns the minimum value from a 'ByteString' +minimum :: ByteString -> Char +minimum = w2c . B.minimum +{-# INLINE minimum #-} + +-- | /O(n)/ map Char functions, provided with the index at each position +mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString +mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c))) +{-# INLINE mapIndexed #-} + +-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ +-- the value of every element. The following holds: +-- +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c +-- +-- This implemenation uses @memset(3)@ +replicate :: Int -> Char -> ByteString +replicate w = B.replicate w . c2w +{-# INLINE replicate #-} + +-- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'. +-- 'unfoldrN' builds a ByteString from a seed value. The function takes +-- the element and returns 'Nothing' if it is done producing the +-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a +-- prepending to the ByteString and @b@ is used as the next element in a +-- recursive call. +-- +-- To preven unfoldrN having /O(n^2)/ complexity (as prepending a +-- character to a ByteString is /O(n)/, this unfoldr requires a maximum +-- final size of the ByteString as an argument. 'cons' can then be +-- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has +-- linear complexity. The depth of the recursion is limited to this +-- size, but may be less. For lazy, infinite unfoldr, use +-- 'Data.List.unfoldr' (from 'Data.List'). +-- +-- Examples: +-- +-- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789" +-- +-- The following equation connects the depth-limited unfoldr to the List unfoldr: +-- +-- > unfoldrN n == take n $ List.unfoldr +-- +unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString +unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f . w2c) (c2w w) + where k (i,j) = (c2w i, c2w j) -- (c2w *** c2w) +{-# INLINE unfoldrN #-} + +-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, +-- returns the longest prefix (possibly empty) of @xs@ of elements that +-- satisfy @p@. +takeWhile :: (Char -> Bool) -> ByteString -> ByteString +takeWhile f = B.takeWhile (f . w2c) +{-# INLINE takeWhile #-} + +-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. +dropWhile :: (Char -> Bool) -> ByteString -> ByteString +dropWhile f = B.dropWhile (f . w2c) +{-# INLINE dropWhile #-} + +-- | 'break' @p@ is equivalent to @'span' ('not' . p)@. +break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +break f = B.break (f . w2c) +{-# INLINE break #-} + +-- | 'span' @p xs@ breaks the ByteString into two segments. It is +-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ +span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +span f = B.span (f . w2c) +{-# INLINE span #-} + +-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. +-- We have +-- +-- > spanEnd (not.isSpace) "x y z" == ("x y ","z") +-- +-- and +-- +-- > spanEnd (not . isSpace) ps +-- > == +-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) +-- +spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) +spanEnd f = B.spanEnd (f . w2c) +{-# INLINE spanEnd #-} + +-- | 'breakChar' breaks its ByteString argument at the first occurence +-- of the specified Char. It is more efficient than 'break' as it is +-- implemented with @memchr(3)@. I.e. +-- +-- > break (=='c') "abcd" == breakChar 'c' "abcd" +-- +breakChar :: Char -> ByteString -> (ByteString, ByteString) +breakChar = B.breakByte . c2w +{-# INLINE breakChar #-} + +-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first +-- occurence of @w@. It behaves like 'break', except the delimiter is +-- not returned, and @Nothing@ is returned if the delimiter is not in +-- the ByteString. I.e. +-- +-- > breakFirst 'b' "aabbcc" == Just ("aa","bcc") +-- +-- > breakFirst c xs == +-- > let (x,y) = break (== c) xs +-- > in if null y then Nothing else Just (x, drop 1 y)) +-- +breakFirst :: Char -> ByteString -> Maybe (ByteString,ByteString) +breakFirst = B.breakFirst . c2w +{-# INLINE breakFirst #-} + +-- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the +-- ByteString. +-- +-- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc") +-- +-- and the following are equivalent: +-- +-- > breakLast 'c' "abcdef" +-- > let (x,y) = break (=='c') (reverse "abcdef") +-- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x) +-- +breakLast :: Char -> ByteString -> Maybe (ByteString,ByteString) +breakLast = B.breakLast . c2w +{-# INLINE breakLast #-} + +-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] +-- > split 'a' "aXaXaXa" == ["","X","X","X"] +-- > split 'x' "x" == ["",""] +-- +-- and +-- +-- > join [c] . split c == id +-- > split == splitWith . (==) +-- +-- As for all splitting functions in this library, this function does +-- not copy the substrings, it just constructs new 'ByteStrings' that +-- are slices of the original. +-- +split :: Char -> ByteString -> [ByteString] +split = B.split . c2w +{-# INLINE split #-} + +-- | /O(n)/ Splits a 'ByteString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] +-- +splitWith :: (Char -> Bool) -> ByteString -> [ByteString] +splitWith f = B.splitWith (f . w2c) +{-# INLINE splitWith #-} +-- the inline makes a big difference here. + +-- | Like 'splitWith', except that sequences of adjacent separators are +-- treated as a single separator. eg. +-- +-- > tokens (=='a') "aabbaca" == ["bb","c"] +-- +tokens :: (Char -> Bool) -> ByteString -> [ByteString] +tokens f = B.tokens (f . w2c) +{-# INLINE tokens #-} + +-- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a +-- char. Around 4 times faster than the generalised join. +-- +joinWithChar :: Char -> ByteString -> ByteString -> ByteString +joinWithChar = B.joinWithByte . c2w +{-# INLINE joinWithChar #-} + +-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. +index :: ByteString -> Int -> Char +index = (w2c .) . B.index +{-# INLINE index #-} + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'ByteString' which is equal (by memchr) to the +-- query element, or 'Nothing' if there is no such element. +elemIndex :: Char -> ByteString -> Maybe Int +elemIndex = B.elemIndex . c2w +{-# INLINE elemIndex #-} + +-- | /O(n)/ The 'elemIndexLast' function returns the last index of the +-- element in the given 'ByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. The following +-- holds: +-- +-- > elemIndexLast c xs == +-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) +-- +elemIndexLast :: Char -> ByteString -> Maybe Int +elemIndexLast = B.elemIndexLast . c2w +{-# INLINE elemIndexLast #-} + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +elemIndices :: Char -> ByteString -> [Int] +elemIndices = B.elemIndices . c2w +{-# INLINE elemIndices #-} + +-- | The 'findIndex' function takes a predicate and a 'ByteString' and +-- returns the index of the first element in the ByteString satisfying the predicate. +findIndex :: (Char -> Bool) -> ByteString -> Maybe Int +findIndex f = B.findIndex (f . w2c) +{-# INLINE findIndex #-} + +-- | The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +findIndices :: (Char -> Bool) -> ByteString -> [Int] +findIndices f = B.findIndices (f . w2c) + +-- | count returns the number of times its argument appears in the ByteString +-- +-- > count = length . elemIndices +-- +-- But more efficiently than using length on the intermediate list. +count :: Char -> ByteString -> Int +count c = B.count (c2w c) + +-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This +-- implementation uses @memchr(3)@. +elem :: Char -> ByteString -> Bool +elem c = B.elem (c2w c) +{-# INLINE elem #-} + +-- | /O(n)/ 'notElem' is the inverse of 'elem' +notElem :: Char -> ByteString -> Bool +notElem c = B.notElem (c2w c) +{-# INLINE notElem #-} + +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. +filter :: (Char -> Bool) -> ByteString -> ByteString +filter f = B.filter (f . w2c) +{-# INLINE filter #-} + +-- | /O(n)/ The 'find' function takes a predicate and a ByteString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +find :: (Char -> Bool) -> ByteString -> Maybe Char +find f ps = w2c `fmap` B.find (f . w2c) ps +{-# INLINE find #-} + +-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common +-- case of filtering a single Char. It is more efficient to use +-- filterChar in this case. +-- +-- > filterChar == filter . (==) +-- +-- filterChar is around 10x faster, and uses much less space, than its +-- filter equivalent +-- +filterChar :: Char -> ByteString -> ByteString +filterChar c = B.filterByte (c2w c) +{-# INLINE filterChar #-} + +-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common +-- case of filtering a single Char out of a list. It is more efficient +-- to use /filterNotChar/ in this case. +-- +-- > filterNotChar == filter . (/=) +-- +-- filterNotChar is around 3x faster, and uses much less space, than its +-- filter equivalent +-- +filterNotChar :: Char -> ByteString -> ByteString +filterNotChar c = B.filterNotByte (c2w c) +{-# INLINE filterNotChar #-} + +-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of +-- corresponding pairs of Chars. If one input ByteString is short, +-- excess elements of the longer ByteString are discarded. This is +-- equivalent to a pair of 'unpack' operations, and so space +-- usage may be large for multi-megabyte ByteStrings +zip :: ByteString -> ByteString -> [(Char,Char)] +zip ps qs + | B.null ps || B.null qs = [] + | otherwise = (unsafeHead ps, unsafeHead qs) : zip (B.unsafeTail ps) (B.unsafeTail qs) + +-- | 'zipWith' generalises 'zip' by zipping with the function given as +-- the first argument, instead of a tupling function. For example, +-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list +-- of corresponding sums. +zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a] +zipWith f = B.zipWith ((. w2c) . f . w2c) + +-- | 'unzip' transforms a list of pairs of Chars into a pair of +-- ByteStrings. Note that this performs two 'pack' operations. +unzip :: [(Char,Char)] -> (ByteString,ByteString) +unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) +{-# INLINE unzip #-} + +-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits +-- the check for the empty case, which is good for performance, but +-- there is an obligation on the programmer to provide a proof that the +-- ByteString is non-empty. +unsafeHead :: ByteString -> Char +unsafeHead = w2c . B.unsafeHead +{-# INLINE unsafeHead #-} + +-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a Char. +-- This omits the bounds check, which means there is an accompanying +-- obligation on the programmer to ensure the bounds are checked in some +-- other way. +unsafeIndex :: ByteString -> Int -> Char +unsafeIndex = (w2c .) . B.unsafeIndex +{-# INLINE unsafeIndex #-} + +-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. +w2c :: Word8 -> Char +#if !defined(__GLASGOW_HASKELL__) +w2c = chr . fromIntegral +#else +w2c = unsafeChr . fromIntegral +#endif +{-# INLINE w2c #-} + +-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and +-- silently truncates to 8 bits Chars > '\255'. It is provided as +-- convenience for ByteString construction. +c2w :: Char -> Word8 +c2w = fromIntegral . ord +{-# INLINE c2w #-} + +-- --------------------------------------------------------------------- +-- Things that depend on the encoding + +-- | 'breakSpace' returns the pair of ByteStrings when the argument is +-- broken at the first whitespace byte. I.e. +-- +-- > break isSpace == breakSpace +-- +breakSpace :: ByteString -> (ByteString,ByteString) +breakSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + i <- firstspace (p `plusPtr` s) 0 l + return $ case () of {_ + | i == 0 -> (empty, PS x s l) + | i == l -> (PS x s l, empty) + | otherwise -> (PS x s i, PS x (s+i) (l-i)) + } +{-# INLINE breakSpace #-} + +firstspace :: Ptr Word8 -> Int -> Int -> IO Int +STRICT3(firstspace) +firstspace ptr n m + | n >= m = return n + | otherwise = do w <- peekByteOff ptr n + if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n + +-- | 'dropSpace' efficiently returns the 'ByteString' argument with +-- white space Chars removed from the front. It is more efficient than +-- calling dropWhile for removing whitespace. I.e. +-- +-- > dropWhile isSpace == dropSpace +-- +dropSpace :: ByteString -> ByteString +dropSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + i <- firstnonspace (p `plusPtr` s) 0 l + return $ if i == l then empty else PS x (s+i) (l-i) +{-# INLINE dropSpace #-} + +firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int +STRICT3(firstnonspace) +firstnonspace ptr n m + | n >= m = return n + | otherwise = do w <- peekElemOff ptr n + if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n + +-- | 'dropSpaceEnd' efficiently returns the 'ByteString' argument with +-- white space removed from the end. I.e. +-- +-- > reverse . (dropWhile isSpace) . reverse == dropSpaceEnd +-- +-- but it is more efficient than using multiple reverses. +-- +dropSpaceEnd :: ByteString -> ByteString +dropSpaceEnd (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + i <- lastnonspace (p `plusPtr` s) (l-1) + return $ if i == (-1) then empty else PS x s (i+1) +{-# INLINE dropSpaceEnd #-} + +lastnonspace :: Ptr Word8 -> Int -> IO Int +STRICT2(lastnonspace) +lastnonspace ptr n + | n < 0 = return n + | otherwise = do w <- peekElemOff ptr n + if isSpaceWord8 w then lastnonspace ptr (n-1) else return n + +-- | 'lines' breaks a ByteString up into a list of ByteStrings at +-- newline Chars. The resulting strings do not contain newlines. +lines :: ByteString -> [ByteString] +lines ps + | null ps = [] + | otherwise = case search ps of + Nothing -> [ps] + Just n -> take n ps : lines (drop (n+1) ps) + where search = elemIndex '\n' +{-# INLINE lines #-} + +{- +-- Just as fast, but more complex. Should be much faster, I thought. +lines :: ByteString -> [ByteString] +lines (PS _ _ 0) = [] +lines (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do + let ptr = p `plusPtr` s + + STRICT1(loop) + loop n = do + let q = memchr (ptr `plusPtr` n) 0x0a (fromIntegral (l-n)) + if q == nullPtr + then return [PS x (s+n) (l-n)] + else do let i = q `minusPtr` ptr + ls <- loop (i+1) + return $! PS x (s+n) (i-n) : ls + loop 0 +-} + +-- | 'unlines' is an inverse operation to 'lines'. It joins lines, +-- after appending a terminating newline to each. +unlines :: [ByteString] -> ByteString +unlines [] = empty +unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space + where nl = packChar '\n' + +-- | 'words' breaks a ByteString up into a list of words, which +-- were delimited by Chars representing white space. And +-- +-- > tokens isSpace = words +-- +words :: ByteString -> [ByteString] +words = B.tokens isSpaceWord8 + +-- | The 'unwords' function is analogous to the 'unlines' function, on words. +unwords :: [ByteString] -> ByteString +unwords = join (packChar ' ') + +-- | /O(n)/ Indicies of newlines. Shorthand for +-- +-- > elemIndices '\n' +-- +lineIndices :: ByteString -> [Int] +lineIndices = elemIndices '\n' + +-- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on +-- newline Chars. However, unlike the Prelude functions, 'lines\'' and +-- 'unlines\'' correctly reconstruct lines that are missing terminating +-- newlines characters. I.e. +-- +-- > unlines (lines "a\nb\nc") == "a\nb\nc\n" +-- > unlines' (lines' "a\nb\nc") == "a\nb\nc" +-- +-- Note that this means: +-- +-- > lines "a\nb\nc\n" == ["a","b","c"] +-- > lines' "a\nb\nc\n" == ["a","b","c",""] +-- +lines' :: ByteString -> [ByteString] +lines' ps = ps `seq` case elemIndex '\n' ps of + Nothing -> [ps] + Just n -> take n ps : lines' (drop (n+1) ps) + +-- | 'linesCRLF\'' behaves like 'lines\'', but breaks on (\\cr?\\lf) +linesCRLF' :: ByteString -> [ByteString] +linesCRLF' ps = ps `seq` case elemIndex '\n' ps of + Nothing -> [ps] + Just 0 -> empty : linesCRLF' (drop 1 ps) + Just n -> let k = if ps `unsafeIndex` (n-1) == '\r' then n-1 else n + in take k ps : linesCRLF' (drop (n+1) ps) + +-- | 'unlines\'' behaves like 'unlines', except that it also correctly +-- retores lines that do not have terminating newlines (see the +-- description for 'lines\''). +-- +unlines' :: [ByteString] -> ByteString +unlines' ss = concat $ intersperse_newlines ss + where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s) + intersperse_newlines s = s + newline = packChar '\n' + +-- | 'unlines\'' behaves like 'unlines', except that it also correctly +-- retores lines that do not have terminating newlines (see the +-- description for 'lines\''). Uses CRLF instead of LF. +-- +unlinesCRLF' :: [ByteString] -> ByteString +unlinesCRLF' ss = concat $ intersperse_newlines ss + where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s) + intersperse_newlines s = s + newline = pack "\r\n" + +-- | 'words\'' behaves like 'words', with the exception that it produces +-- output on ByteStrings with trailing whitespace that can be +-- correctly inverted by 'unwords'. I.e. +-- +-- > words "a b c " == ["a","b","c"] +-- > words' "a b c " == ["a","b","c",""] +-- +-- > unwords $ words "a b c " == "a b c" +-- > unwords $ words' "a b c " == "a b c " +-- +words' :: ByteString -> [ByteString] +words' = B.splitWith isSpaceWord8 + +-- | 'unwords\'' behaves like 'unwords'. It is provided for consistency +-- with the other invertable words and lines functions. +unwords' :: [ByteString] -> ByteString +unwords' = unwords + +-- | 'betweenLines' returns the ByteString between the two lines given, +-- or Nothing if they do not appear. The returned string is the first +-- and shortest string such that the line before it is the given first +-- line, and the line after it is the given second line. +betweenLines :: ByteString -- ^ First line to look for + -> ByteString -- ^ Second line to look for + -> ByteString -- ^ 'ByteString' to look in + -> Maybe (ByteString) + +betweenLines start end ps = + case P.break (start ==) (lines ps) of + (_, _:rest@(PS ps1 s1 _:_)) -> + case P.break (end ==) rest of + (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1) + _ -> Nothing + _ -> Nothing + +-- --------------------------------------------------------------------- +-- Reading from ByteStrings + +-- | readInt skips any whitespace at the beginning of its argument, and +-- reads an Int from the beginning of the ByteString. If there is no +-- integer at the beginning of the string, it returns Nothing, otherwise +-- it just returns the int read, and the rest of the string. +readInt :: ByteString -> Maybe (Int, ByteString) +readInt p@(PS x s l) = inlinePerformIO $ useAsCString p $ \cstr -> + with (castPtr cstr) $ \endpp -> do + val <- c_strtol (castPtr cstr) endpp 0 + skipped <- (`minusPtr` cstr) `fmap` peek endpp + return $ if skipped == 0 + then Nothing + else Just (fromIntegral val, PS x (s+skipped) (l-skipped)) + +-- | unsafeReadInt is like readInt, but requires a null terminated +-- ByteString. It avoids a copy if this is the case. It returns the Int +-- read, if any, and the rest of the string. +unsafeReadInt :: ByteString -> Maybe (Int, ByteString) +unsafeReadInt p@(PS x s l) = inlinePerformIO $ unsafeUseAsCString p $ \cstr -> + with (castPtr cstr) $ \endpp -> do + val <- c_strtol (castPtr cstr) endpp 0 + skipped <- (`minusPtr` cstr) `fmap` peek endpp + return $ if skipped == 0 + then Nothing + else Just (fromIntegral val, PS x (s+skipped) (l-skipped)) + +foreign import ccall unsafe "stdlib.h strtol" c_strtol + :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong + +{- +-- +-- not quite there yet +-- +readInt :: ByteString -> Maybe (Int, ByteString) +readInt = go 0 + where + STRICT2(go) + go i ps + | B.null ps = Nothing + | x == '-' = neg 0 xs + | otherwise = pos (parse x) xs + where (x, xs) = (ps `unsafeIndex` 0, unsafeTail ps) + + STRICT2(neg) + neg n qs | isSpace x = return $ Just ((i-n),xs) + | otherwise = neg (parse x + (10 * n)) xs + where (x, xs) = (qs `unsafeIndex` 0, unsafeTail qs) + + STRICT2(pos) + pos n qs | isSpace x = go (i+n) xs + | otherwise = pos (parse x + (10 * n)) xs + where (x, xs) = (qs `unsafeIndexWord8` 0, unsafeTail qs) + + parse w = fromIntegral (w - 48) :: Int + {-# INLINE parse #-} +-} + +-- --------------------------------------------------------------------- +-- Internals + +-- Just like inlinePerformIO, but we inline it. Big performance gains as +-- it exposes lots of things to further inlining +-- +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +#if defined(__GLASGOW_HASKELL__) +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r +#else +inlinePerformIO = unsafePerformIO +#endif + +-- ordered by frequency +-- Idea from Ketil +isSpaceWord8 :: Word8 -> Bool +isSpaceWord8 w = case w of + 0x20 -> True -- SPACE + 0x0A -> True -- LF, \n + 0x09 -> True -- HT, \t + 0x0C -> True -- FF, \f + 0x0D -> True -- CR, \r + 0x0B -> True -- VT, \v + _ -> False +{-# INLINE isSpaceWord8 #-} + diff --git a/Makefile b/Makefile index db0aa2f..d00bffe 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ ALL_DIRS = \ Control/Monad \ Control/Monad/ST \ Data \ + Data/ByteString \ Data/Generics \ Data/Array \ Data/Array/IO \ diff --git a/base.cabal b/base.cabal index 4402bef..4aead21 100644 --- a/base.cabal +++ b/base.cabal @@ -38,6 +38,8 @@ exposed-modules: Data.Array.Unboxed, Data.Bits, Data.Bool, + Data.ByteString, + Data.ByteString.Char8, Data.Char, Data.Complex, Data.Dynamic, diff --git a/cbits/fpstring.c b/cbits/fpstring.c new file mode 100644 index 0000000..3d32083 --- /dev/null +++ b/cbits/fpstring.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2003 David Roundy + * Copyright (c) 2005-2006 Don Stewart + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the authors or the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +/* copy a string in reverse */ +void reverse(unsigned char *dest, unsigned char *from, int len) +{ + unsigned char *p, *q; + p = from + len - 1; + q = dest; + + while (p >= from) + *q++ = *p--; +} + +/* compare bytes ascii-wise */ +static int cmp(const void *p, const void *q) { + return (*(unsigned char *)p - *(unsigned char *)q); +} + +/* quicksort wrapper */ +void my_qsort(unsigned char *base, size_t size) +{ + qsort(base, size, sizeof(char), cmp); +} + +/* duplicate a string, interspersing the character through the elements + of the duplicated string */ +void intersperse(unsigned char *dest, unsigned char *from, int len, char c) +{ + unsigned char *p, *q; + p = from; + q = dest; + while (p < from + len - 1) { + *q++ = *p++; + *q++ = c; + } + *q = *p; +} + +/* find maximum char in a packed string */ +unsigned char maximum(unsigned char *p, int len) +{ + unsigned char *q, c = *p; + for (q = p; q < p + len; q++) + if (*q > c) + c = *q; + return c; +} + +/* find minimum char in a packed string */ +unsigned char minimum(unsigned char *p, int len) +{ + unsigned char *q, c = *p; + for (q = p; q < p + len; q++) + if (*q < c) + c = *q; + return c; +} diff --git a/include/fpstring.h b/include/fpstring.h new file mode 100644 index 0000000..baab811 --- /dev/null +++ b/include/fpstring.h @@ -0,0 +1,9 @@ +#include + +char *my_mmap(int len, int fd); + +void reverse(unsigned char *dest, unsigned char *from, int len); +void my_qsort(unsigned char *base, size_t size); +void intersperse(unsigned char *dest, unsigned char *from, int len, char c); +unsigned char maximum(unsigned char *p, int len); +unsigned char minimum(unsigned char *p, int len); diff --git a/package.conf.in b/package.conf.in index 78a74c3..98d6713 100644 --- a/package.conf.in +++ b/package.conf.in @@ -36,6 +36,8 @@ exposed-modules: Data.Array.Unboxed, Data.Bits, Data.Bool, + Data.ByteString, + Data.ByteString.Char8, Data.Char, Data.Complex, Data.Dynamic,