From: Don Stewart <dons@cse.unsw.edu.au>
Date: Fri, 28 Apr 2006 13:07:18 +0000 (+0000)
Subject: Import Data.ByteString from fps 0.5.
X-Git-Tag: 2007-09-13~385
X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=09521ae6f260453e810280f09d85a44ec3e54089;p=ghc-base.git

Import Data.ByteString from fps 0.5.
Fast, packed byte vectors, providing a better PackedString.
---

diff --git a/Data/ByteString.hs b/Data/ByteString.hs
new file mode 100644
index 0000000..7350eb8
--- /dev/null
+++ b/Data/ByteString.hs
@@ -0,0 +1,1921 @@
+{-# OPTIONS_GHC -cpp -fffi #-}
+--
+-- Module      : ByteString
+-- Copyright   : (c) The University of Glasgow 2001,
+--               (c) David Roundy 2003-2005,
+--               (c) Simon Marlow 2005
+--               (c) Don Stewart 2005-2006
+--               (c) Bjorn Bringert 2006
+-- License     : BSD-style
+--
+-- Maintainer  : dons@cse.unsw.edu.au
+-- Stability   : experimental
+-- Portability : portable, requires ffi and cpp
+-- Tested with : GHC 6.4.1 and Hugs March 2005
+-- 
+
+--
+-- | A time and space-efficient implementation of byte vectors using
+-- packed Word8 arrays, suitable for high performance use, both in terms
+-- of large data quantities, or high speed requirements. Byte vectors
+-- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
+-- be passed between C and Haskell with little effort.
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with Prelude functions.  eg.
+--
+-- > import qualified Data.ByteString as B
+--
+-- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
+-- UArray by Simon Marlow. Rewritten to support slices and use
+-- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
+--
+
+module Data.ByteString (
+
+        -- * The @ByteString@ type
+        ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
+
+        -- * Introducing and eliminating 'ByteString's
+        empty,                  -- :: ByteString
+        packByte,               -- :: Word8   -> ByteString
+        pack,                   -- :: [Word8] -> ByteString
+        unpack,                 -- :: ByteString -> [Word8]
+        packWith,               -- :: (a -> Word8) -> [a] -> ByteString
+        unpackWith,             -- :: (Word8 -> a) -> ByteString -> [a]
+
+        -- * Basic interface
+        cons,                   -- :: Word8 -> ByteString -> ByteString
+        snoc,                   -- :: Word8 -> ByteString -> ByteString
+        null,                   -- :: ByteString -> Bool
+        length,                 -- :: ByteString -> Int
+        head,                   -- :: ByteString -> Word8
+        tail,                   -- :: ByteString -> ByteString
+        last,                   -- :: ByteString -> Word8
+        init,                   -- :: ByteString -> ByteString
+        append,                 -- :: ByteString -> ByteString -> ByteString
+
+        -- * Special ByteStrings
+        inits,                  -- :: ByteString -> [ByteString]
+        tails,                  -- :: ByteString -> [ByteString]
+        elems,                  -- :: ByteString -> [ByteString]
+
+        -- * Transformating ByteStrings
+        map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
+        reverse,                -- :: ByteString -> ByteString
+        intersperse,            -- :: Word8 -> ByteString -> ByteString
+        transpose,              -- :: [ByteString] -> [ByteString]
+
+        -- * Reducing 'ByteString's
+        foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
+        foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
+        foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+        foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+
+        -- ** Special folds
+        concat,                 -- :: [ByteString] -> ByteString
+        concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
+        any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
+        all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
+        maximum,                -- :: ByteString -> Word8
+        minimum,                -- :: ByteString -> Word8
+        mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
+        hash,                   -- :: ByteString -> Int32
+
+        -- * Generating and unfolding ByteStrings
+        replicate,              -- :: Int -> Word8 -> ByteString
+        unfoldrN,               -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
+
+        -- * Substrings
+
+        -- ** Breaking strings
+        take,                   -- :: Int -> ByteString -> ByteString
+        drop,                   -- :: Int -> ByteString -> ByteString
+        splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
+        takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
+        dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
+        break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+        span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+        spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+
+        -- ** Breaking and dropping on specific bytes
+        breakByte,              -- :: Word8 -> ByteString -> (ByteString, ByteString)
+        breakFirst,             -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+        breakLast,              -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+
+        -- ** Breaking into many substrings
+        split,                  -- :: Word8 -> ByteString -> [ByteString]
+        splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
+        tokens,                 -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
+
+        -- ** Joining strings
+        join,                   -- :: ByteString -> [ByteString] -> ByteString
+        joinWithByte,           -- :: Word8 -> ByteString -> ByteString -> ByteString
+
+        -- * Indexing ByteStrings
+        index,                  -- :: ByteString -> Int -> Word8
+        elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
+        elemIndices,            -- :: Word8 -> ByteString -> [Int]
+        elemIndexLast,          -- :: Word8 -> ByteString -> Maybe Int
+        findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
+        findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
+        count,                  -- :: Word8 -> ByteString -> Int
+
+        -- * Ordered ByteStrings
+        sort,                   -- :: ByteString -> ByteString
+
+        -- * Searching ByteStrings
+
+        -- ** Searching by equality
+        -- | These functions use memchr(3) to efficiently search the ByteString
+
+        elem,                   -- :: Word8 -> ByteString -> Bool
+        notElem,                -- :: Word8 -> ByteString -> Bool
+        filterByte,             -- :: Word8 -> ByteString -> ByteString
+        filterNotByte,          -- :: Word8 -> ByteString -> ByteString
+
+        -- ** Searching with a predicate
+        filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
+        find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
+
+        -- ** Prefixes and suffixes
+        -- | These functions use memcmp(3) to efficiently compare substrings
+        isPrefixOf,             -- :: ByteString -> ByteString -> Bool
+        isSuffixOf,             -- :: ByteString -> ByteString -> Bool
+
+        -- ** Search for arbitrary substrings
+        isSubstringOf,          -- :: ByteString -> ByteString -> Bool
+        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
+        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
+
+        -- * Zipping and unzipping ByteStrings
+        zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
+        zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
+        unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
+
+        -- * Unchecked access
+        unsafeHead,             -- :: ByteString -> Word8
+        unsafeTail,             -- :: ByteString -> ByteString
+        unsafeIndex,            -- :: ByteString -> Int -> Word8
+
+        -- * Low level introduction and elimination
+        generate,               -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
+        create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
+        fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> ByteString
+        toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
+        skipIndex,              -- :: ByteString -> Int
+
+        -- ** Packing CStrings and pointers
+        packCString,            -- :: CString -> ByteString
+        packCStringLen,         -- :: CString -> ByteString
+        packMallocCString,      -- :: CString -> ByteString
+
+#if defined(__GLASGOW_HASKELL__)
+        packCStringFinalizer,   -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
+        packAddress,            -- :: Addr# -> ByteString
+        unsafePackAddress,      -- :: Int -> Addr# -> ByteString
+        unsafeFinalize,         -- :: ByteString -> IO ()
+#endif
+
+        -- ** Using ByteStrings as CStrings
+        useAsCString,           -- :: ByteString -> (CString -> IO a) -> IO a
+        unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
+        unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
+
+        -- ** Copying ByteStrings
+        -- | These functions perform memcpy(3) operations
+        copy,                   -- :: ByteString -> ByteString
+        copyCString,            -- :: CString -> ByteString
+        copyCStringLen,         -- :: CStringLen -> ByteString
+
+        -- * I\/O with @ByteString@s
+
+        -- ** Standard input and output
+
+#if defined(__GLASGOW_HASKELL__)
+        getLine,                -- :: IO ByteString
+#endif
+        getContents,            -- :: IO ByteString
+        putStr,                 -- :: ByteString -> IO ()
+        putStrLn,               -- :: ByteString -> IO ()
+
+        -- ** Files
+        readFile,               -- :: FilePath -> IO ByteString
+        writeFile,              -- :: FilePath -> ByteString -> IO ()
+
+        -- ** I\/O with Handles
+#if defined(__GLASGOW_HASKELL__)
+        getArgs,                -- :: IO [ByteString]
+        hGetLine,               -- :: Handle -> IO ByteString
+        hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
+#endif
+        hGetContents,           -- :: Handle -> IO ByteString
+        hGet,                   -- :: Handle -> Int -> IO ByteString
+        hPut,                   -- :: Handle -> ByteString -> IO ()
+
+#if defined(__GLASGOW_HASKELL__)
+        -- * Miscellaneous
+        unpackList, -- eek, otherwise it gets thrown away by the simplifier
+#endif
+
+  ) where
+
+import qualified Prelude as P
+import Prelude hiding           (reverse,head,tail,last,init,null
+                                ,length,map,lines,foldl,foldr,unlines
+                                ,concat,any,take,drop,splitAt,takeWhile
+                                ,dropWhile,span,break,elem,filter,maximum
+                                ,minimum,all,concatMap,foldl1,foldr1
+                                ,readFile,writeFile,replicate
+                                ,getContents,getLine,putStr,putStrLn
+                                ,zip,zipWith,unzip,notElem)
+
+import qualified Data.List as List
+
+import Data.Char
+import Data.Word                (Word8)
+import Data.Int                 (Int32)
+import Data.Bits                (rotateL)
+import Data.Maybe               (listToMaybe)
+import Data.Array               (listArray)
+import qualified Data.Array as Array ((!))
+
+import Control.Exception        (bracket)
+
+import Foreign.C.Types          (CSize, CInt)
+import Foreign.C.String         (CString, CStringLen)
+import Foreign.Storable
+import Foreign.ForeignPtr
+import Foreign.Ptr
+import Foreign.Marshal.Array
+
+import System.IO                (stdin,stdout,hClose,hFileSize
+                                ,hGetBuf,hPutBuf,openBinaryFile
+                                ,Handle,IOMode(..))
+
+#if defined(__GLASGOW_HASKELL__)
+
+import System.IO                (hGetBufNonBlocking)
+
+import qualified Foreign.Concurrent as FC (newForeignPtr)
+
+import Data.Generics            (Data(..), Typeable(..))
+
+import System.IO.Error          (isEOFError)
+import Foreign.Marshal          (alloca)
+
+import GHC.Handle
+import GHC.Prim
+import GHC.Base                 (build, unsafeChr)
+import GHC.Word hiding (Word8)
+import GHC.Ptr                  (Ptr(..))
+import GHC.ST                   (ST(..))
+import GHC.IOBase
+
+#else
+
+import System.IO.Unsafe
+
+#endif
+
+-- -----------------------------------------------------------------------------
+--
+-- Useful macros, until we have bang patterns
+--
+
+#define STRICT1(f) f a | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
+#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
+
+-- -----------------------------------------------------------------------------
+
+-- | A space-efficient representation of a Word8 vector, supporting many
+-- efficient operations.  A 'ByteString' contains 8-bit characters only.
+--
+-- Instances of Eq, Ord, Read, Show, Data, Typeable
+--
+data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
+                     {-# UNPACK #-} !Int
+                     {-# UNPACK #-} !Int
+
+#if defined(__GLASGOW_HASKELL__)
+    deriving (Data, Typeable)
+#endif
+
+instance Eq  ByteString
+    where (==)    = eq
+
+instance Ord ByteString
+    where compare = compareBytes
+
+instance Show ByteString where
+    showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
+
+instance Read ByteString where
+    readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
+
+{-
+instance Arbitrary PackedString where
+    arbitrary = P.pack `fmap` arbitrary
+    coarbitrary s = coarbitrary (P.unpack s)
+-}
+
+-- | /O(n)/ Equality on the 'ByteString' type.
+eq :: ByteString -> ByteString -> Bool
+eq a b = (compareBytes a b) == EQ
+{-# INLINE eq #-}
+
+-- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. 
+compareBytes :: ByteString -> ByteString -> Ordering
+compareBytes (PS _ _ 0) (PS _ _ 0)       = EQ    -- short cut for empty strings
+compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
+    withForeignPtr x1 $ \p1 ->
+    withForeignPtr x2 $ \p2 -> do
+        i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
+        return $ case i `compare` 0 of
+                    EQ  -> l1 `compare` l2
+                    x   -> x
+{-# INLINE compareBytes #-}
+
+{-
+--
+-- About 4x slower over 32M
+--
+compareBytes :: ByteString -> ByteString -> Ordering
+compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
+    withForeignPtr fp1 $ \p1 ->
+        withForeignPtr fp2 $ \p2 ->
+            cmp (p1 `plusPtr` off1)
+                (p2 `plusPtr` off2) 0 len1 len2
+
+cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
+STRICT5(cmp)
+cmp p1 p2 n len1 len2
+      | n == len1 = if n == len2 then return EQ else return LT
+      | n == len2 = return GT
+      | otherwise = do
+          (a :: Word8) <- peekByteOff p1 n
+          (b :: Word8) <- peekByteOff p2 n
+          case a `compare` b of
+                EQ -> cmp p1 p2 (n+1) len1 len2
+                LT -> return LT
+                GT -> return GT
+{-# INLINE compareBytes #-}
+-}
+
+-- -----------------------------------------------------------------------------
+-- Introducing and eliminating 'ByteString's
+
+-- | /O(1)/ The empty 'ByteString'
+empty :: ByteString
+empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
+{-# NOINLINE empty #-}
+
+-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
+packByte :: Word8 -> ByteString
+packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
+    withForeignPtr fp $ \p -> poke p c
+    return $ PS fp 0 1
+{-# NOINLINE packByte #-}
+
+-- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 
+--
+-- For applications with large numbers of string literals, pack can be a
+-- bottleneck. In such cases, consider using packAddress (GHC only).
+pack :: [Word8] -> ByteString
+
+#if !defined(__GLASGOW_HASKELL__)
+
+pack str = create (P.length str) $ \p -> go p str
+    where
+        go _ []     = return ()
+        go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
+
+#else /* hack away */
+
+pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
+    where
+        go _ _ []        = return ()
+        go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
+
+        writeByte p i c = ST $ \s# ->
+            case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
+
+#endif
+
+-- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
+unpack :: ByteString -> [Word8]
+
+#if !defined(__GLASGOW_HASKELL__)
+
+unpack (PS _  _ 0) = []
+unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
+        go (p `plusPtr` s) (l - 1) []
+    where
+        STRICT3(go)
+        go p 0 acc = peek p          >>= \e -> return (e : acc)
+        go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
+{-# INLINE unpack #-}
+
+#else
+
+unpack ps = build (unpackFoldr ps)
+{-# INLINE unpack #-}
+
+unpackList :: ByteString -> [Word8]
+unpackList (PS fp off len) = withPtr fp $ \p -> do
+    let STRICT3(loop)
+        loop _ (-1) acc = return acc
+        loop q n acc = do
+           a <- peekByteOff q n
+           loop q (n-1) (a : acc)
+    loop (p `plusPtr` off) (len-1) []
+
+{-# RULES
+"unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
+ #-}
+
+unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
+unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
+    let STRICT3(loop)
+        loop _ (-1) acc = return acc
+        loop q n    acc = do
+           a <- peekByteOff q n
+           loop q (n-1) (a `f` acc)
+    loop (p `plusPtr` off) (len-1) ch
+{-# INLINE [0] unpackFoldr #-}
+
+#endif
+
+------------------------------------------------------------------------
+
+-- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
+-- conversion function
+packWith :: (a -> Word8) -> [a] -> ByteString
+packWith k str = create (P.length str) $ \p -> go p str
+    where
+        STRICT2(go)
+        go _ []     = return ()
+        go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
+{-# INLINE packWith #-}
+{-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
+
+-- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
+unpackWith :: (Word8 -> a) -> ByteString -> [a]
+unpackWith _ (PS _  _ 0) = []
+unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
+        go (p `plusPtr` s) (l - 1) []
+    where
+        STRICT3(go)
+        go p 0 acc = peek p          >>= \e -> return (k e : acc)
+        go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
+{-# INLINE unpackWith #-}
+{-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
+
+-- ---------------------------------------------------------------------
+-- Basic interface
+
+-- | /O(1)/ Test whether a ByteString is empty.
+null :: ByteString -> Bool
+null (PS _ _ l) = l == 0
+{-# INLINE null #-}
+
+-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
+length :: ByteString -> Int
+length (PS _ _ l) = l
+{-# INLINE length #-}
+
+-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
+-- complexity, as it requires a memcpy.
+cons :: Word8 -> ByteString -> ByteString
+cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
+        memcpy (p `plusPtr` 1) (f `plusPtr` s) l
+        poke p c
+{-# INLINE cons #-}
+
+-- | /O(n)/ Append a byte to the end of a 'ByteString'
+snoc :: ByteString -> Word8 -> ByteString
+snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
+        memcpy p (f `plusPtr` s) l
+        poke (p `plusPtr` l) c
+{-# INLINE snoc #-}
+
+-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
+head :: ByteString -> Word8
+head ps@(PS x s _)
+    | null ps   = errorEmptyList "head"
+    | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
+{-# INLINE head #-}
+
+-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
+tail :: ByteString -> ByteString
+tail (PS p s l)
+    | l <= 0    = errorEmptyList "tail"
+    | otherwise = PS p (s+1) (l-1)
+{-# INLINE tail #-}
+
+-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
+last :: ByteString -> Word8
+last ps@(PS x s l)
+    | null ps   = errorEmptyList "last"
+    | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
+{-# INLINE last #-}
+
+-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
+init :: ByteString -> ByteString
+init (PS p s l)
+    | l <= 0    = errorEmptyList "init"
+    | otherwise = PS p s (l-1)
+{-# INLINE init #-}
+
+-- | /O(n)/ Append two ByteStrings
+append :: ByteString -> ByteString -> ByteString
+append xs ys | null xs   = ys
+             | null ys   = xs
+             | otherwise = concat [xs,ys]
+{-# INLINE append #-}
+
+{-
+--
+-- About 30% faster, but allocating in a big chunk isn't good for memory use
+--
+append :: ByteString -> ByteString -> ByteString
+append xs@(PS ffp s l) ys@(PS fgp t m)
+    | null xs   = ys
+    | null ys   = xs
+    | otherwise = create len $ \ptr ->
+        withForeignPtr ffp $ \fp ->
+        withForeignPtr fgp $ \gp -> do
+            memcpy ptr               (fp `plusPtr` s) l
+            memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
+        where len = length xs + length ys
+-}
+
+-- ---------------------------------------------------------------------
+-- Transformations
+
+-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
+-- element of @xs@
+--
+map :: (Word8 -> Word8) -> ByteString -> ByteString
+map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
+    new_fp <- mallocByteString len
+    withForeignPtr new_fp $ \new_p -> do
+        map_ f (len-1) (p `plusPtr` start) new_p
+        return (PS new_fp 0 len)
+{-# INLINE map #-}
+
+map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
+STRICT4(map_)
+map_ f n p1 p2
+   | n < 0 = return ()
+   | otherwise = do
+        x <- peekByteOff p1 n
+        pokeByteOff p2 n (f x)
+        map_ f (n-1) p1 p2
+{-# INLINE map_ #-}
+
+-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
+reverse :: ByteString -> ByteString
+reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
+        c_reverse p (f `plusPtr` s) l
+
+-- reverse = pack . P.reverse . unpack
+
+-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
+-- 'ByteString' and \`intersperses\' that byte between the elements of
+-- the 'ByteString'.  It is analogous to the intersperse function on
+-- Lists.
+intersperse :: Word8 -> ByteString -> ByteString
+intersperse c ps@(PS x s l)
+    | length ps < 2  = ps
+    | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
+        c_intersperse p (f `plusPtr` s) l c
+
+-- intersperse c = pack . List.intersperse c . unpack
+
+-- | The 'transpose' function transposes the rows and columns of its
+-- 'ByteString' argument.
+transpose :: [ByteString] -> [ByteString]
+transpose ps = P.map pack (List.transpose (P.map unpack ps))
+
+-- ---------------------------------------------------------------------
+-- Reducing 'ByteString's
+
+-- | 'foldl', applied to a binary operator, a starting value (typically
+-- the left-identity of the operator), and a ByteString, reduces the
+-- ByteString using the binary operator, from left to right.
+foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
+foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+        lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+    where
+        STRICT3(lgo)
+        lgo z p q | p == q    = return z
+                  | otherwise = do c <- peek p
+                                   lgo (f z c) (p `plusPtr` 1) q
+
+-- | 'foldr', applied to a binary operator, a starting value
+-- (typically the right-identity of the operator), and a ByteString,
+-- reduces the ByteString using the binary operator, from right to left.
+foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
+foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+        go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+    where
+        STRICT2(go)
+        go p q | p == q    = return z
+               | otherwise = do c  <- peek p
+                                ws <- go (p `plusPtr` 1) q
+                                return $ c `k` ws
+
+-- | 'foldl1' is a variant of 'foldl' that has no starting value
+-- argument, and thus must be applied to non-empty 'ByteStrings'.
+foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+foldl1 f ps
+    | null ps   = errorEmptyList "foldl1"
+    | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
+
+-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
+-- and thus must be applied to non-empty 'ByteString's
+foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
+foldr1 f ps
+    | null ps        = errorEmptyList "foldr1"
+    | otherwise      = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
+
+-- ---------------------------------------------------------------------
+-- Special folds
+
+-- | /O(n)/ Concatenate a list of ByteStrings.
+concat :: [ByteString] -> ByteString
+concat []     = empty
+concat [ps]   = ps
+concat xs     = inlinePerformIO $ do
+    let start_size = 1024
+    p <- mallocArray start_size
+    f p 0 1024 xs
+
+    where f ptr len _ [] = do
+                ptr' <- reallocArray ptr (len+1)
+                poke (ptr' `plusPtr` len) (0::Word8)    -- XXX so CStrings work
+                fp   <- newForeignFreePtr ptr'
+                return $ PS fp 0 len
+
+          f ptr len to_go pss@(PS p s l:pss')
+           | l <= to_go = do withForeignPtr p $ \pf ->
+                                 memcpy (ptr `plusPtr` len)
+                                          (pf `plusPtr` s) l
+                             f ptr (len + l) (to_go - l) pss'
+
+           | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
+                            ptr' <- reallocArray ptr new_total
+                            f ptr' len (new_total - len) pss
+
+-- | Map a function over a 'ByteString' and concatenate the results
+concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
+concatMap f = foldr (append . f) empty
+
+-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
+-- any element of the 'ByteString' satisfies the predicate.
+any :: (Word8 -> Bool) -> ByteString -> Bool
+any _ (PS _ _ 0) = False
+any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+        go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+    where
+        STRICT2(go)
+        go p q | p == q    = return False
+               | otherwise = do c <- peek p
+                                if f c then return True
+                                       else go (p `plusPtr` 1) q
+
+-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
+-- if all elements of the 'ByteString' satisfy the predicate.
+all :: (Word8 -> Bool) -> ByteString -> Bool
+all _ (PS _ _ 0) = True
+all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
+        go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
+    where
+        STRICT2(go)
+        go p q | p == q     = return True  -- end of list
+               | otherwise  = do c <- peek p
+                                 if f c
+                                    then go (p `plusPtr` 1) q
+                                    else return False
+
+-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
+maximum :: ByteString -> Word8
+maximum xs@(PS x s l)
+    | null xs   = errorEmptyList "maximum"
+    | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
+                    return $ c_maximum (p `plusPtr` s) l
+
+-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
+minimum :: ByteString -> Word8
+minimum xs@(PS x s l)
+    | null xs   = errorEmptyList "minimum"
+    | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
+                    return $ c_minimum (p `plusPtr` s) l
+
+{-
+maximum xs@(PS x s l)
+    | null xs   = errorEmptyList "maximum"
+    | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
+                        w <- peek p
+                        maximum_ (p `plusPtr` s) 0 l w
+{-# INLINE maximum #-}
+
+maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
+STRICT4(maximum_)
+maximum_ ptr n m c
+    | n >= m    = return c
+    | otherwise = do w <- peekByteOff ptr n
+                     maximum_ ptr (n+1) m (if w > c then w else c)
+
+minimum xs@(PS x s l)
+    | null xs   = errorEmptyList "minimum"
+    | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
+                        w <- peek p
+                        minimum_ (p `plusPtr` s) 0 l w
+{-# INLINE minimum #-}
+
+minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
+STRICT4(minimum_)
+minimum_ ptr n m c
+    | n >= m    = return c
+    | otherwise = do w <- peekByteOff ptr n
+                     minimum_ ptr (n+1) m (if w < c then w else c)
+-}
+-- | /O(n)/ map Word8 functions, provided with the index at each position
+mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
+mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
+    go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
+  where
+    go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
+    STRICT4(go)
+    go n f t p | f == p    = return ()
+               | otherwise = do w <- peek f
+                                ((poke t) . k n) w
+                                go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
+
+-- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key.
+hash :: ByteString -> Int32
+hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
+    go (0 :: Int32) (p `plusPtr` s) l
+  where
+    go :: Int32 -> Ptr Word8 -> Int -> IO Int32
+    STRICT3(go)
+    go h _ 0 = return h
+    go h p n = do w <- peek p
+                  go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1)
+
+-- ---------------------------------------------------------------------
+-- Unfolds and replicates
+
+-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
+-- the value of every element. The following holds:
+--
+-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
+--
+-- This implemenation uses @memset(3)@
+replicate :: Int -> Word8 -> ByteString
+replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
+
+{-
+-- About 5x slower
+replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
+    where
+        STRICT2(go)
+        go _   0 = return w
+        go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
+-}
+
+-- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
+-- 'unfoldrN' builds a ByteString from a seed value.  The function takes
+-- the element and returns 'Nothing' if it is done producing the
+-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
+-- prepending to the ByteString and @b@ is used as the next element in a
+-- recursive call.
+--
+-- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
+-- character to a ByteString is /O(n)/, this unfoldr requires a maximum
+-- final size of the ByteString as an argument. 'cons' can then be
+-- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
+-- linear complexity. The depth of the recursion is limited to this
+-- size, but may be less. For lazy, infinite unfoldr, use
+-- 'Data.List.unfoldr' (from 'Data.List').
+--
+-- Examples:
+--
+-- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
+--
+-- The following equation connects the depth-limited unfoldr to the List unfoldr:
+--
+-- > unfoldrN n == take n $ List.unfoldr
+unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
+unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
+    where
+        STRICT3(go)
+        go q c n | n == i    = return n      -- stop if we reach `i'
+                 | otherwise = case f c of
+                                   Nothing        -> return n
+                                   Just (a,new_c) -> do
+                                        poke q a
+                                        go (q `plusPtr` 1) new_c (n+1)
+
+-- ---------------------------------------------------------------------
+-- Substrings
+
+-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
+-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
+take :: Int -> ByteString -> ByteString
+take n ps@(PS x s l)
+    | n < 0     = empty
+    | n >= l    = ps
+    | otherwise = PS x s n
+{-# INLINE take #-}
+
+-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
+-- elements, or @[]@ if @n > 'length' xs@.
+drop  :: Int -> ByteString -> ByteString
+drop n ps@(PS x s l)
+    | n <= 0    = ps
+    | n >  l    = empty
+    | otherwise = PS x (s+n) (l-n)
+{-# INLINE drop #-}
+
+-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
+splitAt :: Int -> ByteString -> (ByteString, ByteString)
+splitAt  n ps  = (take n ps, drop n ps)
+{-# INLINE splitAt #-}
+
+-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
+-- returns the longest prefix (possibly empty) of @xs@ of elements that
+-- satisfy @p@.
+takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
+takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
+{-# INLINE takeWhile #-}
+
+-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
+dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
+dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
+{-# INLINE dropWhile #-}
+
+-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
+break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
+{-# INLINE break #-}
+
+-- | 'breakByte' breaks its ByteString argument at the first occurence
+-- of the specified byte. It is more efficient than 'break' as it is
+-- implemented with @memchr(3)@. I.e.
+-- 
+-- > break (=='c') "abcd" == breakByte 'c' "abcd"
+--
+breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
+breakByte c p = case elemIndex c p of
+    Nothing -> (p,empty)
+    Just n  -> (take n p, drop n p)
+{-# INLINE breakByte #-}
+
+-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
+-- occurence of @w@. It behaves like 'break', except the delimiter is
+-- not returned, and @Nothing@ is returned if the delimiter is not in
+-- the ByteString. I.e.
+--
+-- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
+--
+-- > breakFirst c xs ==
+-- > let (x,y) = break (== c) xs 
+-- > in if null y then Nothing else Just (x, drop 1 y))
+--
+breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+breakFirst c p = case elemIndex c p of
+   Nothing -> Nothing
+   Just n -> Just (take n p, drop (n+1) p)
+{-# INLINE breakFirst #-}
+
+-- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
+-- ByteString.
+--
+-- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
+--
+-- and the following are equivalent:
+--
+-- > breakLast 'c' "abcdef"
+-- > let (x,y) = break (=='c') (reverse "abcdef") 
+-- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
+--
+breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
+breakLast c p = case elemIndexLast c p of
+    Nothing -> Nothing
+    Just n -> Just (take n p, drop (n+1) p)
+{-# INLINE breakLast #-}
+
+-- | 'span' @p xs@ breaks the ByteString into two segments. It is
+-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
+span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+span  p ps = break (not . p) ps
+{-# INLINE span #-}
+
+-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
+-- We have
+--
+-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- >    == 
+-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 
+--
+spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
+
+-- | /O(n)/ Splits a 'ByteString' into components delimited by
+-- separators, where the predicate returns True for a separator element.
+-- The resulting components do not contain the separators.  Two adjacent
+-- separators result in an empty component in the output.  eg.
+--
+-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
+-- > splitWith (=='a') []        == []
+--
+splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
+
+#if defined(__GLASGOW_HASKELL__)
+splitWith _pred (PS _  _   0) = []
+splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
+  where pred# c# = pred_ (W8# c#)
+
+        splitWith' pred' off' len' fp' = withPtr fp $ \p ->
+            splitLoop pred' p 0 off' len' fp'
+
+        splitLoop :: (Word# -> Bool)
+                  -> Ptr Word8
+                  -> Int -> Int -> Int
+                  -> ForeignPtr Word8
+                  -> IO [ByteString]
+
+        splitLoop pred' p idx' off' len' fp'
+            | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
+            | idx' >= len'  = return [PS fp' off' idx']
+            | otherwise = do
+                w <- peekElemOff p (off'+idx')
+                if pred' (case w of W8# w# -> w#)
+                   then return (PS fp' off' idx' :
+                              splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
+                   else splitLoop pred' p (idx'+1) off' len' fp'
+{-# INLINE splitWith #-}
+
+#else
+splitWith _ (PS _ _ 0) = []
+splitWith p ps = splitWith' p ps
+    where
+        STRICT2(splitWith')
+        splitWith' q qs = if null rest then [chunk]
+                                       else chunk : splitWith' q (unsafeTail rest)
+            where (chunk,rest) = break q qs
+#endif
+
+-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
+-- argument, consuming the delimiter. I.e.
+--
+-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
+-- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
+-- > split 'x'  "x"          == ["",""]
+-- 
+-- and
+--
+-- > join [c] . split c == id
+-- > split == splitWith . (==)
+-- 
+-- As for all splitting functions in this library, this function does
+-- not copy the substrings, it just constructs new 'ByteStrings' that
+-- are slices of the original.
+--
+split :: Word8 -> ByteString -> [ByteString]
+split _ (PS _ _ 0) = []
+split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+    let ptr = p `plusPtr` s
+
+        STRICT1(loop)
+        loop n = do
+            let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
+            if q == nullPtr
+                then return [PS x (s+n) (l-n)]
+                else do let i = q `minusPtr` ptr
+                        ls <- loop (i+1)
+                        return $! PS x (s+n) (i-n) : ls
+    loop 0
+{-# INLINE split #-}
+
+{-
+-- slower. but stays inside Haskell.
+split _ (PS _  _   0) = []
+split (W8# w#) (PS fp off len) = splitWith' off len fp
+    where
+        splitWith' off' len' fp' = withPtr fp $ \p ->
+            splitLoop p 0 off' len' fp'
+
+        splitLoop :: Ptr Word8
+                  -> Int -> Int -> Int
+                  -> ForeignPtr Word8
+                  -> IO [ByteString]
+
+        STRICT5(splitLoop)
+        splitLoop p idx' off' len' fp'
+            | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
+            | idx' >= len'  = return [PS fp' off' idx']
+            | otherwise = do
+                (W8# x#) <- peekElemOff p (off'+idx')
+                if word2Int# w# ==# word2Int# x#
+                   then return (PS fp' off' idx' :
+                              splitWith' (off'+idx'+1) (len'-idx'-1) fp')
+                   else splitLoop p (idx'+1) off' len' fp'
+-}
+
+-- | Like 'splitWith', except that sequences of adjacent separators are
+-- treated as a single separator. eg.
+-- 
+-- > tokens (=='a') "aabbaca" == ["bb","c"]
+--
+tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
+tokens f = P.filter (not.null) . splitWith f
+
+-- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
+-- 'ByteString's and concatenates the list after interspersing the first
+-- argument between each element of the list.
+join :: ByteString -> [ByteString] -> ByteString
+join filler pss = concat (splice pss)
+    where
+        splice []  = []
+        splice [x] = [x]
+        splice (x:y:xs) = x:filler:splice (y:xs)
+
+--
+-- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
+-- with a char. Around 4 times faster than the generalised join.
+--
+joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
+joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
+    withForeignPtr ffp $ \fp ->
+    withForeignPtr fgp $ \gp -> do
+        memcpy ptr (fp `plusPtr` s) l
+        poke (ptr `plusPtr` l) c
+        memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
+    where
+      len = length f + length g + 1
+{-# INLINE joinWithByte #-}
+
+-- ---------------------------------------------------------------------
+-- Indexing ByteStrings
+
+-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
+index :: ByteString -> Int -> Word8
+index ps n
+    | n < 0          = error $ "ByteString.indexWord8: negative index: " ++ show n
+    | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
+                                ++ ", length = " ++ show (length ps)
+    | otherwise      = ps `unsafeIndex` n
+{-# INLINE index #-}
+
+-- | /O(n)/ The 'elemIndex' function returns the index of the first
+-- element in the given 'ByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element. 
+-- This implementation uses memchr(3).
+elemIndex :: Word8 -> ByteString -> Maybe Int
+elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+    let p' = p `plusPtr` s
+        q  = memchr p' c (fromIntegral l)
+    return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
+{-# INLINE elemIndex #-}
+
+-- | /O(n)/ The 'elemIndexLast' function returns the last index of the
+-- element in the given 'ByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element. The following
+-- holds:
+--
+-- > elemIndexLast c xs == 
+-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
+--
+elemIndexLast :: Word8 -> ByteString -> Maybe Int
+elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
+    go (p `plusPtr` s) (l-1)
+  where
+    STRICT2(go)
+    go p i | i < 0     = return Nothing
+           | otherwise = do ch' <- peekByteOff p i
+                            if ch == ch'
+                                then return $ Just i
+                                else go p (i-1)
+{-# INLINE elemIndexLast #-}
+
+-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
+-- the indices of all elements equal to the query element, in ascending order.
+-- This implementation uses memchr(3).
+elemIndices :: Word8 -> ByteString -> [Int]
+elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+    let ptr = p `plusPtr` s
+
+        STRICT1(loop)
+        loop n = do
+                let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
+                if q == nullPtr
+                    then return []
+                    else do let i = q `minusPtr` ptr
+                            ls <- loop (i+1)
+                            return $! i:ls
+    loop 0
+
+{-
+-- much slower
+elemIndices :: Word8 -> ByteString -> [Int]
+elemIndices c ps = loop 0 ps
+   where STRICT2(loop)
+         loop _ ps' | null ps'            = []
+         loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
+                    | otherwise           = loop (n+1) (unsafeTail ps')
+-}
+
+-- | count returns the number of times its argument appears in the ByteString
+--
+-- > count = length . elemIndices
+--
+-- But more efficiently than using length on the intermediate list.
+count :: Word8 -> ByteString -> Int
+count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
+     go (p `plusPtr` s) (fromIntegral m) 0
+    where
+        go :: Ptr Word8 -> CSize -> Int -> IO Int
+        STRICT3(go)
+        go p l i = do
+            let q = memchr p w l
+            if q == nullPtr
+                then return i
+                else do let k = fromIntegral $ q `minusPtr` p
+                        go (q `plusPtr` 1) (l-k-1) (i+1)
+{-# INLINE count #-}
+
+-- | The 'findIndex' function takes a predicate and a 'ByteString' and
+-- returns the index of the first element in the ByteString
+-- satisfying the predicate.
+findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
+findIndex = (listToMaybe .) . findIndices
+
+-- | The 'findIndices' function extends 'findIndex', by returning the
+-- indices of all elements satisfying the predicate, in ascending order.
+findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
+findIndices p ps = loop 0 ps
+   where
+     STRICT2(loop)
+     loop _ qs | null qs           = []
+     loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
+               | otherwise         =     loop (n+1) (unsafeTail qs)
+
+-- ---------------------------------------------------------------------
+-- Searching ByteStrings
+
+-- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
+elem :: Word8 -> ByteString -> Bool
+elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
+{-# INLINE elem #-}
+
+-- | /O(n)/ 'notElem' is the inverse of 'elem'
+notElem :: Word8 -> ByteString -> Bool
+notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
+{-# INLINE notElem #-}
+
+--
+-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
+-- case of filtering a single byte. It is more efficient to use
+-- /filterByte/ in this case.
+--
+-- > filterByte == filter . (==)
+--
+-- filterByte is around 10x faster, and uses much less space, than its
+-- filter equivalent
+filterByte :: Word8 -> ByteString -> ByteString
+filterByte w ps = replicate (count w ps) w
+
+{-
+-- slower than the replicate version
+
+filterByte ch ps@(PS x s l)
+    | null ps   = ps
+    | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+        t <- go (f `plusPtr` s) p l
+        return (t `minusPtr` p) -- actual length
+    where
+        STRICT3(go)
+        go _ t 0 = return t
+        go f t e = do w <- peek f
+                      if w == ch
+                        then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
+                        else             go (f `plusPtr` 1) t               (e-1)
+-}
+
+--
+-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
+-- case of filtering a single byte out of a list. It is more efficient
+-- to use /filterNotByte/ in this case.
+--
+-- > filterNotByte == filter . (/=)
+--
+-- filterNotByte is around 3x faster, and uses much less space, than its
+-- filter equivalent
+filterNotByte :: Word8 -> ByteString -> ByteString
+filterNotByte ch ps@(PS x s l)
+    | null ps   = ps
+    | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+        t <- go (f `plusPtr` s) p l
+        return (t `minusPtr` p) -- actual length
+    where
+        STRICT3(go)
+        go _ t 0 = return t
+        go f t e = do w <- peek f
+                      if w /= ch
+                        then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
+                        else             go (f `plusPtr` 1) t               (e-1)
+
+-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
+-- returns a ByteString containing those characters that satisfy the
+-- predicate.
+filter :: (Word8 -> Bool) -> ByteString -> ByteString
+filter k ps@(PS x s l)
+    | null ps   = ps
+    | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
+        t <- go (f `plusPtr` s) p l
+        return (t `minusPtr` p) -- actual length
+    where
+        STRICT3(go)
+        go _ t 0 = return t
+        go f t e = do w <- peek f
+                      if k w
+                        then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
+                        else             go (f `plusPtr` 1) t               (e - 1)
+
+-- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
+
+-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
+-- and returns the first element in matching the predicate, or 'Nothing'
+-- if there is no such element.
+find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
+find p ps = case filter p ps of
+    q | null q -> Nothing
+      | otherwise -> Just (unsafeHead q)
+
+-- ---------------------------------------------------------------------
+-- Searching for substrings
+
+-- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
+-- iff the first is a prefix of the second.
+isPrefixOf :: ByteString -> ByteString -> Bool
+isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
+    | l1 == 0   = True
+    | l2 < l1   = False
+    | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
+        withForeignPtr x2 $ \p2 -> do
+            i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
+            return (i == 0)
+
+-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
+-- iff the first is a suffix of the second.
+-- 
+-- The following holds:
+--
+-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
+--
+-- However, the real implemenation uses memcmp to compare the end of the
+-- string only, with no reverse required..
+isSuffixOf :: ByteString -> ByteString -> Bool
+isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
+    | l1 == 0   = True
+    | l2 < l1   = False
+    | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
+        withForeignPtr x2 $ \p2 -> do
+            i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
+            return (i == 0)
+
+-- | Check whether one string is a substring of another. @isSubstringOf
+-- p s@ is equivalent to @not (null (findSubstrings p s))@.
+isSubstringOf :: ByteString -- ^ String to search for.
+              -> ByteString -- ^ String to search in.
+              -> Bool
+isSubstringOf p s = not $ P.null $ findSubstrings p s
+
+-- | Get the first index of a substring in another string,
+--   or 'Nothing' if the string is not found.
+--   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
+findSubstring :: ByteString -- ^ String to search for.
+              -> ByteString -- ^ String to seach in.
+              -> Maybe Int
+findSubstring = (listToMaybe .) . findSubstrings
+
+-- | Find the indexes of all (possibly overlapping) occurances of a
+-- substring in a string.  This function uses the Knuth-Morris-Pratt
+-- string matching algorithm.
+findSubstrings :: ByteString -- ^ String to search for.
+               -> ByteString -- ^ String to seach in.
+               -> [Int]
+
+findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
+  where
+      patc x = pat `unsafeIndex` x
+      strc x = str `unsafeIndex` x
+
+      -- maybe we should make kmpNext a UArray before using it in search?
+      kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
+      kmpNextL p _ | null p = []
+      kmpNextL p j = let j' = next (unsafeHead p) j + 1
+                         ps = unsafeTail p
+                         x = if not (null ps) && unsafeHead ps == patc j'
+                                then kmpNext Array.! j' else j'
+                        in x:kmpNextL ps j'
+      search i j = match ++ rest -- i: position in string, j: position in pattern
+        where match = if j == m then [(i - j)] else []
+              rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
+      next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
+               | otherwise = j
+
+-- ---------------------------------------------------------------------
+-- Zipping
+
+-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
+-- corresponding pairs of bytes. If one input ByteString is short,
+-- excess elements of the longer ByteString are discarded. This is
+-- equivalent to a pair of 'unpack' operations.
+zip :: ByteString -> ByteString -> [(Word8,Word8)]
+zip ps qs
+    | null ps || null qs = []
+    | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
+
+-- | 'zipWith' generalises 'zip' by zipping with the function given as
+-- the first argument, instead of a tupling function.  For example,
+-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
+-- corresponding sums.
+zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
+zipWith f ps qs
+    | null ps || null qs = []
+    | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
+
+-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
+-- ByteStrings. Note that this performs two 'pack' operations.
+unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
+unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
+{-# INLINE unzip #-}
+
+-- ---------------------------------------------------------------------
+-- Special lists
+
+-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
+inits :: ByteString -> [ByteString]
+inits (PS x s l) = [PS x s n | n <- [0..l]]
+
+-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
+tails :: ByteString -> [ByteString]
+tails p | null p    = [empty]
+        | otherwise = p : tails (unsafeTail p)
+
+-- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
+
+-- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
+elems :: ByteString -> [ByteString]
+elems (PS _ _ 0) = []
+elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
+{-# INLINE elems #-}
+
+-- ---------------------------------------------------------------------
+-- ** Ordered 'ByteString's
+
+-- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
+sort :: ByteString -> ByteString
+sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
+        memcpy p (f `plusPtr` s) l
+        c_qsort p l -- inplace
+
+-- sort = pack . List.sort . unpack
+
+-- ---------------------------------------------------------------------
+--
+-- Extensions to the basic interface
+--
+
+-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
+-- check for the empty case, so there is an obligation on the programmer
+-- to provide a proof that the ByteString is non-empty.
+unsafeHead :: ByteString -> Word8
+unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
+{-# INLINE unsafeHead #-}
+
+-- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
+-- check for the empty case. As with 'unsafeHead', the programmer must
+-- provide a separate proof that the ByteString is non-empty.
+unsafeTail :: ByteString -> ByteString
+unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
+{-# INLINE unsafeTail #-}
+
+-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
+-- This omits the bounds check, which means there is an accompanying
+-- obligation on the programmer to ensure the bounds are checked in some
+-- other way.
+unsafeIndex :: ByteString -> Int -> Word8
+unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
+{-# INLINE unsafeIndex #-}
+
+-- ---------------------------------------------------------------------
+-- Low level constructors
+
+#if defined(__GLASGOW_HASKELL__)
+-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
+-- Addr\# (an arbitrary machine address assumed to point outside the
+-- garbage-collected heap) into a @ByteString@. A much faster way to
+-- create an Addr\# is with an unboxed string literal, than to pack a
+-- boxed string. A unboxed string literal is compiled to a static @char
+-- []@ by GHC. Establishing the length of the string requires a call to
+-- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
+-- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
+-- if you know the length of the string statically.
+--
+-- An example:
+--
+-- > literalFS = packAddress "literal"#
+--
+packAddress :: Addr# -> ByteString
+packAddress addr# = inlinePerformIO $ do
+    p <- newForeignPtr_ cstr
+    return $ PS p 0 (fromIntegral $ c_strlen cstr)
+  where
+    cstr = Ptr addr#
+{-# INLINE packAddress #-}
+
+-- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
+-- 'ByteStrings' -- which is ideal for string literals. It packs a
+-- null-terminated sequence of bytes into a 'ByteString', given a raw
+-- 'Addr\#' to the string, and the length of the string. Make sure the
+-- length is correct, otherwise use the safer 'packAddress' (where the
+-- length will be calculated once at runtime).
+unsafePackAddress :: Int -> Addr# -> ByteString
+unsafePackAddress len addr# = inlinePerformIO $ do
+    p <- newForeignPtr_ cstr
+    return $ PS p 0 len
+    where cstr = Ptr addr#
+
+#endif
+
+-- | /O(1)/ Build a ByteString from a ForeignPtr
+fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
+fromForeignPtr fp l = PS fp 0 l
+
+-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
+toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
+toForeignPtr (PS ps s l) = (ps, s, l)
+
+-- | /O(1)/ 'skipIndex' returns the internal skipped index of the
+-- current 'ByteString' from any larger string it was created from, as
+-- an 'Int'.
+skipIndex :: ByteString -> Int
+skipIndex (PS _ s _) = s
+{-# INLINE skipIndex #-}
+
+-- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
+-- finalizer associated to it. The ByteString length is calculated using
+-- /strlen(3)/, and thus the complexity is a /O(n)/.
+packCString :: CString -> ByteString
+packCString cstr = inlinePerformIO $ do
+    fp <- newForeignPtr_ (castPtr cstr)
+    return $ PS fp 0 (fromIntegral $ c_strlen cstr)
+
+-- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
+-- have /no/ finalizer associated with it. This operation has /O(1)/
+-- complexity as we already know the final size, so no /strlen(3)/ is
+-- required.
+packCStringLen :: CStringLen -> ByteString
+packCStringLen (ptr,len) = inlinePerformIO $ do
+    fp <- newForeignPtr_ (castPtr ptr)
+    return $ PS fp 0 (fromIntegral len)
+
+-- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
+-- have a @free(3)@ finalizer associated to it.
+packMallocCString :: CString -> ByteString
+packMallocCString cstr = inlinePerformIO $ do
+    fp <- newForeignFreePtr (castPtr cstr)
+    return $ PS fp 0 (fromIntegral $ c_strlen cstr)
+
+#if defined(__GLASGOW_HASKELL__)
+-- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
+-- length, and an IO action representing a finalizer. This function is
+-- not available on Hugs.
+--
+packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
+packCStringFinalizer p l f = do
+    fp <- FC.newForeignPtr p f
+    return $ PS fp 0 l
+
+-- | Explicitly run the finaliser associated with a 'ByteString'.
+-- Further references to this value may generate invalid memory
+-- references. This operation is unsafe, as there may be other
+-- 'ByteStrings' referring to the same underlying pages. If you use
+-- this, you need to have a proof of some kind that all 'ByteString's
+-- ever generated from the underlying byte array are no longer live.
+unsafeFinalize :: ByteString -> IO ()
+unsafeFinalize (PS p _ _) = finalizeForeignPtr p
+
+#endif
+
+-- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
+--   The @CString@ should not be freed afterwards. This is a memcpy(3).
+useAsCString :: ByteString -> (CString -> IO a) -> IO a
+useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
+    where
+      alloc = withForeignPtr ps $ \p -> do
+                buf <- c_malloc (fromIntegral l+1)
+                memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
+                poke (buf `plusPtr` l) (0::Word8)
+                return $ castPtr buf
+
+-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
+-- Warning: modifying the @CString@ will affect the @ByteString@.
+-- Why is this function unsafe? It relies on the null byte at the end of
+-- the ByteString to be there. This is /not/ the case if your ByteString
+-- has been spliced from a larger string (i.e. with take or drop).
+-- Unless you can guarantee the null byte, you should use the safe
+-- version, which will copy the string first.
+--
+unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
+unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
+
+-- | /O(n)/ Make a copy of the 'ByteString' with its own storage. 
+--   This is mainly useful to allow the rest of the data pointed
+--   to by the 'ByteString' to be garbage collected, for example
+--   if a large string has been read in, and only a small part of it 
+--   is needed in the rest of the program.
+copy :: ByteString -> ByteString
+copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
+
+-- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
+-- CString is going to be deallocated from C land.
+copyCString :: CString -> ByteString
+copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
+
+-- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
+copyCStringLen :: CStringLen -> ByteString
+copyCStringLen (cstr, len) = inlinePerformIO $ do
+    fp <- mallocForeignPtrArray (len+1)
+    withForeignPtr fp $ \p -> do
+        memcpy p (castPtr cstr) len
+        poke (p `plusPtr` len) (0 :: Word8)
+    return $! PS fp 0 len
+
+-- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
+-- Warning: modifying the @CStringLen@ will affect the @ByteString@.
+-- This is analogous to unsafeUseAsCString, and comes with the same
+-- safety requirements.
+--
+unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
+unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
+
+-- | Given the maximum size needed and a function to make the contents
+-- of a ByteString, generate makes the 'ByteString'. The generating
+-- function is required to return the actual final size (<= the maximum
+-- size), and the resulting byte array is realloced to this size.  The
+-- string is padded at the end with a null byte.
+--
+-- generate is the main mechanism for creating custom, efficient
+-- ByteString functions, using Haskell or C functions to fill the space.
+--
+generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
+generate i f = do
+    p <- mallocArray i
+    i' <- f p
+    p' <- reallocArray p (i'+1)
+    poke (p' `plusPtr` i') (0::Word8)    -- XXX so CStrings work
+    fp <- newForeignFreePtr p'
+    return $ PS fp 0 i'
+
+-- ---------------------------------------------------------------------
+-- line IO
+
+#if defined(__GLASGOW_HASKELL__)
+
+-- | getLine, read a line from stdin.
+getLine :: IO ByteString
+getLine = hGetLine stdin
+
+-- | hGetLine. read a ByteString from a handle
+hGetLine :: Handle -> IO ByteString
+hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
+    case haBufferMode handle_ of
+       NoBuffering -> error "no buffering"
+       _other      -> hGetLineBuffered handle_
+
+ where
+    hGetLineBuffered handle_ = do
+        let ref = haBuffer handle_
+        buf <- readIORef ref
+        hGetLineBufferedLoop handle_ ref buf 0 []
+
+    hGetLineBufferedLoop handle_ ref
+            buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
+        len `seq` do
+        off <- findEOL r w raw
+        let new_len = len + off - r
+        xs <- mkPS raw r off
+
+      -- if eol == True, then off is the offset of the '\n'
+      -- otherwise off == w and the buffer is now empty.
+        if off /= w
+            then do if (w == off + 1)
+                            then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
+                            else writeIORef ref buf{ bufRPtr = off + 1 }
+                    mkBigPS new_len (xs:xss)
+            else do
+                 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
+                                    buf{ bufWPtr=0, bufRPtr=0 }
+                 case maybe_buf of
+                    -- Nothing indicates we caught an EOF, and we may have a
+                    -- partial line to return.
+                    Nothing -> do
+                         writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
+                         if new_len > 0
+                            then mkBigPS new_len (xs:xss)
+                            else ioe_EOF
+                    Just new_buf ->
+                         hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
+
+    -- find the end-of-line character, if there is one
+    findEOL r w raw
+        | r == w = return w
+        | otherwise =  do
+            (c,r') <- readCharFromBuffer raw r
+            if c == '\n'
+                then return r -- NB. not r': don't include the '\n'
+                else findEOL r' w raw
+
+    maybeFillReadBuffer fd is_line is_stream buf = catch
+        (do buf' <- fillReadBuffer fd is_line is_stream buf
+            return (Just buf'))
+        (\e -> if isEOFError e then return Nothing else ioError e)
+
+-- TODO, rewrite to use normal memcpy
+mkPS :: RawBuffer -> Int -> Int -> IO ByteString
+mkPS buf start end = do
+    let len = end - start
+    fp <- mallocByteString (len `quot` 8)
+    withForeignPtr fp $ \p -> do
+        memcpy_ptr_baoff p buf start (fromIntegral len)
+        return (PS fp 0 len)
+
+mkBigPS :: Int -> [ByteString] -> IO ByteString
+mkBigPS _ [ps] = return ps
+mkBigPS _ pss = return $! concat (P.reverse pss)
+
+#endif
+
+-- ---------------------------------------------------------------------
+-- Block IO
+
+-- | Outputs a 'ByteString' to the specified 'Handle'.
+hPut :: Handle -> ByteString -> IO ()
+hPut _ (PS _ _ 0)  = return ()
+hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
+hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
+
+-- | Write a ByteString to stdout
+putStr :: ByteString -> IO ()
+putStr = hPut stdout
+
+-- | Write a ByteString to stdout, appending a newline byte
+putStrLn :: ByteString -> IO ()
+putStrLn ps = hPut stdout ps >> hPut stdout nl
+    where nl = packByte 0x0a
+
+-- | Read a 'ByteString' directly from the specified 'Handle'.  This
+-- is far more efficient than reading the characters into a 'String'
+-- and then using 'pack'.
+hGet :: Handle -> Int -> IO ByteString
+hGet _ 0 = return empty
+hGet h i = do fp <- mallocByteString i
+              l  <- withForeignPtr fp $ \p-> hGetBuf h p i
+              return $ PS fp 0 l
+
+#if defined(__GLASGOW_HASKELL__)
+-- | hGetNonBlocking is identical to 'hGet', except that it will never block
+-- waiting for data to become available, instead it returns only whatever data
+-- is available.
+hGetNonBlocking :: Handle -> Int -> IO ByteString
+hGetNonBlocking _ 0 = return empty
+hGetNonBlocking h i = do
+    fp <- mallocByteString i
+    l  <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
+    return $ PS fp 0 l
+#endif
+
+-- | Read entire handle contents into a 'ByteString'.
+--
+-- As with 'hGet', the string representation in the file is assumed to
+-- be ISO-8859-1.
+--
+hGetContents :: Handle -> IO ByteString
+hGetContents h = do
+    let start_size = 1024
+    p <- mallocArray start_size
+    i <- hGetBuf h p start_size
+    if i < start_size
+        then do p' <- reallocArray p i
+                fp <- newForeignFreePtr p'
+                return $ PS fp 0 i
+        else f p start_size
+    where
+        f p s = do
+        let s' = 2 * s
+        p' <- reallocArray p s'
+        i  <- hGetBuf h (p' `plusPtr` s) s
+        if i < s
+            then do let i' = s + i
+                    p'' <- reallocArray p' i'
+                    fp  <- newForeignFreePtr p''
+                    return $ PS fp 0 i'
+            else f p' s'
+
+-- | getContents. Equivalent to hGetContents stdin
+getContents :: IO ByteString
+getContents = hGetContents stdin
+
+-- | Read an entire file directly into a 'ByteString'.  This is far more
+-- efficient than reading the characters into a 'String' and then using
+-- 'pack'.  It also may be more efficient than opening the file and
+-- reading it using hGet.
+readFile :: FilePath -> IO ByteString
+readFile f = do
+    h <- openBinaryFile f ReadMode
+    l <- hFileSize h
+    s <- hGet h $ fromIntegral l
+    hClose h
+    return s
+
+-- | Write a 'ByteString' to a file.
+writeFile :: FilePath -> ByteString -> IO ()
+writeFile f ps = do
+    h <- openBinaryFile f WriteMode
+    hPut h ps
+    hClose h
+
+#if defined(__GLASGOW_HASKELL__)
+--
+-- | A ByteString equivalent for getArgs. More efficient for large argument lists
+--
+getArgs :: IO [ByteString]
+getArgs =
+  alloca $ \ p_argc ->
+  alloca $ \ p_argv -> do
+    getProgArgv p_argc p_argv
+    p    <- fromIntegral `fmap` peek p_argc
+    argv <- peek p_argv
+    P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
+#endif
+
+-- ---------------------------------------------------------------------
+-- Internal utilities
+
+-- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
+-- silently truncate to 8 bits Chars > '\255'. They are provided as
+-- convenience for ByteString construction.
+w2c :: Word8 -> Char
+#if !defined(__GLASGOW_HASKELL__)
+w2c = chr . fromIntegral
+#else
+w2c = unsafeChr . fromIntegral
+#endif
+{-# INLINE w2c #-}
+
+c2w :: Char -> Word8
+c2w = fromIntegral . ord
+{-# INLINE c2w #-}
+
+-- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
+-- is padded with a null byte.
+mallocByteString :: Int -> IO (ForeignPtr Word8)
+mallocByteString l = do
+    fp <- mallocForeignPtrArray (l+1)
+    withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
+    return fp
+
+-- | A way of creating ForeignPtrs outside the IO monad. The @Int@
+-- argument gives the final size of the ByteString. Unlike 'generate'
+-- the ByteString is no reallocated if the final size is less than the
+-- estimated size.
+create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
+create l write_ptr = inlinePerformIO $ do
+    fp <- mallocByteString (l+1)
+    withForeignPtr fp $ \p -> write_ptr p
+    return $ PS fp 0 l
+{-# INLINE create #-}
+
+-- | Perform an operation with a temporary ByteString
+withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
+withPtr fp io = inlinePerformIO (withForeignPtr fp io)
+{-# INLINE withPtr #-}
+
+-- Common up near identical calls to `error' to reduce the number
+-- constant strings created when compiled:
+errorEmptyList :: String -> a
+errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
+{-# INLINE errorEmptyList #-}
+
+-- 'findIndexOrEnd' is a variant of findIndex, that returns the length
+-- of the string if no element is found, rather than Nothing.
+findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
+STRICT2(findIndexOrEnd)
+findIndexOrEnd f ps
+    | null ps           = 0
+    | f (unsafeHead ps) = 0
+    | otherwise         = 1 + findIndexOrEnd f (unsafeTail ps)
+{-# INLINE findIndexOrEnd #-}
+
+-- Find from the end of the string using predicate
+findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
+STRICT2(findFromEndUntil)
+findFromEndUntil f ps@(PS x s l) =
+    if null ps then 0
+    else if f (last ps) then l
+         else findFromEndUntil f (PS x s (l-1))
+
+-- Just like inlinePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining
+--
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif
+
+{-# INLINE newForeignFreePtr #-}
+newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
+#if defined(__GLASGOW_HASKELL__)
+newForeignFreePtr p = FC.newForeignPtr p (c_free p)
+#else
+newForeignFreePtr p = newForeignPtr c_free_finalizer p
+#endif
+
+-- ---------------------------------------------------------------------
+-- 
+-- Standard C functions
+--
+
+foreign import ccall unsafe "string.h strlen" c_strlen
+    :: CString -> CInt
+
+foreign import ccall unsafe "stdlib.h malloc" c_malloc
+    :: CInt -> IO (Ptr Word8)
+
+foreign import ccall unsafe "static stdlib.h free" c_free
+    :: Ptr Word8 -> IO ()
+
+#if !defined(__GLASGOW_HASKELL__)
+foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
+    :: FunPtr (Ptr Word8 -> IO ())
+#endif
+
+foreign import ccall unsafe "string.h memset" memset
+    :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
+
+foreign import ccall unsafe "string.h memchr" memchr
+    :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
+
+foreign import ccall unsafe "string.h memcmp" memcmp
+    :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
+
+foreign import ccall unsafe "string.h memcpy" memcpy
+    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+
+-- ---------------------------------------------------------------------
+--
+-- Uses our C code
+--
+
+foreign import ccall unsafe "static fpstring.h reverse" c_reverse
+    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+
+foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
+    :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
+
+foreign import ccall unsafe "static fpstring.h maximum" c_maximum
+    :: Ptr Word8 -> Int -> Word8
+
+foreign import ccall unsafe "static fpstring.h minimum" c_minimum
+    :: Ptr Word8 -> Int -> Word8
+
+foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
+    :: Ptr Word8 -> Int -> IO ()
+
+-- ---------------------------------------------------------------------
+-- Internal GHC Haskell magic
+
+#if defined(__GLASGOW_HASKELL__)
+foreign import ccall unsafe "RtsAPI.h getProgArgv"
+    getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
+
+foreign import ccall unsafe "__hscore_memcpy_src_off"
+   memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
+#endif
diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs
new file mode 100644
index 0000000..9f39e61
--- /dev/null
+++ b/Data/ByteString/Char8.hs
@@ -0,0 +1,1007 @@
+{-# OPTIONS_GHC -cpp -fffi #-}
+--
+-- Module      : Data.ByteString.Char8
+-- Copyright   : (c) Don Stewart 2006
+-- License     : BSD-style
+--
+-- Maintainer  : dons@cse.unsw.edu.au
+-- Stability   : experimental
+-- Portability : portable (tested with GHC>=6.4.1 and Hugs 2005)
+-- 
+
+--
+-- | Manipulate ByteStrings using Char operations. All Chars will be
+-- truncated to 8 bits. It can be expected that these functions will run
+-- at identical speeds to their Word8 equivalents in @Data.ByteString@.
+--
+-- More specifically these byte strings are taken to be in the
+-- subset of Unicode covered by code points 0-255. This covers
+-- Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls.
+-- 
+-- See: 
+--
+--  * <http://www.unicode.org/charts/>
+--
+--  * <http://www.unicode.org/charts/PDF/U0000.pdf>
+--
+--  * <http://www.unicode.org/charts/PDF/U0080.pdf>
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with Prelude functions.  eg.
+--
+-- > import qualified Data.ByteString.Char8 as B
+--
+
+module Data.ByteString.Char8 (
+
+        -- * The @ByteString@ type
+        ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
+
+        -- * Introducing and eliminating 'ByteString's
+        empty,                  -- :: ByteString
+        packChar,               -- :: Char   -> ByteString
+        pack,                   -- :: String -> ByteString
+        unpack,                 -- :: ByteString -> String
+
+        -- * Basic interface
+        cons,                   -- :: Char -> ByteString -> ByteString
+        snoc,                   -- :: Char -> ByteString -> ByteString
+        null,                   -- :: ByteString -> Bool
+        length,                 -- :: ByteString -> Int
+        head,                   -- :: ByteString -> Char
+        tail,                   -- :: ByteString -> ByteString
+        last,                   -- :: ByteString -> Char
+        init,                   -- :: ByteString -> ByteString
+        append,                 -- :: ByteString -> ByteString -> ByteString
+
+        -- * Special ByteStrings
+        inits,                  -- :: ByteString -> [ByteString]
+        tails,                  -- :: ByteString -> [ByteString]
+        elems,                  -- :: ByteString -> [ByteString]
+
+        -- * Transformating ByteStrings
+        map,                    -- :: (Char -> Char) -> ByteString -> ByteString
+        reverse,                -- :: ByteString -> ByteString
+        intersperse,            -- :: Char -> ByteString -> ByteString
+        transpose,              -- :: [ByteString] -> [ByteString]
+
+        -- * Reducing 'ByteString's
+        foldl,                  -- :: (a -> Char -> a) -> a -> ByteString -> a
+        foldr,                  -- :: (Char -> a -> a) -> a -> ByteString -> a
+        foldl1,                 -- :: (Char -> Char -> Char) -> ByteString -> Char
+        foldr1,                 -- :: (Char -> Char -> Char) -> ByteString -> Char
+
+        -- ** Special folds
+        concat,                 -- :: [ByteString] -> ByteString
+        concatMap,              -- :: (Char -> ByteString) -> ByteString -> ByteString
+        any,                    -- :: (Char -> Bool) -> ByteString -> Bool
+        all,                    -- :: (Char -> Bool) -> ByteString -> Bool
+        maximum,                -- :: ByteString -> Char
+        minimum,                -- :: ByteString -> Char
+        mapIndexed,             -- :: (Int -> Char -> Char) -> ByteString -> ByteString
+        hash,                   -- :: ByteString -> Int32
+
+        -- * Generating and unfolding ByteStrings
+        replicate,              -- :: Int -> Char -> ByteString
+        unfoldrN,               -- :: (Char -> Maybe (Char, Char)) -> Char -> ByteString
+
+        -- * Substrings
+
+        -- ** Breaking strings
+        take,                   -- :: Int -> ByteString -> ByteString
+        drop,                   -- :: Int -> ByteString -> ByteString
+        splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
+        takeWhile,              -- :: (Char -> Bool) -> ByteString -> ByteString
+        dropWhile,              -- :: (Char -> Bool) -> ByteString -> ByteString
+        break,                  -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+        span,                   -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+        spanEnd,                -- :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+
+        -- ** Breaking and dropping on specific Chars
+        breakChar,              -- :: Char -> ByteString -> (ByteString, ByteString)
+        breakFirst,             -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
+        breakLast,              -- :: Char -> ByteString -> Maybe (ByteString,ByteString)
+        breakSpace,             -- :: ByteString -> Maybe (ByteString,ByteString)
+        dropSpace,              -- :: ByteString -> ByteString
+        dropSpaceEnd,           -- :: ByteString -> ByteString
+
+        -- ** Breaking into many substrings
+        split,                  -- :: Char -> ByteString -> [ByteString]
+        splitWith,              -- :: (Char -> Bool) -> ByteString -> [ByteString]
+        tokens,                 -- :: (Char -> Bool) -> ByteString -> [ByteString]
+
+        -- ** Breaking into lines and words
+        lines,                  -- :: ByteString -> [ByteString]
+        words,                  -- :: ByteString -> [ByteString]
+        unlines,                -- :: [ByteString] -> ByteString
+        unwords,                -- :: ByteString -> [ByteString]
+
+        lines',                 -- :: ByteString -> [ByteString]
+        unlines',               -- :: [ByteString] -> ByteString
+        linesCRLF',             -- :: ByteString -> [ByteString]
+        unlinesCRLF',           -- :: [ByteString] -> ByteString
+        words',                 -- :: ByteString -> [ByteString]
+        unwords',               -- :: ByteString -> [ByteString]
+
+        lineIndices,            -- :: ByteString -> [Int]
+        betweenLines,           -- :: ByteString -> ByteString -> ByteString -> Maybe (ByteString)
+
+        -- ** Joining strings
+        join,                   -- :: ByteString -> [ByteString] -> ByteString
+        joinWithChar,           -- :: Char -> ByteString -> ByteString -> ByteString
+
+        -- * Indexing ByteStrings
+        index,                  -- :: ByteString -> Int -> Char
+        elemIndex,              -- :: Char -> ByteString -> Maybe Int
+        elemIndexLast,          -- :: Char -> ByteString -> Maybe Int
+        elemIndices,            -- :: Char -> ByteString -> [Int]
+        findIndex,              -- :: (Char -> Bool) -> ByteString -> Maybe Int
+        findIndices,            -- :: (Char -> Bool) -> ByteString -> [Int]
+        count,                  -- :: Char -> ByteString -> Int
+
+        -- * Ordered ByteStrings
+        sort,                   -- :: ByteString -> ByteString
+
+        -- * Searching ByteStrings
+
+        -- ** Searching by equality
+        elem,                   -- :: Char -> ByteString -> Bool
+        notElem,                -- :: Char -> ByteString -> Bool
+        filterChar,             -- :: Char -> ByteString -> ByteString
+        filterNotChar,          -- :: Char -> ByteString -> ByteString
+
+        -- ** Searching with a predicate
+        filter,                 -- :: (Char -> Bool) -> ByteString -> ByteString
+        find,                   -- :: (Char -> Bool) -> ByteString -> Maybe Char
+
+        -- ** Searching for substrings
+        isPrefixOf,             -- :: ByteString -> ByteString -> Bool
+        isSuffixOf,             -- :: ByteString -> ByteString -> Bool
+        isSubstringOf,          -- :: ByteString -> ByteString -> Bool
+        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
+        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
+
+        -- * Zipping and unzipping ByteString
+        zip,                    -- :: ByteString -> ByteString -> [(Char,Char)]
+        zipWith,                -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c]
+        unzip,                  -- :: [(Char,Char)] -> (ByteString,ByteString)
+
+        -- * Unchecked access
+        unsafeHead,             -- :: ByteString -> Char
+        unsafeTail,             -- :: ByteString -> ByteString
+        unsafeIndex,            -- :: ByteString -> Int -> Char
+        w2c,                    -- :: Word8 -> Char
+        c2w,                    -- :: Char  -> Word8
+
+        -- * Reading from ByteStrings
+        readInt,                -- :: ByteString -> Maybe Int
+        unsafeReadInt,          -- :: ByteString -> Maybe Int
+
+        -- * Copying ByteStrings
+        copy,                   -- :: ByteString -> ByteString
+
+        -- * I\/O with @ByteString@s
+
+        -- ** Standard input and output
+
+#if defined(__GLASGOW_HASKELL__)
+        getLine,                -- :: IO ByteString
+#endif
+        getContents,            -- :: IO ByteString
+        putStr,                 -- :: ByteString -> IO ()
+        putStrLn,               -- :: ByteString -> IO ()
+
+        -- ** Files
+        readFile,               -- :: FilePath -> IO ByteString
+        writeFile,              -- :: FilePath -> ByteString -> IO ()
+
+        -- ** I\/O with Handles
+#if defined(__GLASGOW_HASKELL__)
+        getArgs,                -- :: IO [ByteString]
+        hGetLine,               -- :: Handle -> IO ByteString
+        hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
+#endif
+        hGetContents,           -- :: Handle -> IO ByteString
+        hGet,                   -- :: Handle -> Int -> IO ByteString
+        hPut,                   -- :: Handle -> ByteString -> IO ()
+
+#if defined(__GLASGOW_HASKELL__)
+        -- * Low level construction
+        -- | For constructors from foreign language types see /Data.ByteString/
+        packAddress,            -- :: Addr# -> ByteString
+        unsafePackAddress,      -- :: Int -> Addr# -> ByteString
+#endif
+
+    ) where
+
+import qualified Prelude as P
+import Prelude hiding           (reverse,head,tail,last,init,null
+                                ,length,map,lines,foldl,foldr,unlines
+                                ,concat,any,take,drop,splitAt,takeWhile
+                                ,dropWhile,span,break,elem,filter,unwords
+                                ,words,maximum,minimum,all,concatMap
+                                ,foldl1,foldr1,readFile,writeFile,replicate
+                                ,getContents,getLine,putStr,putStrLn
+                                ,zip,zipWith,unzip,notElem)
+
+import qualified Data.ByteString as B
+
+-- Listy functions transparently exported
+import Data.ByteString (ByteString(..)
+                       ,empty,null,length,tail,init,append
+                       ,inits,tails,elems,reverse,transpose
+                       ,concat,hash,take,drop,splitAt,join
+                       ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring
+                       ,findSubstrings,unsafeTail,copy
+
+                       ,getContents, putStr, putStrLn
+                       ,readFile, writeFile
+                       ,hGetContents, hGet, hPut
+#if defined(__GLASGOW_HASKELL__)
+                       ,getLine, getArgs, hGetLine, hGetNonBlocking
+                       ,packAddress, unsafePackAddress
+#endif
+                       ,useAsCString, unsafeUseAsCString
+                       )
+
+import Data.Char
+
+import qualified Data.List as List (intersperse)
+
+import Foreign
+import Foreign.C.Types          (CLong)
+import Foreign.Marshal.Utils    (with)
+
+#if defined(__GLASGOW_HASKELL__)
+import GHC.Base                 (Char(..),unsafeChr,unpackCString#,unsafeCoerce#)
+import GHC.IOBase               (IO(..),stToIO)
+import GHC.Prim                 (Addr#,writeWord8OffAddr#,realWorld#,plusAddr#)
+import GHC.Ptr                  (Ptr(..))
+import GHC.ST                   (ST(..))
+#endif
+
+#define STRICT1(f) f a | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+
+------------------------------------------------------------------------
+
+-- | /O(1)/ Convert a 'Char' into a 'ByteString'
+packChar :: Char -> ByteString
+packChar = B.packByte . c2w
+{-# INLINE packChar #-}
+
+-- | /O(n)/ Convert a 'String' into a 'ByteString'
+--
+-- For applications with large numbers of string literals, pack can be a
+-- bottleneck. In such cases, consider using packAddress (GHC only).
+pack :: String -> ByteString
+#if !defined(__GLASGOW_HASKELL__)
+
+pack str = B.create (P.length str) $ \p -> go p str
+    where go _ []     = return ()
+          go p (x:xs) = poke p (c2w x) >> go (p `plusPtr` 1) xs
+
+#else /* hack away */
+
+pack str = B.create (P.length str) $ \(Ptr p) -> stToIO (go p str)
+  where
+    go :: Addr# -> [Char] -> ST a ()
+    go _ []        = return ()
+    go p (C# c:cs) = writeByte p (unsafeCoerce# c) >> go (p `plusAddr#` 1#) cs
+
+    writeByte p c = ST $ \s# ->
+        case writeWord8OffAddr# p 0# c s# of s2# -> (# s2#, () #)
+    {-# INLINE writeByte #-}
+
+{-# RULES
+"pack/packAddress" forall s# .
+                   pack (unpackCString# s#) = B.packAddress s#
+ #-}
+
+#endif
+
+{-# INLINE pack #-}
+
+-- | /O(n)/ Converts a 'ByteString' to a 'String'.
+unpack :: ByteString -> [Char]
+unpack = B.unpackWith w2c
+{-# INLINE unpack #-}
+
+-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
+-- complexity, as it requires a memcpy.
+cons :: Char -> ByteString -> ByteString
+cons = B.cons . c2w
+{-# INLINE cons #-}
+
+-- | /O(n)/ Append a Char to the end of a 'ByteString'. Similar to
+-- 'cons', this function performs a memcpy.
+snoc :: ByteString -> Char -> ByteString
+snoc p = B.snoc p . c2w
+{-# INLINE snoc #-}
+
+-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
+head :: ByteString -> Char
+head = w2c . B.head
+{-# INLINE head #-}
+
+-- | /O(1)/ Extract the last element of a packed string, which must be non-empty.
+last :: ByteString -> Char
+last = w2c . B.last
+{-# INLINE last #-}
+
+-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each element of @xs@
+map :: (Char -> Char) -> ByteString -> ByteString
+map f = B.map (c2w . f . w2c)
+{-# INLINE map #-}
+
+-- | /O(n)/ The 'intersperse' function takes a Char and a 'ByteString'
+-- and \`intersperses\' that Char between the elements of the
+-- 'ByteString'.  It is analogous to the intersperse function on Lists.
+intersperse :: Char -> ByteString -> ByteString
+intersperse = B.intersperse . c2w
+{-# INLINE intersperse #-}
+
+-- | 'foldl', applied to a binary operator, a starting value (typically
+-- the left-identity of the operator), and a ByteString, reduces the
+-- ByteString using the binary operator, from left to right.
+foldl :: (a -> Char -> a) -> a -> ByteString -> a
+foldl f = B.foldl (\a c -> f a (w2c c))
+{-# INLINE foldl #-}
+
+-- | 'foldr', applied to a binary operator, a starting value
+-- (typically the right-identity of the operator), and a packed string,
+-- reduces the packed string using the binary operator, from right to left.
+foldr :: (Char -> a -> a) -> a -> ByteString -> a
+foldr f = B.foldr (\c a -> f (w2c c) a)
+{-# INLINE foldr #-}
+
+-- | 'foldl1' is a variant of 'foldl' that has no starting value
+-- argument, and thus must be applied to non-empty 'ByteStrings'.
+foldl1 :: (Char -> Char -> Char) -> ByteString -> Char
+foldl1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
+{-# INLINE foldl1 #-}
+
+-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
+-- and thus must be applied to non-empty 'ByteString's
+foldr1 :: (Char -> Char -> Char) -> ByteString -> Char
+foldr1 f ps = w2c (B.foldl1 (\x y -> c2w (f (w2c x) (w2c y))) ps)
+{-# INLINE foldr1 #-}
+
+-- | Map a function over a 'ByteString' and concatenate the results
+concatMap :: (Char -> ByteString) -> ByteString -> ByteString
+concatMap f = B.concatMap (f . w2c)
+{-# INLINE concatMap #-}
+
+-- | Applied to a predicate and a ByteString, 'any' determines if
+-- any element of the 'ByteString' satisfies the predicate.
+any :: (Char -> Bool) -> ByteString -> Bool
+any f = B.any (f . w2c)
+{-# INLINE any #-}
+
+-- | Applied to a predicate and a 'ByteString', 'all' determines if
+-- all elements of the 'ByteString' satisfy the predicate.
+all :: (Char -> Bool) -> ByteString -> Bool
+all f = B.all (f . w2c)
+{-# INLINE all #-}
+
+-- | 'maximum' returns the maximum value from a 'ByteString'
+maximum :: ByteString -> Char
+maximum = w2c . B.maximum
+{-# INLINE maximum #-}
+
+-- | 'minimum' returns the minimum value from a 'ByteString'
+minimum :: ByteString -> Char
+minimum = w2c . B.minimum
+{-# INLINE minimum #-}
+
+-- | /O(n)/ map Char functions, provided with the index at each position
+mapIndexed :: (Int -> Char -> Char) -> ByteString -> ByteString
+mapIndexed f = B.mapIndexed (\i c -> c2w (f i (w2c c)))
+{-# INLINE mapIndexed #-}
+
+-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
+-- the value of every element. The following holds:
+--
+-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
+--
+-- This implemenation uses @memset(3)@
+replicate :: Int -> Char -> ByteString
+replicate w = B.replicate w . c2w
+{-# INLINE replicate #-}
+
+-- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
+-- 'unfoldrN' builds a ByteString from a seed value.  The function takes
+-- the element and returns 'Nothing' if it is done producing the
+-- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
+-- prepending to the ByteString and @b@ is used as the next element in a
+-- recursive call.
+--
+-- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
+-- character to a ByteString is /O(n)/, this unfoldr requires a maximum
+-- final size of the ByteString as an argument. 'cons' can then be
+-- implemented in /O(1)/ (i.e.  a 'poke'), and the unfoldr itself has
+-- linear complexity. The depth of the recursion is limited to this
+-- size, but may be less. For lazy, infinite unfoldr, use
+-- 'Data.List.unfoldr' (from 'Data.List').
+--
+-- Examples:
+--
+-- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
+--
+-- The following equation connects the depth-limited unfoldr to the List unfoldr:
+--
+-- > unfoldrN n == take n $ List.unfoldr
+--
+unfoldrN :: Int -> (Char -> Maybe (Char, Char)) -> Char -> ByteString
+unfoldrN n f w = B.unfoldrN n ((k `fmap`) . f . w2c) (c2w w)
+    where k (i,j) = (c2w i, c2w j) -- (c2w *** c2w)
+{-# INLINE unfoldrN #-}
+
+-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
+-- returns the longest prefix (possibly empty) of @xs@ of elements that
+-- satisfy @p@.
+takeWhile :: (Char -> Bool) -> ByteString -> ByteString
+takeWhile f = B.takeWhile (f . w2c)
+{-# INLINE takeWhile #-}
+
+-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
+dropWhile :: (Char -> Bool) -> ByteString -> ByteString
+dropWhile f = B.dropWhile (f . w2c)
+{-# INLINE dropWhile #-}
+
+-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
+break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+break f = B.break (f . w2c)
+{-# INLINE break #-}
+
+-- | 'span' @p xs@ breaks the ByteString into two segments. It is
+-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
+span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+span f = B.span (f . w2c)
+{-# INLINE span #-}
+
+-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
+-- We have
+--
+-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- >    == 
+-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 
+--
+spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd f = B.spanEnd (f . w2c)
+{-# INLINE spanEnd #-}
+
+-- | 'breakChar' breaks its ByteString argument at the first occurence
+-- of the specified Char. It is more efficient than 'break' as it is
+-- implemented with @memchr(3)@. I.e.
+-- 
+-- > break (=='c') "abcd" == breakChar 'c' "abcd"
+--
+breakChar :: Char -> ByteString -> (ByteString, ByteString)
+breakChar = B.breakByte . c2w
+{-# INLINE breakChar #-}
+
+-- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
+-- occurence of @w@. It behaves like 'break', except the delimiter is
+-- not returned, and @Nothing@ is returned if the delimiter is not in
+-- the ByteString. I.e.
+--
+-- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
+--
+-- > breakFirst c xs ==
+-- > let (x,y) = break (== c) xs 
+-- > in if null y then Nothing else Just (x, drop 1 y))
+--
+breakFirst :: Char -> ByteString -> Maybe (ByteString,ByteString)
+breakFirst = B.breakFirst . c2w
+{-# INLINE breakFirst #-}
+
+-- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
+-- ByteString.
+--
+-- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
+--
+-- and the following are equivalent:
+--
+-- > breakLast 'c' "abcdef"
+-- > let (x,y) = break (=='c') (reverse "abcdef") 
+-- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
+--
+breakLast :: Char -> ByteString -> Maybe (ByteString,ByteString)
+breakLast = B.breakLast . c2w
+{-# INLINE breakLast #-}
+
+-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
+-- argument, consuming the delimiter. I.e.
+--
+-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
+-- > split 'a'  "aXaXaXa"    == ["","X","X","X"]
+-- > split 'x'  "x"          == ["",""]
+-- 
+-- and
+--
+-- > join [c] . split c == id
+-- > split == splitWith . (==)
+-- 
+-- As for all splitting functions in this library, this function does
+-- not copy the substrings, it just constructs new 'ByteStrings' that
+-- are slices of the original.
+--
+split :: Char -> ByteString -> [ByteString]
+split = B.split . c2w
+{-# INLINE split #-}
+
+-- | /O(n)/ Splits a 'ByteString' into components delimited by
+-- separators, where the predicate returns True for a separator element.
+-- The resulting components do not contain the separators.  Two adjacent
+-- separators result in an empty component in the output.  eg.
+--
+-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
+--
+splitWith :: (Char -> Bool) -> ByteString -> [ByteString]
+splitWith f = B.splitWith (f . w2c)
+{-# INLINE splitWith #-}
+-- the inline makes a big difference here.
+
+-- | Like 'splitWith', except that sequences of adjacent separators are
+-- treated as a single separator. eg.
+-- 
+-- > tokens (=='a') "aabbaca" == ["bb","c"]
+--
+tokens :: (Char -> Bool) -> ByteString -> [ByteString]
+tokens f = B.tokens (f . w2c)
+{-# INLINE tokens #-}
+
+-- | /O(n)/ joinWithChar. An efficient way to join to two ByteStrings with a
+-- char. Around 4 times faster than the generalised join.
+--
+joinWithChar :: Char -> ByteString -> ByteString -> ByteString
+joinWithChar = B.joinWithByte . c2w
+{-# INLINE joinWithChar #-}
+
+-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
+index :: ByteString -> Int -> Char
+index = (w2c .) . B.index
+{-# INLINE index #-}
+
+-- | /O(n)/ The 'elemIndex' function returns the index of the first
+-- element in the given 'ByteString' which is equal (by memchr) to the
+-- query element, or 'Nothing' if there is no such element.
+elemIndex :: Char -> ByteString -> Maybe Int
+elemIndex = B.elemIndex . c2w
+{-# INLINE elemIndex #-}
+
+-- | /O(n)/ The 'elemIndexLast' function returns the last index of the
+-- element in the given 'ByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element. The following
+-- holds:
+--
+-- > elemIndexLast c xs == 
+-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
+--
+elemIndexLast :: Char -> ByteString -> Maybe Int
+elemIndexLast = B.elemIndexLast . c2w
+{-# INLINE elemIndexLast #-}
+
+-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
+-- the indices of all elements equal to the query element, in ascending order.
+elemIndices :: Char -> ByteString -> [Int]
+elemIndices = B.elemIndices . c2w
+{-# INLINE elemIndices #-}
+
+-- | The 'findIndex' function takes a predicate and a 'ByteString' and
+-- returns the index of the first element in the ByteString satisfying the predicate.
+findIndex :: (Char -> Bool) -> ByteString -> Maybe Int
+findIndex f = B.findIndex (f . w2c)
+{-# INLINE findIndex #-}
+
+-- | The 'findIndices' function extends 'findIndex', by returning the
+-- indices of all elements satisfying the predicate, in ascending order.
+findIndices :: (Char -> Bool) -> ByteString -> [Int]
+findIndices f = B.findIndices (f . w2c)
+
+-- | count returns the number of times its argument appears in the ByteString
+--
+-- > count = length . elemIndices
+--
+-- But more efficiently than using length on the intermediate list.
+count :: Char -> ByteString -> Int
+count c = B.count (c2w c)
+
+-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. This
+-- implementation uses @memchr(3)@.
+elem :: Char -> ByteString -> Bool
+elem    c = B.elem (c2w c)
+{-# INLINE elem #-}
+
+-- | /O(n)/ 'notElem' is the inverse of 'elem'
+notElem :: Char -> ByteString -> Bool
+notElem c = B.notElem (c2w c)
+{-# INLINE notElem #-}
+
+-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
+-- returns a ByteString containing those characters that satisfy the
+-- predicate.
+filter :: (Char -> Bool) -> ByteString -> ByteString
+filter f = B.filter (f . w2c)
+{-# INLINE filter #-}
+
+-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
+-- and returns the first element in matching the predicate, or 'Nothing'
+-- if there is no such element.
+find :: (Char -> Bool) -> ByteString -> Maybe Char
+find f ps = w2c `fmap` B.find (f . w2c) ps
+{-# INLINE find #-}
+
+-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
+-- case of filtering a single Char. It is more efficient to use
+-- filterChar in this case.
+--
+-- > filterChar == filter . (==)
+--
+-- filterChar is around 10x faster, and uses much less space, than its
+-- filter equivalent
+--
+filterChar :: Char -> ByteString -> ByteString
+filterChar c = B.filterByte (c2w c)
+{-# INLINE filterChar #-}
+
+-- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
+-- case of filtering a single Char out of a list. It is more efficient
+-- to use /filterNotChar/ in this case.
+--
+-- > filterNotChar == filter . (/=)
+--
+-- filterNotChar is around 3x faster, and uses much less space, than its
+-- filter equivalent
+--
+filterNotChar :: Char -> ByteString -> ByteString
+filterNotChar c = B.filterNotByte (c2w c)
+{-# INLINE filterNotChar #-}
+
+-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
+-- corresponding pairs of Chars. If one input ByteString is short,
+-- excess elements of the longer ByteString are discarded. This is
+-- equivalent to a pair of 'unpack' operations, and so space
+-- usage may be large for multi-megabyte ByteStrings
+zip :: ByteString -> ByteString -> [(Char,Char)]
+zip ps qs
+    | B.null ps || B.null qs = []
+    | otherwise = (unsafeHead ps, unsafeHead qs) : zip (B.unsafeTail ps) (B.unsafeTail qs)
+
+-- | 'zipWith' generalises 'zip' by zipping with the function given as
+-- the first argument, instead of a tupling function.  For example,
+-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list
+-- of corresponding sums.
+zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
+zipWith f = B.zipWith ((. w2c) . f . w2c)
+
+-- | 'unzip' transforms a list of pairs of Chars into a pair of
+-- ByteStrings. Note that this performs two 'pack' operations.
+unzip :: [(Char,Char)] -> (ByteString,ByteString)
+unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
+{-# INLINE unzip #-}
+
+-- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits
+-- the check for the empty case, which is good for performance, but
+-- there is an obligation on the programmer to provide a proof that the
+-- ByteString is non-empty.
+unsafeHead :: ByteString -> Char
+unsafeHead  = w2c . B.unsafeHead
+{-# INLINE unsafeHead #-}
+
+-- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a Char.
+-- This omits the bounds check, which means there is an accompanying
+-- obligation on the programmer to ensure the bounds are checked in some
+-- other way.
+unsafeIndex :: ByteString -> Int -> Char
+unsafeIndex = (w2c .) . B.unsafeIndex
+{-# INLINE unsafeIndex #-}
+
+-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
+w2c :: Word8 -> Char
+#if !defined(__GLASGOW_HASKELL__)
+w2c = chr . fromIntegral
+#else
+w2c = unsafeChr . fromIntegral
+#endif
+{-# INLINE w2c #-}
+
+-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
+-- silently truncates to 8 bits Chars > '\255'. It is provided as
+-- convenience for ByteString construction.
+c2w :: Char -> Word8
+c2w = fromIntegral . ord
+{-# INLINE c2w #-}
+
+-- ---------------------------------------------------------------------
+-- Things that depend on the encoding
+
+-- | 'breakSpace' returns the pair of ByteStrings when the argument is
+-- broken at the first whitespace byte. I.e.
+-- 
+-- > break isSpace == breakSpace
+--
+breakSpace :: ByteString -> (ByteString,ByteString)
+breakSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+    i <- firstspace (p `plusPtr` s) 0 l
+    return $ case () of {_
+        | i == 0    -> (empty, PS x s l)
+        | i == l    -> (PS x s l, empty)
+        | otherwise -> (PS x s i, PS x (s+i) (l-i))
+    }
+{-# INLINE breakSpace #-}
+
+firstspace :: Ptr Word8 -> Int -> Int -> IO Int
+STRICT3(firstspace)
+firstspace ptr n m
+    | n >= m    = return n
+    | otherwise = do w <- peekByteOff ptr n
+                     if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n
+
+-- | 'dropSpace' efficiently returns the 'ByteString' argument with
+-- white space Chars removed from the front. It is more efficient than
+-- calling dropWhile for removing whitespace. I.e.
+-- 
+-- > dropWhile isSpace == dropSpace
+--
+dropSpace :: ByteString -> ByteString
+dropSpace (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+    i <- firstnonspace (p `plusPtr` s) 0 l
+    return $ if i == l then empty else PS x (s+i) (l-i)
+{-# INLINE dropSpace #-}
+
+firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int
+STRICT3(firstnonspace)
+firstnonspace ptr n m
+    | n >= m    = return n
+    | otherwise = do w <- peekElemOff ptr n
+                     if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n
+
+-- | 'dropSpaceEnd' efficiently returns the 'ByteString' argument with
+-- white space removed from the end. I.e.
+-- 
+-- > reverse . (dropWhile isSpace) . reverse == dropSpaceEnd
+--
+-- but it is more efficient than using multiple reverses.
+--
+dropSpaceEnd :: ByteString -> ByteString
+dropSpaceEnd (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+    i <- lastnonspace (p `plusPtr` s) (l-1)
+    return $ if i == (-1) then empty else PS x s (i+1)
+{-# INLINE dropSpaceEnd #-}
+
+lastnonspace :: Ptr Word8 -> Int -> IO Int
+STRICT2(lastnonspace)
+lastnonspace ptr n
+    | n < 0     = return n
+    | otherwise = do w <- peekElemOff ptr n
+                     if isSpaceWord8 w then lastnonspace ptr (n-1) else return n
+
+-- | 'lines' breaks a ByteString up into a list of ByteStrings at
+-- newline Chars. The resulting strings do not contain newlines.
+lines :: ByteString -> [ByteString]
+lines ps
+    | null ps = []
+    | otherwise = case search ps of
+             Nothing -> [ps]
+             Just n  -> take n ps : lines (drop (n+1) ps)
+    where search = elemIndex '\n'
+{-# INLINE lines #-}
+
+{-
+-- Just as fast, but more complex. Should be much faster, I thought.
+lines :: ByteString -> [ByteString]
+lines (PS _ _ 0) = []
+lines (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
+        let ptr = p `plusPtr` s
+
+            STRICT1(loop)
+            loop n = do
+                let q = memchr (ptr `plusPtr` n) 0x0a (fromIntegral (l-n))
+                if q == nullPtr
+                    then return [PS x (s+n) (l-n)]
+                    else do let i = q `minusPtr` ptr
+                            ls <- loop (i+1)
+                            return $! PS x (s+n) (i-n) : ls
+        loop 0
+-}
+
+-- | 'unlines' is an inverse operation to 'lines'.  It joins lines,
+-- after appending a terminating newline to each.
+unlines :: [ByteString] -> ByteString
+unlines [] = empty
+unlines ss = (concat $ List.intersperse nl ss) `append` nl -- half as much space
+    where nl = packChar '\n'
+
+-- | 'words' breaks a ByteString up into a list of words, which
+-- were delimited by Chars representing white space. And
+--
+-- > tokens isSpace = words
+--
+words :: ByteString -> [ByteString]
+words = B.tokens isSpaceWord8
+
+-- | The 'unwords' function is analogous to the 'unlines' function, on words.
+unwords :: [ByteString] -> ByteString
+unwords = join (packChar ' ')
+
+-- | /O(n)/ Indicies of newlines. Shorthand for 
+--
+-- > elemIndices '\n'
+--
+lineIndices :: ByteString -> [Int]
+lineIndices = elemIndices '\n'
+
+-- | 'lines\'' behaves like 'lines', in that it breaks a ByteString on
+-- newline Chars. However, unlike the Prelude functions, 'lines\'' and
+-- 'unlines\'' correctly reconstruct lines that are missing terminating
+-- newlines characters. I.e.
+--
+-- > unlines  (lines "a\nb\nc")  == "a\nb\nc\n"
+-- > unlines' (lines' "a\nb\nc") == "a\nb\nc"
+--
+-- Note that this means:
+--
+-- > lines  "a\nb\nc\n" == ["a","b","c"]
+-- > lines' "a\nb\nc\n" == ["a","b","c",""]
+--
+lines' :: ByteString -> [ByteString]
+lines' ps = ps `seq` case elemIndex '\n' ps of
+     Nothing -> [ps]
+     Just n -> take n ps : lines' (drop (n+1) ps)
+
+-- | 'linesCRLF\'' behaves like 'lines\'', but breaks on (\\cr?\\lf)
+linesCRLF' :: ByteString -> [ByteString]
+linesCRLF' ps = ps `seq` case elemIndex '\n' ps of
+     Nothing -> [ps]
+     Just 0  -> empty : linesCRLF' (drop 1 ps)
+     Just n  -> let k = if ps `unsafeIndex` (n-1) == '\r' then n-1 else n
+                in take k ps : linesCRLF' (drop (n+1) ps)
+
+-- | 'unlines\'' behaves like 'unlines', except that it also correctly
+-- retores lines that do not have terminating newlines (see the
+-- description for 'lines\'').
+--
+unlines' :: [ByteString] -> ByteString
+unlines' ss = concat $ intersperse_newlines ss
+    where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
+          intersperse_newlines s = s
+          newline = packChar '\n'
+
+-- | 'unlines\'' behaves like 'unlines', except that it also correctly
+-- retores lines that do not have terminating newlines (see the
+-- description for 'lines\''). Uses CRLF instead of LF.
+--
+unlinesCRLF' :: [ByteString] -> ByteString
+unlinesCRLF' ss = concat $ intersperse_newlines ss
+    where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
+          intersperse_newlines s = s
+          newline = pack "\r\n"
+
+-- | 'words\'' behaves like 'words', with the exception that it produces
+-- output on ByteStrings with trailing whitespace that can be
+-- correctly inverted by 'unwords'. I.e.
+--
+-- > words  "a b c " == ["a","b","c"]
+-- > words' "a b c " == ["a","b","c",""]
+--
+-- > unwords $ words  "a b c " == "a b c"
+-- > unwords $ words' "a b c " == "a b c "
+--
+words' :: ByteString -> [ByteString]
+words' = B.splitWith isSpaceWord8
+
+-- | 'unwords\'' behaves like 'unwords'. It is provided for consistency
+-- with the other invertable words and lines functions.
+unwords' :: [ByteString] -> ByteString
+unwords' = unwords
+
+-- | 'betweenLines' returns the ByteString between the two lines given,
+-- or Nothing if they do not appear.  The returned string is the first
+-- and shortest string such that the line before it is the given first
+-- line, and the line after it is the given second line.
+betweenLines :: ByteString -- ^ First line to look for
+             -> ByteString -- ^ Second line to look for
+             -> ByteString -- ^ 'ByteString' to look in
+             -> Maybe (ByteString)
+
+betweenLines start end ps =
+    case P.break (start ==) (lines ps) of
+        (_, _:rest@(PS ps1 s1 _:_)) ->
+            case P.break (end ==) rest of
+                (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1)
+                _ -> Nothing
+        _ -> Nothing
+
+-- ---------------------------------------------------------------------
+-- Reading from ByteStrings
+
+-- | readInt skips any whitespace at the beginning of its argument, and
+-- reads an Int from the beginning of the ByteString.  If there is no
+-- integer at the beginning of the string, it returns Nothing, otherwise
+-- it just returns the int read, and the rest of the string.
+readInt :: ByteString -> Maybe (Int, ByteString)
+readInt p@(PS x s l) = inlinePerformIO $ useAsCString p $ \cstr ->
+    with (castPtr cstr) $ \endpp -> do
+        val     <- c_strtol (castPtr cstr) endpp 0
+        skipped <- (`minusPtr` cstr) `fmap` peek endpp
+        return $ if skipped == 0
+                 then Nothing
+                 else Just (fromIntegral val, PS x (s+skipped) (l-skipped))
+
+-- | unsafeReadInt is like readInt, but requires a null terminated
+-- ByteString. It avoids a copy if this is the case. It returns the Int
+-- read, if any, and the rest of the string.
+unsafeReadInt :: ByteString -> Maybe (Int, ByteString)
+unsafeReadInt p@(PS x s l) = inlinePerformIO $ unsafeUseAsCString p $ \cstr ->
+    with (castPtr cstr) $ \endpp -> do
+        val     <- c_strtol (castPtr cstr) endpp 0
+        skipped <- (`minusPtr` cstr) `fmap` peek endpp
+        return $ if skipped == 0
+                 then Nothing
+                 else Just (fromIntegral val, PS x (s+skipped) (l-skipped))
+
+foreign import ccall unsafe "stdlib.h strtol" c_strtol
+    :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong
+
+{-
+--
+-- not quite there yet
+--
+readInt :: ByteString -> Maybe (Int, ByteString)
+readInt = go 0
+    where
+        STRICT2(go)
+        go i ps
+            | B.null ps = Nothing
+            | x == '-'  = neg 0 xs
+            | otherwise = pos (parse x) xs
+            where (x, xs) = (ps `unsafeIndex` 0, unsafeTail ps)
+
+        STRICT2(neg)
+        neg n qs | isSpace x   = return $ Just ((i-n),xs)
+                 | otherwise   = neg (parse x + (10 * n)) xs
+                 where (x, xs) = (qs `unsafeIndex` 0, unsafeTail qs)
+
+        STRICT2(pos)
+        pos n qs | isSpace x = go (i+n) xs
+                 | otherwise = pos (parse x + (10 * n)) xs
+                 where (x, xs) = (qs `unsafeIndexWord8` 0, unsafeTail qs)
+
+        parse w = fromIntegral (w - 48) :: Int
+        {-# INLINE parse #-}
+-}
+
+-- ---------------------------------------------------------------------
+-- Internals
+
+-- Just like inlinePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining
+--
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif
+
+-- ordered by frequency
+-- Idea from Ketil
+isSpaceWord8 :: Word8 -> Bool
+isSpaceWord8 w = case w of
+    0x20 -> True -- SPACE
+    0x0A -> True -- LF, \n
+    0x09 -> True -- HT, \t
+    0x0C -> True -- FF, \f
+    0x0D -> True -- CR, \r
+    0x0B -> True -- VT, \v
+    _    -> False
+{-# INLINE isSpaceWord8 #-}
+
diff --git a/Makefile b/Makefile
index db0aa2f..d00bffe 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@ ALL_DIRS = \
 	Control/Monad \
 	Control/Monad/ST \
 	Data \
+	Data/ByteString \
 	Data/Generics \
 	Data/Array \
 	Data/Array/IO \
diff --git a/base.cabal b/base.cabal
index 4402bef..4aead21 100644
--- a/base.cabal
+++ b/base.cabal
@@ -38,6 +38,8 @@ exposed-modules:
 	Data.Array.Unboxed,
 	Data.Bits,
 	Data.Bool,
+	Data.ByteString,
+	Data.ByteString.Char8,
 	Data.Char,
 	Data.Complex,
 	Data.Dynamic,
diff --git a/cbits/fpstring.c b/cbits/fpstring.c
new file mode 100644
index 0000000..3d32083
--- /dev/null
+++ b/cbits/fpstring.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2003 David Roundy
+ * Copyright (c) 2005-2006 Don Stewart
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the authors or the names of any contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+
+/* copy a string in reverse */
+void reverse(unsigned char *dest, unsigned char *from, int len)
+{
+    unsigned char *p, *q;
+    p = from + len - 1;
+    q = dest;
+
+    while (p >= from)
+        *q++ = *p--;
+}
+
+/* compare bytes ascii-wise */
+static int cmp(const void *p, const void *q) {
+    return (*(unsigned char *)p - *(unsigned char *)q);
+}
+
+/* quicksort wrapper */
+void my_qsort(unsigned char *base, size_t size)
+{
+    qsort(base, size, sizeof(char), cmp);
+}
+
+/* duplicate a string, interspersing the character through the elements
+   of the duplicated string */
+void intersperse(unsigned char *dest, unsigned char *from, int len, char c)
+{
+    unsigned char *p, *q;
+    p = from;
+    q = dest;
+    while (p < from + len - 1) {
+        *q++ = *p++; 
+        *q++ = c;
+    }
+    *q = *p;
+}
+
+/* find maximum char in a packed string */
+unsigned char maximum(unsigned char *p, int len)
+{
+    unsigned char *q, c = *p;
+    for (q = p; q < p + len; q++)
+        if (*q > c)
+            c = *q;
+    return c;
+}
+
+/* find minimum char in a packed string */
+unsigned char minimum(unsigned char *p, int len)
+{
+    unsigned char *q, c = *p;
+    for (q = p; q < p + len; q++)
+        if (*q < c)
+            c = *q;
+    return c;
+}
diff --git a/include/fpstring.h b/include/fpstring.h
new file mode 100644
index 0000000..baab811
--- /dev/null
+++ b/include/fpstring.h
@@ -0,0 +1,9 @@
+#include <HsFFI.h>
+
+char *my_mmap(int len, int fd);
+
+void reverse(unsigned char *dest, unsigned char *from, int len);
+void my_qsort(unsigned char *base, size_t size);
+void intersperse(unsigned char *dest, unsigned char *from, int len, char c);
+unsigned char maximum(unsigned char *p, int len);
+unsigned char minimum(unsigned char *p, int len);
diff --git a/package.conf.in b/package.conf.in
index 78a74c3..98d6713 100644
--- a/package.conf.in
+++ b/package.conf.in
@@ -36,6 +36,8 @@ exposed-modules:
 	Data.Array.Unboxed,
 	Data.Bits,
 	Data.Bool,
+	Data.ByteString,
+	Data.ByteString.Char8,
 	Data.Char,
 	Data.Complex,
 	Data.Dynamic,