From 39387cbf531ac4de18994726d30650b391fdae65 Mon Sep 17 00:00:00 2001 From: Don Stewart Date: Sat, 29 Apr 2006 04:07:33 +0000 Subject: [PATCH] Merge in Data.ByteString head. Fixes ByteString+cbits in hugs --- Data/ByteString.hs | 161 ++++++++++++++++++++++++++++++++++------------ Data/ByteString/Char8.hs | 6 +- cbits/fpstring.c | 28 ++++---- include/fpstring.h | 5 +- 4 files changed, 141 insertions(+), 59 deletions(-) diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 7350eb8..61ed887 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -80,7 +80,6 @@ module Data.ByteString ( maximum, -- :: ByteString -> Word8 minimum, -- :: ByteString -> Word8 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString - hash, -- :: ByteString -> Int32 -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Word8 -> ByteString @@ -202,6 +201,7 @@ module Data.ByteString ( -- ** Files readFile, -- :: FilePath -> IO ByteString writeFile, -- :: FilePath -> ByteString -> IO () +-- mmapFile, -- :: FilePath -> IO ByteString -- ** I\/O with Handles #if defined(__GLASGOW_HASKELL__) @@ -234,50 +234,50 @@ import qualified Data.List as List import Data.Char import Data.Word (Word8) -import Data.Int (Int32) -import Data.Bits (rotateL) import Data.Maybe (listToMaybe) import Data.Array (listArray) import qualified Data.Array as Array ((!)) import Control.Exception (bracket) -import Foreign.C.Types (CSize, CInt) import Foreign.C.String (CString, CStringLen) -import Foreign.Storable +import Foreign.C.Types (CSize, CInt) import Foreign.ForeignPtr -import Foreign.Ptr import Foreign.Marshal.Array +import Foreign.Ptr +import Foreign.Storable (Storable(..)) import System.IO (stdin,stdout,hClose,hFileSize ,hGetBuf,hPutBuf,openBinaryFile ,Handle,IOMode(..)) -#if defined(__GLASGOW_HASKELL__) - -import System.IO (hGetBufNonBlocking) +#if !defined(__GLASGOW_HASKELL__) +import System.IO.Unsafe +#endif -import qualified Foreign.Concurrent as FC (newForeignPtr) +#if defined(__GLASGOW_HASKELL__) import Data.Generics (Data(..), Typeable(..)) +import System.IO (hGetBufNonBlocking) import System.IO.Error (isEOFError) + import Foreign.Marshal (alloca) +import qualified Foreign.Concurrent as FC (newForeignPtr) import GHC.Handle -import GHC.Prim +import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#) import GHC.Base (build, unsafeChr) import GHC.Word hiding (Word8) import GHC.Ptr (Ptr(..)) import GHC.ST (ST(..)) import GHC.IOBase -#else - -import System.IO.Unsafe - #endif +-- CFILES stuff is Hugs only +{-# CFILES cbits/fpstring.c #-} + -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns @@ -582,7 +582,9 @@ reverse :: ByteString -> ByteString reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> c_reverse p (f `plusPtr` s) l --- reverse = pack . P.reverse . unpack +{- +reverse = pack . P.reverse . unpack +-} -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a -- 'ByteString' and \`intersperses\' that byte between the elements of @@ -594,7 +596,9 @@ intersperse c ps@(PS x s l) | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f -> c_intersperse p (f `plusPtr` s) l c --- intersperse c = pack . List.intersperse c . unpack +{- +intersperse c = pack . List.intersperse c . unpack +-} -- | The 'transpose' function transposes the rows and columns of its -- 'ByteString' argument. @@ -708,6 +712,7 @@ maximum xs@(PS x s l) | null xs = errorEmptyList "maximum" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> return $ c_maximum (p `plusPtr` s) l +{-# INLINE maximum #-} -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' minimum :: ByteString -> Word8 @@ -715,6 +720,7 @@ minimum xs@(PS x s l) | null xs = errorEmptyList "minimum" | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> return $ c_minimum (p `plusPtr` s) l +{-# INLINE minimum #-} {- maximum xs@(PS x s l) @@ -722,7 +728,6 @@ maximum xs@(PS x s l) | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do w <- peek p maximum_ (p `plusPtr` s) 0 l w -{-# INLINE maximum #-} maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 STRICT4(maximum_) @@ -736,7 +741,6 @@ minimum xs@(PS x s l) | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do w <- peek p minimum_ (p `plusPtr` s) 0 l w -{-# INLINE minimum #-} minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 STRICT4(minimum_) @@ -745,6 +749,7 @@ minimum_ ptr n m c | otherwise = do w <- peekByteOff ptr n minimum_ ptr (n+1) m (if w < c then w else c) -} + -- | /O(n)/ map Word8 functions, provided with the index at each position mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f -> @@ -757,17 +762,6 @@ mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f -> ((poke t) . k n) w go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p --- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key. -hash :: ByteString -> Int32 -hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> - go (0 :: Int32) (p `plusPtr` s) l - where - go :: Int32 -> Ptr Word8 -> Int -> IO Int32 - STRICT3(go) - go h _ 0 = return h - go h p n = do w <- peek p - go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1) - -- --------------------------------------------------------------------- -- Unfolds and replicates @@ -1143,6 +1137,14 @@ elemIndices c ps = loop 0 ps -- But more efficiently than using length on the intermediate list. count :: Word8 -> ByteString -> Int count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> + return $ c_count (p `plusPtr` s) (fromIntegral m) w +{-# INLINE count #-} + +{- +-- +-- around 30% slower +-- +count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> go (p `plusPtr` s) (fromIntegral m) 0 where go :: Ptr Word8 -> CSize -> Int -> IO Int @@ -1153,7 +1155,7 @@ count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> then return i else do let k = fromIntegral $ q `minusPtr` p go (q `plusPtr` 1) (l-k-1) (i+1) -{-# INLINE count #-} +-} -- | The 'findIndex' function takes a predicate and a 'ByteString' and -- returns the index of the first element in the ByteString @@ -1392,7 +1394,9 @@ sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do memcpy p (f `plusPtr` s) l c_qsort p l -- inplace --- sort = pack . List.sort . unpack +{- +sort = pack . List.sort . unpack +-} -- --------------------------------------------------------------------- -- @@ -1726,15 +1730,15 @@ hGetContents h = do else f p start_size where f p s = do - let s' = 2 * s - p' <- reallocArray p s' - i <- hGetBuf h (p' `plusPtr` s) s - if i < s - then do let i' = s + i - p'' <- reallocArray p' i' - fp <- newForeignFreePtr p'' - return $ PS fp 0 i' - else f p' s' + let s' = 2 * s + p' <- reallocArray p s' + i <- hGetBuf h (p' `plusPtr` s) s + if i < s + then do let i' = s + i + p'' <- reallocArray p' i' + fp <- newForeignFreePtr p'' + return $ PS fp 0 i' + else f p' s' -- | getContents. Equivalent to hGetContents stdin getContents :: IO ByteString @@ -1759,6 +1763,62 @@ writeFile f ps = do hPut h ps hClose h +{- +-- +-- Disable until we can move it into a portable .hsc file +-- + +-- | Like readFile, this reads an entire file directly into a +-- 'ByteString', but it is even more efficient. It involves directly +-- mapping the file to memory. This has the advantage that the contents +-- of the file never need to be copied. Also, under memory pressure the +-- page may simply be discarded, while in the case of readFile it would +-- need to be written to swap. If you read many small files, mmapFile +-- will be less memory-efficient than readFile, since each mmapFile +-- takes up a separate page of memory. Also, you can run into bus +-- errors if the file is modified. As with 'readFile', the string +-- representation in the file is assumed to be ISO-8859-1. +-- +-- On systems without mmap, this is the same as a readFile. +-- +mmapFile :: FilePath -> IO ByteString +mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l + +mmap :: FilePath -> IO (ForeignPtr Word8, Int) +mmap f = do + h <- openBinaryFile f ReadMode + l <- fromIntegral `fmap` hFileSize h + -- Don't bother mmaping small files because each mmapped file takes up + -- at least one full VM block. + if l < mmap_limit + then do thefp <- mallocByteString l + withForeignPtr thefp $ \p-> hGetBuf h p l + hClose h + return (thefp, l) + else do + -- unix only :( + fd <- fromIntegral `fmap` handleToFd h + p <- my_mmap l fd + fp <- if p == nullPtr + then do thefp <- mallocByteString l + withForeignPtr thefp $ \p' -> hGetBuf h p' l + return thefp + else do + -- The munmap leads to crashes on OpenBSD. + -- maybe there's a use after unmap in there somewhere? +#if !defined(__OpenBSD__) + let unmap = c_munmap p l >> return () +#else + let unmap = return () +#endif + fp <- FC.newForeignPtr p unmap + return fp + c_close fd + hClose h + return (fp, l) + where mmap_limit = 16*1024 +-} + #if defined(__GLASGOW_HASKELL__) -- -- | A ByteString equivalent for getArgs. More efficient for large argument lists @@ -1906,10 +1966,29 @@ foreign import ccall unsafe "static fpstring.h maximum" c_maximum foreign import ccall unsafe "static fpstring.h minimum" c_minimum :: Ptr Word8 -> Int -> Word8 +foreign import ccall unsafe "static fpstring.h count" c_count + :: Ptr Word8 -> Int -> Word8 -> Int + foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort :: Ptr Word8 -> Int -> IO () -- --------------------------------------------------------------------- +-- MMap + +{- +foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap + :: Int -> Int -> IO (Ptr Word8) + +foreign import ccall unsafe "static unistd.h close" c_close + :: Int -> IO Int + +# if !defined(__OpenBSD__) +foreign import ccall unsafe "static sys/mman.h munmap" c_munmap + :: Ptr Word8 -> Int -> IO Int +# endif +-} + +-- --------------------------------------------------------------------- -- Internal GHC Haskell magic #if defined(__GLASGOW_HASKELL__) diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index 9f39e61..dd94d0a 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -79,7 +79,6 @@ module Data.ByteString.Char8 ( maximum, -- :: ByteString -> Char minimum, -- :: ByteString -> Char mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString - hash, -- :: ByteString -> Int32 -- * Generating and unfolding ByteStrings replicate, -- :: Int -> Char -> ByteString @@ -193,6 +192,7 @@ module Data.ByteString.Char8 ( -- ** Files readFile, -- :: FilePath -> IO ByteString +-- mmapFile, -- :: FilePath -> IO ByteString writeFile, -- :: FilePath -> ByteString -> IO () -- ** I\/O with Handles @@ -230,12 +230,12 @@ import qualified Data.ByteString as B import Data.ByteString (ByteString(..) ,empty,null,length,tail,init,append ,inits,tails,elems,reverse,transpose - ,concat,hash,take,drop,splitAt,join + ,concat,take,drop,splitAt,join ,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring ,findSubstrings,unsafeTail,copy ,getContents, putStr, putStrLn - ,readFile, writeFile + ,readFile, {-mmapFile,-} writeFile ,hGetContents, hGet, hPut #if defined(__GLASGOW_HASKELL__) ,getLine, getArgs, hGetLine, hGetNonBlocking diff --git a/cbits/fpstring.c b/cbits/fpstring.c index 3d32083..3fabcc8 100644 --- a/cbits/fpstring.c +++ b/cbits/fpstring.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2003 David Roundy - * Copyright (c) 2005-2006 Don Stewart + * Copyright (c) 2005-6 Don Stewart * * All rights reserved. * @@ -29,11 +29,10 @@ * SUCH DAMAGE. */ -#include +#include "fpstring.h" /* copy a string in reverse */ -void reverse(unsigned char *dest, unsigned char *from, int len) -{ +void reverse(unsigned char *dest, unsigned char *from, int len) { unsigned char *p, *q; p = from + len - 1; q = dest; @@ -48,15 +47,13 @@ static int cmp(const void *p, const void *q) { } /* quicksort wrapper */ -void my_qsort(unsigned char *base, size_t size) -{ +void my_qsort(unsigned char *base, size_t size) { qsort(base, size, sizeof(char), cmp); } /* duplicate a string, interspersing the character through the elements of the duplicated string */ -void intersperse(unsigned char *dest, unsigned char *from, int len, char c) -{ +void intersperse(unsigned char *dest, unsigned char *from, int len, char c) { unsigned char *p, *q; p = from; q = dest; @@ -68,8 +65,7 @@ void intersperse(unsigned char *dest, unsigned char *from, int len, char c) } /* find maximum char in a packed string */ -unsigned char maximum(unsigned char *p, int len) -{ +unsigned char maximum(unsigned char *p, int len) { unsigned char *q, c = *p; for (q = p; q < p + len; q++) if (*q > c) @@ -78,11 +74,19 @@ unsigned char maximum(unsigned char *p, int len) } /* find minimum char in a packed string */ -unsigned char minimum(unsigned char *p, int len) -{ +unsigned char minimum(unsigned char *p, int len) { unsigned char *q, c = *p; for (q = p; q < p + len; q++) if (*q < c) c = *q; return c; } + +/* count the number of occurences of a char in a string */ +int count(unsigned char *p, int len, unsigned char w) { + int c; + for (c = 0; len--; ++p) + if (*p == w) + ++c; + return c; +} diff --git a/include/fpstring.h b/include/fpstring.h index baab811..18e633f 100644 --- a/include/fpstring.h +++ b/include/fpstring.h @@ -1,9 +1,8 @@ -#include - -char *my_mmap(int len, int fd); +#include void reverse(unsigned char *dest, unsigned char *from, int len); void my_qsort(unsigned char *base, size_t size); void intersperse(unsigned char *dest, unsigned char *from, int len, char c); unsigned char maximum(unsigned char *p, int len); unsigned char minimum(unsigned char *p, int len); +int count(unsigned char *p, int len, unsigned char w); -- 1.7.10.4