maximum, -- :: ByteString -> Word8
minimum, -- :: ByteString -> Word8
mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
- hash, -- :: ByteString -> Int32
-- * Generating and unfolding ByteStrings
replicate, -- :: Int -> Word8 -> ByteString
-- ** Files
readFile, -- :: FilePath -> IO ByteString
writeFile, -- :: FilePath -> ByteString -> IO ()
+-- mmapFile, -- :: FilePath -> IO ByteString
-- ** I\/O with Handles
#if defined(__GLASGOW_HASKELL__)
import Data.Char
import Data.Word (Word8)
-import Data.Int (Int32)
-import Data.Bits (rotateL)
import Data.Maybe (listToMaybe)
import Data.Array (listArray)
import qualified Data.Array as Array ((!))
import Control.Exception (bracket)
-import Foreign.C.Types (CSize, CInt)
import Foreign.C.String (CString, CStringLen)
-import Foreign.Storable
+import Foreign.C.Types (CSize, CInt)
import Foreign.ForeignPtr
-import Foreign.Ptr
import Foreign.Marshal.Array
+import Foreign.Ptr
+import Foreign.Storable (Storable(..))
import System.IO (stdin,stdout,hClose,hFileSize
,hGetBuf,hPutBuf,openBinaryFile
,Handle,IOMode(..))
-#if defined(__GLASGOW_HASKELL__)
-
-import System.IO (hGetBufNonBlocking)
+#if !defined(__GLASGOW_HASKELL__)
+import System.IO.Unsafe
+#endif
-import qualified Foreign.Concurrent as FC (newForeignPtr)
+#if defined(__GLASGOW_HASKELL__)
import Data.Generics (Data(..), Typeable(..))
+import System.IO (hGetBufNonBlocking)
import System.IO.Error (isEOFError)
+
import Foreign.Marshal (alloca)
+import qualified Foreign.Concurrent as FC (newForeignPtr)
import GHC.Handle
-import GHC.Prim
+import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
import GHC.Base (build, unsafeChr)
import GHC.Word hiding (Word8)
import GHC.Ptr (Ptr(..))
import GHC.ST (ST(..))
import GHC.IOBase
-#else
-
-import System.IO.Unsafe
-
#endif
+-- CFILES stuff is Hugs only
+{-# CFILES cbits/fpstring.c #-}
+
-- -----------------------------------------------------------------------------
--
-- Useful macros, until we have bang patterns
reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
c_reverse p (f `plusPtr` s) l
--- reverse = pack . P.reverse . unpack
+{-
+reverse = pack . P.reverse . unpack
+-}
-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
-- 'ByteString' and \`intersperses\' that byte between the elements of
| otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
c_intersperse p (f `plusPtr` s) l c
--- intersperse c = pack . List.intersperse c . unpack
+{-
+intersperse c = pack . List.intersperse c . unpack
+-}
-- | The 'transpose' function transposes the rows and columns of its
-- 'ByteString' argument.
| null xs = errorEmptyList "maximum"
| otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
return $ c_maximum (p `plusPtr` s) l
+{-# INLINE maximum #-}
-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
minimum :: ByteString -> Word8
| null xs = errorEmptyList "minimum"
| otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
return $ c_minimum (p `plusPtr` s) l
+{-# INLINE minimum #-}
{-
maximum xs@(PS x s l)
| otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
w <- peek p
maximum_ (p `plusPtr` s) 0 l w
-{-# INLINE maximum #-}
maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
STRICT4(maximum_)
| otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
w <- peek p
minimum_ (p `plusPtr` s) 0 l w
-{-# INLINE minimum #-}
minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
STRICT4(minimum_)
| otherwise = do w <- peekByteOff ptr n
minimum_ ptr (n+1) m (if w < c then w else c)
-}
+
-- | /O(n)/ map Word8 functions, provided with the index at each position
mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
((poke t) . k n) w
go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
--- | /O(n)/ Hash a ByteString into an 'Int32' value, suitable for use as a key.
-hash :: ByteString -> Int32
-hash (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
- go (0 :: Int32) (p `plusPtr` s) l
- where
- go :: Int32 -> Ptr Word8 -> Int -> IO Int32
- STRICT3(go)
- go h _ 0 = return h
- go h p n = do w <- peek p
- go (fromIntegral w + rotateL h 8) (p `plusPtr` 1) (n-1)
-
-- ---------------------------------------------------------------------
-- Unfolds and replicates
-- But more efficiently than using length on the intermediate list.
count :: Word8 -> ByteString -> Int
count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
+ return $ c_count (p `plusPtr` s) (fromIntegral m) w
+{-# INLINE count #-}
+
+{-
+--
+-- around 30% slower
+--
+count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
go (p `plusPtr` s) (fromIntegral m) 0
where
go :: Ptr Word8 -> CSize -> Int -> IO Int
then return i
else do let k = fromIntegral $ q `minusPtr` p
go (q `plusPtr` 1) (l-k-1) (i+1)
-{-# INLINE count #-}
+-}
-- | The 'findIndex' function takes a predicate and a 'ByteString' and
-- returns the index of the first element in the ByteString
memcpy p (f `plusPtr` s) l
c_qsort p l -- inplace
--- sort = pack . List.sort . unpack
+{-
+sort = pack . List.sort . unpack
+-}
-- ---------------------------------------------------------------------
--
else f p start_size
where
f p s = do
- let s' = 2 * s
- p' <- reallocArray p s'
- i <- hGetBuf h (p' `plusPtr` s) s
- if i < s
- then do let i' = s + i
- p'' <- reallocArray p' i'
- fp <- newForeignFreePtr p''
- return $ PS fp 0 i'
- else f p' s'
+ let s' = 2 * s
+ p' <- reallocArray p s'
+ i <- hGetBuf h (p' `plusPtr` s) s
+ if i < s
+ then do let i' = s + i
+ p'' <- reallocArray p' i'
+ fp <- newForeignFreePtr p''
+ return $ PS fp 0 i'
+ else f p' s'
-- | getContents. Equivalent to hGetContents stdin
getContents :: IO ByteString
hPut h ps
hClose h
+{-
+--
+-- Disable until we can move it into a portable .hsc file
+--
+
+-- | Like readFile, this reads an entire file directly into a
+-- 'ByteString', but it is even more efficient. It involves directly
+-- mapping the file to memory. This has the advantage that the contents
+-- of the file never need to be copied. Also, under memory pressure the
+-- page may simply be discarded, while in the case of readFile it would
+-- need to be written to swap. If you read many small files, mmapFile
+-- will be less memory-efficient than readFile, since each mmapFile
+-- takes up a separate page of memory. Also, you can run into bus
+-- errors if the file is modified. As with 'readFile', the string
+-- representation in the file is assumed to be ISO-8859-1.
+--
+-- On systems without mmap, this is the same as a readFile.
+--
+mmapFile :: FilePath -> IO ByteString
+mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
+
+mmap :: FilePath -> IO (ForeignPtr Word8, Int)
+mmap f = do
+ h <- openBinaryFile f ReadMode
+ l <- fromIntegral `fmap` hFileSize h
+ -- Don't bother mmaping small files because each mmapped file takes up
+ -- at least one full VM block.
+ if l < mmap_limit
+ then do thefp <- mallocByteString l
+ withForeignPtr thefp $ \p-> hGetBuf h p l
+ hClose h
+ return (thefp, l)
+ else do
+ -- unix only :(
+ fd <- fromIntegral `fmap` handleToFd h
+ p <- my_mmap l fd
+ fp <- if p == nullPtr
+ then do thefp <- mallocByteString l
+ withForeignPtr thefp $ \p' -> hGetBuf h p' l
+ return thefp
+ else do
+ -- The munmap leads to crashes on OpenBSD.
+ -- maybe there's a use after unmap in there somewhere?
+#if !defined(__OpenBSD__)
+ let unmap = c_munmap p l >> return ()
+#else
+ let unmap = return ()
+#endif
+ fp <- FC.newForeignPtr p unmap
+ return fp
+ c_close fd
+ hClose h
+ return (fp, l)
+ where mmap_limit = 16*1024
+-}
+
#if defined(__GLASGOW_HASKELL__)
--
-- | A ByteString equivalent for getArgs. More efficient for large argument lists
foreign import ccall unsafe "static fpstring.h minimum" c_minimum
:: Ptr Word8 -> Int -> Word8
+foreign import ccall unsafe "static fpstring.h count" c_count
+ :: Ptr Word8 -> Int -> Word8 -> Int
+
foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
:: Ptr Word8 -> Int -> IO ()
-- ---------------------------------------------------------------------
+-- MMap
+
+{-
+foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
+ :: Int -> Int -> IO (Ptr Word8)
+
+foreign import ccall unsafe "static unistd.h close" c_close
+ :: Int -> IO Int
+
+# if !defined(__OpenBSD__)
+foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
+ :: Ptr Word8 -> Int -> IO Int
+# endif
+-}
+
+-- ---------------------------------------------------------------------
-- Internal GHC Haskell magic
#if defined(__GLASGOW_HASKELL__)
maximum, -- :: ByteString -> Char
minimum, -- :: ByteString -> Char
mapIndexed, -- :: (Int -> Char -> Char) -> ByteString -> ByteString
- hash, -- :: ByteString -> Int32
-- * Generating and unfolding ByteStrings
replicate, -- :: Int -> Char -> ByteString
-- ** Files
readFile, -- :: FilePath -> IO ByteString
+-- mmapFile, -- :: FilePath -> IO ByteString
writeFile, -- :: FilePath -> ByteString -> IO ()
-- ** I\/O with Handles
import Data.ByteString (ByteString(..)
,empty,null,length,tail,init,append
,inits,tails,elems,reverse,transpose
- ,concat,hash,take,drop,splitAt,join
+ ,concat,take,drop,splitAt,join
,sort,isPrefixOf,isSuffixOf,isSubstringOf,findSubstring
,findSubstrings,unsafeTail,copy
,getContents, putStr, putStrLn
- ,readFile, writeFile
+ ,readFile, {-mmapFile,-} writeFile
,hGetContents, hGet, hPut
#if defined(__GLASGOW_HASKELL__)
,getLine, getArgs, hGetLine, hGetNonBlocking
/*
* Copyright (c) 2003 David Roundy
- * Copyright (c) 2005-2006 Don Stewart
+ * Copyright (c) 2005-6 Don Stewart
*
* All rights reserved.
*
* SUCH DAMAGE.
*/
-#include <stdlib.h>
+#include "fpstring.h"
/* copy a string in reverse */
-void reverse(unsigned char *dest, unsigned char *from, int len)
-{
+void reverse(unsigned char *dest, unsigned char *from, int len) {
unsigned char *p, *q;
p = from + len - 1;
q = dest;
}
/* quicksort wrapper */
-void my_qsort(unsigned char *base, size_t size)
-{
+void my_qsort(unsigned char *base, size_t size) {
qsort(base, size, sizeof(char), cmp);
}
/* duplicate a string, interspersing the character through the elements
of the duplicated string */
-void intersperse(unsigned char *dest, unsigned char *from, int len, char c)
-{
+void intersperse(unsigned char *dest, unsigned char *from, int len, char c) {
unsigned char *p, *q;
p = from;
q = dest;
}
/* find maximum char in a packed string */
-unsigned char maximum(unsigned char *p, int len)
-{
+unsigned char maximum(unsigned char *p, int len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q > c)
}
/* find minimum char in a packed string */
-unsigned char minimum(unsigned char *p, int len)
-{
+unsigned char minimum(unsigned char *p, int len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q < c)
c = *q;
return c;
}
+
+/* count the number of occurences of a char in a string */
+int count(unsigned char *p, int len, unsigned char w) {
+ int c;
+ for (c = 0; len--; ++p)
+ if (*p == w)
+ ++c;
+ return c;
+}