Buffers for scanning string input stored in external arrays.
\begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
module StringBuffer
(
StringBuffer(..),
lexemeToFastString,
-- * Parsing integers
- parseInteger,
+ parseUnsignedInteger,
) where
#include "HsVersions.h"
import Encoding
-import FastString ( FastString,mkFastString,mkFastStringBytes )
+import FastString hiding ( buf )
+import FastTypes
+import FastFunctions
import Foreign
import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose
, Handle, hTell )
-import GHC.Ptr ( Ptr(..) )
import GHC.Exts
-import GHC.IOBase ( IO(..) )
-import GHC.Base ( unsafeChr )
-#if __GLASGOW_HASKELL__ >= 601
+#if !defined(__GLASGOW_HASKELL__) || __GLASGOW_HASKELL__ >= 601
import System.IO ( openBinaryFile )
#else
import IOExts ( openFileEx, IOModeEx(..) )
#endif
-#if __GLASGOW_HASKELL__ < 601
+#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601
openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
#endif
hClose h
if (r /= size)
then ioError (userError "short read of file")
- else do
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- -- sentinels for UTF-8 decoding
- return (StringBuffer buf size 0)
+ else newUTF8StringBuffer buf ptr size
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock handle wanted
withForeignPtr buf $ \ptr ->
do r <- if size == 0 then return 0 else hGetBuf handle ptr size
if r /= size
- then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle))
- else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- return (StringBuffer buf size 0)
+ then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+ else newUTF8StringBuffer buf ptr size
+
+newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
+newUTF8StringBuffer buf ptr size = do
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ let
+ sb0 = StringBuffer buf size 0
+ (first_char, sb1) = nextChar sb0
+ -- skip the byte-order mark if there is one (see #1744)
+ -- This is better than treating #FEFF as whitespace,
+ -- because that would mess up layout. We don't have a concept
+ -- of zero-width whitespace in Haskell: all whitespace codepoints
+ -- have a width of one column.
+ return (if first_char == '\xfeff' then sb1 else sb0)
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers sb1 sb2
utf8DecodeString (ptr `plusPtr` cur) bytes
lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
-lexemeToFastString _ 0 = mkFastString ""
+lexemeToFastString _ 0 = nilFS
lexemeToFastString (StringBuffer buf _ cur) len =
inlinePerformIO $
withForeignPtr buf $ \ptr ->
-- -----------------------------------------------------------------------------
-- Parsing integer strings in various bases
-
+{-
byteOff :: StringBuffer -> Int -> Char
byteOff (StringBuffer buf _ cur) i =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
- w <- peek (ptr `plusPtr` (cur+i))
- return (unsafeChr (fromIntegral (w::Word8)))
+-- return $! cBox (indexWord8OffFastPtrAsFastChar
+-- (pUnbox ptr) (iUnbox (cur+i)))
+--or
+-- w <- peek (ptr `plusPtr` (cur+i))
+-- return (unsafeChr (fromIntegral (w::Word8)))
+-}
+-- | XXX assumes ASCII digits only (by using byteOff)
+parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
+parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
+ = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
+ --LOL, in implementations where the indexing needs slow unsafePerformIO,
+ --this is less (not more) efficient than using the IO monad explicitly
+ --here.
+ byteOff p i = cBox (indexWord8OffFastPtrAsFastChar
+ (pUnbox ptr) (iUnbox (cur+i)))
+ go i x | i == len = x
+ | otherwise = case byteOff ptr i of
+ char -> go (i+1) (x * radix + toInteger (char_to_int char))
+ in go 0 0
--- | XXX assumes ASCII digits only
-parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
-parseInteger buf len radix to_int
- = go 0 0
- where go i x | i == len = x
- | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
-
--- -----------------------------------------------------------------------------
--- under the carpet
-
--- Just like unsafePerformIO, but we inline it.
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-
-#if __GLASGOW_HASKELL__ < 600
-mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a)
-mallocForeignPtrArray = doMalloc undefined
- where
- doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b)
- doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy)
-
-mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
-mallocForeignPtrBytes n = do
- r <- mallocBytes n
- newForeignPtr r (finalizerFree r)
-
-foreign import ccall unsafe "stdlib.h free"
- finalizerFree :: Ptr a -> IO ()
-#endif
\end{code}