X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Futils%2FStringBuffer.lhs;h=cbf7d618b25ea8c51cba424de8329c884494b878;hb=947c866a3e655c61ed0d5f3dd1f10e4ecb4c2848;hp=e02a3ba0d5e2a1cc35d3ff459f5705b183968c6b;hpb=01ecefa4b97106fec5c139c5514e5d56e59ecbaf;p=ghc-hetmet.git diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs index e02a3ba..cbf7d61 100644 --- a/compiler/utils/StringBuffer.lhs +++ b/compiler/utils/StringBuffer.lhs @@ -6,6 +6,13 @@ Buffers for scanning string input stored in external arrays. \begin{code} +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + module StringBuffer ( StringBuffer(..), @@ -39,23 +46,23 @@ module StringBuffer #include "HsVersions.h" import Encoding -import FastString ( FastString,mkFastString,mkFastStringBytes ) +import FastString hiding ( buf ) +import FastTypes +import FastFunctions import Foreign import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose , Handle, hTell ) import GHC.Exts -import GHC.IOBase ( IO(..) ) -import GHC.Base ( unsafeChr ) -#if __GLASGOW_HASKELL__ >= 601 +#if !defined(__GLASGOW_HASKELL__) || __GLASGOW_HASKELL__ >= 601 import System.IO ( openBinaryFile ) #else import IOExts ( openFileEx, IOModeEx(..) ) #endif -#if __GLASGOW_HASKELL__ < 601 +#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601 openBinaryFile fp mode = openFileEx fp (BinaryMode mode) #endif @@ -99,10 +106,7 @@ hGetStringBuffer fname = do hClose h if (r /= size) then ioError (userError "short read of file") - else do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) + else newUTF8StringBuffer buf ptr size hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer hGetStringBufferBlock handle wanted @@ -114,8 +118,21 @@ hGetStringBufferBlock handle wanted do r <- if size == 0 then return 0 else hGetBuf handle ptr size if r /= size then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) - else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - return (StringBuffer buf size 0) + else newUTF8StringBuffer buf ptr size + +newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer +newUTF8StringBuffer buf ptr size = do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + let + sb0 = StringBuffer buf size 0 + (first_char, sb1) = nextChar sb0 + -- skip the byte-order mark if there is one (see #1744) + -- This is better than treating #FEFF as whitespace, + -- because that would mess up layout. We don't have a concept + -- of zero-width whitespace in Haskell: all whitespace codepoints + -- have a width of one column. + return (if first_char == '\xfeff' then sb1 else sb0) appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer appendStringBuffers sb1 sb2 @@ -191,7 +208,7 @@ lexemeToString (StringBuffer buf _ cur) bytes = utf8DecodeString (ptr `plusPtr` cur) bytes lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString -lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString _ 0 = nilFS lexemeToFastString (StringBuffer buf _ cur) len = inlinePerformIO $ withForeignPtr buf $ \ptr -> @@ -199,28 +216,28 @@ lexemeToFastString (StringBuffer buf _ cur) len = -- ----------------------------------------------------------------------------- -- Parsing integer strings in various bases - +{- byteOff :: StringBuffer -> Int -> Char byteOff (StringBuffer buf _ cur) i = inlinePerformIO $ withForeignPtr buf $ \ptr -> do - w <- peek (ptr `plusPtr` (cur+i)) - return (unsafeChr (fromIntegral (w::Word8))) - +-- return $! cBox (indexWord8OffFastPtrAsFastChar +-- (pUnbox ptr) (iUnbox (cur+i))) +--or +-- w <- peek (ptr `plusPtr` (cur+i)) +-- return (unsafeChr (fromIntegral (w::Word8))) +-} -- | XXX assumes ASCII digits only (by using byteOff) parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer -parseUnsignedInteger buf len radix char_to_int - = go 0 0 - where +parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let + --LOL, in implementations where the indexing needs slow unsafePerformIO, + --this is less (not more) efficient than using the IO monad explicitly + --here. + byteOff p i = cBox (indexWord8OffFastPtrAsFastChar + (pUnbox ptr) (iUnbox (cur+i))) go i x | i == len = x - | otherwise = go (i+1) - (x * radix + toInteger (char_to_int (byteOff buf i))) - --- ----------------------------------------------------------------------------- --- under the carpet - --- Just like unsafePerformIO, but we inline it. -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + | otherwise = case byteOff ptr i of + char -> go (i+1) (x * radix + toInteger (char_to_int char)) + in go 0 0 \end{code}