X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Futils%2FStringBuffer.lhs;h=92a937b74f1aed2f4fefabb4088974c4c639cfa8;hb=0ad143e178a035e54601ff0793e403c5fd5ac890;hp=28a7f6728d7ee565ddb82e8dec93d19f8e2f0ce3;hpb=4a1aca1033549f95cbdb62cbc0aac331610c91ea;p=ghc-hetmet.git diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs index 28a7f67..92a937b 100644 --- a/compiler/utils/StringBuffer.lhs +++ b/compiler/utils/StringBuffer.lhs @@ -6,6 +6,13 @@ Buffers for scanning string input stored in external arrays. \begin{code} +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + module StringBuffer ( StringBuffer(..), @@ -45,7 +52,6 @@ import Foreign import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose , Handle, hTell ) -import GHC.Ptr ( Ptr(..) ) import GHC.Exts import GHC.IOBase ( IO(..) ) import GHC.Base ( unsafeChr ) @@ -100,10 +106,7 @@ hGetStringBuffer fname = do hClose h if (r /= size) then ioError (userError "short read of file") - else do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) + else newUTF8StringBuffer buf ptr size hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer hGetStringBufferBlock handle wanted @@ -114,9 +117,22 @@ hGetStringBufferBlock handle wanted withForeignPtr buf $ \ptr -> do r <- if size == 0 then return 0 else hGetBuf handle ptr size if r /= size - then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle)) - else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - return (StringBuffer buf size 0) + then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) + else newUTF8StringBuffer buf ptr size + +newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer +newUTF8StringBuffer buf ptr size = do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + let + sb0 = StringBuffer buf size 0 + (first_char, sb1) = nextChar sb0 + -- skip the byte-order mark if there is one (see #1744) + -- This is better than treating #FEFF as whitespace, + -- because that would mess up layout. We don't have a concept + -- of zero-width whitespace in Haskell: all whitespace codepoints + -- have a width of one column. + return (if first_char == '\xfeff' then sb1 else sb0) appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer appendStringBuffers sb1 sb2 @@ -224,19 +240,4 @@ parseUnsignedInteger buf len radix char_to_int inlinePerformIO :: IO a -> a inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -#if __GLASGOW_HASKELL__ < 600 -mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a) -mallocForeignPtrArray = doMalloc undefined - where - doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b) - doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy) - -mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) -mallocForeignPtrBytes n = do - r <- mallocBytes n - newForeignPtr r (finalizerFree r) - -foreign import ccall unsafe "stdlib.h free" - finalizerFree :: Ptr a -> IO () -#endif \end{code}