X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Futils%2FStringBuffer.lhs;h=a89c0d2ba98dca11c15ff5019acfc5c34aeaa248;hb=e761a777f2440ca1b8d8b40848cc5aa30d889ff6;hp=e52e7e78da9c7792ff9a9b79b747c5ab27888700;hpb=0065d5ab628975892cea1ec7303f968c3338cbe1;p=ghc-hetmet.git diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs index e52e7e7..a89c0d2 100644 --- a/compiler/utils/StringBuffer.lhs +++ b/compiler/utils/StringBuffer.lhs @@ -1,7 +1,7 @@ % +% (c) The University of Glasgow 2006 % (c) The University of Glasgow, 1997-2006 % -\section{String buffers} Buffers for scanning string input stored in external arrays. @@ -9,56 +9,47 @@ Buffers for scanning string input stored in external arrays. module StringBuffer ( StringBuffer(..), - -- non-abstract for vs\/HaskellService + -- non-abstract for vs\/HaskellService - -- * Creation\/destruction + -- * Creation\/destruction hGetStringBuffer, hGetStringBufferBlock, appendStringBuffers, - stringToStringBuffer, + stringToStringBuffer, - -- * Inspection - nextChar, - currentChar, - prevChar, - atEnd, + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, - -- * Moving and comparison - stepOn, - offsetBytes, - byteDiff, + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, -- * Conversion lexemeToString, lexemeToFastString, - -- * Parsing integers - parseInteger, + -- * Parsing integers + parseUnsignedInteger, ) where #include "HsVersions.h" import Encoding -import FastString ( FastString,mkFastString,mkFastStringBytes ) +import FastString hiding ( buf ) +import FastTypes +import FastFunctions import Foreign -import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose +import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose , Handle, hTell ) -import GHC.Ptr ( Ptr(..) ) import GHC.Exts -import GHC.IOBase ( IO(..) ) -import GHC.Base ( unsafeChr ) -#if __GLASGOW_HASKELL__ >= 601 -import System.IO ( openBinaryFile ) -#else -import IOExts ( openFileEx, IOModeEx(..) ) -#endif - -#if __GLASGOW_HASKELL__ < 601 -openBinaryFile fp mode = openFileEx fp (BinaryMode mode) -#endif +import System.IO ( openBinaryFile ) -- ----------------------------------------------------------------------------- -- The StringBuffer type @@ -73,18 +64,18 @@ openBinaryFile fp mode = openFileEx fp (BinaryMode mode) data StringBuffer = StringBuffer { buf :: {-# UNPACK #-} !(ForeignPtr Word8), - len :: {-# UNPACK #-} !Int, -- length - cur :: {-# UNPACK #-} !Int -- current pos + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos } - -- The buffer is assumed to be UTF-8 encoded, and furthermore - -- we add three '\0' bytes to the end as sentinels so that the - -- decoder doesn't have to check for overflow at every single byte - -- of a multibyte sequence. + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. instance Show StringBuffer where - showsPrec _ s = showString "" + showsPrec _ s = showString "" -- ----------------------------------------------------------------------------- -- Creation / Destruction @@ -99,11 +90,8 @@ hGetStringBuffer fname = do r <- if size == 0 then return 0 else hGetBuf h ptr size hClose h if (r /= size) - then ioError (userError "short read of file") - else do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) + then ioError (userError "short read of file") + else newUTF8StringBuffer buf ptr size hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer hGetStringBufferBlock handle wanted @@ -114,9 +102,22 @@ hGetStringBufferBlock handle wanted withForeignPtr buf $ \ptr -> do r <- if size == 0 then return 0 else hGetBuf handle ptr size if r /= size - then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle)) - else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - return (StringBuffer buf size 0) + then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) + else newUTF8StringBuffer buf ptr size + +newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer +newUTF8StringBuffer buf ptr size = do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + let + sb0 = StringBuffer buf size 0 + (first_char, sb1) = nextChar sb0 + -- skip the byte-order mark if there is one (see #1744) + -- This is better than treating #FEFF as whitespace, + -- because that would mess up layout. We don't have a concept + -- of zero-width whitespace in Haskell: all whitespace codepoints + -- have a width of one column. + return (if first_char == '\xfeff' then sb1 else sb0) appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer appendStringBuffers sb1 sb2 @@ -138,7 +139,7 @@ stringToStringBuffer str = do withForeignPtr buf $ \ptr -> do utf8EncodeString ptr str pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding + -- sentinels for UTF-8 decoding return (StringBuffer buf size 0) -- ----------------------------------------------------------------------------- @@ -150,17 +151,17 @@ nextChar :: StringBuffer -> (Char,StringBuffer) nextChar (StringBuffer buf len (I# cur#)) = inlinePerformIO $ do withForeignPtr buf $ \(Ptr a#) -> do - case utf8DecodeChar# (a# `plusAddr#` cur#) of - (# c#, b# #) -> - let cur' = I# (b# `minusAddr#` a#) in - return (C# c#, StringBuffer buf len cur') + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') currentChar :: StringBuffer -> Char currentChar = fst . nextChar prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer buf len 0) deflt = deflt -prevChar (StringBuffer buf len cur) deflt = +prevChar (StringBuffer _ _ 0) deflt = deflt +prevChar (StringBuffer buf _ cur) _ = inlinePerformIO $ do withForeignPtr buf $ \p -> do p' <- utf8PrevChar (p `plusPtr` cur) @@ -187,12 +188,12 @@ atEnd (StringBuffer _ l c) = l == c lexemeToString :: StringBuffer -> Int {-bytes-} -> String lexemeToString _ 0 = "" lexemeToString (StringBuffer buf _ cur) bytes = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> + inlinePerformIO $ + withForeignPtr buf $ \ptr -> utf8DecodeString (ptr `plusPtr` cur) bytes lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString -lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString _ 0 = nilFS lexemeToFastString (StringBuffer buf _ cur) len = inlinePerformIO $ withForeignPtr buf $ \ptr -> @@ -200,41 +201,28 @@ lexemeToFastString (StringBuffer buf _ cur) len = -- ----------------------------------------------------------------------------- -- Parsing integer strings in various bases - +{- byteOff :: StringBuffer -> Int -> Char -byteOff (StringBuffer buf _ cur) i = +byteOff (StringBuffer buf _ cur) i = inlinePerformIO $ withForeignPtr buf $ \ptr -> do - w <- peek (ptr `plusPtr` (cur+i)) - return (unsafeChr (fromIntegral (w::Word8))) +-- return $! cBox (indexWord8OffFastPtrAsFastChar +-- (pUnbox ptr) (iUnbox (cur+i))) +--or +-- w <- peek (ptr `plusPtr` (cur+i)) +-- return (unsafeChr (fromIntegral (w::Word8))) +-} +-- | XXX assumes ASCII digits only (by using byteOff) +parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let + --LOL, in implementations where the indexing needs slow unsafePerformIO, + --this is less (not more) efficient than using the IO monad explicitly + --here. + ptr' = pUnbox ptr + byteOff i = cBox (indexWord8OffFastPtrAsFastChar ptr' (iUnbox (cur + i))) + go i x | i == len = x + | otherwise = case byteOff i of + char -> go (i + 1) (x * radix + toInteger (char_to_int char)) + in go 0 0 --- | XXX assumes ASCII digits only -parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer -parseInteger buf len radix to_int - = go 0 0 - where go i x | i == len = x - | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i))) - --- ----------------------------------------------------------------------------- --- under the carpet - --- Just like unsafePerformIO, but we inline it. -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r - -#if __GLASGOW_HASKELL__ < 600 -mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a) -mallocForeignPtrArray = doMalloc undefined - where - doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b) - doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy) - -mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) -mallocForeignPtrBytes n = do - r <- mallocBytes n - newForeignPtr r (finalizerFree r) - -foreign import ccall unsafe "stdlib.h free" - finalizerFree :: Ptr a -> IO () -#endif \end{code}