X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Futils%2FStringBuffer.lhs;h=5d1bfa60861781e17bf8528ad1bb7e33bdeceebd;hp=69caf0ee120eec15ab8cb4aec24ac1b9bf4cc703;hb=5289f5d85610f71625a439747a09384876655eb5;hpb=9f589efb688f22cb0637f9c1164dd9027b4378a3 diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs index 69caf0e..5d1bfa6 100644 --- a/compiler/utils/StringBuffer.lhs +++ b/compiler/utils/StringBuffer.lhs @@ -6,59 +6,55 @@ Buffers for scanning string input stored in external arrays. \begin{code} +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS_GHC -O -funbox-strict-fields #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected + module StringBuffer ( StringBuffer(..), - -- non-abstract for vs\/HaskellService + -- non-abstract for vs\/HaskellService - -- * Creation\/destruction + -- * Creation\/destruction hGetStringBuffer, hGetStringBufferBlock, appendStringBuffers, - stringToStringBuffer, + stringToStringBuffer, - -- * Inspection - nextChar, - currentChar, - prevChar, - atEnd, + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, - -- * Moving and comparison - stepOn, - offsetBytes, - byteDiff, + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, -- * Conversion lexemeToString, lexemeToFastString, - -- * Parsing integers - parseUnsignedInteger, + -- * Parsing integers + parseUnsignedInteger, ) where #include "HsVersions.h" import Encoding -import FastString ( FastString,mkFastString,mkFastStringBytes ) +import FastString hiding ( buf ) +import FastTypes +import FastFunctions import Foreign -import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose +import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose , Handle, hTell ) -import GHC.Ptr ( Ptr(..) ) import GHC.Exts -import GHC.IOBase ( IO(..) ) -import GHC.Base ( unsafeChr ) - -#if __GLASGOW_HASKELL__ >= 601 -import System.IO ( openBinaryFile ) -#else -import IOExts ( openFileEx, IOModeEx(..) ) -#endif -#if __GLASGOW_HASKELL__ < 601 -openBinaryFile fp mode = openFileEx fp (BinaryMode mode) -#endif +import System.IO ( openBinaryFile ) -- ----------------------------------------------------------------------------- -- The StringBuffer type @@ -73,18 +69,18 @@ openBinaryFile fp mode = openFileEx fp (BinaryMode mode) data StringBuffer = StringBuffer { buf :: {-# UNPACK #-} !(ForeignPtr Word8), - len :: {-# UNPACK #-} !Int, -- length - cur :: {-# UNPACK #-} !Int -- current pos + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos } - -- The buffer is assumed to be UTF-8 encoded, and furthermore - -- we add three '\0' bytes to the end as sentinels so that the - -- decoder doesn't have to check for overflow at every single byte - -- of a multibyte sequence. + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. instance Show StringBuffer where - showsPrec _ s = showString "" + showsPrec _ s = showString "" -- ----------------------------------------------------------------------------- -- Creation / Destruction @@ -99,11 +95,8 @@ hGetStringBuffer fname = do r <- if size == 0 then return 0 else hGetBuf h ptr size hClose h if (r /= size) - then ioError (userError "short read of file") - else do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) + then ioError (userError "short read of file") + else newUTF8StringBuffer buf ptr size hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer hGetStringBufferBlock handle wanted @@ -114,9 +107,22 @@ hGetStringBufferBlock handle wanted withForeignPtr buf $ \ptr -> do r <- if size == 0 then return 0 else hGetBuf handle ptr size if r /= size - then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle)) - else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - return (StringBuffer buf size 0) + then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) + else newUTF8StringBuffer buf ptr size + +newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer +newUTF8StringBuffer buf ptr size = do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + let + sb0 = StringBuffer buf size 0 + (first_char, sb1) = nextChar sb0 + -- skip the byte-order mark if there is one (see #1744) + -- This is better than treating #FEFF as whitespace, + -- because that would mess up layout. We don't have a concept + -- of zero-width whitespace in Haskell: all whitespace codepoints + -- have a width of one column. + return (if first_char == '\xfeff' then sb1 else sb0) appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer appendStringBuffers sb1 sb2 @@ -124,21 +130,24 @@ appendStringBuffers sb1 sb2 withForeignPtr newBuf $ \ptr -> withForeignPtr (buf sb1) $ \sb1Ptr -> withForeignPtr (buf sb2) $ \sb2Ptr -> - do copyArray (sb1Ptr `advancePtr` cur sb1) ptr (calcLen sb1) - copyArray (sb2Ptr `advancePtr` cur sb2) (ptr `advancePtr` cur sb1) (calcLen sb2) + do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len + copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len pokeArray (ptr `advancePtr` size) [0,0,0] return (StringBuffer newBuf size 0) - where calcLen sb = len sb - cur sb - size = calcLen sb1 + calcLen sb2 - -stringToStringBuffer :: String -> IO StringBuffer -stringToStringBuffer str = do + where sb1_len = calcLen sb1 + sb2_len = calcLen sb2 + calcLen sb = len sb - cur sb + size = sb1_len + sb2_len + +stringToStringBuffer :: String -> StringBuffer +stringToStringBuffer str = + unsafePerformIO $ do let size = utf8EncodedLength str buf <- mallocForeignPtrArray (size+3) withForeignPtr buf $ \ptr -> do utf8EncodeString ptr str pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding + -- sentinels for UTF-8 decoding return (StringBuffer buf size 0) -- ----------------------------------------------------------------------------- @@ -150,17 +159,17 @@ nextChar :: StringBuffer -> (Char,StringBuffer) nextChar (StringBuffer buf len (I# cur#)) = inlinePerformIO $ do withForeignPtr buf $ \(Ptr a#) -> do - case utf8DecodeChar# (a# `plusAddr#` cur#) of - (# c#, b# #) -> - let cur' = I# (b# `minusAddr#` a#) in - return (C# c#, StringBuffer buf len cur') + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') currentChar :: StringBuffer -> Char currentChar = fst . nextChar prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer buf len 0) deflt = deflt -prevChar (StringBuffer buf len cur) deflt = +prevChar (StringBuffer _ _ 0) deflt = deflt +prevChar (StringBuffer buf _ cur) _ = inlinePerformIO $ do withForeignPtr buf $ \p -> do p' <- utf8PrevChar (p `plusPtr` cur) @@ -187,12 +196,12 @@ atEnd (StringBuffer _ l c) = l == c lexemeToString :: StringBuffer -> Int {-bytes-} -> String lexemeToString _ 0 = "" lexemeToString (StringBuffer buf _ cur) bytes = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> + inlinePerformIO $ + withForeignPtr buf $ \ptr -> utf8DecodeString (ptr `plusPtr` cur) bytes lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString -lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString _ 0 = nilFS lexemeToFastString (StringBuffer buf _ cur) len = inlinePerformIO $ withForeignPtr buf $ \ptr -> @@ -200,28 +209,28 @@ lexemeToFastString (StringBuffer buf _ cur) len = -- ----------------------------------------------------------------------------- -- Parsing integer strings in various bases - +{- byteOff :: StringBuffer -> Int -> Char -byteOff (StringBuffer buf _ cur) i = +byteOff (StringBuffer buf _ cur) i = inlinePerformIO $ withForeignPtr buf $ \ptr -> do - w <- peek (ptr `plusPtr` (cur+i)) - return (unsafeChr (fromIntegral (w::Word8))) - +-- return $! cBox (indexWord8OffFastPtrAsFastChar +-- (pUnbox ptr) (iUnbox (cur+i))) +--or +-- w <- peek (ptr `plusPtr` (cur+i)) +-- return (unsafeChr (fromIntegral (w::Word8))) +-} -- | XXX assumes ASCII digits only (by using byteOff) parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer -parseUnsignedInteger buf len radix char_to_int - = go 0 0 - where +parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let + --LOL, in implementations where the indexing needs slow unsafePerformIO, + --this is less (not more) efficient than using the IO monad explicitly + --here. + !ptr' = pUnbox ptr + byteOff i = cBox (indexWord8OffFastPtrAsFastChar ptr' (iUnbox (cur + i))) go i x | i == len = x - | otherwise = go (i+1) - (x * radix + toInteger (char_to_int (byteOff buf i))) - --- ----------------------------------------------------------------------------- --- under the carpet - --- Just like unsafePerformIO, but we inline it. -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + | otherwise = case byteOff i of + char -> go (i + 1) (x * radix + toInteger (char_to_int char)) + in go 0 0 \end{code}