X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FStringBuffer.lhs;h=e2eed889f20055a9da940ef187f9ac2c259b88ba;hb=9d7da331989abcd1844e9d03b8d1e4163796fa85;hp=e53dbc89ce2565073d9c78f024bfae3369943f81;hpb=853e20a3eb86137cdb8accf69c6caa9db83a3d34;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index e53dbc8..e2eed88 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -6,27 +6,32 @@ Buffers for scanning string input stored in external arrays. \begin{code} +{-# OPTIONS_GHC -O #-} +-- always optimise this module, it's critical + module StringBuffer ( StringBuffer(..), -- non-abstract for vs\/HaskellService -- * Creation\/destruction - hGetStringBuffer, -- :: FilePath -> IO StringBuffer - stringToStringBuffer, -- :: String -> IO StringBuffer + hGetStringBuffer, + stringToStringBuffer, - -- * Lookup - currentChar, -- :: StringBuffer -> Char - prevChar, -- :: StringBuffer -> Char -> Char - lookAhead, -- :: StringBuffer -> Int -> Char - atEnd, -- :: StringBuffer -> Bool + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, - -- * Moving - stepOn, stepOnBy, + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, -- * Conversion - lexemeToString, -- :: StringBuffer -> Int -> String - lexemeToFastString, -- :: StringBuffer -> Int -> FastString + lexemeToString, + lexemeToFastString, -- * Parsing integers parseInteger, @@ -34,22 +39,19 @@ module StringBuffer #include "HsVersions.h" -import FastString -import Panic +import Encoding +import FastString (FastString,mkFastString,mkFastStringBytes) import GLAEXTS import Foreign -#if __GLASGOW_HASKELL__ < 503 -import PrelIOBase -import PrelHandle -#else -import GHC.IOBase -import GHC.IO ( slurpFile ) -#endif +import GHC.IOBase ( IO(..) ) +import GHC.Base ( unsafeChr ) + +import System.IO ( hGetBuf ) -import IO ( openFile, hFileSize, IOMode(ReadMode), +import IO ( hFileSize, IOMode(ReadMode), hClose ) #if __GLASGOW_HASKELL__ >= 601 import System.IO ( openBinaryFile ) @@ -57,37 +59,35 @@ import System.IO ( openBinaryFile ) import IOExts ( openFileEx, IOModeEx(..) ) #endif -#if __GLASGOW_HASKELL__ < 503 -import IArray ( listArray ) -import ArrayBase ( UArray(..) ) -import MutableArray -import IOExts ( hGetBufBA ) -#else -import Data.Array.IArray ( listArray ) -import Data.Array.MArray ( unsafeFreeze, newArray_ ) -import Data.Array.Base ( UArray(..) ) -import Data.Array.IO ( IOArray, hGetArray ) -#endif - -import Char ( ord ) - #if __GLASGOW_HASKELL__ < 601 openBinaryFile fp mode = openFileEx fp (BinaryMode mode) #endif + -- ----------------------------------------------------------------------------- -- The StringBuffer type --- A StringBuffer is a ByteArray# with a pointer into it. We also cache --- the length of the ByteArray# for speed. - +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- data StringBuffer - = StringBuffer - ByteArray# - Int# -- length - Int# -- current pos + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. instance Show StringBuffer where - showsPrec _ s = showString "" + showsPrec _ s = showString "" -- ----------------------------------------------------------------------------- -- Creation / Destruction @@ -95,97 +95,108 @@ instance Show StringBuffer where hGetStringBuffer :: FilePath -> IO StringBuffer hGetStringBuffer fname = do h <- openBinaryFile fname ReadMode - size <- hFileSize h - let size_i@(I# sz#) = fromIntegral size -#if __GLASGOW_HASKELL__ < 503 - arr <- stToIO (newCharArray (0,size_i-1)) - r <- hGetBufBA h arr size_i -#else - arr <- newArray_ (0,size_i-1) - r <- if size_i == 0 then return 0 else hGetArray h arr size_i -#endif - hClose h - if (r /= size_i) + size_i <- hFileSize h + let size = fromIntegral size_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) then ioError (userError "short read of file") else do -#if __GLASGOW_HASKELL__ < 503 - frozen <- stToIO (unsafeFreezeByteArray arr) - case frozen of - ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) -#else - frozen <- unsafeFreeze arr - case frozen of - UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) -#endif + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) -#if __GLASGOW_HASKELL__ >= 502 +stringToStringBuffer :: String -> IO StringBuffer stringToStringBuffer str = do - let size@(I# sz#) = length str - arr = listArray (0,size-1) (map (fromIntegral.ord) str) - :: UArray Int Word8 - case arr of - UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) -#else -stringToStringBuffer = panic "stringToStringBuffer: not implemented" -#endif + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) -- ----------------------------------------------------------------------------- --- Lookup - -currentChar :: StringBuffer -> Char -currentChar (StringBuffer arr# l# current#) = - ASSERT(current# <# l#) - C# (indexCharArray# arr# current#) +-- Grab a character + +-- Getting our fingers dirty a little here, but this is performance-critical +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') + +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer _ _ 0#) deflt = deflt -prevChar s deflt = lookAhead s (-1) - -lookAhead :: StringBuffer -> Int -> Char -lookAhead (StringBuffer arr# l# c#) (I# i#) = - ASSERT(off <# l# && off >=# 0#) - C# (indexCharArray# arr# off) - where - off = c# +# i# +prevChar (StringBuffer buf len 0) deflt = deflt +prevChar (StringBuffer buf len cur) deflt = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) -- ----------------------------------------------------------------------------- -- Moving stepOn :: StringBuffer -> StringBuffer -stepOn s = stepOnBy 1 s +stepOn s = snd (nextChar s) + +offsetBytes :: Int -> StringBuffer -> StringBuffer +offsetBytes i s = s { cur = cur s + i } -stepOnBy :: Int -> StringBuffer -> StringBuffer -stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#) +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 atEnd :: StringBuffer -> Bool -atEnd (StringBuffer _ l# c#) = l# ==# c# +atEnd (StringBuffer _ l c) = l == c -- ----------------------------------------------------------------------------- -- Conversion -lexemeToString :: StringBuffer -> Int -> String +lexemeToString :: StringBuffer -> Int {-bytes-} -> String lexemeToString _ 0 = "" -lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current# - where - end = current# +# len# +lexemeToString (StringBuffer buf _ cur) bytes = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + utf8DecodeString (ptr `plusPtr` cur) bytes - unpack nh - | nh >=# end = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharArray# arr# nh - -lexemeToFastString :: StringBuffer -> Int -> FastString +lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString lexemeToFastString _ 0 = mkFastString "" -lexemeToFastString (StringBuffer fo _ current#) (I# len) = - mkFastSubStringBA# fo current# len +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len -- ----------------------------------------------------------------------------- -- Parsing integer strings in various bases +byteOff :: StringBuffer -> Int -> Char +byteOff (StringBuffer buf _ cur) i = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + w <- peek (ptr `plusPtr` (cur+i)) + return (unsafeChr (fromIntegral (w::Word8))) + +-- | XXX assumes ASCII digits only parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer parseInteger buf len radix to_int = go 0 0 where go i x | i == len = x - | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i))) + | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i))) + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + \end{code}