X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FStringBuffer.lhs;h=e2eed889f20055a9da940ef187f9ac2c259b88ba;hb=9d7da331989abcd1844e9d03b8d1e4163796fa85;hp=67b565e83bc9e6d6386fde1973aef3da067d4b4f;hpb=6e35e88570cbbcb483988af62837ce9cf9170441;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index 67b565e..e2eed88 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -1,379 +1,202 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997 +% (c) The University of Glasgow, 1997-2003 % \section{String buffers} Buffers for scanning string input stored in external arrays. \begin{code} -{-# OPTIONS -fno-prune-tydecls #-} +{-# OPTIONS_GHC -O #-} +-- always optimise this module, it's critical + module StringBuffer ( - StringBuffer, - - -- creation - hGetStringBuffer, -- :: FilePath -> IO StringBuffer - - -- Lookup - currentChar, -- :: StringBuffer -> Char - currentChar#, -- :: StringBuffer -> Char# - indexSBuffer, -- :: StringBuffer -> Int -> Char - indexSBuffer#, -- :: StringBuffer -> Int# -> Char# - -- relative lookup, i.e, currentChar = lookAhead 0 - lookAhead, -- :: StringBuffer -> Int -> Char - lookAhead#, -- :: StringBuffer -> Int# -> Char# - - -- moving the end point of the current lexeme. - setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer - incLexeme, -- :: StringBuffer -> StringBuffer - decLexeme, -- :: StringBuffer -> StringBuffer - - -- move the start and end lexeme pointer on by x units. - stepOn, -- :: StringBuffer -> StringBuffer - stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - stepOverLexeme, -- :: StringBuffer -> StringBuffer - scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer) - expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer - -- at or beyond end of buffer? - bufferExhausted, -- :: StringBuffer -> Bool - emptyLexeme, -- :: StringBuffer -> Bool - - -- matching - prefixMatch, -- :: StringBuffer -> String -> Bool - untilEndOfString#, -- :: StringBuffer -> Int# - untilEndOfChar#, -- :: StringBuffer -> Int# - untilChar#, -- :: StringBuffer -> Char# -> Int# - - -- conversion - lexemeToString, -- :: StringBuffer -> String - lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int - lexemeToFastString, -- :: StringBuffer -> FastString - lexemeToBuffer, -- :: StringBuffer -> StringBuffer - - FastString, - ByteArray + StringBuffer(..), + -- non-abstract for vs\/HaskellService + + -- * Creation\/destruction + hGetStringBuffer, + stringToStringBuffer, + + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, + + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, + + -- * Conversion + lexemeToString, + lexemeToFastString, + + -- * Parsing integers + parseInteger, ) where #include "HsVersions.h" -import GlaExts -import Addr ( Addr(..) ) -import Foreign -import ST +import Encoding +import FastString (FastString,mkFastString,mkFastStringBytes) -#if __GLASGOW_HASKELL__ >= 303 -import IO ( slurpFile ) -#else -import IO ( openFile, hFileSize, hClose, IOMode(..) ) -#endif +import GLAEXTS -#if __GLASGOW_HASKELL__ < 301 -import IOBase ( IOError(..), IOErrorType(..) ) -import IOHandle ( readHandle, writeHandle, filePtr ) -import PackBase ( unpackCStringBA ) -#else -# if __GLASGOW_HASKELL__ <= 302 -import PrelIOBase ( IOError(..), IOErrorType(..) ) -import PrelHandle ( readHandle, writeHandle, filePtr ) -# endif -import PrelPack ( unpackCStringBA ) -#endif +import Foreign -import PrimPacked -import FastString -import Char (isDigit) -\end{code} +import GHC.IOBase ( IO(..) ) +import GHC.Base ( unsafeChr ) -\begin{code} -data StringBuffer - = StringBuffer - Addr# - Int# -- length - Int# -- lexeme start - Int# -- current pos -\end{code} +import System.IO ( hGetBuf ) -\begin{code} -instance Text StringBuffer where - showsPrec _ s = showString "" -\end{code} - -\begin{code} -hGetStringBuffer :: FilePath -> IO StringBuffer -hGetStringBuffer fname = -#if __GLASGOW_HASKELL__ >= 303 - slurpFile fname >>= \ (a , read) -> - let (A# a#) = a - (I# read#) = read - in - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' a (I# (read# -# 1#)) >>= \ () -> - return (StringBuffer a# read# 0# 0#) +import IO ( hFileSize, IOMode(ReadMode), + hClose ) +#if __GLASGOW_HASKELL__ >= 601 +import System.IO ( openBinaryFile ) #else - openFile fname ReadMode >>= \ hndl -> - hFileSize hndl >>= \ len@(J# _ _ d#) -> - let len_i = fromInteger len in - -- Allocate an array for system call to store its bytes into. - -- ToDo: make it robust --- trace (show ((len_i::Int)+1)) $ - _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) -> - if addr2Int# a# ==# 0# then - fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes")) - else - readHandle hndl >>= \ hndl_ -> - writeHandle hndl hndl_ >> - let ptr = filePtr hndl_ in -#if __GLASGOW_HASKELL__ <= 302 - _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) -> -#else - _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) -> +import IOExts ( openFileEx, IOModeEx(..) ) #endif - hClose hndl >> - if read# ==# 0# then -- EOF or some other error - fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname)) - else - -- Add a sentinel NUL - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' arr (I# (read# -# 1#)) >>= \ () -> - return (StringBuffer a# read# 0# 0#) +#if __GLASGOW_HASKELL__ < 601 +openBinaryFile fp mode = openFileEx fp (BinaryMode mode) #endif -unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer -unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# = - unsafePerformIO ( - _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () -> - return s - ) -\end{code} - -Lookup - -\begin{code} -currentChar :: StringBuffer -> Char -currentChar sb = case currentChar# sb of c -> C# c - -lookAhead :: StringBuffer -> Int -> Char -lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c - -indexSBuffer :: StringBuffer -> Int -> Char -indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c - -currentChar# :: StringBuffer -> Char# -indexSBuffer# :: StringBuffer -> Int# -> Char# -lookAhead# :: StringBuffer -> Int# -> Char# -currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# -indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# - - -- relative lookup, i.e, currentChar = lookAhead 0 -lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#) - -\end{code} - - moving the start point of the current lexeme. - -\begin{code} - -- moving the end point of the current lexeme. -setCurrentPos# :: StringBuffer -> Int# -> StringBuffer -setCurrentPos# (StringBuffer fo l# s# c#) i# = - StringBuffer fo l# s# (c# +# i#) +-- ----------------------------------------------------------------------------- +-- The StringBuffer type --- augmenting the current lexeme by one. -incLexeme :: StringBuffer -> StringBuffer -incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#) - -decLexeme :: StringBuffer -> StringBuffer -decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#) - -\end{code} +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- +data StringBuffer + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. + +instance Show StringBuffer where + showsPrec _ s = showString "" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction --- move the start and end point of the buffer on by --- x units. +hGetStringBuffer :: FilePath -> IO StringBuffer +hGetStringBuffer fname = do + h <- openBinaryFile fname ReadMode + size_i <- hFileSize h + let size = fromIntegral size_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) + then ioError (userError "short read of file") + else do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +stringToStringBuffer :: String -> IO StringBuffer +stringToStringBuffer str = do + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +-- ----------------------------------------------------------------------------- +-- Grab a character + +-- Getting our fingers dirty a little here, but this is performance-critical +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') + +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar + +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer buf len 0) deflt = deflt +prevChar (StringBuffer buf len cur) deflt = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) + +-- ----------------------------------------------------------------------------- +-- Moving -\begin{code} stepOn :: StringBuffer -> StringBuffer -stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same. - -stepOnBy# :: StringBuffer -> Int# -> StringBuffer -stepOnBy# (StringBuffer fo# l# s# c#) i# = - case s# +# i# of - new_s# -> StringBuffer fo# l# new_s# new_s# - --- jump to pos. -stepOnTo# :: StringBuffer -> Int# -> StringBuffer -stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s# - -stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer - -stepOnUntil pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> StringBuffer fo l# c# c# - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> loop (c# +# 1#) - -stepOverLexeme :: StringBuffer -> StringBuffer -stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c# - -expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer -expandWhile pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - - -scanNumLit :: Int -> StringBuffer -> (Int,StringBuffer) -scanNumLit (I# acc#) (StringBuffer fo l# s# c#) = - loop acc# c# - where - loop acc# c# = - case indexCharOffAddr# fo c# of - ch# | isDigit (C# ch#) -> loop (acc# *# 10# +# (ord# ch# -# ord# '0'#)) (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> (I# acc#, StringBuffer fo l# l# l#) -- EOB, return immediately. - | otherwise -> (I# acc#,StringBuffer fo l# s# c#) - - -expandUntilMatch :: StringBuffer -> String -> StringBuffer -expandUntilMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = StringBuffer fo l# s# c# - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = loop (c# +# 1#) str - -\end{code} - -\begin{code} - -- at or beyond end of buffer? -bufferExhausted :: StringBuffer -> Bool -bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l# - -emptyLexeme :: StringBuffer -> Bool -emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c# - - -- matching -prefixMatch :: StringBuffer -> String -> Maybe StringBuffer -prefixMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = Nothing - -untilEndOfString# :: StringBuffer -> StringBuffer -untilEndOfString# (StringBuffer fo l# s# c#) = - loop c# - where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\"'# -> - case getch# (c# -# 1#) of - '\\'# -> - -- looks like an escaped something or other to me, - -- better count the number of "\\"s that are immediately - -- preceeding to decide if the " is escaped. - let - odd_slashes flg i# = - case getch# i# of - '\\'# -> odd_slashes (not flg) (i# -# 1#) - _ -> flg - in - if odd_slashes True (c# -# 2#) then - -- odd number, " is ecaped. - loop (c# +# 1#) - else -- a real end of string delimiter after all. - StringBuffer fo l# s# c# - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - - -untilEndOfChar# :: StringBuffer -> StringBuffer -untilEndOfChar# (StringBuffer fo l# s# c#) = - loop c# - where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\''# -> - case getch# (c# -# 1#) of - '\\'# -> - case getch# (c# -# 2#) of - '\\'# -> -- end of char - StringBuffer fo l# s# c# - _ -> loop (c# +# 1#) -- false alarm - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - -untilChar# :: StringBuffer -> Char# -> StringBuffer -untilChar# (StringBuffer fo l# s# c#) x# = - loop c# - where - loop c# - | indexCharOffAddr# fo c# `eqChar#` x# - = StringBuffer fo l# s# c# - | otherwise - = loop (c# +# 1#) - - -- conversion -lexemeToString :: StringBuffer -> String -lexemeToString (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - "" - else - unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))) - -lexemeToByteArray :: StringBuffer -> _ByteArray Int -lexemeToByteArray (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - error "lexemeToByteArray" - else - copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) - -lexemeToFastString :: StringBuffer -> FastString -lexemeToFastString (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - mkFastCharString2 (A# fo) (I# 0#) - else - mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) - -{- - Create a StringBuffer from the current lexeme, and add a sentinel - at the end. Know What You're Doing before taking this function - into use.. --} -lexemeToBuffer :: StringBuffer -> StringBuffer -lexemeToBuffer (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - StringBuffer fo 0# start_pos# current# -- an error, really. - else - unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#) - (current# -# 1#) - '\NUL'# +stepOn s = snd (nextChar s) + +offsetBytes :: Int -> StringBuffer -> StringBuffer +offsetBytes i s = s { cur = cur s + i } + +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 + +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l c) = l == c + +-- ----------------------------------------------------------------------------- +-- Conversion + +lexemeToString :: StringBuffer -> Int {-bytes-} -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer buf _ cur) bytes = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + utf8DecodeString (ptr `plusPtr` cur) bytes + +lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString +lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len + +-- ----------------------------------------------------------------------------- +-- Parsing integer strings in various bases + +byteOff :: StringBuffer -> Int -> Char +byteOff (StringBuffer buf _ cur) i = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + w <- peek (ptr `plusPtr` (cur+i)) + return (unsafeChr (fromIntegral (w::Word8))) + +-- | XXX assumes ASCII digits only +parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseInteger buf len radix to_int + = go 0 0 + where go i x | i == len = x + | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i))) + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r \end{code}