X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FStringBuffer.lhs;h=f84311b52a58c4d93fd52b54501de5b44ef65228;hb=a237946da277f10bd3d223e5926d118044d24194;hp=e2f5654b56e104a99d14cfbaa66a1e5bc8914172;hpb=dcfe9a1fac635cf60b47cf6c7e06caad552010ec;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index e2f5654..f84311b 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -1,25 +1,18 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997 +% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 % \section{String buffers} Buffers for scanning string input stored in external arrays. \begin{code} - {-# OPTIONS -fno-prune-tydecls #-} --- Don't really understand this! --- ERROR: Can't see the data constructor(s) for _ccall_/_casm_ argument; --- type: ForeignObj(try compiling with -fno-prune-tydecls ..) - - module StringBuffer ( StringBuffer, -- creation hGetStringBuffer, -- :: FilePath -> IO StringBuffer - freeStringBuffer, -- :: StringBuffer -> IO () -- Lookup currentChar, -- :: StringBuffer -> Char @@ -30,6 +23,10 @@ module StringBuffer lookAhead, -- :: StringBuffer -> Int -> Char lookAhead#, -- :: StringBuffer -> Int# -> Char# + -- offsets + currentIndex#, -- :: StringBuffer -> Int# + lexemeIndex, -- :: StringBuffer -> Int# + -- moving the end point of the current lexeme. setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer incLexeme, -- :: StringBuffer -> StringBuffer @@ -40,9 +37,13 @@ module StringBuffer stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer + stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer stepOverLexeme, -- :: StringBuffer -> StringBuffer scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer) - expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer + squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer + mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer + expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer + expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer -- at or beyond end of buffer? bufferExhausted, -- :: StringBuffer -> Bool @@ -51,8 +52,6 @@ module StringBuffer -- matching prefixMatch, -- :: StringBuffer -> String -> Bool untilEndOfString#, -- :: StringBuffer -> Int# - untilEndOfChar#, -- :: StringBuffer -> Int# - untilChar#, -- :: StringBuffer -> Char# -> Int# -- conversion lexemeToString, -- :: StringBuffer -> String @@ -67,21 +66,48 @@ module StringBuffer #include "HsVersions.h" import GlaExts -import Addr ( Addr(..) ) +import PrelAddr ( Addr(..) ) import Foreign import ST +import Char ( chr ) + +-- urk! +#include "../lib/std/cbits/stgerror.h" + +#if __GLASGOW_HASKELL__ >= 303 +import IO ( openFile +#if __GLASGOW_HASKELL__ < 407 + , slurpFile -- comes from PrelHandle or IOExts now +#endif + ) +import PrelIOBase +import PrelHandle +import Addr +#else import IO ( openFile, hFileSize, hClose, IOMode(..) ) +import Addr +#endif #if __GLASGOW_HASKELL__ < 301 -import IOBase ( IOError(..), IOErrorType(..) ) +import IOBase ( Handle, IOError(..), IOErrorType(..), + constructErrorAndFail ) import IOHandle ( readHandle, writeHandle, filePtr ) import PackBase ( unpackCStringBA ) #else -import PrelIOBase ( IOError(..), IOErrorType(..) ) +# if __GLASGOW_HASKELL__ <= 302 +import PrelIOBase ( Handle, IOError(..), IOErrorType(..), + constructErrorAndFail ) import PrelHandle ( readHandle, writeHandle, filePtr ) +# endif import PrelPack ( unpackCStringBA ) #endif +#if __GLASGOW_HASKELL__ < 402 +import Util ( bracket ) +#else +import Exception ( bracket ) +#endif + import PrimPacked import FastString import Char (isDigit) @@ -91,22 +117,33 @@ import Char (isDigit) data StringBuffer = StringBuffer Addr# --- ForeignObj# -- the data Int# -- length Int# -- lexeme start Int# -- current pos \end{code} \begin{code} -instance Text StringBuffer where +instance Show StringBuffer where showsPrec _ s = showString "" \end{code} \begin{code} -hGetStringBuffer :: FilePath -> IO StringBuffer -hGetStringBuffer fname = +hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer +hGetStringBuffer expand_tabs fname = do + (a, read) <- if expand_tabs + then slurpFileExpandTabs fname + else slurpFile fname + + let (A# a#) = a; (I# read#) = read + + -- add sentinel '\NUL' + _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#)) + return (StringBuffer a# read# 0# 0#) + +#if __GLASGOW_HASKELL__ < 303 +slurpFile fname = openFile fname ReadMode >>= \ hndl -> - hFileSize hndl >>= \ len@(J# _ _ d#) -> + hFileSize hndl >>= \ len -> let len_i = fromInteger len in -- Allocate an array for system call to store its bytes into. -- ToDo: make it robust @@ -115,24 +152,20 @@ hGetStringBuffer fname = if addr2Int# a# ==# 0# then fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes")) else - --- _casm_ `` %r=NULL; '' >>= \ free_p -> --- makeForeignObj arr free_p >>= \ fo@(_ForeignObj fo#) -> - readHandle hndl >>= \ hndl_ -> - writeHandle hndl hndl_ >> + readHandle hndl >>= \ hndl_ -> + writeHandle hndl hndl_ >> let ptr = filePtr hndl_ in - _ccall_ fread arr (1::Int) len_i ptr >>= \ (I# read#) -> +#if __GLASGOW_HASKELL__ <= 302 + _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) -> +#else + _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) -> +#endif hClose hndl >> if read# ==# 0# then -- EOF or some other error fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname)) else - -- Add a sentinel NUL - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' arr (I# (read# -# 1#)) >>= \ () -> - return (StringBuffer a# read# 0# 0#) - -freeStringBuffer :: StringBuffer -> IO () -freeStringBuffer (StringBuffer a# _ _ _) = - _casm_ `` free((char *)%0); '' (A# a#) + return (arr, I# read#) +#endif unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# = @@ -140,31 +173,165 @@ unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# = _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () -> return s ) +\end{code} +----------------------------------------------------------------------------- +This very disturbing bit of code is used for expanding the tabs in a +file before we start parsing it. Expanding the tabs early makes the +lexer a lot simpler: we only have to record the beginning of the line +in order to be able to calculate the column offset of the current +token. + +We guess the size of the buffer required as 20% extra for +expanded tabs, and enlarge it if necessary. + +\begin{code} +#if __GLASGOW_HASKELL__ < 303 +mayBlock fo thing = thing + +writeCharOffAddr :: Addr -> Int -> Char -> IO () +writeCharOffAddr addr off c + = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c +#endif + +getErrType :: IO Int +#if __GLASGOW_HASKELL__ < 303 +getErrType = _casm_ ``%r = ghc_errtype;'' +#else +getErrType = _ccall_ getErrType__ +#endif + +slurpFileExpandTabs :: FilePath -> IO (Addr,Int) +slurpFileExpandTabs fname = do + bracket (openFile fname ReadMode) (hClose) + (\ handle -> + do sz <- hFileSize handle + if sz > toInteger (maxBound::Int) + then IOERROR (userError "slurpFile: file too big") + else do + let sz_i = fromInteger sz + sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs + chunk <- allocMem sz_i' + trySlurp handle sz_i' chunk + ) + +trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int) +trySlurp handle sz_i chunk = +#if __GLASGOW_HASKELL__ == 303 + wantReadableHandle "hGetChar" handle >>= \ handle_ -> + let fo = haFO__ handle_ in +#elif __GLASGOW_HASKELL__ > 303 + wantReadableHandle "hGetChar" handle $ \ handle_ -> + let fo = haFO__ handle_ in +#else + readHandle handle >>= \ handle_ -> + let fo = filePtr handle_ in +#endif + let + (I# chunk_sz) = sz_i + + tAB_SIZE = 8# + + slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int) + slurpFile c off chunk chunk_sz max_off = slurp c off + where + + slurp :: Int# -> Int# -> IO (Addr, Int) + slurp c off | off >=# max_off = do + let new_sz = chunk_sz *# 2# + chunk' <- reAllocMem chunk (I# new_sz) + slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#)) + slurp c off = do + intc <- mayBlock fo (_ccall_ fileGetc fo) + if intc == ((-1)::Int) + then do errtype <- getErrType + if errtype == (ERR_EOF :: Int) + then return (chunk, I# off) + else constructErrorAndFail "slurpFile" + else case chr intc of + '\t' -> tabIt c off + ch -> do writeCharOffAddr chunk (I# off) ch + let c' | ch == '\n' = 0# + | otherwise = c +# 1# + slurp c' (off +# 1#) + + tabIt :: Int# -> Int# -> IO (Addr, Int) + -- can't run out of buffer in here, because we reserved an + -- extra tAB_SIZE bytes at the end earlier. + tabIt c off = do + writeCharOffAddr chunk (I# off) ' ' + let c' = c +# 1# + off' = off +# 1# + if c' `remInt#` tAB_SIZE ==# 0# + then slurp c' off' + else tabIt c' off' + in do + + -- allow space for a full tab at the end of the buffer + -- (that's what the max_off thing is for), + -- and add 1 to allow room for the final sentinel \NUL at + -- the end of the file. + (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#)) +#if __GLASGOW_HASKELL__ < 404 + writeHandle handle handle_ +#endif + if rc < (0::Int) + then constructErrorAndFail "slurpFile" + else return (chunk', rc+1 {-room for sentinel-}) + + +reAllocMem :: Addr -> Int -> IO Addr +reAllocMem ptr sz = do + chunk <- _ccall_ realloc ptr sz + if chunk == nullAddr +#if __GLASGOW_HASKELL__ >= 400 + then fail "reAllocMem" +#else + then fail (userError "reAllocMem") +#endif + else return chunk + +allocMem :: Int -> IO Addr +allocMem sz = do +#if __GLASGOW_HASKELL__ < 303 + chunk <- _ccall_ malloc sz + if chunk == nullAddr + then fail (userError "allocMem") + else return chunk +#else + chunk <- _ccall_ allocMemory__ sz + if chunk == nullAddr + then constructErrorAndFail "allocMem" + else return chunk +#endif \end{code} Lookup \begin{code} -currentChar# :: StringBuffer -> Char# -currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# - currentChar :: StringBuffer -> Char currentChar sb = case currentChar# sb of c -> C# c -indexSBuffer# :: StringBuffer -> Int# -> Char# -indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# +lookAhead :: StringBuffer -> Int -> Char +lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c indexSBuffer :: StringBuffer -> Int -> Char indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c - -- relative lookup, i.e, currentChar = lookAhead 0 +currentChar# :: StringBuffer -> Char# +indexSBuffer# :: StringBuffer -> Int# -> Char# lookAhead# :: StringBuffer -> Int# -> Char# +currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# +indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# + + -- relative lookup, i.e, currentChar = lookAhead 0 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#) -lookAhead :: StringBuffer -> Int -> Char -lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c +currentIndex# :: StringBuffer -> Int# +currentIndex# (StringBuffer fo# _ _ c#) = c# +lexemeIndex :: StringBuffer -> Int# +lexemeIndex (StringBuffer fo# _ c# _) = c# \end{code} moving the start point of the current lexeme. @@ -200,7 +367,15 @@ stepOnBy# (StringBuffer fo# l# s# c#) i# = stepOnTo# :: StringBuffer -> Int# -> StringBuffer stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s# +squeezeLexeme :: StringBuffer -> Int# -> StringBuffer +squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c# + +mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer +mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#) + = StringBuffer fo l s# c# + stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer + stepOnUntil pred (StringBuffer fo l# s# c#) = loop c# where @@ -223,28 +398,38 @@ expandWhile pred (StringBuffer fo l# s# c#) = | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. | otherwise -> StringBuffer fo l# s# c# +expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer +expandWhile# pred (StringBuffer fo l# s# c#) = + loop c# + where + loop c# = + case indexCharOffAddr# fo c# of + ch# | pred ch# -> loop (c# +# 1#) + | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately. + | otherwise -> StringBuffer fo l# s# c# -scanNumLit :: Int -> StringBuffer -> (Int,StringBuffer) -scanNumLit (I# acc#) (StringBuffer fo l# s# c#) = - loop acc# c# +scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer) +scanNumLit acc (StringBuffer fo l# s# c#) = + loop acc c# where - loop acc# c# = + loop acc c# = case indexCharOffAddr# fo c# of - ch# | isDigit (C# ch#) -> loop (acc# *# 10# +# (ord# ch# -# ord# '0'#)) (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> (I# acc#, StringBuffer fo l# l# l#) -- EOB, return immediately. - | otherwise -> (I# acc#,StringBuffer fo l# s# c#) + ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#) + | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately. + | otherwise -> (acc,StringBuffer fo l# s# c#) -expandUntilMatch :: StringBuffer -> String -> StringBuffer +expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer expandUntilMatch (StringBuffer fo l# s# c#) str = loop c# str where - loop c# [] = StringBuffer fo l# s# c# + loop c# [] = Just (StringBuffer fo l# s# c#) loop c# ((C# x#):xs) = - if indexCharOffAddr# fo c# `eqChar#` x# then - loop (c# +# 1#) xs - else - loop (c# +# 1#) str + case indexCharOffAddr# fo c# of + ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing + | ch# `eqChar#` x# -> loop (c# +# 1#) xs + | otherwise -> loop (c# +# 1#) str + \end{code} \begin{code} @@ -261,27 +446,29 @@ prefixMatch (StringBuffer fo l# s# c#) str = loop c# str where loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) = - if indexCharOffAddr# fo c# `eqChar#` x# then - loop (c# +# 1#) xs - else - Nothing + loop c# ((C# x#):xs) + | indexCharOffAddr# fo c# `eqChar#` x# + = loop (c# +# 1#) xs + | otherwise + = Nothing untilEndOfString# :: StringBuffer -> StringBuffer untilEndOfString# (StringBuffer fo l# s# c#) = loop c# where + getch# i# = indexCharOffAddr# fo i# + loop c# = - case indexCharOffAddr# fo c# of + case getch# c# of '\"'# -> - case indexCharOffAddr# fo (c# -# 1#) of + case getch# (c# -# 1#) of '\\'# -> -- looks like an escaped something or other to me, -- better count the number of "\\"s that are immediately -- preceeding to decide if the " is escaped. let odd_slashes flg i# = - case indexCharOffAddr# fo i# of + case getch# i# of '\\'# -> odd_slashes (not flg) (i# -# 1#) _ -> flg in @@ -299,36 +486,15 @@ untilEndOfString# (StringBuffer fo l# s# c#) = _ -> loop (c# +# 1#) -untilEndOfChar# :: StringBuffer -> StringBuffer -untilEndOfChar# (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - '\''# -> - case indexCharOffAddr# fo (c# -# 1#) of - '\\'# -> - case indexCharOffAddr# fo (c# -# 2#) of - '\\'# -> -- end of char - StringBuffer fo l# s# c# - _ -> loop (c# +# 1#) -- false alarm - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - -untilChar# :: StringBuffer -> Char# -> StringBuffer -untilChar# (StringBuffer fo l# s# c#) x# = +stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer +stepOnUntilChar# (StringBuffer fo l# s# c#) x# = loop c# where - loop c# = - if indexCharOffAddr# fo c# `eqChar#` x# then - StringBuffer fo l# s# c# - else - loop (c# +# 1#) + loop c# + | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x# + = StringBuffer fo l# c# c# + | otherwise + = loop (c# +# 1#) -- conversion lexemeToString :: StringBuffer -> String @@ -338,7 +504,7 @@ lexemeToString (StringBuffer fo _ start_pos# current#) = else unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))) -lexemeToByteArray :: StringBuffer -> _ByteArray Int +lexemeToByteArray :: StringBuffer -> ByteArray Int lexemeToByteArray (StringBuffer fo _ start_pos# current#) = if start_pos# ==# current# then error "lexemeToByteArray"