X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FStringBuffer.lhs;h=7c61b5b147716b9d364dffd9e4b93ad0bb8115d6;hb=29da2cf3011c292bc4261601aff85afb13e24d54;hp=b51fd9d50f191541cd13d681788c16616c7bef4e;hpb=e137a978183b0c2edd827accd5ccc32e1851ea9d;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index b51fd9d..7c61b5b 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -1,509 +1,177 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 +% (c) The University of Glasgow, 1997-2003 % \section{String buffers} Buffers for scanning string input stored in external arrays. \begin{code} - -{-# OPTIONS -optc-DNON_POSIX_SOURCE #-} - module StringBuffer ( - StringBuffer, + StringBuffer(..), + -- non-abstract for vs/HaskellService - -- creation/destruction + -- * Creation/destruction hGetStringBuffer, -- :: FilePath -> IO StringBuffer stringToStringBuffer, -- :: String -> IO StringBuffer - freeStringBuffer, -- :: StringBuffer -> IO () - - -- Lookup - currentChar, -- :: StringBuffer -> Char - currentChar#, -- :: StringBuffer -> Char# - indexSBuffer, -- :: StringBuffer -> Int -> Char - indexSBuffer#, -- :: StringBuffer -> Int# -> Char# - -- relative lookup, i.e, currentChar = lookAhead 0 - lookAhead, -- :: StringBuffer -> Int -> Char - lookAhead#, -- :: StringBuffer -> Int# -> Char# - - -- offsets - currentIndex#, -- :: StringBuffer -> Int# - lexemeIndex, -- :: StringBuffer -> Int# - - -- moving the end point of the current lexeme. - setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer - incLexeme, -- :: StringBuffer -> StringBuffer - decLexeme, -- :: StringBuffer -> StringBuffer - - -- move the start and end lexeme pointer on by x units. - stepOn, -- :: StringBuffer -> StringBuffer - stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer - stepOverLexeme, -- :: StringBuffer -> StringBuffer - scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer) - squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer - mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer - expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer - expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer - -- at or beyond end of buffer? - bufferExhausted, -- :: StringBuffer -> Bool - emptyLexeme, -- :: StringBuffer -> Bool - - -- matching - prefixMatch, -- :: StringBuffer -> String -> Bool - untilEndOfString#, -- :: StringBuffer -> Int# - - -- conversion - lexemeToString, -- :: StringBuffer -> String - lexemeToFastString, -- :: StringBuffer -> FastString - ) where -#include "HsVersions.h" + -- * Lookup + currentChar, -- :: StringBuffer -> Char + prevChar, -- :: StringBuffer -> Char -> Char + lookAhead, -- :: StringBuffer -> Int -> Char + atEnd, -- :: StringBuffer -> Bool + -- * Moving + stepOn, stepOnBy, -#if __GLASGOW_HASKELL__ < 411 -import PrelAddr ( Addr(..) ) -import Panic ( panic ) -#else -import Addr ( Addr(..) ) -#if __GLASGOW_HASKELL__ < 503 -import Ptr ( Ptr(..) ) -#else -import GHC.Ptr ( Ptr(..) ) -#endif -#endif + -- * Conversion + lexemeToString, -- :: StringBuffer -> Int -> String + lexemeToFastString, -- :: StringBuffer -> Int -> FastString + ) where -#if __GLASGOW_HASKELL__ < 501 -import Char ( chr ) -#elif __GLASGOW_HASKELL__ < 503 -import PrelIO ( hGetcBuffered ) -#else -import GHC.IO ( hGetcBuffered ) -#endif +#include "HsVersions.h" -import PrimPacked import FastString +import Panic -import GlaExts -import Foreign -import IO ( openFile, isEOFError ) -import Addr -import Exception ( bracket ) +import GLAEXTS -import CString ( unpackCStringBA ) +import Foreign #if __GLASGOW_HASKELL__ < 503 import PrelIOBase import PrelHandle #else import GHC.IOBase -import GHC.Handle +import GHC.IO ( slurpFile ) #endif -import Char ( isDigit ) -\end{code} +import IO ( openFile, hFileSize, IOMode(ReadMode) ) +#if __GLASGOW_HASKELL__ >= 601 +import System.IO ( openBinaryFile ) +#else +import IOExts ( openFileEx, IOModeEx(..) ) +#endif + +#if __GLASGOW_HASKELL__ < 503 +import IArray ( listArray ) +import ArrayBase ( UArray(..) ) +import MutableArray +import IOExts ( hGetBufBA ) +#else +import Data.Array.IArray ( listArray ) +import Data.Array.MArray ( unsafeFreeze, newArray_ ) +import Data.Array.Base ( UArray(..) ) +import Data.Array.IO ( IOArray, hGetArray ) +#endif + +import Char ( ord ) + +#if __GLASGOW_HASKELL__ < 601 +openBinaryFile fp mode = openFileEx fp (BinaryMode mode) +#endif +-- ----------------------------------------------------------------------------- +-- The StringBuffer type + +-- A StringBuffer is a ByteArray# with a pointer into it. We also cache +-- the length of the ByteArray# for speed. -\begin{code} data StringBuffer = StringBuffer - Addr# + ByteArray# Int# -- length - Int# -- lexeme start Int# -- current pos -\end{code} -\begin{code} instance Show StringBuffer where - showsPrec _ s = showString "" -\end{code} + showsPrec _ s = showString "" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction -\begin{code} hGetStringBuffer :: FilePath -> IO StringBuffer hGetStringBuffer fname = do - (a, read) <- slurpFileExpandTabs fname - - -- urk! slurpFile gives us a buffer that doesn't have room for - -- the sentinel. Assume it has a final newline for now, and overwrite - -- that with the sentinel. slurpFileExpandTabs (below) leaves room - -- for the sentinel. - let (A# a#) = a; - (I# read#) = read; - end# = read# -# 1# - - -- add sentinel '\NUL' - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#) - return (StringBuffer a# end# 0# 0#) - -unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer -unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# = - unsafePerformIO ( - _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () -> - return s - ) -\end{code} - ------------------------------------------------------------------------------ --- Turn a String into a StringBuffer - -\begin{code} -stringToStringBuffer :: String -> IO StringBuffer -freeStringBuffer :: StringBuffer -> IO () - -#if __GLASGOW_HASKELL__ >= 411 -stringToStringBuffer str = - do let sz@(I# sz#) = length str - (Ptr a#) <- mallocBytes (sz+1) - fill_in str (A# a#) - writeCharOffAddr (A# a#) sz '\0' -- sentinel - return (StringBuffer a# sz# 0# 0#) - where - fill_in [] _ = return () - fill_in (c:cs) a = do - writeCharOffAddr a 0 c - fill_in cs (a `plusAddr` 1) - -freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#) -#else -stringToStringBuffer = panic "stringToStringBuffer: not implemented" -freeStringBuffer sb = return () -#endif - -\end{code} - ------------------------------------------------------------------------------ -This very disturbing bit of code is used for expanding the tabs in a -file before we start parsing it. Expanding the tabs early makes the -lexer a lot simpler: we only have to record the beginning of the line -in order to be able to calculate the column offset of the current -token. - -We guess the size of the buffer required as 20% extra for -expanded tabs, and enlarge it if necessary. - -\begin{code} -getErrType :: IO Int -getErrType = _ccall_ getErrType__ - -slurpFileExpandTabs :: FilePath -> IO (Addr,Int) -slurpFileExpandTabs fname = do - bracket (openFile fname ReadMode) (hClose) - (\ handle -> - do sz <- hFileSize handle - if sz > toInteger (maxBound::Int) - then ioError (userError "slurpFile: file too big") - else do - let sz_i = fromInteger sz - if sz_i == 0 - -- empty file: just allocate a buffer containing '\0' - then do chunk <- allocMem 1 - writeCharOffAddr chunk 0 '\0' - return (chunk, 0) - else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs - chunk <- allocMem sz_i' - trySlurp handle sz_i' chunk - ) - -trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int) -trySlurp handle sz_i chunk = -#if __GLASGOW_HASKELL__ < 501 - wantReadableHandle "hGetChar" handle $ \ handle_ -> - let fo = haFO__ handle_ in + h <- openBinaryFile fname ReadMode + size <- hFileSize h + let size_i@(I# sz#) = fromIntegral size +#if __GLASGOW_HASKELL__ < 503 + arr <- stToIO (newCharArray (0,size_i-1)) + r <- hGetBufBA h arr size_i #else - wantReadableHandle "hGetChar" handle $ - \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } -> + arr <- newArray_ (0,size_i-1) + r <- hGetArray h arr size_i #endif - let - (I# chunk_sz) = sz_i - - tAB_SIZE = 8# - - slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int) - slurpFile c off chunk chunk_sz max_off = slurp c off - where - - slurp :: Int# -> Int# -> IO (Addr, Int) - slurp c off | off >=# max_off = do - let new_sz = chunk_sz *# 2# - chunk' <- reAllocMem chunk (I# new_sz) - slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#)) - slurp c off = do -#if __GLASGOW_HASKELL__ < 501 - intc <- mayBlock fo (_ccall_ fileGetc fo) - if intc == ((-1)::Int) - then do errtype <- getErrType - if errtype == (19{-ERR_EOF-} :: Int) - then return (chunk, I# off) - else constructErrorAndFail "slurpFile" - else case chr intc of -#else - buf <- readIORef ref - ch <- (if not (bufferEmpty buf) - then hGetcBuffered fd ref buf - else do -#if __GLASGOW_HASKELL__ >= 503 - new_buf <- fillReadBuffer fd True False buf + if (r /= size_i) + then ioError (userError "short read of file") + else do +#if __GLASGOW_HASKELL__ < 503 + frozen <- stToIO (unsafeFreezeByteArray arr) + case frozen of + ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #else - new_buf <- fillReadBuffer fd True buf -#endif - hGetcBuffered fd ref new_buf) - `catch` \e -> if isEOFError e - then return '\xFFFF' - else ioError e - case ch of - '\xFFFF' -> return (chunk, I# off) + frozen <- unsafeFreeze arr + case frozen of + UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #endif - '\t' -> tabIt c off - ch -> do writeCharOffAddr chunk (I# off) ch - let c' | ch == '\n' = 0# - | otherwise = c +# 1# - slurp c' (off +# 1#) - - tabIt :: Int# -> Int# -> IO (Addr, Int) - -- can't run out of buffer in here, because we reserved an - -- extra tAB_SIZE bytes at the end earlier. - tabIt c off = do - writeCharOffAddr chunk (I# off) ' ' - let c' = c +# 1# - off' = off +# 1# - if c' `remInt#` tAB_SIZE ==# 0# - then slurp c' off' - else tabIt c' off' - in do - - -- allow space for a full tab at the end of the buffer - -- (that's what the max_off thing is for), - -- and add 1 to allow room for the final sentinel \NUL at - -- the end of the file. - (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#)) - return (chunk', rc+1 {- room for sentinel -}) - - -reAllocMem :: Addr -> Int -> IO Addr -reAllocMem ptr sz = do - chunk <- _ccall_ realloc ptr sz - if chunk == nullAddr - then fail "reAllocMem" - else return chunk - -allocMem :: Int -> IO Addr -allocMem sz = do - chunk <- _ccall_ malloc sz - if chunk == nullAddr -#if __GLASGOW_HASKELL__ < 501 - then constructErrorAndFail "allocMem" + +#if __GLASGOW_HASKELL__ >= 502 +stringToStringBuffer str = do + let size@(I# sz#) = length str + arr = listArray (0,size-1) (map (fromIntegral.ord) str) + :: UArray Int Word8 + case arr of + UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #else - then ioException (IOError Nothing ResourceExhausted "malloc" - "out of memory" Nothing) +stringToStringBuffer = panic "stringToStringBuffer: not implemented" #endif - else return chunk -\end{code} -Lookup +-- ----------------------------------------------------------------------------- +-- Lookup -\begin{code} currentChar :: StringBuffer -> Char -currentChar sb = case currentChar# sb of c -> C# c +currentChar (StringBuffer arr# l# current#) = + ASSERT(current# <# l#) + C# (indexCharArray# arr# current#) -lookAhead :: StringBuffer -> Int -> Char -lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c - -indexSBuffer :: StringBuffer -> Int -> Char -indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c - -currentChar# :: StringBuffer -> Char# -indexSBuffer# :: StringBuffer -> Int# -> Char# -lookAhead# :: StringBuffer -> Int# -> Char# -currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# -indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# - - -- relative lookup, i.e, currentChar = lookAhead 0 -lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#) - -currentIndex# :: StringBuffer -> Int# -currentIndex# (StringBuffer fo# _ _ c#) = c# +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer _ _ 0#) deflt = deflt +prevChar s deflt = lookAhead s (-1) -lexemeIndex :: StringBuffer -> Int# -lexemeIndex (StringBuffer fo# _ c# _) = c# -\end{code} - - moving the start point of the current lexeme. - -\begin{code} - -- moving the end point of the current lexeme. -setCurrentPos# :: StringBuffer -> Int# -> StringBuffer -setCurrentPos# (StringBuffer fo l# s# c#) i# = - StringBuffer fo l# s# (c# +# i#) +lookAhead :: StringBuffer -> Int -> Char +lookAhead (StringBuffer arr# l# c#) (I# i#) = + ASSERT(off <# l# && off >=# 0#) + C# (indexCharArray# arr# off) + where + off = c# +# i# --- augmenting the current lexeme by one. -incLexeme :: StringBuffer -> StringBuffer -incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#) +-- ----------------------------------------------------------------------------- +-- Moving -decLexeme :: StringBuffer -> StringBuffer -decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#) +stepOn :: StringBuffer -> StringBuffer +stepOn s = stepOnBy 1 s -\end{code} +stepOnBy :: Int -> StringBuffer -> StringBuffer +stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#) --- move the start and end point of the buffer on by --- x units. +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l# c#) = l# ==# c# -\begin{code} -stepOn :: StringBuffer -> StringBuffer -stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same. - -stepOnBy# :: StringBuffer -> Int# -> StringBuffer -stepOnBy# (StringBuffer fo# l# s# c#) i# = - case s# +# i# of - new_s# -> StringBuffer fo# l# new_s# new_s# - --- jump to pos. -stepOnTo# :: StringBuffer -> Int# -> StringBuffer -stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s# - -squeezeLexeme :: StringBuffer -> Int# -> StringBuffer -squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c# - -mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer -mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#) - = StringBuffer fo l s# c# - -stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer - -stepOnUntil pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> StringBuffer fo l# c# c# - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> loop (c# +# 1#) - -stepOverLexeme :: StringBuffer -> StringBuffer -stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c# - -expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer -expandWhile pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - -expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer -expandWhile# pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred ch# -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - -scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer) -scanNumLit acc (StringBuffer fo l# s# c#) = - loop acc c# - where - loop acc c# = - case indexCharOffAddr# fo c# of - ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately. - | otherwise -> (acc,StringBuffer fo l# s# c#) - - -expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer -expandUntilMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) = - case indexCharOffAddr# fo c# of - ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing - | ch# `eqChar#` x# -> loop (c# +# 1#) xs - | otherwise -> loop (c# +# 1#) str - -\end{code} +-- ----------------------------------------------------------------------------- +-- Conversion -\begin{code} - -- at or beyond end of buffer? -bufferExhausted :: StringBuffer -> Bool -bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l# - -emptyLexeme :: StringBuffer -> Bool -emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c# - - -- matching -prefixMatch :: StringBuffer -> String -> Maybe StringBuffer -prefixMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = Nothing - -untilEndOfString# :: StringBuffer -> StringBuffer -untilEndOfString# (StringBuffer fo l# s# c#) = - loop c# - where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\"'# -> - case getch# (c# -# 1#) of - '\\'# -> - -- looks like an escaped something or other to me, - -- better count the number of "\\"s that are immediately - -- preceeding to decide if the " is escaped. - let - odd_slashes flg i# = - case getch# i# of - '\\'# -> odd_slashes (not flg) (i# -# 1#) - _ -> flg - in - if odd_slashes True (c# -# 2#) then - -- odd number, " is ecaped. - loop (c# +# 1#) - else -- a real end of string delimiter after all. - StringBuffer fo l# s# c# - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - - -stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer -stepOnUntilChar# (StringBuffer fo l# s# c#) x# = - loop c# +lexemeToString :: StringBuffer -> Int -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current# where - loop c# - | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x# - = StringBuffer fo l# c# c# - | otherwise - = loop (c# +# 1#) - - -- conversion -lexemeToString :: StringBuffer -> String -lexemeToString (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - "" - else - unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))) - -lexemeToFastString :: StringBuffer -> FastString -lexemeToFastString (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - mkFastString "" - else - mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) + end = current# +# len# + + unpack nh + | nh >=# end = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharArray# arr# nh + +lexemeToFastString :: StringBuffer -> Int -> FastString +lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString (StringBuffer fo _ current#) (I# len) = + mkFastSubStringBA# fo current# len \end{code}