X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FStringBuffer.lhs;h=61a0321d718aa39f1e8307a0eaa393efc4263934;hb=423d477bfecd490de1449c59325c8776f91d7aac;hp=4eca5d8cbcc2c033cdcacaa12199d11821dffec3;hpb=225db1321a337b948a0ade821db81df6e79f1355;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index 4eca5d8..61a0321 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -1,379 +1,189 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997 +% (c) The University of Glasgow, 1997-2003 % \section{String buffers} Buffers for scanning string input stored in external arrays. \begin{code} -{-# OPTIONS -fno-prune-tydecls #-} module StringBuffer ( - StringBuffer, - - -- creation - hGetStringBuffer, -- :: FilePath -> IO StringBuffer - - -- Lookup - currentChar, -- :: StringBuffer -> Char - currentChar#, -- :: StringBuffer -> Char# - indexSBuffer, -- :: StringBuffer -> Int -> Char - indexSBuffer#, -- :: StringBuffer -> Int# -> Char# - -- relative lookup, i.e, currentChar = lookAhead 0 - lookAhead, -- :: StringBuffer -> Int -> Char - lookAhead#, -- :: StringBuffer -> Int# -> Char# - - -- moving the end point of the current lexeme. - setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer - incLexeme, -- :: StringBuffer -> StringBuffer - decLexeme, -- :: StringBuffer -> StringBuffer - - -- move the start and end lexeme pointer on by x units. - stepOn, -- :: StringBuffer -> StringBuffer - stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - stepOverLexeme, -- :: StringBuffer -> StringBuffer - scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer) - expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer - -- at or beyond end of buffer? - bufferExhausted, -- :: StringBuffer -> Bool - emptyLexeme, -- :: StringBuffer -> Bool - - -- matching - prefixMatch, -- :: StringBuffer -> String -> Bool - untilEndOfString#, -- :: StringBuffer -> Int# - untilEndOfChar#, -- :: StringBuffer -> Int# - untilChar#, -- :: StringBuffer -> Char# -> Int# - - -- conversion - lexemeToString, -- :: StringBuffer -> String - lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int - lexemeToFastString, -- :: StringBuffer -> FastString - lexemeToBuffer, -- :: StringBuffer -> StringBuffer - - FastString, - ByteArray + StringBuffer(..), + -- non-abstract for vs/HaskellService + + -- * Creation/destruction + hGetStringBuffer, -- :: FilePath -> IO StringBuffer + stringToStringBuffer, -- :: String -> IO StringBuffer + + -- * Lookup + currentChar, -- :: StringBuffer -> Char + prevChar, -- :: StringBuffer -> Char -> Char + lookAhead, -- :: StringBuffer -> Int -> Char + atEnd, -- :: StringBuffer -> Bool + + -- * Moving + stepOn, stepOnBy, + + -- * Conversion + lexemeToString, -- :: StringBuffer -> Int -> String + lexemeToFastString, -- :: StringBuffer -> Int -> FastString + + -- * Parsing integers + parseInteger, ) where #include "HsVersions.h" -import GlaExts -import Addr ( Addr(..) ) +import FastString +import Panic + +import GLAEXTS + import Foreign -import ST -#if __GLASGOW_HASKELL__ >= 303 -import IO ( slurpFile ) +#if __GLASGOW_HASKELL__ < 503 +import PrelIOBase +import PrelHandle #else -import IO ( openFile, hFileSize, hClose, IOMode(..) ) +import GHC.IOBase +import GHC.IO ( slurpFile ) #endif -#if __GLASGOW_HASKELL__ < 301 -import IOBase ( IOError(..), IOErrorType(..) ) -import IOHandle ( readHandle, writeHandle, filePtr ) -import PackBase ( unpackCStringBA ) +import IO ( openFile, hFileSize, IOMode(ReadMode) ) +#if __GLASGOW_HASKELL__ >= 601 +import System.IO ( openBinaryFile ) #else -# if __GLASGOW_HASKELL__ <= 302 -import PrelIOBase ( IOError(..), IOErrorType(..) ) -import PrelHandle ( readHandle, writeHandle ) -# endif -import PrelPack ( unpackCStringBA ) +import IOExts ( openFileEx, IOModeEx(..) ) #endif -import PrimPacked -import FastString -import Char (isDigit) -\end{code} +#if __GLASGOW_HASKELL__ < 503 +import IArray ( listArray ) +import ArrayBase ( UArray(..) ) +import MutableArray +import IOExts ( hGetBufBA ) +#else +import Data.Array.IArray ( listArray ) +import Data.Array.MArray ( unsafeFreeze, newArray_ ) +import Data.Array.Base ( UArray(..) ) +import Data.Array.IO ( IOArray, hGetArray ) +#endif + +import Char ( ord ) + +#if __GLASGOW_HASKELL__ < 601 +openBinaryFile fp mode = openFileEx fp (BinaryMode mode) +#endif +-- ----------------------------------------------------------------------------- +-- The StringBuffer type + +-- A StringBuffer is a ByteArray# with a pointer into it. We also cache +-- the length of the ByteArray# for speed. -\begin{code} data StringBuffer = StringBuffer - Addr# + ByteArray# Int# -- length - Int# -- lexeme start Int# -- current pos -\end{code} -\begin{code} -instance Text StringBuffer where - showsPrec _ s = showString "" -\end{code} +instance Show StringBuffer where + showsPrec _ s = showString "" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction -\begin{code} hGetStringBuffer :: FilePath -> IO StringBuffer -hGetStringBuffer fname = -#if __GLASGOW_HASKELL__ >= 303 - slurpFile fname >>= \ (a , read) -> - let (A# a#) = a - (I# read#) = read - in - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' a (I# (read# -# 1#)) >>= \ () -> - return (StringBuffer a# read# 0# 0#) +hGetStringBuffer fname = do + h <- openBinaryFile fname ReadMode + size <- hFileSize h + let size_i@(I# sz#) = fromIntegral size +#if __GLASGOW_HASKELL__ < 503 + arr <- stToIO (newCharArray (0,size_i-1)) + r <- hGetBufBA h arr size_i #else - openFile fname ReadMode >>= \ hndl -> - hFileSize hndl >>= \ len@(J# _ _ d#) -> - let len_i = fromInteger len in - -- Allocate an array for system call to store its bytes into. - -- ToDo: make it robust --- trace (show ((len_i::Int)+1)) $ - _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) -> - if addr2Int# a# ==# 0# then - fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes")) - else - readHandle hndl >>= \ hndl_ -> - writeHandle hndl hndl_ >> - let ptr = filePtr hndl_ in -#if __GLASGOW_HASKELL__ <= 302 - _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) -> + arr <- newArray_ (0,size_i-1) + r <- if size_i == 0 then return 0 else hGetArray h arr size_i +#endif + if (r /= size_i) + then ioError (userError "short read of file") + else do +#if __GLASGOW_HASKELL__ < 503 + frozen <- stToIO (unsafeFreezeByteArray arr) + case frozen of + ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #else - _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) -> + frozen <- unsafeFreeze arr + case frozen of + UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #endif - hClose hndl >> - if read# ==# 0# then -- EOF or some other error - fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname)) - else - -- Add a sentinel NUL - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' arr (I# (read# -# 1#)) >>= \ () -> - return (StringBuffer a# read# 0# 0#) +#if __GLASGOW_HASKELL__ >= 502 +stringToStringBuffer str = do + let size@(I# sz#) = length str + arr = listArray (0,size-1) (map (fromIntegral.ord) str) + :: UArray Int Word8 + case arr of + UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) +#else +stringToStringBuffer = panic "stringToStringBuffer: not implemented" #endif -unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer -unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# = - unsafePerformIO ( - _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () -> - return s - ) -\end{code} - -Lookup +-- ----------------------------------------------------------------------------- +-- Lookup -\begin{code} currentChar :: StringBuffer -> Char -currentChar sb = case currentChar# sb of c -> C# c - -lookAhead :: StringBuffer -> Int -> Char -lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c - -indexSBuffer :: StringBuffer -> Int -> Char -indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c - -currentChar# :: StringBuffer -> Char# -indexSBuffer# :: StringBuffer -> Int# -> Char# -lookAhead# :: StringBuffer -> Int# -> Char# -currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# -indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# - - -- relative lookup, i.e, currentChar = lookAhead 0 -lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#) - -\end{code} +currentChar (StringBuffer arr# l# current#) = + ASSERT(current# <# l#) + C# (indexCharArray# arr# current#) - moving the start point of the current lexeme. +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer _ _ 0#) deflt = deflt +prevChar s deflt = lookAhead s (-1) -\begin{code} - -- moving the end point of the current lexeme. -setCurrentPos# :: StringBuffer -> Int# -> StringBuffer -setCurrentPos# (StringBuffer fo l# s# c#) i# = - StringBuffer fo l# s# (c# +# i#) +lookAhead :: StringBuffer -> Int -> Char +lookAhead (StringBuffer arr# l# c#) (I# i#) = + ASSERT(off <# l# && off >=# 0#) + C# (indexCharArray# arr# off) + where + off = c# +# i# --- augmenting the current lexeme by one. -incLexeme :: StringBuffer -> StringBuffer -incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#) +-- ----------------------------------------------------------------------------- +-- Moving -decLexeme :: StringBuffer -> StringBuffer -decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#) +stepOn :: StringBuffer -> StringBuffer +stepOn s = stepOnBy 1 s -\end{code} +stepOnBy :: Int -> StringBuffer -> StringBuffer +stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#) --- move the start and end point of the buffer on by --- x units. +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l# c#) = l# ==# c# -\begin{code} -stepOn :: StringBuffer -> StringBuffer -stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same. - -stepOnBy# :: StringBuffer -> Int# -> StringBuffer -stepOnBy# (StringBuffer fo# l# s# c#) i# = - case s# +# i# of - new_s# -> StringBuffer fo# l# new_s# new_s# - --- jump to pos. -stepOnTo# :: StringBuffer -> Int# -> StringBuffer -stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s# - -stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer - -stepOnUntil pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> StringBuffer fo l# c# c# - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> loop (c# +# 1#) - -stepOverLexeme :: StringBuffer -> StringBuffer -stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c# - -expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer -expandWhile pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - - -scanNumLit :: Int -> StringBuffer -> (Int,StringBuffer) -scanNumLit (I# acc#) (StringBuffer fo l# s# c#) = - loop acc# c# - where - loop acc# c# = - case indexCharOffAddr# fo c# of - ch# | isDigit (C# ch#) -> loop (acc# *# 10# +# (ord# ch# -# ord# '0'#)) (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> (I# acc#, StringBuffer fo l# l# l#) -- EOB, return immediately. - | otherwise -> (I# acc#,StringBuffer fo l# s# c#) - - -expandUntilMatch :: StringBuffer -> String -> StringBuffer -expandUntilMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = StringBuffer fo l# s# c# - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = loop (c# +# 1#) str - -\end{code} +-- ----------------------------------------------------------------------------- +-- Conversion -\begin{code} - -- at or beyond end of buffer? -bufferExhausted :: StringBuffer -> Bool -bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l# - -emptyLexeme :: StringBuffer -> Bool -emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c# - - -- matching -prefixMatch :: StringBuffer -> String -> Maybe StringBuffer -prefixMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = Nothing - -untilEndOfString# :: StringBuffer -> StringBuffer -untilEndOfString# (StringBuffer fo l# s# c#) = - loop c# - where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\"'# -> - case getch# (c# -# 1#) of - '\\'# -> - -- looks like an escaped something or other to me, - -- better count the number of "\\"s that are immediately - -- preceeding to decide if the " is escaped. - let - odd_slashes flg i# = - case getch# i# of - '\\'# -> odd_slashes (not flg) (i# -# 1#) - _ -> flg - in - if odd_slashes True (c# -# 2#) then - -- odd number, " is ecaped. - loop (c# +# 1#) - else -- a real end of string delimiter after all. - StringBuffer fo l# s# c# - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - - -untilEndOfChar# :: StringBuffer -> StringBuffer -untilEndOfChar# (StringBuffer fo l# s# c#) = - loop c# +lexemeToString :: StringBuffer -> Int -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current# where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\''# -> - case getch# (c# -# 1#) of - '\\'# -> - case getch# (c# -# 2#) of - '\\'# -> -- end of char - StringBuffer fo l# s# c# - _ -> loop (c# +# 1#) -- false alarm - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - -untilChar# :: StringBuffer -> Char# -> StringBuffer -untilChar# (StringBuffer fo l# s# c#) x# = - loop c# - where - loop c# - | indexCharOffAddr# fo c# `eqChar#` x# - = StringBuffer fo l# s# c# - | otherwise - = loop (c# +# 1#) - - -- conversion -lexemeToString :: StringBuffer -> String -lexemeToString (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - "" - else - unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))) - -lexemeToByteArray :: StringBuffer -> _ByteArray Int -lexemeToByteArray (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - error "lexemeToByteArray" - else - copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) - -lexemeToFastString :: StringBuffer -> FastString -lexemeToFastString (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - mkFastCharString2 (A# fo) (I# 0#) - else - mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) - -{- - Create a StringBuffer from the current lexeme, and add a sentinel - at the end. Know What You're Doing before taking this function - into use.. --} -lexemeToBuffer :: StringBuffer -> StringBuffer -lexemeToBuffer (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - StringBuffer fo 0# start_pos# current# -- an error, really. - else - unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#) - (current# -# 1#) - '\NUL'# - + end = current# +# len# + + unpack nh + | nh >=# end = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharArray# arr# nh + +lexemeToFastString :: StringBuffer -> Int -> FastString +lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString (StringBuffer fo _ current#) (I# len) = + mkFastSubStringBA# fo current# len + +-- ----------------------------------------------------------------------------- +-- Parsing integer strings in various bases + +parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseInteger buf len radix to_int + = go 0 0 + where go i x | i == len = x + | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i))) \end{code}