%
-% (c) The GRASP/AQUA Project, Glasgow University, 1997
+% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
%
\section{String buffers}
Buffers for scanning string input stored in external arrays.
\begin{code}
-#include "HsVersions.h"
+
+{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
module StringBuffer
(
StringBuffer,
- -- creation
- hGetStringBuffer, -- :: FilePath -> IO StringBuffer
- freeStringBuffer, -- :: StringBuffer -> IO ()
+ -- creation/destruction
+ hGetStringBuffer, -- :: FilePath -> IO StringBuffer
+ stringToStringBuffer, -- :: String -> IO StringBuffer
+ freeStringBuffer, -- :: StringBuffer -> IO ()
-- Lookup
currentChar, -- :: StringBuffer -> Char
lookAhead, -- :: StringBuffer -> Int -> Char
lookAhead#, -- :: StringBuffer -> Int# -> Char#
+ -- offsets
+ currentIndex#, -- :: StringBuffer -> Int#
+ lexemeIndex, -- :: StringBuffer -> Int#
+
-- moving the end point of the current lexeme.
setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
incLexeme, -- :: StringBuffer -> StringBuffer
stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
+ stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
stepOverLexeme, -- :: StringBuffer -> StringBuffer
scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
- expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
+ squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
+ mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
+ expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
+ expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
-- at or beyond end of buffer?
bufferExhausted, -- :: StringBuffer -> Bool
-- matching
prefixMatch, -- :: StringBuffer -> String -> Bool
untilEndOfString#, -- :: StringBuffer -> Int#
- untilEndOfChar#, -- :: StringBuffer -> Int#
- untilChar#, -- :: StringBuffer -> Char# -> Int#
-- conversion
lexemeToString, -- :: StringBuffer -> String
lexemeToBuffer, -- :: StringBuffer -> StringBuffer
FastString,
- _ByteArray
+ ByteArray
) where
-#if __GLASGOW_HASKELL__ <= 200
-import PreludeGlaST
-import PreludeGlaMisc
-import HandleHack
-import Ubiq
-#else
+#include "HsVersions.h"
+
import GlaExts
+#if __GLASGOW_HASKELL__ < 411
+import PrelAddr ( Addr(..) )
+#else
+import Addr ( Addr(..) )
+#endif
import Foreign
-import IOBase
-import IOHandle
-import ST
-import STBase
-import Char (isDigit)
-# if __GLASGOW_HASKELL__ == 202
-import PrelBase ( Char(..) )
-# endif
+import Char ( chr )
+import Panic ( panic )
+
+import IO ( openFile )
+import IOExts ( slurpFile )
+import PrelIOBase
+import PrelHandle
+import Addr
+#if __GLASGOW_HASKELL__ >= 411
+import Ptr ( Ptr(..) )
+#endif
+
+import PrelPack ( unpackCStringBA )
+
+#if __GLASGOW_HASKELL__ >= 501
+import PrelIO ( hGetcBuffered )
#endif
+
+import Exception ( bracket )
import PrimPacked
import FastString
-
+import Char ( isDigit )
\end{code}
\begin{code}
data StringBuffer
= StringBuffer
Addr#
--- ForeignObj# -- the data
Int# -- length
Int# -- lexeme start
Int# -- current pos
\end{code}
\begin{code}
-instance Text StringBuffer where
+instance Show StringBuffer where
showsPrec _ s = showString ""
\end{code}
\begin{code}
-hGetStringBuffer :: FilePath -> IO StringBuffer
-hGetStringBuffer fname =
--- trace ("Renamer: opening " ++ fname) $
- openFile fname ReadMode >>= \ hndl ->
- hFileSize hndl >>= \ len@(J# _ _ d#) ->
- let len_i = fromInteger len in
- -- Allocate an array for system call to store its bytes into.
- -- ToDo: make it robust
--- trace (show ((len_i::Int)+1)) $
- (_casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int)) `CCALL_THEN` \ arr@(A# a#) ->
- if addr2Int# a# ==# 0# then
- failWith MkIOError(hndl,UserError,("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
- else
-
--- _casm_ `` %r=NULL; '' `thenPrimIO` \ free_p ->
--- makeForeignObj arr free_p `thenPrimIO` \ fo@(_ForeignObj fo#) ->
- _readHandle hndl >>= \ hndl_ ->
- _writeHandle hndl hndl_ >>
- let ptr = _filePtr hndl_ in
- _ccall_ fread arr (1::Int) len_i ptr `CCALL_THEN` \ (I# read#) ->
--- trace ("DEBUG: opened " ++ fname ++ show (I# read#)) $
- hClose hndl >>
- if read# ==# 0# then -- EOF or other error
- failWith MkIOError(hndl,UserError,"hGetStringBuffer: EOF reached or some other error")
- else
- -- Add a sentinel NUL
- _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' arr (I# (read# -# 1#)) `CCALL_THEN` \ () ->
- return (StringBuffer a# read# 0# 0#)
+hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
+hGetStringBuffer expand_tabs fname = do
+ (a, read) <- if expand_tabs
+ then slurpFileExpandTabs fname
+#if __GLASGOW_HASKELL__ < 411
+ else slurpFile fname
+#else
+ else do
+ (Ptr a#, read) <- slurpFile fname
+ return (A# a#, read)
+#endif
-freeStringBuffer :: StringBuffer -> IO ()
-freeStringBuffer (StringBuffer a# _ _ _) =
- _casm_ `` free((char *)%0); '' (A# a#) `CCALL_THEN` \ () ->
- return ()
+ -- urk! slurpFile gives us a buffer that doesn't have room for
+ -- the sentinel. Assume it has a final newline for now, and overwrite
+ -- that with the sentinel. slurpFileExpandTabs (below) leaves room
+ -- for the sentinel.
+ let (A# a#) = a;
+ (I# read#) = read;
+ end# = read# -# 1#
+
+ -- add sentinel '\NUL'
+ _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
+ return (StringBuffer a# end# 0# 0#)
unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
- unsafePerformPrimIO (
- _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) `thenPrimIO` \ () ->
- returnPrimIO s)
+ unsafePerformIO (
+ _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
+ return s
+ )
+\end{code}
+
+-----------------------------------------------------------------------------
+-- Turn a String into a StringBuffer
+
+\begin{code}
+stringToStringBuffer :: String -> IO StringBuffer
+freeStringBuffer :: StringBuffer -> IO ()
+
+#if __GLASGOW_HASKELL__ >= 411
+stringToStringBuffer str =
+ do let sz@(I# sz#) = length str
+ (Ptr a#) <- mallocBytes (sz+1)
+ fill_in str (A# a#)
+ writeCharOffAddr (A# a#) sz '\0' -- sentinel
+ return (StringBuffer a# sz# 0# 0#)
+ where
+ fill_in [] _ = return ()
+ fill_in (c:cs) a = do
+ writeCharOffAddr a 0 c
+ fill_in cs (a `plusAddr` 1)
+
+freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
+#else
+stringToStringBuffer = panic "stringToStringBuffer: not implemented"
+freeStringBuffer sb = return ()
+#endif
\end{code}
-Lookup
+-----------------------------------------------------------------------------
+This very disturbing bit of code is used for expanding the tabs in a
+file before we start parsing it. Expanding the tabs early makes the
+lexer a lot simpler: we only have to record the beginning of the line
+in order to be able to calculate the column offset of the current
+token.
+
+We guess the size of the buffer required as 20% extra for
+expanded tabs, and enlarge it if necessary.
\begin{code}
-currentChar# :: StringBuffer -> Char#
-currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
+getErrType :: IO Int
+getErrType = _ccall_ getErrType__
+
+slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
+slurpFileExpandTabs fname = do
+ bracket (openFile fname ReadMode) (hClose)
+ (\ handle ->
+ do sz <- hFileSize handle
+ if sz > toInteger (maxBound::Int)
+ then ioError (userError "slurpFile: file too big")
+ else do
+ let sz_i = fromInteger sz
+ sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
+ chunk <- allocMem sz_i'
+ trySlurp handle sz_i' chunk
+ )
+
+trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
+trySlurp handle sz_i chunk =
+#if __GLASGOW_HASKELL__ < 501
+ wantReadableHandle "hGetChar" handle $ \ handle_ ->
+ let fo = haFO__ handle_ in
+#else
+ wantReadableHandle "hGetChar" handle $
+ \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
+#endif
+ let
+ (I# chunk_sz) = sz_i
+
+ tAB_SIZE = 8#
+
+ slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
+ slurpFile c off chunk chunk_sz max_off = slurp c off
+ where
+
+ slurp :: Int# -> Int# -> IO (Addr, Int)
+ slurp c off | off >=# max_off = do
+ let new_sz = chunk_sz *# 2#
+ chunk' <- reAllocMem chunk (I# new_sz)
+ slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
+ slurp c off = do
+#if __GLASGOW_HASKELL__ < 501
+ intc <- mayBlock fo (_ccall_ fileGetc fo)
+ if intc == ((-1)::Int)
+ then do errtype <- getErrType
+ if errtype == (19{-ERR_EOF-} :: Int)
+ then return (chunk, I# off)
+ else constructErrorAndFail "slurpFile"
+ else case chr intc of
+#else
+ buf <- readIORef ref
+ ch <- (if not (bufferEmpty buf)
+ then hGetcBuffered fd ref buf
+ else do new_buf <- fillReadBuffer fd True buf
+ hGetcBuffered fd ref new_buf)
+ `catch` \e -> if isEOFError e
+ then return '\xFFFF'
+ else ioError e
+ case ch of
+ '\xFFFF' -> return (chunk, I# off)
+#endif
+ '\t' -> tabIt c off
+ ch -> do writeCharOffAddr chunk (I# off) ch
+ let c' | ch == '\n' = 0#
+ | otherwise = c +# 1#
+ slurp c' (off +# 1#)
+
+ tabIt :: Int# -> Int# -> IO (Addr, Int)
+ -- can't run out of buffer in here, because we reserved an
+ -- extra tAB_SIZE bytes at the end earlier.
+ tabIt c off = do
+ writeCharOffAddr chunk (I# off) ' '
+ let c' = c +# 1#
+ off' = off +# 1#
+ if c' `remInt#` tAB_SIZE ==# 0#
+ then slurp c' off'
+ else tabIt c' off'
+ in do
+
+ -- allow space for a full tab at the end of the buffer
+ -- (that's what the max_off thing is for),
+ -- and add 1 to allow room for the final sentinel \NUL at
+ -- the end of the file.
+ (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
+#if __GLASGOW_HASKELL__ < 404
+ writeHandle handle handle_
+#endif
+ return (chunk', rc+1 {- room for sentinel -})
+
+
+reAllocMem :: Addr -> Int -> IO Addr
+reAllocMem ptr sz = do
+ chunk <- _ccall_ realloc ptr sz
+ if chunk == nullAddr
+ then fail "reAllocMem"
+ else return chunk
+
+allocMem :: Int -> IO Addr
+allocMem sz = do
+ chunk <- _ccall_ malloc sz
+ if chunk == nullAddr
+#if __GLASGOW_HASKELL__ < 501
+ then constructErrorAndFail "allocMem"
+#else
+ then ioException (IOError Nothing ResourceExhausted "malloc"
+ "out of memory" Nothing)
+#endif
+ else return chunk
+\end{code}
+
+Lookup
+\begin{code}
currentChar :: StringBuffer -> Char
currentChar sb = case currentChar# sb of c -> C# c
-indexSBuffer# :: StringBuffer -> Int# -> Char#
-indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
+lookAhead :: StringBuffer -> Int -> Char
+lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
indexSBuffer :: StringBuffer -> Int -> Char
indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
- -- relative lookup, i.e, currentChar = lookAhead 0
+currentChar# :: StringBuffer -> Char#
+indexSBuffer# :: StringBuffer -> Int# -> Char#
lookAhead# :: StringBuffer -> Int# -> Char#
+currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
+indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
+
+ -- relative lookup, i.e, currentChar = lookAhead 0
lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
-lookAhead :: StringBuffer -> Int -> Char
-lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
+currentIndex# :: StringBuffer -> Int#
+currentIndex# (StringBuffer fo# _ _ c#) = c#
+lexemeIndex :: StringBuffer -> Int#
+lexemeIndex (StringBuffer fo# _ c# _) = c#
\end{code}
moving the start point of the current lexeme.
stepOnTo# :: StringBuffer -> Int# -> StringBuffer
stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
+squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
+squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
+
+mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
+mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
+ = StringBuffer fo l s# c#
+
stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
+
stepOnUntil pred (StringBuffer fo l# s# c#) =
loop c#
where
| ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
| otherwise -> StringBuffer fo l# s# c#
+expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
+expandWhile# pred (StringBuffer fo l# s# c#) =
+ loop c#
+ where
+ loop c# =
+ case indexCharOffAddr# fo c# of
+ ch# | pred ch# -> loop (c# +# 1#)
+ | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
+ | otherwise -> StringBuffer fo l# s# c#
-scanNumLit :: Int -> StringBuffer -> (Int,StringBuffer)
-scanNumLit (I# acc#) (StringBuffer fo l# s# c#) =
- loop acc# c#
+scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
+scanNumLit acc (StringBuffer fo l# s# c#) =
+ loop acc c#
where
- loop acc# c# =
+ loop acc c# =
case indexCharOffAddr# fo c# of
- ch# | isDigit (C# ch#) -> loop (acc# *# 10# +# (ord# ch# -# ord# '0'#)) (c# +# 1#)
- | ch# `eqChar#` '\NUL'# && c# >=# l# -> (I# acc#, StringBuffer fo l# l# l#) -- EOB, return immediately.
- | otherwise -> (I# acc#,StringBuffer fo l# s# c#)
+ ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
+ | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
+ | otherwise -> (acc,StringBuffer fo l# s# c#)
-expandUntilMatch :: StringBuffer -> String -> StringBuffer
+expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
expandUntilMatch (StringBuffer fo l# s# c#) str =
loop c# str
where
- loop c# [] = StringBuffer fo l# s# c#
+ loop c# [] = Just (StringBuffer fo l# s# c#)
loop c# ((C# x#):xs) =
- if indexCharOffAddr# fo c# `eqChar#` x# then
- loop (c# +# 1#) xs
- else
- loop (c# +# 1#) str
+ case indexCharOffAddr# fo c# of
+ ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
+ | ch# `eqChar#` x# -> loop (c# +# 1#) xs
+ | otherwise -> loop (c# +# 1#) str
+
\end{code}
\begin{code}
loop c# str
where
loop c# [] = Just (StringBuffer fo l# s# c#)
- loop c# ((C# x#):xs) =
- if indexCharOffAddr# fo c# `eqChar#` x# then
- loop (c# +# 1#) xs
- else
- Nothing
+ loop c# ((C# x#):xs)
+ | indexCharOffAddr# fo c# `eqChar#` x#
+ = loop (c# +# 1#) xs
+ | otherwise
+ = Nothing
untilEndOfString# :: StringBuffer -> StringBuffer
untilEndOfString# (StringBuffer fo l# s# c#) =
loop c#
where
+ getch# i# = indexCharOffAddr# fo i#
+
loop c# =
- case indexCharOffAddr# fo c# of
+ case getch# c# of
'\"'# ->
- case indexCharOffAddr# fo (c# -# 1#) of
+ case getch# (c# -# 1#) of
'\\'# ->
-- looks like an escaped something or other to me,
-- better count the number of "\\"s that are immediately
-- preceeding to decide if the " is escaped.
let
odd_slashes flg i# =
- case indexCharOffAddr# fo i# of
+ case getch# i# of
'\\'# -> odd_slashes (not flg) (i# -# 1#)
_ -> flg
in
_ -> loop (c# +# 1#)
-untilEndOfChar# :: StringBuffer -> StringBuffer
-untilEndOfChar# (StringBuffer fo l# s# c#) =
- loop c#
- where
- loop c# =
- case indexCharOffAddr# fo c# of
- '\''# ->
- case indexCharOffAddr# fo (c# -# 1#) of
- '\\'# ->
- case indexCharOffAddr# fo (c# -# 2#) of
- '\\'# -> -- end of char
- StringBuffer fo l# s# c#
- _ -> loop (c# +# 1#) -- false alarm
- _ -> StringBuffer fo l# s# c#
- '\NUL'# ->
- if c# >=# l# then -- hit sentinel, this doesn't look too good..
- StringBuffer fo l# l# l#
- else
- loop (c# +# 1#)
- _ -> loop (c# +# 1#)
-
-untilChar# :: StringBuffer -> Char# -> StringBuffer
-untilChar# (StringBuffer fo l# s# c#) x# =
+stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
+stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
loop c#
where
- loop c# =
- if indexCharOffAddr# fo c# `eqChar#` x# then
- StringBuffer fo l# s# c#
- else
- loop (c# +# 1#)
+ loop c#
+ | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
+ = StringBuffer fo l# c# c#
+ | otherwise
+ = loop (c# +# 1#)
-- conversion
lexemeToString :: StringBuffer -> String
if start_pos# ==# current# then
""
else
-#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 205
- byteArrayToString (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
-#else
unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
-#endif
-lexemeToByteArray :: StringBuffer -> _ByteArray Int
+lexemeToByteArray :: StringBuffer -> ByteArray Int
lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
if start_pos# ==# current# then
error "lexemeToByteArray"