Buffers for scanning string input stored in external arrays.
\begin{code}
-{-# OPTIONS -fno-prune-tydecls #-}
+
+{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
+
module StringBuffer
(
StringBuffer,
- -- creation
- hGetStringBuffer, -- :: FilePath -> IO StringBuffer
+ -- creation/destruction
+ hGetStringBuffer, -- :: FilePath -> IO StringBuffer
+ stringToStringBuffer, -- :: String -> IO StringBuffer
+ freeStringBuffer, -- :: StringBuffer -> IO ()
-- Lookup
currentChar, -- :: StringBuffer -> Char
lexemeIndex, -- :: StringBuffer -> Int#
-- moving the end point of the current lexeme.
- setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
- incLexeme, -- :: StringBuffer -> StringBuffer
- decLexeme, -- :: StringBuffer -> StringBuffer
+ addToCurrentPos, -- :: StringBuffer -> Int# -> StringBuffer
+ incCurrentPos, -- :: StringBuffer -> StringBuffer
+ decCurrentPos, -- :: StringBuffer -> StringBuffer
-- move the start and end lexeme pointer on by x units.
stepOn, -- :: StringBuffer -> StringBuffer
stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
+ stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
stepOverLexeme, -- :: StringBuffer -> StringBuffer
scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
-- matching
prefixMatch, -- :: StringBuffer -> String -> Bool
untilEndOfString#, -- :: StringBuffer -> Int#
- untilChar#, -- :: StringBuffer -> Char# -> Int#
-- conversion
lexemeToString, -- :: StringBuffer -> String
- lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
lexemeToFastString, -- :: StringBuffer -> FastString
- lexemeToBuffer, -- :: StringBuffer -> StringBuffer
-
- FastString,
- ByteArray
) where
#include "HsVersions.h"
-import GlaExts
-import PrelAddr ( Addr(..) )
-import Foreign
-import ST
-import Char ( chr )
-
--- urk!
-#include "../lib/std/cbits/stgerror.h"
-#if __GLASGOW_HASKELL__ >= 303
-import IO ( openFile
-#if __GLASGOW_HASKELL__ < 407
- , slurpFile -- comes from PrelHandle or IOExts now
-#endif
- )
-import PrelIOBase
-import PrelHandle
-import Addr
+#if __GLASGOW_HASKELL__ < 502
+import Panic ( panic )
#else
-import IO ( openFile, hFileSize, hClose, IOMode(..) )
-import Addr
-#endif
-
-#if __GLASGOW_HASKELL__ < 301
-import IOBase ( Handle, IOError(..), IOErrorType(..),
- constructErrorAndFail )
-import IOHandle ( readHandle, writeHandle, filePtr )
-import PackBase ( unpackCStringBA )
+#if __GLASGOW_HASKELL__ < 503
+import Ptr ( Ptr(..) )
#else
-# if __GLASGOW_HASKELL__ <= 302
-import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
- constructErrorAndFail )
-import PrelHandle ( readHandle, writeHandle, filePtr )
-# endif
-import PrelPack ( unpackCStringBA )
+import GHC.Ptr ( Ptr(..) )
+#endif
#endif
-#if __GLASGOW_HASKELL__ < 402
-import Util ( bracket )
+#if __GLASGOW_HASKELL__ < 501
+import Char ( chr )
+#elif __GLASGOW_HASKELL__ < 503
+import PrelIO ( hGetcBuffered )
#else
-import Exception ( bracket )
+import GHC.IO ( hGetcBuffered )
#endif
import PrimPacked
import FastString
-import Char (isDigit)
+
+import GLAEXTS
+
+import Foreign
+
+#if __GLASGOW_HASKELL__ >= 502
+import CString ( newCString )
+#endif
+
+import IO ( openFile, isEOFError )
+import EXCEPTION ( bracket )
+
+#if __GLASGOW_HASKELL__ < 503
+import PrelIOBase
+import PrelHandle
+#else
+import GHC.IOBase
+import GHC.Handle
+#endif
+
+import Char ( isDigit )
\end{code}
\begin{code}
\begin{code}
instance Show StringBuffer where
- showsPrec _ s = showString ""
+ showsPrec _ s = showString "<stringbuffer>"
\end{code}
\begin{code}
-hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
-hGetStringBuffer expand_tabs fname = do
- (a, read) <- if expand_tabs
- then slurpFileExpandTabs fname
- else slurpFile fname
-
- let (A# a#) = a; (I# read#) = read
-
- -- add sentinel '\NUL'
- _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
- return (StringBuffer a# read# 0# 0#)
-
-#if __GLASGOW_HASKELL__ < 303
-slurpFile fname =
- openFile fname ReadMode >>= \ hndl ->
- hFileSize hndl >>= \ len ->
- let len_i = fromInteger len in
- -- Allocate an array for system call to store its bytes into.
- -- ToDo: make it robust
--- trace (show ((len_i::Int)+1)) $
- _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
- if addr2Int# a# ==# 0# then
- fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
- else
- readHandle hndl >>= \ hndl_ ->
- writeHandle hndl hndl_ >>
- let ptr = filePtr hndl_ in
-#if __GLASGOW_HASKELL__ <= 302
- _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
+hGetStringBuffer :: FilePath -> IO StringBuffer
+hGetStringBuffer fname = do
+ (a, read) <- slurpFileExpandTabs fname
+
+ -- urk! slurpFile gives us a buffer that doesn't have room for
+ -- the sentinel. Assume it has a final newline for now, and overwrite
+ -- that with the sentinel. slurpFileExpandTabs (below) leaves room
+ -- for the sentinel.
+ let (Ptr a#) = a;
+ (I# read#) = read;
+ end# = read# -# 1#
+
+ -- add sentinel '\NUL'
+ writeCharOffPtr a (I# end#) '\0'
+
+ return (StringBuffer a# end# 0# 0#)
+\end{code}
+
+-----------------------------------------------------------------------------
+-- Turn a String into a StringBuffer
+
+\begin{code}
+stringToStringBuffer :: String -> IO StringBuffer
+freeStringBuffer :: StringBuffer -> IO ()
+
+#if __GLASGOW_HASKELL__ >= 502
+stringToStringBuffer str = do
+ let sz@(I# sz#) = length str
+ Ptr a# <- newCString str
+ return (StringBuffer a# sz# 0# 0#)
+
+freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
#else
- _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
-#endif
- hClose hndl >>
- if read# ==# 0# then -- EOF or some other error
- fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
- else
- return (arr, I# read#)
+stringToStringBuffer = panic "stringToStringBuffer: not implemented"
+freeStringBuffer sb = return ()
#endif
-unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
-unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
- unsafePerformIO (
- _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
- return s
- )
\end{code}
-----------------------------------------------------------------------------
expanded tabs, and enlarge it if necessary.
\begin{code}
-#if __GLASGOW_HASKELL__ < 303
-mayBlock fo thing = thing
-
-writeCharOffAddr :: Addr -> Int -> Char -> IO ()
-writeCharOffAddr addr off c
- = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
-#endif
-
+#if __GLASGOW_HASKELL__ < 501
getErrType :: IO Int
-#if __GLASGOW_HASKELL__ < 303
-getErrType = _casm_ ``%r = ghc_errtype;''
-#else
getErrType = _ccall_ getErrType__
#endif
-slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
+slurpFileExpandTabs :: FilePath -> IO (Ptr (),Int)
slurpFileExpandTabs fname = do
bracket (openFile fname ReadMode) (hClose)
(\ handle ->
do sz <- hFileSize handle
if sz > toInteger (maxBound::Int)
- then IOERROR (userError "slurpFile: file too big")
+ then ioError (userError "slurpFile: file too big")
else do
let sz_i = fromInteger sz
- sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
- chunk <- allocMem sz_i'
- trySlurp handle sz_i' chunk
+ if sz_i == 0
+ -- empty file: just allocate a buffer containing '\0'
+ then do chunk <- allocMem 1
+ writeCharOffPtr chunk 0 '\0'
+ return (chunk, 0)
+ else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
+ chunk <- allocMem sz_i'
+ trySlurp handle sz_i' chunk
)
-trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
+trySlurp :: Handle -> Int -> Ptr () -> IO (Ptr (), Int)
trySlurp handle sz_i chunk =
-#if __GLASGOW_HASKELL__ == 303
- wantReadableHandle "hGetChar" handle >>= \ handle_ ->
- let fo = haFO__ handle_ in
-#elif __GLASGOW_HASKELL__ > 303
+#if __GLASGOW_HASKELL__ < 501
wantReadableHandle "hGetChar" handle $ \ handle_ ->
let fo = haFO__ handle_ in
#else
- readHandle handle >>= \ handle_ ->
- let fo = filePtr handle_ in
+ wantReadableHandle "hGetChar" handle $
+ \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
#endif
let
(I# chunk_sz) = sz_i
tAB_SIZE = 8#
- slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
+ slurpFile :: Int# -> Int# -> Ptr () -> Int# -> Int# -> IO (Ptr (), Int)
slurpFile c off chunk chunk_sz max_off = slurp c off
where
- slurp :: Int# -> Int# -> IO (Addr, Int)
+ slurp :: Int# -> Int# -> IO (Ptr (), Int)
slurp c off | off >=# max_off = do
let new_sz = chunk_sz *# 2#
chunk' <- reAllocMem chunk (I# new_sz)
slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
slurp c off = do
+#if __GLASGOW_HASKELL__ < 501
intc <- mayBlock fo (_ccall_ fileGetc fo)
if intc == ((-1)::Int)
then do errtype <- getErrType
- if errtype == (ERR_EOF :: Int)
+ if errtype == (19{-ERR_EOF-} :: Int)
then return (chunk, I# off)
else constructErrorAndFail "slurpFile"
else case chr intc of
+#else
+ buf <- readIORef ref
+ ch <- (if not (bufferEmpty buf)
+ then hGetcBuffered fd ref buf
+ else do
+#if __GLASGOW_HASKELL__ >= 503
+ new_buf <- fillReadBuffer fd True False buf
+#else
+ new_buf <- fillReadBuffer fd True buf
+#endif
+ hGetcBuffered fd ref new_buf)
+ `catch` \e -> if isEOFError e
+ then return '\xFFFF'
+ else ioError e
+ case ch of
+ '\xFFFF' -> return (chunk, I# off)
+#endif
'\t' -> tabIt c off
- ch -> do writeCharOffAddr chunk (I# off) ch
+ ch -> do writeCharOffPtr chunk (I# off) ch
let c' | ch == '\n' = 0#
| otherwise = c +# 1#
slurp c' (off +# 1#)
- tabIt :: Int# -> Int# -> IO (Addr, Int)
+ tabIt :: Int# -> Int# -> IO (Ptr (), Int)
-- can't run out of buffer in here, because we reserved an
-- extra tAB_SIZE bytes at the end earlier.
tabIt c off = do
- writeCharOffAddr chunk (I# off) ' '
+ writeCharOffPtr chunk (I# off) ' '
let c' = c +# 1#
off' = off +# 1#
if c' `remInt#` tAB_SIZE ==# 0#
-- and add 1 to allow room for the final sentinel \NUL at
-- the end of the file.
(chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
-#if __GLASGOW_HASKELL__ < 404
- writeHandle handle handle_
-#endif
- if rc < (0::Int)
- then constructErrorAndFail "slurpFile"
- else return (chunk', rc+1 {-room for sentinel-})
+ return (chunk', rc+1 {- room for sentinel -})
-reAllocMem :: Addr -> Int -> IO Addr
+reAllocMem :: Ptr () -> Int -> IO (Ptr ())
reAllocMem ptr sz = do
- chunk <- _ccall_ realloc ptr sz
- if chunk == nullAddr
-#if __GLASGOW_HASKELL__ >= 400
+ chunk <- c_realloc ptr sz
+ if chunk == nullPtr
then fail "reAllocMem"
-#else
- then fail (userError "reAllocMem")
-#endif
else return chunk
-allocMem :: Int -> IO Addr
+allocMem :: Int -> IO (Ptr ())
allocMem sz = do
-#if __GLASGOW_HASKELL__ < 303
- chunk <- _ccall_ malloc sz
- if chunk == nullAddr
- then fail (userError "allocMem")
- else return chunk
-#else
- chunk <- _ccall_ allocMemory__ sz
- if chunk == nullAddr
+ chunk <- c_malloc sz
+ if chunk == nullPtr
+#if __GLASGOW_HASKELL__ < 501
then constructErrorAndFail "allocMem"
+#else
+ then ioException (IOError Nothing ResourceExhausted "malloc"
+ "out of memory" Nothing)
+#endif
else return chunk
+
+#if __GLASGOW_HASKELL__ <= 408
+c_malloc sz = do A# a <- c_malloc' sz; return (Ptr a)
+foreign import ccall "malloc" unsafe
+ c_malloc' :: Int -> IO Addr
+
+c_realloc (Ptr a) sz = do A# a <- c_realloc' (A# a) sz; return (Ptr a)
+foreign import ccall "realloc" unsafe
+ c_realloc' :: Addr -> Int -> IO Addr
+#else
+foreign import ccall "malloc" unsafe
+ c_malloc :: Int -> IO (Ptr a)
+
+foreign import ccall "realloc" unsafe
+ c_realloc :: Ptr a -> Int -> IO (Ptr a)
#endif
\end{code}
\begin{code}
-- moving the end point of the current lexeme.
-setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
-setCurrentPos# (StringBuffer fo l# s# c#) i# =
+addToCurrentPos :: StringBuffer -> Int# -> StringBuffer
+addToCurrentPos (StringBuffer fo l# s# c#) i# =
StringBuffer fo l# s# (c# +# i#)
-- augmenting the current lexeme by one.
-incLexeme :: StringBuffer -> StringBuffer
-incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
+incCurrentPos :: StringBuffer -> StringBuffer
+incCurrentPos (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
-decLexeme :: StringBuffer -> StringBuffer
-decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
+decCurrentPos :: StringBuffer -> StringBuffer
+decCurrentPos (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
\end{code}
_ -> loop (c# +# 1#)
-untilChar# :: StringBuffer -> Char# -> StringBuffer
-untilChar# (StringBuffer fo l# s# c#) x# =
+stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
+stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
loop c#
where
loop c#
| c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
- = StringBuffer fo l# s# c#
+ = StringBuffer fo l# c# c#
| otherwise
= loop (c# +# 1#)
-- conversion
lexemeToString :: StringBuffer -> String
-lexemeToString (StringBuffer fo _ start_pos# current#) =
+lexemeToString (StringBuffer fo len# start_pos# current#) =
if start_pos# ==# current# then
""
else
- unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
-
-lexemeToByteArray :: StringBuffer -> ByteArray Int
-lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
- if start_pos# ==# current# then
- error "lexemeToByteArray"
- else
- copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
+ let len = I# (current# -# start_pos#) in
+ unpackNBytesBA (copySubStr fo (I# start_pos#) len) len
lexemeToFastString :: StringBuffer -> FastString
lexemeToFastString (StringBuffer fo l# start_pos# current#) =
if start_pos# ==# current# then
- mkFastCharString2 (A# fo) (I# 0#)
+ mkFastString ""
else
- mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
-
-{-
- Create a StringBuffer from the current lexeme, and add a sentinel
- at the end. Know What You're Doing before taking this function
- into use..
--}
-lexemeToBuffer :: StringBuffer -> StringBuffer
-lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
- if start_pos# ==# current# then
- StringBuffer fo 0# start_pos# current# -- an error, really.
- else
- unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)
- (current# -# 1#)
- '\NUL'#
-
+ mkFastSubString fo (I# start_pos#) (I# (current# -# start_pos#))
\end{code}