X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FStringBuffer.lhs;h=e52e7e78da9c7792ff9a9b79b747c5ab27888700;hb=28a464a75e14cece5db40f2765a29348273ff2d2;hp=f95f7a1ed713aa9703de9ca463f59c57c5c89147;hpb=6c9a37e31afc41d57417a3828877577d8d270266;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index f95f7a1..e52e7e7 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -1,573 +1,240 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 +% (c) The University of Glasgow, 1997-2006 % \section{String buffers} Buffers for scanning string input stored in external arrays. \begin{code} - module StringBuffer ( - StringBuffer, - - -- creation/destruction - hGetStringBuffer, -- :: FilePath -> IO StringBuffer -#ifdef GHCI - stringToStringBuffer, -- :: String -> IO StringBuffer - freeStringBuffer, -- :: StringBuffer -> IO () -#endif - - -- Lookup - currentChar, -- :: StringBuffer -> Char - currentChar#, -- :: StringBuffer -> Char# - indexSBuffer, -- :: StringBuffer -> Int -> Char - indexSBuffer#, -- :: StringBuffer -> Int# -> Char# - -- relative lookup, i.e, currentChar = lookAhead 0 - lookAhead, -- :: StringBuffer -> Int -> Char - lookAhead#, -- :: StringBuffer -> Int# -> Char# - - -- offsets - currentIndex#, -- :: StringBuffer -> Int# - lexemeIndex, -- :: StringBuffer -> Int# - - -- moving the end point of the current lexeme. - setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer - incLexeme, -- :: StringBuffer -> StringBuffer - decLexeme, -- :: StringBuffer -> StringBuffer - - -- move the start and end lexeme pointer on by x units. - stepOn, -- :: StringBuffer -> StringBuffer - stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer - stepOverLexeme, -- :: StringBuffer -> StringBuffer - scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer) - squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer - mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer - expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer - expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer - -- at or beyond end of buffer? - bufferExhausted, -- :: StringBuffer -> Bool - emptyLexeme, -- :: StringBuffer -> Bool - - -- matching - prefixMatch, -- :: StringBuffer -> String -> Bool - untilEndOfString#, -- :: StringBuffer -> Int# - - -- conversion - lexemeToString, -- :: StringBuffer -> String - lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int - lexemeToFastString, -- :: StringBuffer -> FastString - lexemeToBuffer, -- :: StringBuffer -> StringBuffer - - FastString, - ByteArray + StringBuffer(..), + -- non-abstract for vs\/HaskellService + + -- * Creation\/destruction + hGetStringBuffer, + hGetStringBufferBlock, + appendStringBuffers, + stringToStringBuffer, + + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, + + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, + + -- * Conversion + lexemeToString, + lexemeToFastString, + + -- * Parsing integers + parseInteger, ) where #include "HsVersions.h" -import GlaExts -#if __GLASGOW_HASKELL__ < 411 -import PrelAddr ( Addr(..) ) -#else -import Addr ( Addr(..) ) -#endif -import Foreign -import Char ( chr ) +import Encoding +import FastString ( FastString,mkFastString,mkFastStringBytes ) --- urk! -#include "../lib/std/cbits/stgerror.h" +import Foreign +import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose + , Handle, hTell ) -#if __GLASGOW_HASKELL__ >= 303 -import IO ( openFile -#if __GLASGOW_HASKELL__ < 407 - , slurpFile -- comes from PrelHandle or IOExts now -#endif - ) -import PrelIOBase -import PrelHandle -import Addr -#else -import IO ( openFile, hFileSize, hClose, IOMode(..) ) -import Addr -#endif -#if __GLASGOW_HASKELL__ >= 411 -import Ptr ( Ptr(..) ) -#endif +import GHC.Ptr ( Ptr(..) ) +import GHC.Exts +import GHC.IOBase ( IO(..) ) +import GHC.Base ( unsafeChr ) -#if __GLASGOW_HASKELL__ < 301 -import IOBase ( Handle, IOError(..), IOErrorType(..), - constructErrorAndFail ) -import IOHandle ( readHandle, writeHandle, filePtr ) -import PackBase ( unpackCStringBA ) +#if __GLASGOW_HASKELL__ >= 601 +import System.IO ( openBinaryFile ) #else -# if __GLASGOW_HASKELL__ <= 302 -import PrelIOBase ( Handle, IOError(..), IOErrorType(..), - constructErrorAndFail ) -import PrelHandle ( readHandle, writeHandle, filePtr ) -# endif -import PrelPack ( unpackCStringBA ) +import IOExts ( openFileEx, IOModeEx(..) ) #endif -#if __GLASGOW_HASKELL__ < 402 -import Util ( bracket ) -#else -import Exception ( bracket ) +#if __GLASGOW_HASKELL__ < 601 +openBinaryFile fp mode = openFileEx fp (BinaryMode mode) #endif -import PrimPacked -import FastString -import Char (isDigit) -\end{code} +-- ----------------------------------------------------------------------------- +-- The StringBuffer type -\begin{code} +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- data StringBuffer - = StringBuffer - Addr# - Int# -- length - Int# -- lexeme start - Int# -- current pos -\end{code} + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. -\begin{code} instance Show StringBuffer where - showsPrec _ s = showString "" -\end{code} - -\begin{code} -hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer -hGetStringBuffer expand_tabs fname = do - (a, read) <- if expand_tabs - then slurpFileExpandTabs fname -#if __GLASGOW_HASKELL__ < 411 - else slurpFile fname -#else - else do - (Ptr a#, read) <- slurpFile fname - return (A# a#, read) -#endif - - let (A# a#) = a; (I# read#) = read - - -- add sentinel '\NUL' - _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#)) - return (StringBuffer a# read# 0# 0#) - -#if __GLASGOW_HASKELL__ < 303 -slurpFile fname = - openFile fname ReadMode >>= \ hndl -> - hFileSize hndl >>= \ len -> - let len_i = fromInteger len in - -- Allocate an array for system call to store its bytes into. - -- ToDo: make it robust --- trace (show ((len_i::Int)+1)) $ - _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) -> - if addr2Int# a# ==# 0# then - fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes")) - else - readHandle hndl >>= \ hndl_ -> - writeHandle hndl hndl_ >> - let ptr = filePtr hndl_ in -#if __GLASGOW_HASKELL__ <= 302 - _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) -> -#else - _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) -> -#endif - hClose hndl >> - if read# ==# 0# then -- EOF or some other error - fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname)) - else - return (arr, I# read#) -#endif - -unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer -unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# = - unsafePerformIO ( - _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () -> - return s - ) -\end{code} - ------------------------------------------------------------------------------ --- Turn a String into a StringBuffer + showsPrec _ s = showString "" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction + +hGetStringBuffer :: FilePath -> IO StringBuffer +hGetStringBuffer fname = do + h <- openBinaryFile fname ReadMode + size_i <- hFileSize h + let size = fromIntegral size_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) + then ioError (userError "short read of file") + else do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer +hGetStringBufferBlock handle wanted + = do size_i <- hFileSize handle + offset_i <- hTell handle + let size = min wanted (fromIntegral $ size_i-offset_i) + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> + do r <- if size == 0 then return 0 else hGetBuf handle ptr size + if r /= size + then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle)) + else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + return (StringBuffer buf size 0) + +appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer +appendStringBuffers sb1 sb2 + = do newBuf <- mallocForeignPtrArray (size+3) + withForeignPtr newBuf $ \ptr -> + withForeignPtr (buf sb1) $ \sb1Ptr -> + withForeignPtr (buf sb2) $ \sb2Ptr -> + do copyArray (sb1Ptr `advancePtr` cur sb1) ptr (calcLen sb1) + copyArray (sb2Ptr `advancePtr` cur sb2) (ptr `advancePtr` cur sb1) (calcLen sb2) + pokeArray (ptr `advancePtr` size) [0,0,0] + return (StringBuffer newBuf size 0) + where calcLen sb = len sb - cur sb + size = calcLen sb1 + calcLen sb2 -\begin{code} -#ifdef GHCI stringToStringBuffer :: String -> IO StringBuffer -stringToStringBuffer str = - do let sz@(I# sz#) = length str + 1 - (Ptr a#) <- mallocBytes sz - fill_in str (A# a#) - writeCharOffAddr (A# a#) (sz-1) '\0' -- sentinel - return (StringBuffer a# sz# 0# 0#) - where - fill_in [] _ = return () - fill_in (c:cs) a = do - writeCharOffAddr a 0 c - fill_in cs (a `plusAddr` 1) - -freeStringBuffer :: StringBuffer -> IO () -freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#) -#endif -\end{code} - ------------------------------------------------------------------------------ -This very disturbing bit of code is used for expanding the tabs in a -file before we start parsing it. Expanding the tabs early makes the -lexer a lot simpler: we only have to record the beginning of the line -in order to be able to calculate the column offset of the current -token. - -We guess the size of the buffer required as 20% extra for -expanded tabs, and enlarge it if necessary. - -\begin{code} -#if __GLASGOW_HASKELL__ < 303 -mayBlock fo thing = thing - -writeCharOffAddr :: Addr -> Int -> Char -> IO () -writeCharOffAddr addr off c - = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c -#endif - -getErrType :: IO Int -#if __GLASGOW_HASKELL__ < 303 -getErrType = _casm_ ``%r = ghc_errtype;'' -#else -getErrType = _ccall_ getErrType__ -#endif - -slurpFileExpandTabs :: FilePath -> IO (Addr,Int) -slurpFileExpandTabs fname = do - bracket (openFile fname ReadMode) (hClose) - (\ handle -> - do sz <- hFileSize handle - if sz > toInteger (maxBound::Int) - then IOERROR (userError "slurpFile: file too big") - else do - let sz_i = fromInteger sz - sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs - chunk <- allocMem sz_i' - trySlurp handle sz_i' chunk - ) - -trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int) -trySlurp handle sz_i chunk = -#if __GLASGOW_HASKELL__ == 303 - wantReadableHandle "hGetChar" handle >>= \ handle_ -> - let fo = haFO__ handle_ in -#elif __GLASGOW_HASKELL__ > 303 - wantReadableHandle "hGetChar" handle $ \ handle_ -> - let fo = haFO__ handle_ in -#else - readHandle handle >>= \ handle_ -> - let fo = filePtr handle_ in -#endif - let - (I# chunk_sz) = sz_i - - tAB_SIZE = 8# - - slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int) - slurpFile c off chunk chunk_sz max_off = slurp c off - where - - slurp :: Int# -> Int# -> IO (Addr, Int) - slurp c off | off >=# max_off = do - let new_sz = chunk_sz *# 2# - chunk' <- reAllocMem chunk (I# new_sz) - slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#)) - slurp c off = do - intc <- mayBlock fo (_ccall_ fileGetc fo) - if intc == ((-1)::Int) - then do errtype <- getErrType - if errtype == (ERR_EOF :: Int) - then return (chunk, I# off) - else constructErrorAndFail "slurpFile" - else case chr intc of - '\t' -> tabIt c off - ch -> do writeCharOffAddr chunk (I# off) ch - let c' | ch == '\n' = 0# - | otherwise = c +# 1# - slurp c' (off +# 1#) - - tabIt :: Int# -> Int# -> IO (Addr, Int) - -- can't run out of buffer in here, because we reserved an - -- extra tAB_SIZE bytes at the end earlier. - tabIt c off = do - writeCharOffAddr chunk (I# off) ' ' - let c' = c +# 1# - off' = off +# 1# - if c' `remInt#` tAB_SIZE ==# 0# - then slurp c' off' - else tabIt c' off' - in do - - -- allow space for a full tab at the end of the buffer - -- (that's what the max_off thing is for), - -- and add 1 to allow room for the final sentinel \NUL at - -- the end of the file. - (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#)) -#if __GLASGOW_HASKELL__ < 404 - writeHandle handle handle_ -#endif - if rc < (0::Int) - then constructErrorAndFail "slurpFile" - else return (chunk', rc+1 {-room for sentinel-}) +stringToStringBuffer str = do + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +-- ----------------------------------------------------------------------------- +-- Grab a character + +-- Getting our fingers dirty a little here, but this is performance-critical +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') + +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar + +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer buf len 0) deflt = deflt +prevChar (StringBuffer buf len cur) deflt = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) + +-- ----------------------------------------------------------------------------- +-- Moving - -reAllocMem :: Addr -> Int -> IO Addr -reAllocMem ptr sz = do - chunk <- _ccall_ realloc ptr sz - if chunk == nullAddr -#if __GLASGOW_HASKELL__ >= 400 - then fail "reAllocMem" -#else - then fail (userError "reAllocMem") -#endif - else return chunk - -allocMem :: Int -> IO Addr -allocMem sz = do - chunk <- _ccall_ malloc sz -#if __GLASGOW_HASKELL__ < 303 - if chunk == nullAddr - then fail (userError "allocMem") - else return chunk -#else - if chunk == nullAddr - then constructErrorAndFail "allocMem" - else return chunk -#endif -\end{code} - -Lookup - -\begin{code} -currentChar :: StringBuffer -> Char -currentChar sb = case currentChar# sb of c -> C# c - -lookAhead :: StringBuffer -> Int -> Char -lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c - -indexSBuffer :: StringBuffer -> Int -> Char -indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c - -currentChar# :: StringBuffer -> Char# -indexSBuffer# :: StringBuffer -> Int# -> Char# -lookAhead# :: StringBuffer -> Int# -> Char# -currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# -indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# - - -- relative lookup, i.e, currentChar = lookAhead 0 -lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#) - -currentIndex# :: StringBuffer -> Int# -currentIndex# (StringBuffer fo# _ _ c#) = c# - -lexemeIndex :: StringBuffer -> Int# -lexemeIndex (StringBuffer fo# _ c# _) = c# -\end{code} - - moving the start point of the current lexeme. - -\begin{code} - -- moving the end point of the current lexeme. -setCurrentPos# :: StringBuffer -> Int# -> StringBuffer -setCurrentPos# (StringBuffer fo l# s# c#) i# = - StringBuffer fo l# s# (c# +# i#) - --- augmenting the current lexeme by one. -incLexeme :: StringBuffer -> StringBuffer -incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#) - -decLexeme :: StringBuffer -> StringBuffer -decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#) - -\end{code} - --- move the start and end point of the buffer on by --- x units. - -\begin{code} stepOn :: StringBuffer -> StringBuffer -stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same. - -stepOnBy# :: StringBuffer -> Int# -> StringBuffer -stepOnBy# (StringBuffer fo# l# s# c#) i# = - case s# +# i# of - new_s# -> StringBuffer fo# l# new_s# new_s# - --- jump to pos. -stepOnTo# :: StringBuffer -> Int# -> StringBuffer -stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s# - -squeezeLexeme :: StringBuffer -> Int# -> StringBuffer -squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c# - -mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer -mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#) - = StringBuffer fo l s# c# - -stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer - -stepOnUntil pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> StringBuffer fo l# c# c# - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> loop (c# +# 1#) - -stepOverLexeme :: StringBuffer -> StringBuffer -stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c# - -expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer -expandWhile pred (StringBuffer fo l# s# c#) = - loop c# +stepOn s = snd (nextChar s) + +offsetBytes :: Int -> StringBuffer -> StringBuffer +offsetBytes i s = s { cur = cur s + i } + +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 + +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l c) = l == c + +-- ----------------------------------------------------------------------------- +-- Conversion + +lexemeToString :: StringBuffer -> Int {-bytes-} -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer buf _ cur) bytes = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + utf8DecodeString (ptr `plusPtr` cur) bytes + +lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString +lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len + +-- ----------------------------------------------------------------------------- +-- Parsing integer strings in various bases + +byteOff :: StringBuffer -> Int -> Char +byteOff (StringBuffer buf _ cur) i = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + w <- peek (ptr `plusPtr` (cur+i)) + return (unsafeChr (fromIntegral (w::Word8))) + +-- | XXX assumes ASCII digits only +parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseInteger buf len radix to_int + = go 0 0 + where go i x | i == len = x + | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i))) + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +#if __GLASGOW_HASKELL__ < 600 +mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a) +mallocForeignPtrArray = doMalloc undefined where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - -expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer -expandWhile# pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred ch# -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - -scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer) -scanNumLit acc (StringBuffer fo l# s# c#) = - loop acc c# - where - loop acc c# = - case indexCharOffAddr# fo c# of - ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately. - | otherwise -> (acc,StringBuffer fo l# s# c#) - + doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b) + doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy) -expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer -expandUntilMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) = - case indexCharOffAddr# fo c# of - ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing - | ch# `eqChar#` x# -> loop (c# +# 1#) xs - | otherwise -> loop (c# +# 1#) str - -\end{code} - -\begin{code} - -- at or beyond end of buffer? -bufferExhausted :: StringBuffer -> Bool -bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l# - -emptyLexeme :: StringBuffer -> Bool -emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c# - - -- matching -prefixMatch :: StringBuffer -> String -> Maybe StringBuffer -prefixMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = Nothing - -untilEndOfString# :: StringBuffer -> StringBuffer -untilEndOfString# (StringBuffer fo l# s# c#) = - loop c# - where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\"'# -> - case getch# (c# -# 1#) of - '\\'# -> - -- looks like an escaped something or other to me, - -- better count the number of "\\"s that are immediately - -- preceeding to decide if the " is escaped. - let - odd_slashes flg i# = - case getch# i# of - '\\'# -> odd_slashes (not flg) (i# -# 1#) - _ -> flg - in - if odd_slashes True (c# -# 2#) then - -- odd number, " is ecaped. - loop (c# +# 1#) - else -- a real end of string delimiter after all. - StringBuffer fo l# s# c# - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - - -stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer -stepOnUntilChar# (StringBuffer fo l# s# c#) x# = - loop c# - where - loop c# - | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x# - = StringBuffer fo l# c# c# - | otherwise - = loop (c# +# 1#) - - -- conversion -lexemeToString :: StringBuffer -> String -lexemeToString (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - "" - else - unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))) - -lexemeToByteArray :: StringBuffer -> ByteArray Int -lexemeToByteArray (StringBuffer fo _ start_pos# current#) = - if start_pos# ==# current# then - error "lexemeToByteArray" - else - copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) - -lexemeToFastString :: StringBuffer -> FastString -lexemeToFastString (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - mkFastCharString2 (A# fo) (I# 0#) - else - mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#)) - -{- - Create a StringBuffer from the current lexeme, and add a sentinel - at the end. Know What You're Doing before taking this function - into use.. --} -lexemeToBuffer :: StringBuffer -> StringBuffer -lexemeToBuffer (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - StringBuffer fo 0# start_pos# current# -- an error, really. - else - unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#) - (current# -# 1#) - '\NUL'# +mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) +mallocForeignPtrBytes n = do + r <- mallocBytes n + newForeignPtr r (finalizerFree r) +foreign import ccall unsafe "stdlib.h free" + finalizerFree :: Ptr a -> IO () +#endif \end{code}