2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToFastString, -- :: StringBuffer -> FastString
65 #include "HsVersions.h"
68 #if __GLASGOW_HASKELL__ < 411
69 import PrelAddr ( Addr(..) )
70 import Panic ( panic )
72 import Addr ( Addr(..) )
73 import Ptr ( Ptr(..) )
76 #if __GLASGOW_HASKELL__ < 501
78 #elif __GLASGOW_HASKELL__ < 503
79 import PrelIO ( hGetcBuffered )
81 import GHC.IO ( hGetcBuffered )
89 import IO ( openFile, isEOFError )
91 import Exception ( bracket )
93 import CString ( unpackCStringBA )
95 #if __GLASGOW_HASKELL__ < 503
103 import Char ( isDigit )
116 instance Show StringBuffer where
117 showsPrec _ s = showString ""
121 hGetStringBuffer :: FilePath -> IO StringBuffer
122 hGetStringBuffer fname = do
123 (a, read) <- slurpFileExpandTabs fname
125 -- urk! slurpFile gives us a buffer that doesn't have room for
126 -- the sentinel. Assume it has a final newline for now, and overwrite
127 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
133 -- add sentinel '\NUL'
134 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
135 return (StringBuffer a# end# 0# 0#)
137 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
138 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
140 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
145 -----------------------------------------------------------------------------
146 -- Turn a String into a StringBuffer
149 stringToStringBuffer :: String -> IO StringBuffer
150 freeStringBuffer :: StringBuffer -> IO ()
152 #if __GLASGOW_HASKELL__ >= 411
153 stringToStringBuffer str =
154 do let sz@(I# sz#) = length str
155 (Ptr a#) <- mallocBytes (sz+1)
157 writeCharOffAddr (A# a#) sz '\0' -- sentinel
158 return (StringBuffer a# sz# 0# 0#)
160 fill_in [] _ = return ()
161 fill_in (c:cs) a = do
162 writeCharOffAddr a 0 c
163 fill_in cs (a `plusAddr` 1)
165 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
167 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
168 freeStringBuffer sb = return ()
173 -----------------------------------------------------------------------------
174 This very disturbing bit of code is used for expanding the tabs in a
175 file before we start parsing it. Expanding the tabs early makes the
176 lexer a lot simpler: we only have to record the beginning of the line
177 in order to be able to calculate the column offset of the current
180 We guess the size of the buffer required as 20% extra for
181 expanded tabs, and enlarge it if necessary.
185 getErrType = _ccall_ getErrType__
187 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
188 slurpFileExpandTabs fname = do
189 bracket (openFile fname ReadMode) (hClose)
191 do sz <- hFileSize handle
192 if sz > toInteger (maxBound::Int)
193 then ioError (userError "slurpFile: file too big")
195 let sz_i = fromInteger sz
197 -- empty file: just allocate a buffer containing '\0'
198 then do chunk <- allocMem 1
199 writeCharOffAddr chunk 0 '\0'
201 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
202 chunk <- allocMem sz_i'
203 trySlurp handle sz_i' chunk
206 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
207 trySlurp handle sz_i chunk =
208 #if __GLASGOW_HASKELL__ < 501
209 wantReadableHandle "hGetChar" handle $ \ handle_ ->
210 let fo = haFO__ handle_ in
212 wantReadableHandle "hGetChar" handle $
213 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
220 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
221 slurpFile c off chunk chunk_sz max_off = slurp c off
224 slurp :: Int# -> Int# -> IO (Addr, Int)
225 slurp c off | off >=# max_off = do
226 let new_sz = chunk_sz *# 2#
227 chunk' <- reAllocMem chunk (I# new_sz)
228 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
230 #if __GLASGOW_HASKELL__ < 501
231 intc <- mayBlock fo (_ccall_ fileGetc fo)
232 if intc == ((-1)::Int)
233 then do errtype <- getErrType
234 if errtype == (19{-ERR_EOF-} :: Int)
235 then return (chunk, I# off)
236 else constructErrorAndFail "slurpFile"
237 else case chr intc of
240 ch <- (if not (bufferEmpty buf)
241 then hGetcBuffered fd ref buf
243 #if __GLASGOW_HASKELL__ >= 503
244 new_buf <- fillReadBuffer fd True False buf
246 new_buf <- fillReadBuffer fd True buf
248 hGetcBuffered fd ref new_buf)
249 `catch` \e -> if isEOFError e
253 '\xFFFF' -> return (chunk, I# off)
256 ch -> do writeCharOffAddr chunk (I# off) ch
257 let c' | ch == '\n' = 0#
258 | otherwise = c +# 1#
261 tabIt :: Int# -> Int# -> IO (Addr, Int)
262 -- can't run out of buffer in here, because we reserved an
263 -- extra tAB_SIZE bytes at the end earlier.
265 writeCharOffAddr chunk (I# off) ' '
268 if c' `remInt#` tAB_SIZE ==# 0#
273 -- allow space for a full tab at the end of the buffer
274 -- (that's what the max_off thing is for),
275 -- and add 1 to allow room for the final sentinel \NUL at
276 -- the end of the file.
277 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
278 return (chunk', rc+1 {- room for sentinel -})
281 reAllocMem :: Addr -> Int -> IO Addr
282 reAllocMem ptr sz = do
283 chunk <- _ccall_ realloc ptr sz
285 then fail "reAllocMem"
288 allocMem :: Int -> IO Addr
290 chunk <- _ccall_ malloc sz
292 #if __GLASGOW_HASKELL__ < 501
293 then constructErrorAndFail "allocMem"
295 then ioException (IOError Nothing ResourceExhausted "malloc"
296 "out of memory" Nothing)
304 currentChar :: StringBuffer -> Char
305 currentChar sb = case currentChar# sb of c -> C# c
307 lookAhead :: StringBuffer -> Int -> Char
308 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
310 indexSBuffer :: StringBuffer -> Int -> Char
311 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
313 currentChar# :: StringBuffer -> Char#
314 indexSBuffer# :: StringBuffer -> Int# -> Char#
315 lookAhead# :: StringBuffer -> Int# -> Char#
316 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
317 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
319 -- relative lookup, i.e, currentChar = lookAhead 0
320 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
322 currentIndex# :: StringBuffer -> Int#
323 currentIndex# (StringBuffer fo# _ _ c#) = c#
325 lexemeIndex :: StringBuffer -> Int#
326 lexemeIndex (StringBuffer fo# _ c# _) = c#
329 moving the start point of the current lexeme.
332 -- moving the end point of the current lexeme.
333 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
334 setCurrentPos# (StringBuffer fo l# s# c#) i# =
335 StringBuffer fo l# s# (c# +# i#)
337 -- augmenting the current lexeme by one.
338 incLexeme :: StringBuffer -> StringBuffer
339 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
341 decLexeme :: StringBuffer -> StringBuffer
342 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
346 -- move the start and end point of the buffer on by
350 stepOn :: StringBuffer -> StringBuffer
351 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
353 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
354 stepOnBy# (StringBuffer fo# l# s# c#) i# =
356 new_s# -> StringBuffer fo# l# new_s# new_s#
359 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
360 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
362 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
363 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
365 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
366 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
367 = StringBuffer fo l s# c#
369 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
371 stepOnUntil pred (StringBuffer fo l# s# c#) =
375 case indexCharOffAddr# fo c# of
376 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
377 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
378 | otherwise -> loop (c# +# 1#)
380 stepOverLexeme :: StringBuffer -> StringBuffer
381 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
383 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
384 expandWhile pred (StringBuffer fo l# s# c#) =
388 case indexCharOffAddr# fo c# of
389 ch# | pred (C# ch#) -> loop (c# +# 1#)
390 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
391 | otherwise -> StringBuffer fo l# s# c#
393 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
394 expandWhile# pred (StringBuffer fo l# s# c#) =
398 case indexCharOffAddr# fo c# of
399 ch# | pred ch# -> loop (c# +# 1#)
400 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
401 | otherwise -> StringBuffer fo l# s# c#
403 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
404 scanNumLit acc (StringBuffer fo l# s# c#) =
408 case indexCharOffAddr# fo c# of
409 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
410 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
411 | otherwise -> (acc,StringBuffer fo l# s# c#)
414 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
415 expandUntilMatch (StringBuffer fo l# s# c#) str =
418 loop c# [] = Just (StringBuffer fo l# s# c#)
419 loop c# ((C# x#):xs) =
420 case indexCharOffAddr# fo c# of
421 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
422 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
423 | otherwise -> loop (c# +# 1#) str
428 -- at or beyond end of buffer?
429 bufferExhausted :: StringBuffer -> Bool
430 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
432 emptyLexeme :: StringBuffer -> Bool
433 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
436 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
437 prefixMatch (StringBuffer fo l# s# c#) str =
440 loop c# [] = Just (StringBuffer fo l# s# c#)
442 | indexCharOffAddr# fo c# `eqChar#` x#
447 untilEndOfString# :: StringBuffer -> StringBuffer
448 untilEndOfString# (StringBuffer fo l# s# c#) =
451 getch# i# = indexCharOffAddr# fo i#
456 case getch# (c# -# 1#) of
458 -- looks like an escaped something or other to me,
459 -- better count the number of "\\"s that are immediately
460 -- preceeding to decide if the " is escaped.
464 '\\'# -> odd_slashes (not flg) (i# -# 1#)
467 if odd_slashes True (c# -# 2#) then
468 -- odd number, " is ecaped.
470 else -- a real end of string delimiter after all.
471 StringBuffer fo l# s# c#
472 _ -> StringBuffer fo l# s# c#
474 if c# >=# l# then -- hit sentinel, this doesn't look too good..
475 StringBuffer fo l# l# l#
481 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
482 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
486 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
487 = StringBuffer fo l# c# c#
492 lexemeToString :: StringBuffer -> String
493 lexemeToString (StringBuffer fo _ start_pos# current#) =
494 if start_pos# ==# current# then
497 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
499 lexemeToFastString :: StringBuffer -> FastString
500 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
501 if start_pos# ==# current# then
504 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))