2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
75 #include "../lib/std/cbits/error.h"
77 #if __GLASGOW_HASKELL__ >= 303
78 import IO ( openFile, slurpFile )
83 import IO ( openFile, hFileSize, hClose, IOMode(..) )
87 #if __GLASGOW_HASKELL__ < 301
88 import IOBase ( IOError(..), IOErrorType(..) )
89 import IOHandle ( readHandle, writeHandle, filePtr )
90 import PackBase ( unpackCStringBA )
92 # if __GLASGOW_HASKELL__ <= 302
93 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
94 constructErrorAndFail )
95 import PrelHandle ( readHandle, writeHandle, filePtr )
97 import PrelPack ( unpackCStringBA )
100 #if __GLASGOW_HASKELL__ < 402
101 import Util ( bracket )
103 import Exception ( bracket )
108 import Char (isDigit)
121 instance Text StringBuffer where
122 showsPrec _ s = showString ""
126 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
127 hGetStringBuffer expand_tabs fname = do
128 (a, read) <- if expand_tabs
129 then slurpFileExpandTabs fname
132 let (A# a#) = a; (I# read#) = read
134 -- add sentinel '\NUL'
135 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
136 return (StringBuffer a# read# 0# 0#)
138 #if __GLASGOW_HASKELL__ < 303
140 openFile fname ReadMode >>= \ hndl ->
141 hFileSize hndl >>= \ len ->
142 let len_i = fromInteger len in
143 -- Allocate an array for system call to store its bytes into.
144 -- ToDo: make it robust
145 -- trace (show ((len_i::Int)+1)) $
146 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
147 if addr2Int# a# ==# 0# then
148 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
150 readHandle hndl >>= \ hndl_ ->
151 writeHandle hndl hndl_ >>
152 let ptr = filePtr hndl_ in
153 #if __GLASGOW_HASKELL__ <= 302
154 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
156 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
159 if read# ==# 0# then -- EOF or some other error
160 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
162 return (arr, I# read#)
165 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
166 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
168 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
173 -----------------------------------------------------------------------------
174 This very disturbing bit of code is used for expanding the tabs in a
175 file before we start parsing it. Expanding the tabs early makes the
176 lexer a lot simpler: we only have to record the beginning of the line
177 in order to be able to calculate the column offset of the current
180 We guess the size of the buffer required as 20% extra for
181 expanded tabs, and enlarge it if necessary.
184 #if __GLASGOW_HASKELL__ < 303
185 mayBlock fo thing = thing
187 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
188 writeCharOffAddr addr off c
189 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
193 #if __GLASGOW_HASKELL__ < 303
194 getErrType = _casm_ ``%r = ghc_errtype;''
196 getErrType = _ccall_ getErrType__
199 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
200 slurpFileExpandTabs fname = do
201 bracket (openFile fname ReadMode) (hClose)
203 do sz <- hFileSize handle
204 if sz > toInteger (maxBound::Int)
205 then IOERROR (userError "slurpFile: file too big")
207 let sz_i = fromInteger sz
208 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
209 chunk <- allocMem sz_i'
210 trySlurp handle sz_i' chunk
213 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
214 trySlurp handle sz_i chunk =
215 #if __GLASGOW_HASKELL__ >= 303
216 wantReadableHandle "hGetChar" handle $ \ handle_ ->
217 let fo = haFO__ handle_ in
219 readHandle handle >>= \ handle_ ->
220 let fo = filePtr handle_ in
227 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
228 slurpFile c off chunk chunk_sz max_off = slurp c off
231 slurp :: Int# -> Int# -> IO (Addr, Int)
232 slurp c off | off >=# max_off = do
233 let new_sz = chunk_sz *# 2#
234 chunk' <- reAllocMem chunk (I# new_sz)
235 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
237 intc <- mayBlock fo (_ccall_ fileGetc fo)
238 if intc == ((-1)::Int)
239 then do errtype <- getErrType
240 if errtype == (ERR_EOF :: Int)
241 then return (chunk, I# off)
242 else constructErrorAndFail "slurpFile"
243 else case chr intc of
245 ch -> do writeCharOffAddr chunk (I# off) ch
246 let c' | ch == '\n' = 0#
247 | otherwise = c +# 1#
250 tabIt :: Int# -> Int# -> IO (Addr, Int)
251 -- can't run out of buffer in here, because we reserved an
252 -- extra tAB_SIZE bytes at the end earlier.
254 writeCharOffAddr chunk (I# off) ' '
257 if c' `remInt#` tAB_SIZE ==# 0#
262 -- allow space for a full tab at the end of the buffer
263 -- (that's what the max_off thing is for),
264 -- and add 1 to allow room for the final sentinel \NUL at
265 -- the end of the file.
266 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
267 #if __GLASGOW_HASKELL__ < 404
268 writeHandle handle handle_
271 then constructErrorAndFail "slurpFile"
272 else return (chunk', rc+1 {-room for sentinel-})
275 reAllocMem :: Addr -> Int -> IO Addr
276 reAllocMem ptr sz = do
277 chunk <- _ccall_ realloc ptr sz
279 #if __GLASGOW_HASKELL__ < 303
280 then fail (userError "reAllocMem")
282 then fail "reAllocMem"
286 allocMem :: Int -> IO Addr
288 #if __GLASGOW_HASKELL__ < 303
289 chunk <- _ccall_ malloc sz
291 then fail (userError "allocMem")
294 chunk <- _ccall_ allocMemory__ sz
296 then constructErrorAndFail "allocMem"
304 currentChar :: StringBuffer -> Char
305 currentChar sb = case currentChar# sb of c -> C# c
307 lookAhead :: StringBuffer -> Int -> Char
308 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
310 indexSBuffer :: StringBuffer -> Int -> Char
311 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
313 currentChar# :: StringBuffer -> Char#
314 indexSBuffer# :: StringBuffer -> Int# -> Char#
315 lookAhead# :: StringBuffer -> Int# -> Char#
316 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
317 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
319 -- relative lookup, i.e, currentChar = lookAhead 0
320 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
322 currentIndex# :: StringBuffer -> Int#
323 currentIndex# (StringBuffer fo# _ _ c#) = c#
325 lexemeIndex :: StringBuffer -> Int#
326 lexemeIndex (StringBuffer fo# _ c# _) = c#
329 moving the start point of the current lexeme.
332 -- moving the end point of the current lexeme.
333 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
334 setCurrentPos# (StringBuffer fo l# s# c#) i# =
335 StringBuffer fo l# s# (c# +# i#)
337 -- augmenting the current lexeme by one.
338 incLexeme :: StringBuffer -> StringBuffer
339 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
341 decLexeme :: StringBuffer -> StringBuffer
342 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
346 -- move the start and end point of the buffer on by
350 stepOn :: StringBuffer -> StringBuffer
351 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
353 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
354 stepOnBy# (StringBuffer fo# l# s# c#) i# =
356 new_s# -> StringBuffer fo# l# new_s# new_s#
359 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
360 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
362 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
363 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
365 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
366 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
367 = StringBuffer fo l s# c#
369 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
371 stepOnUntil pred (StringBuffer fo l# s# c#) =
375 case indexCharOffAddr# fo c# of
376 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
377 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
378 | otherwise -> loop (c# +# 1#)
380 stepOverLexeme :: StringBuffer -> StringBuffer
381 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
383 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
384 expandWhile pred (StringBuffer fo l# s# c#) =
388 case indexCharOffAddr# fo c# of
389 ch# | pred (C# ch#) -> loop (c# +# 1#)
390 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
391 | otherwise -> StringBuffer fo l# s# c#
393 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
394 expandWhile# pred (StringBuffer fo l# s# c#) =
398 case indexCharOffAddr# fo c# of
399 ch# | pred ch# -> loop (c# +# 1#)
400 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
401 | otherwise -> StringBuffer fo l# s# c#
403 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
404 scanNumLit acc (StringBuffer fo l# s# c#) =
408 case indexCharOffAddr# fo c# of
409 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
410 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
411 | otherwise -> (acc,StringBuffer fo l# s# c#)
414 expandUntilMatch :: StringBuffer -> String -> StringBuffer
415 expandUntilMatch (StringBuffer fo l# s# c#) str =
418 loop c# [] = StringBuffer fo l# s# c#
420 | indexCharOffAddr# fo c# `eqChar#` x#
423 = loop (c# +# 1#) str
428 -- at or beyond end of buffer?
429 bufferExhausted :: StringBuffer -> Bool
430 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
432 emptyLexeme :: StringBuffer -> Bool
433 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
436 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
437 prefixMatch (StringBuffer fo l# s# c#) str =
440 loop c# [] = Just (StringBuffer fo l# s# c#)
442 | indexCharOffAddr# fo c# `eqChar#` x#
447 untilEndOfString# :: StringBuffer -> StringBuffer
448 untilEndOfString# (StringBuffer fo l# s# c#) =
451 getch# i# = indexCharOffAddr# fo i#
456 case getch# (c# -# 1#) of
458 -- looks like an escaped something or other to me,
459 -- better count the number of "\\"s that are immediately
460 -- preceeding to decide if the " is escaped.
464 '\\'# -> odd_slashes (not flg) (i# -# 1#)
467 if odd_slashes True (c# -# 2#) then
468 -- odd number, " is ecaped.
470 else -- a real end of string delimiter after all.
471 StringBuffer fo l# s# c#
472 _ -> StringBuffer fo l# s# c#
474 if c# >=# l# then -- hit sentinel, this doesn't look too good..
475 StringBuffer fo l# l# l#
481 untilChar# :: StringBuffer -> Char# -> StringBuffer
482 untilChar# (StringBuffer fo l# s# c#) x# =
486 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
487 = StringBuffer fo l# s# c#
492 lexemeToString :: StringBuffer -> String
493 lexemeToString (StringBuffer fo _ start_pos# current#) =
494 if start_pos# ==# current# then
497 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
499 lexemeToByteArray :: StringBuffer -> _ByteArray Int
500 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
501 if start_pos# ==# current# then
502 error "lexemeToByteArray"
504 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
506 lexemeToFastString :: StringBuffer -> FastString
507 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
508 if start_pos# ==# current# then
509 mkFastCharString2 (A# fo) (I# 0#)
511 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
514 Create a StringBuffer from the current lexeme, and add a sentinel
515 at the end. Know What You're Doing before taking this function
518 lexemeToBuffer :: StringBuffer -> StringBuffer
519 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
520 if start_pos# ==# current# then
521 StringBuffer fo 0# start_pos# current# -- an error, really.
523 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)