2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
75 #include "../lib/std/cbits/error.h"
77 #if __GLASGOW_HASKELL__ >= 303
78 import IO ( openFile, slurpFile )
83 import IO ( openFile, hFileSize, hClose, IOMode(..) )
87 #if __GLASGOW_HASKELL__ < 301
88 import IOBase ( IOError(..), IOErrorType(..) )
89 import IOHandle ( readHandle, writeHandle, filePtr )
90 import PackBase ( unpackCStringBA )
92 # if __GLASGOW_HASKELL__ <= 302
93 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
94 constructErrorAndFail )
95 import PrelHandle ( readHandle, writeHandle, filePtr )
97 import PrelPack ( unpackCStringBA )
100 #if __GLASGOW_HASKELL__ < 402
101 import Util ( bracket )
103 import Exception ( bracket )
108 import Char (isDigit)
121 instance Text StringBuffer where
122 showsPrec _ s = showString ""
126 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
127 hGetStringBuffer expand_tabs fname = do
128 (a, read) <- if expand_tabs
129 then slurpFileExpandTabs fname
132 let (A# a#) = a; (I# read#) = read
134 -- add sentinel '\NUL'
135 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
136 return (StringBuffer a# read# 0# 0#)
138 #if __GLASGOW_HASKELL__ < 303
140 openFile fname ReadMode >>= \ hndl ->
141 hFileSize hndl >>= \ len ->
142 let len_i = fromInteger len in
143 -- Allocate an array for system call to store its bytes into.
144 -- ToDo: make it robust
145 -- trace (show ((len_i::Int)+1)) $
146 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
147 if addr2Int# a# ==# 0# then
148 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
150 readHandle hndl >>= \ hndl_ ->
151 writeHandle hndl hndl_ >>
152 let ptr = filePtr hndl_ in
153 #if __GLASGOW_HASKELL__ <= 302
154 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
156 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
159 if read# ==# 0# then -- EOF or some other error
160 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
162 return (arr, I# read#)
165 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
166 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
168 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
173 -----------------------------------------------------------------------------
174 This very disturbing bit of code is used for expanding the tabs in a
175 file before we start parsing it. Expanding the tabs early makes the
176 lexer a lot simpler: we only have to record the beginning of the line
177 in order to be able to calculate the column offset of the current
180 We guess the size of the buffer required as 20% extra for
181 expanded tabs, and enlarge it if necessary.
184 #if __GLASGOW_HASKELL__ < 303
186 mayBlock fo thing = thing
188 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
189 writeCharOffAddr addr off c
190 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
194 #if __GLASGOW_HASKELL__ < 303
195 getErrType = _casm_ ``%r = ghc_errtype;''
197 getErrType = _ccall_ getErrType__
200 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
201 slurpFileExpandTabs fname = do
202 bracket (openFile fname ReadMode) (hClose)
204 do sz <- hFileSize handle
205 if sz > toInteger (maxBound::Int)
206 then ioError (userError "slurpFile: file too big")
208 let sz_i = fromInteger sz
209 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
210 chunk <- allocMem sz_i'
211 trySlurp handle sz_i' chunk
214 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
215 trySlurp handle sz_i chunk =
216 #if __GLASGOW_HASKELL__ >= 303
217 wantReadableHandle "hGetChar" handle $ \ handle_ ->
218 let fo = haFO__ handle_ in
220 readHandle handle >>= \ handle_ ->
221 let fo = filePtr handle_ in
228 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
229 slurpFile c off chunk chunk_sz max_off = slurp c off
232 slurp :: Int# -> Int# -> IO (Addr, Int)
233 slurp c off | off >=# max_off = do
234 let new_sz = chunk_sz *# 2#
235 chunk' <- reAllocMem chunk (I# new_sz)
236 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
238 intc <- mayBlock fo (_ccall_ fileGetc fo)
239 if intc == ((-1)::Int)
240 then do errtype <- getErrType
241 if errtype == (ERR_EOF :: Int)
242 then return (chunk, I# off)
243 else constructErrorAndFail "slurpFile"
244 else case chr intc of
246 ch -> do writeCharOffAddr chunk (I# off) ch
247 let c' | ch == '\n' = 0#
248 | otherwise = c +# 1#
251 tabIt :: Int# -> Int# -> IO (Addr, Int)
252 -- can't run out of buffer in here, because we reserved an
253 -- extra tAB_SIZE bytes at the end earlier.
255 writeCharOffAddr chunk (I# off) ' '
258 if c' `remInt#` tAB_SIZE ==# 0#
263 -- allow space for a full tab at the end of the buffer
264 -- (that's what the max_off thing is for),
265 -- and add 1 to allow room for the final sentinel \NUL at
266 -- the end of the file.
267 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
268 writeHandle handle handle_
270 then constructErrorAndFail "slurpFile"
271 else return (chunk', rc+1 {-room for sentinel-})
274 reAllocMem :: Addr -> Int -> IO Addr
275 reAllocMem ptr sz = do
276 chunk <- _ccall_ realloc ptr sz
278 #if __GLASGOW_HASKELL__ < 303
279 then fail (userError "reAllocMem")
281 then fail "reAllocMem"
285 allocMem :: Int -> IO Addr
287 #if __GLASGOW_HASKELL__ < 303
288 chunk <- _ccall_ malloc sz
290 then fail (userError "allocMem")
293 chunk <- _ccall_ allocMemory__ sz
295 then constructErrorAndFail "allocMem"
303 currentChar :: StringBuffer -> Char
304 currentChar sb = case currentChar# sb of c -> C# c
306 lookAhead :: StringBuffer -> Int -> Char
307 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
309 indexSBuffer :: StringBuffer -> Int -> Char
310 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
312 currentChar# :: StringBuffer -> Char#
313 indexSBuffer# :: StringBuffer -> Int# -> Char#
314 lookAhead# :: StringBuffer -> Int# -> Char#
315 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
316 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
318 -- relative lookup, i.e, currentChar = lookAhead 0
319 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
321 currentIndex# :: StringBuffer -> Int#
322 currentIndex# (StringBuffer fo# _ _ c#) = c#
324 lexemeIndex :: StringBuffer -> Int#
325 lexemeIndex (StringBuffer fo# _ c# _) = c#
328 moving the start point of the current lexeme.
331 -- moving the end point of the current lexeme.
332 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
333 setCurrentPos# (StringBuffer fo l# s# c#) i# =
334 StringBuffer fo l# s# (c# +# i#)
336 -- augmenting the current lexeme by one.
337 incLexeme :: StringBuffer -> StringBuffer
338 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
340 decLexeme :: StringBuffer -> StringBuffer
341 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
345 -- move the start and end point of the buffer on by
349 stepOn :: StringBuffer -> StringBuffer
350 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
352 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
353 stepOnBy# (StringBuffer fo# l# s# c#) i# =
355 new_s# -> StringBuffer fo# l# new_s# new_s#
358 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
359 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
361 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
362 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
364 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
365 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
366 = StringBuffer fo l s# c#
368 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
370 stepOnUntil pred (StringBuffer fo l# s# c#) =
374 case indexCharOffAddr# fo c# of
375 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
376 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
377 | otherwise -> loop (c# +# 1#)
379 stepOverLexeme :: StringBuffer -> StringBuffer
380 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
382 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
383 expandWhile pred (StringBuffer fo l# s# c#) =
387 case indexCharOffAddr# fo c# of
388 ch# | pred (C# ch#) -> loop (c# +# 1#)
389 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
390 | otherwise -> StringBuffer fo l# s# c#
392 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
393 expandWhile# pred (StringBuffer fo l# s# c#) =
397 case indexCharOffAddr# fo c# of
398 ch# | pred ch# -> loop (c# +# 1#)
399 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
400 | otherwise -> StringBuffer fo l# s# c#
402 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
403 scanNumLit acc (StringBuffer fo l# s# c#) =
407 case indexCharOffAddr# fo c# of
408 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
409 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
410 | otherwise -> (acc,StringBuffer fo l# s# c#)
413 expandUntilMatch :: StringBuffer -> String -> StringBuffer
414 expandUntilMatch (StringBuffer fo l# s# c#) str =
417 loop c# [] = StringBuffer fo l# s# c#
419 | indexCharOffAddr# fo c# `eqChar#` x#
422 = loop (c# +# 1#) str
427 -- at or beyond end of buffer?
428 bufferExhausted :: StringBuffer -> Bool
429 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
431 emptyLexeme :: StringBuffer -> Bool
432 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
435 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
436 prefixMatch (StringBuffer fo l# s# c#) str =
439 loop c# [] = Just (StringBuffer fo l# s# c#)
441 | indexCharOffAddr# fo c# `eqChar#` x#
446 untilEndOfString# :: StringBuffer -> StringBuffer
447 untilEndOfString# (StringBuffer fo l# s# c#) =
450 getch# i# = indexCharOffAddr# fo i#
455 case getch# (c# -# 1#) of
457 -- looks like an escaped something or other to me,
458 -- better count the number of "\\"s that are immediately
459 -- preceeding to decide if the " is escaped.
463 '\\'# -> odd_slashes (not flg) (i# -# 1#)
466 if odd_slashes True (c# -# 2#) then
467 -- odd number, " is ecaped.
469 else -- a real end of string delimiter after all.
470 StringBuffer fo l# s# c#
471 _ -> StringBuffer fo l# s# c#
473 if c# >=# l# then -- hit sentinel, this doesn't look too good..
474 StringBuffer fo l# l# l#
480 untilChar# :: StringBuffer -> Char# -> StringBuffer
481 untilChar# (StringBuffer fo l# s# c#) x# =
485 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
486 = StringBuffer fo l# s# c#
491 lexemeToString :: StringBuffer -> String
492 lexemeToString (StringBuffer fo _ start_pos# current#) =
493 if start_pos# ==# current# then
496 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
498 lexemeToByteArray :: StringBuffer -> _ByteArray Int
499 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
500 if start_pos# ==# current# then
501 error "lexemeToByteArray"
503 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
505 lexemeToFastString :: StringBuffer -> FastString
506 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
507 if start_pos# ==# current# then
508 mkFastCharString2 (A# fo) (I# 0#)
510 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
513 Create a StringBuffer from the current lexeme, and add a sentinel
514 at the end. Know What You're Doing before taking this function
517 lexemeToBuffer :: StringBuffer -> StringBuffer
518 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
519 if start_pos# ==# current# then
520 StringBuffer fo 0# start_pos# current# -- an error, really.
522 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)