2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
41 stepOverLexeme, -- :: StringBuffer -> StringBuffer
42 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
43 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
44 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
45 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
46 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
47 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
48 -- at or beyond end of buffer?
49 bufferExhausted, -- :: StringBuffer -> Bool
50 emptyLexeme, -- :: StringBuffer -> Bool
53 prefixMatch, -- :: StringBuffer -> String -> Bool
54 untilEndOfString#, -- :: StringBuffer -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import PrelAddr ( Addr(..) )
75 #include "../lib/std/cbits/stgerror.h"
77 #if __GLASGOW_HASKELL__ >= 303
79 #if __GLASGOW_HASKELL__ < 407
80 , slurpFile -- comes from PrelHandle or IOExts now
87 import IO ( openFile, hFileSize, hClose, IOMode(..) )
91 #if __GLASGOW_HASKELL__ < 301
92 import IOBase ( Handle, IOError(..), IOErrorType(..),
93 constructErrorAndFail )
94 import IOHandle ( readHandle, writeHandle, filePtr )
95 import PackBase ( unpackCStringBA )
97 # if __GLASGOW_HASKELL__ <= 302
98 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
99 constructErrorAndFail )
100 import PrelHandle ( readHandle, writeHandle, filePtr )
102 import PrelPack ( unpackCStringBA )
105 #if __GLASGOW_HASKELL__ < 402
106 import Util ( bracket )
108 import Exception ( bracket )
113 import Char (isDigit)
126 instance Show StringBuffer where
127 showsPrec _ s = showString ""
131 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
132 hGetStringBuffer expand_tabs fname = do
133 (a, read) <- if expand_tabs
134 then slurpFileExpandTabs fname
137 let (A# a#) = a; (I# read#) = read
139 -- add sentinel '\NUL'
140 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
141 return (StringBuffer a# read# 0# 0#)
143 #if __GLASGOW_HASKELL__ < 303
145 openFile fname ReadMode >>= \ hndl ->
146 hFileSize hndl >>= \ len ->
147 let len_i = fromInteger len in
148 -- Allocate an array for system call to store its bytes into.
149 -- ToDo: make it robust
150 -- trace (show ((len_i::Int)+1)) $
151 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
152 if addr2Int# a# ==# 0# then
153 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
155 readHandle hndl >>= \ hndl_ ->
156 writeHandle hndl hndl_ >>
157 let ptr = filePtr hndl_ in
158 #if __GLASGOW_HASKELL__ <= 302
159 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
161 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
164 if read# ==# 0# then -- EOF or some other error
165 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
167 return (arr, I# read#)
170 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
171 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
173 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
178 -----------------------------------------------------------------------------
179 This very disturbing bit of code is used for expanding the tabs in a
180 file before we start parsing it. Expanding the tabs early makes the
181 lexer a lot simpler: we only have to record the beginning of the line
182 in order to be able to calculate the column offset of the current
185 We guess the size of the buffer required as 20% extra for
186 expanded tabs, and enlarge it if necessary.
189 #if __GLASGOW_HASKELL__ < 303
190 mayBlock fo thing = thing
192 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
193 writeCharOffAddr addr off c
194 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
198 #if __GLASGOW_HASKELL__ < 303
199 getErrType = _casm_ ``%r = ghc_errtype;''
201 getErrType = _ccall_ getErrType__
204 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
205 slurpFileExpandTabs fname = do
206 bracket (openFile fname ReadMode) (hClose)
208 do sz <- hFileSize handle
209 if sz > toInteger (maxBound::Int)
210 then IOERROR (userError "slurpFile: file too big")
212 let sz_i = fromInteger sz
213 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
214 chunk <- allocMem sz_i'
215 trySlurp handle sz_i' chunk
218 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
219 trySlurp handle sz_i chunk =
220 #if __GLASGOW_HASKELL__ == 303
221 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
222 let fo = haFO__ handle_ in
223 #elif __GLASGOW_HASKELL__ > 303
224 wantReadableHandle "hGetChar" handle $ \ handle_ ->
225 let fo = haFO__ handle_ in
227 readHandle handle >>= \ handle_ ->
228 let fo = filePtr handle_ in
235 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
236 slurpFile c off chunk chunk_sz max_off = slurp c off
239 slurp :: Int# -> Int# -> IO (Addr, Int)
240 slurp c off | off >=# max_off = do
241 let new_sz = chunk_sz *# 2#
242 chunk' <- reAllocMem chunk (I# new_sz)
243 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
245 intc <- mayBlock fo (_ccall_ fileGetc fo)
246 if intc == ((-1)::Int)
247 then do errtype <- getErrType
248 if errtype == (ERR_EOF :: Int)
249 then return (chunk, I# off)
250 else constructErrorAndFail "slurpFile"
251 else case chr intc of
253 ch -> do writeCharOffAddr chunk (I# off) ch
254 let c' | ch == '\n' = 0#
255 | otherwise = c +# 1#
258 tabIt :: Int# -> Int# -> IO (Addr, Int)
259 -- can't run out of buffer in here, because we reserved an
260 -- extra tAB_SIZE bytes at the end earlier.
262 writeCharOffAddr chunk (I# off) ' '
265 if c' `remInt#` tAB_SIZE ==# 0#
270 -- allow space for a full tab at the end of the buffer
271 -- (that's what the max_off thing is for),
272 -- and add 1 to allow room for the final sentinel \NUL at
273 -- the end of the file.
274 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
275 #if __GLASGOW_HASKELL__ < 404
276 writeHandle handle handle_
279 then constructErrorAndFail "slurpFile"
280 else return (chunk', rc+1 {-room for sentinel-})
283 reAllocMem :: Addr -> Int -> IO Addr
284 reAllocMem ptr sz = do
285 chunk <- _ccall_ realloc ptr sz
287 #if __GLASGOW_HASKELL__ >= 400
288 then fail "reAllocMem"
290 then fail (userError "reAllocMem")
294 allocMem :: Int -> IO Addr
296 #if __GLASGOW_HASKELL__ < 303
297 chunk <- _ccall_ malloc sz
299 then fail (userError "allocMem")
302 chunk <- _ccall_ allocMemory__ sz
304 then constructErrorAndFail "allocMem"
312 currentChar :: StringBuffer -> Char
313 currentChar sb = case currentChar# sb of c -> C# c
315 lookAhead :: StringBuffer -> Int -> Char
316 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
318 indexSBuffer :: StringBuffer -> Int -> Char
319 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
321 currentChar# :: StringBuffer -> Char#
322 indexSBuffer# :: StringBuffer -> Int# -> Char#
323 lookAhead# :: StringBuffer -> Int# -> Char#
324 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
325 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
327 -- relative lookup, i.e, currentChar = lookAhead 0
328 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
330 currentIndex# :: StringBuffer -> Int#
331 currentIndex# (StringBuffer fo# _ _ c#) = c#
333 lexemeIndex :: StringBuffer -> Int#
334 lexemeIndex (StringBuffer fo# _ c# _) = c#
337 moving the start point of the current lexeme.
340 -- moving the end point of the current lexeme.
341 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
342 setCurrentPos# (StringBuffer fo l# s# c#) i# =
343 StringBuffer fo l# s# (c# +# i#)
345 -- augmenting the current lexeme by one.
346 incLexeme :: StringBuffer -> StringBuffer
347 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
349 decLexeme :: StringBuffer -> StringBuffer
350 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
354 -- move the start and end point of the buffer on by
358 stepOn :: StringBuffer -> StringBuffer
359 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
361 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
362 stepOnBy# (StringBuffer fo# l# s# c#) i# =
364 new_s# -> StringBuffer fo# l# new_s# new_s#
367 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
368 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
370 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
371 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
373 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
374 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
375 = StringBuffer fo l s# c#
377 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
379 stepOnUntil pred (StringBuffer fo l# s# c#) =
383 case indexCharOffAddr# fo c# of
384 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
385 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
386 | otherwise -> loop (c# +# 1#)
388 stepOverLexeme :: StringBuffer -> StringBuffer
389 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
391 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
392 expandWhile pred (StringBuffer fo l# s# c#) =
396 case indexCharOffAddr# fo c# of
397 ch# | pred (C# ch#) -> loop (c# +# 1#)
398 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
399 | otherwise -> StringBuffer fo l# s# c#
401 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
402 expandWhile# pred (StringBuffer fo l# s# c#) =
406 case indexCharOffAddr# fo c# of
407 ch# | pred ch# -> loop (c# +# 1#)
408 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
409 | otherwise -> StringBuffer fo l# s# c#
411 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
412 scanNumLit acc (StringBuffer fo l# s# c#) =
416 case indexCharOffAddr# fo c# of
417 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
418 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
419 | otherwise -> (acc,StringBuffer fo l# s# c#)
422 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
423 expandUntilMatch (StringBuffer fo l# s# c#) str =
426 loop c# [] = Just (StringBuffer fo l# s# c#)
427 loop c# ((C# x#):xs) =
428 case indexCharOffAddr# fo c# of
429 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
430 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
431 | otherwise -> loop (c# +# 1#) str
436 -- at or beyond end of buffer?
437 bufferExhausted :: StringBuffer -> Bool
438 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
440 emptyLexeme :: StringBuffer -> Bool
441 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
444 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
445 prefixMatch (StringBuffer fo l# s# c#) str =
448 loop c# [] = Just (StringBuffer fo l# s# c#)
450 | indexCharOffAddr# fo c# `eqChar#` x#
455 untilEndOfString# :: StringBuffer -> StringBuffer
456 untilEndOfString# (StringBuffer fo l# s# c#) =
459 getch# i# = indexCharOffAddr# fo i#
464 case getch# (c# -# 1#) of
466 -- looks like an escaped something or other to me,
467 -- better count the number of "\\"s that are immediately
468 -- preceeding to decide if the " is escaped.
472 '\\'# -> odd_slashes (not flg) (i# -# 1#)
475 if odd_slashes True (c# -# 2#) then
476 -- odd number, " is ecaped.
478 else -- a real end of string delimiter after all.
479 StringBuffer fo l# s# c#
480 _ -> StringBuffer fo l# s# c#
482 if c# >=# l# then -- hit sentinel, this doesn't look too good..
483 StringBuffer fo l# l# l#
489 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
490 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
494 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
495 = StringBuffer fo l# c# c#
500 lexemeToString :: StringBuffer -> String
501 lexemeToString (StringBuffer fo _ start_pos# current#) =
502 if start_pos# ==# current# then
505 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
507 lexemeToByteArray :: StringBuffer -> ByteArray Int
508 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
509 if start_pos# ==# current# then
510 error "lexemeToByteArray"
512 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
514 lexemeToFastString :: StringBuffer -> FastString
515 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
516 if start_pos# ==# current# then
517 mkFastCharString2 (A# fo) (I# 0#)
519 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
522 Create a StringBuffer from the current lexeme, and add a sentinel
523 at the end. Know What You're Doing before taking this function
526 lexemeToBuffer :: StringBuffer -> StringBuffer
527 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
528 if start_pos# ==# current# then
529 StringBuffer fo 0# start_pos# current# -- an error, really.
531 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)