2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
41 stepOverLexeme, -- :: StringBuffer -> StringBuffer
42 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
43 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
44 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
45 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
46 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
47 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
48 -- at or beyond end of buffer?
49 bufferExhausted, -- :: StringBuffer -> Bool
50 emptyLexeme, -- :: StringBuffer -> Bool
53 prefixMatch, -- :: StringBuffer -> String -> Bool
54 untilEndOfString#, -- :: StringBuffer -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import PrelAddr ( Addr(..) )
75 #include "../lib/std/cbits/stgerror.h"
77 #if __GLASGOW_HASKELL__ >= 303
79 #if __GLASGOW_HASKELL__ < 407
80 , slurpFile -- comes from PrelHandle or IOExts now
87 import IO ( openFile, hFileSize, hClose, IOMode(..) )
91 #if __GLASGOW_HASKELL__ < 301
92 import IOBase ( Handle, IOError(..), IOErrorType(..),
93 constructErrorAndFail )
94 import IOHandle ( readHandle, writeHandle, filePtr )
95 import PackBase ( unpackCStringBA )
97 # if __GLASGOW_HASKELL__ <= 302
98 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
99 constructErrorAndFail )
100 import PrelHandle ( readHandle, writeHandle, filePtr )
102 import PrelPack ( unpackCStringBA )
105 #if __GLASGOW_HASKELL__ < 402
106 import Util ( bracket )
108 import Exception ( bracket )
113 import Char (isDigit)
126 instance Show StringBuffer where
127 showsPrec _ s = showString ""
131 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
132 hGetStringBuffer expand_tabs fname = do
133 (a, read) <- if expand_tabs
134 then slurpFileExpandTabs fname
137 let (A# a#) = a; (I# read#) = read
139 -- add sentinel '\NUL'
140 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
141 return (StringBuffer a# read# 0# 0#)
143 #if __GLASGOW_HASKELL__ < 303
145 openFile fname ReadMode >>= \ hndl ->
146 hFileSize hndl >>= \ len ->
147 let len_i = fromInteger len in
148 -- Allocate an array for system call to store its bytes into.
149 -- ToDo: make it robust
150 -- trace (show ((len_i::Int)+1)) $
151 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
152 if addr2Int# a# ==# 0# then
153 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
155 readHandle hndl >>= \ hndl_ ->
156 writeHandle hndl hndl_ >>
157 let ptr = filePtr hndl_ in
158 #if __GLASGOW_HASKELL__ <= 302
159 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
161 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
164 if read# ==# 0# then -- EOF or some other error
165 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
167 return (arr, I# read#)
170 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
171 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
173 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
178 -----------------------------------------------------------------------------
179 This very disturbing bit of code is used for expanding the tabs in a
180 file before we start parsing it. Expanding the tabs early makes the
181 lexer a lot simpler: we only have to record the beginning of the line
182 in order to be able to calculate the column offset of the current
185 We guess the size of the buffer required as 20% extra for
186 expanded tabs, and enlarge it if necessary.
189 #if __GLASGOW_HASKELL__ < 303
190 mayBlock fo thing = thing
192 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
193 writeCharOffAddr addr off c
194 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
198 #if __GLASGOW_HASKELL__ < 303
199 getErrType = _casm_ ``%r = ghc_errtype;''
201 getErrType = _ccall_ getErrType__
204 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
205 slurpFileExpandTabs fname = do
206 bracket (openFile fname ReadMode) (hClose)
208 do sz <- hFileSize handle
209 if sz > toInteger (maxBound::Int)
210 then IOERROR (userError "slurpFile: file too big")
212 let sz_i = fromInteger sz
213 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
214 chunk <- allocMem sz_i'
215 trySlurp handle sz_i' chunk
218 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
219 trySlurp handle sz_i chunk =
220 #if __GLASGOW_HASKELL__ == 303
221 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
222 let fo = haFO__ handle_ in
223 #elif __GLASGOW_HASKELL__ > 303
224 wantReadableHandle "hGetChar" handle $ \ handle_ ->
225 let fo = haFO__ handle_ in
227 readHandle handle >>= \ handle_ ->
228 let fo = filePtr handle_ in
235 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
236 slurpFile c off chunk chunk_sz max_off = slurp c off
239 slurp :: Int# -> Int# -> IO (Addr, Int)
240 slurp c off | off >=# max_off = do
241 let new_sz = chunk_sz *# 2#
242 chunk' <- reAllocMem chunk (I# new_sz)
243 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
245 intc <- mayBlock fo (_ccall_ fileGetc fo)
246 if intc == ((-1)::Int)
247 then do errtype <- getErrType
248 if errtype == (ERR_EOF :: Int)
249 then return (chunk, I# off)
250 else constructErrorAndFail "slurpFile"
251 else case chr intc of
253 ch -> do writeCharOffAddr chunk (I# off) ch
254 let c' | ch == '\n' = 0#
255 | otherwise = c +# 1#
258 tabIt :: Int# -> Int# -> IO (Addr, Int)
259 -- can't run out of buffer in here, because we reserved an
260 -- extra tAB_SIZE bytes at the end earlier.
262 writeCharOffAddr chunk (I# off) ' '
265 if c' `remInt#` tAB_SIZE ==# 0#
270 -- allow space for a full tab at the end of the buffer
271 -- (that's what the max_off thing is for),
272 -- and add 1 to allow room for the final sentinel \NUL at
273 -- the end of the file.
274 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
275 #if __GLASGOW_HASKELL__ < 404
276 writeHandle handle handle_
279 then constructErrorAndFail "slurpFile"
280 else return (chunk', rc+1 {-room for sentinel-})
283 reAllocMem :: Addr -> Int -> IO Addr
284 reAllocMem ptr sz = do
285 chunk <- _ccall_ realloc ptr sz
287 #if __GLASGOW_HASKELL__ >= 400
288 then fail "reAllocMem"
290 then fail (userError "reAllocMem")
294 allocMem :: Int -> IO Addr
296 chunk <- _ccall_ malloc sz
297 #if __GLASGOW_HASKELL__ < 303
299 then fail (userError "allocMem")
303 then constructErrorAndFail "allocMem"
311 currentChar :: StringBuffer -> Char
312 currentChar sb = case currentChar# sb of c -> C# c
314 lookAhead :: StringBuffer -> Int -> Char
315 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
317 indexSBuffer :: StringBuffer -> Int -> Char
318 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
320 currentChar# :: StringBuffer -> Char#
321 indexSBuffer# :: StringBuffer -> Int# -> Char#
322 lookAhead# :: StringBuffer -> Int# -> Char#
323 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
324 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
326 -- relative lookup, i.e, currentChar = lookAhead 0
327 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
329 currentIndex# :: StringBuffer -> Int#
330 currentIndex# (StringBuffer fo# _ _ c#) = c#
332 lexemeIndex :: StringBuffer -> Int#
333 lexemeIndex (StringBuffer fo# _ c# _) = c#
336 moving the start point of the current lexeme.
339 -- moving the end point of the current lexeme.
340 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
341 setCurrentPos# (StringBuffer fo l# s# c#) i# =
342 StringBuffer fo l# s# (c# +# i#)
344 -- augmenting the current lexeme by one.
345 incLexeme :: StringBuffer -> StringBuffer
346 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
348 decLexeme :: StringBuffer -> StringBuffer
349 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
353 -- move the start and end point of the buffer on by
357 stepOn :: StringBuffer -> StringBuffer
358 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
360 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
361 stepOnBy# (StringBuffer fo# l# s# c#) i# =
363 new_s# -> StringBuffer fo# l# new_s# new_s#
366 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
367 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
369 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
370 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
372 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
373 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
374 = StringBuffer fo l s# c#
376 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
378 stepOnUntil pred (StringBuffer fo l# s# c#) =
382 case indexCharOffAddr# fo c# of
383 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
384 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
385 | otherwise -> loop (c# +# 1#)
387 stepOverLexeme :: StringBuffer -> StringBuffer
388 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
390 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
391 expandWhile pred (StringBuffer fo l# s# c#) =
395 case indexCharOffAddr# fo c# of
396 ch# | pred (C# ch#) -> loop (c# +# 1#)
397 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
398 | otherwise -> StringBuffer fo l# s# c#
400 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
401 expandWhile# pred (StringBuffer fo l# s# c#) =
405 case indexCharOffAddr# fo c# of
406 ch# | pred ch# -> loop (c# +# 1#)
407 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
408 | otherwise -> StringBuffer fo l# s# c#
410 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
411 scanNumLit acc (StringBuffer fo l# s# c#) =
415 case indexCharOffAddr# fo c# of
416 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
417 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
418 | otherwise -> (acc,StringBuffer fo l# s# c#)
421 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
422 expandUntilMatch (StringBuffer fo l# s# c#) str =
425 loop c# [] = Just (StringBuffer fo l# s# c#)
426 loop c# ((C# x#):xs) =
427 case indexCharOffAddr# fo c# of
428 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
429 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
430 | otherwise -> loop (c# +# 1#) str
435 -- at or beyond end of buffer?
436 bufferExhausted :: StringBuffer -> Bool
437 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
439 emptyLexeme :: StringBuffer -> Bool
440 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
443 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
444 prefixMatch (StringBuffer fo l# s# c#) str =
447 loop c# [] = Just (StringBuffer fo l# s# c#)
449 | indexCharOffAddr# fo c# `eqChar#` x#
454 untilEndOfString# :: StringBuffer -> StringBuffer
455 untilEndOfString# (StringBuffer fo l# s# c#) =
458 getch# i# = indexCharOffAddr# fo i#
463 case getch# (c# -# 1#) of
465 -- looks like an escaped something or other to me,
466 -- better count the number of "\\"s that are immediately
467 -- preceeding to decide if the " is escaped.
471 '\\'# -> odd_slashes (not flg) (i# -# 1#)
474 if odd_slashes True (c# -# 2#) then
475 -- odd number, " is ecaped.
477 else -- a real end of string delimiter after all.
478 StringBuffer fo l# s# c#
479 _ -> StringBuffer fo l# s# c#
481 if c# >=# l# then -- hit sentinel, this doesn't look too good..
482 StringBuffer fo l# l# l#
488 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
489 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
493 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
494 = StringBuffer fo l# c# c#
499 lexemeToString :: StringBuffer -> String
500 lexemeToString (StringBuffer fo _ start_pos# current#) =
501 if start_pos# ==# current# then
504 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
506 lexemeToByteArray :: StringBuffer -> ByteArray Int
507 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
508 if start_pos# ==# current# then
509 error "lexemeToByteArray"
511 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
513 lexemeToFastString :: StringBuffer -> FastString
514 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
515 if start_pos# ==# current# then
516 mkFastCharString2 (A# fo) (I# 0#)
518 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
521 Create a StringBuffer from the current lexeme, and add a sentinel
522 at the end. Know What You're Doing before taking this function
525 lexemeToBuffer :: StringBuffer -> StringBuffer
526 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
527 if start_pos# ==# current# then
528 StringBuffer fo 0# start_pos# current# -- an error, really.
530 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)