2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
75 #include "../lib/std/cbits/error.h"
77 #if __GLASGOW_HASKELL__ >= 303
78 import IO ( openFile, slurpFile )
83 import IO ( openFile, hFileSize, hClose, IOMode(..) )
87 #if __GLASGOW_HASKELL__ < 301
88 import IOBase ( IOError(..), IOErrorType(..) )
89 import IOHandle ( readHandle, writeHandle, filePtr )
90 import PackBase ( unpackCStringBA )
92 # if __GLASGOW_HASKELL__ <= 302
93 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
94 constructErrorAndFail )
95 import PrelHandle ( readHandle, writeHandle, filePtr )
97 import PrelPack ( unpackCStringBA )
100 #if __GLASGOW_HASKELL__ < 402
101 import Util ( bracket )
103 import Exception ( bracket )
108 import Char (isDigit)
121 instance Text StringBuffer where
122 showsPrec _ s = showString ""
126 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
127 hGetStringBuffer expand_tabs fname = do
128 (a, read) <- if expand_tabs
129 then slurpFileExpandTabs fname
132 let (A# a#) = a; (I# read#) = read
134 -- add sentinel '\NUL'
135 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
136 return (StringBuffer a# read# 0# 0#)
138 #if __GLASGOW_HASKELL__ < 303
140 openFile fname ReadMode >>= \ hndl ->
141 hFileSize hndl >>= \ len ->
142 let len_i = fromInteger len in
143 -- Allocate an array for system call to store its bytes into.
144 -- ToDo: make it robust
145 -- trace (show ((len_i::Int)+1)) $
146 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
147 if addr2Int# a# ==# 0# then
148 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
150 readHandle hndl >>= \ hndl_ ->
151 writeHandle hndl hndl_ >>
152 let ptr = filePtr hndl_ in
153 #if __GLASGOW_HASKELL__ <= 302
154 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
156 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
159 if read# ==# 0# then -- EOF or some other error
160 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
162 return (arr, I# read#)
165 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
166 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
168 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
173 -----------------------------------------------------------------------------
174 This very disturbing bit of code is used for expanding the tabs in a
175 file before we start parsing it. Expanding the tabs early makes the
176 lexer a lot simpler: we only have to record the beginning of the line
177 in order to be able to calculate the column offset of the current
180 We guess the size of the buffer required as 20% extra for
181 expanded tabs, and enlarge it if necessary.
184 #if __GLASGOW_HASKELL__ < 303
186 mayBlock fo thing = thing
188 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
189 writeCharOffAddr addr off c
190 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
194 #if __GLASGOW_HASKELL__ < 303
195 getErrType = _casm_ ``%r = ghc_errtype;''
197 getErrType = _ccall_ getErrType__
200 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
201 slurpFileExpandTabs fname = do
202 bracket (openFile fname ReadMode) (hClose)
204 do sz <- hFileSize handle
205 if sz > toInteger (maxBound::Int)
206 then ioError (userError "slurpFile: file too big")
208 let sz_i = fromInteger sz
209 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
210 chunk <- allocMem sz_i'
211 trySlurp handle sz_i' chunk
214 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
215 trySlurp handle sz_i chunk =
216 #if __GLASGOW_HASKELL__ >= 303
217 wantReadableHandle "hGetChar" handle $ \ handle_ ->
218 let fo = haFO__ handle_ in
220 readHandle handle >>= \ handle_ ->
221 let fo = filePtr handle_ in
228 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
229 slurpFile c off chunk chunk_sz max_off = slurp c off
232 slurp :: Int# -> Int# -> IO (Addr, Int)
233 slurp c off | off >=# max_off = do
234 let new_sz = chunk_sz *# 2#
235 chunk' <- reAllocMem chunk (I# new_sz)
236 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
238 intc <- mayBlock fo (_ccall_ fileGetc fo)
239 if intc == ((-1)::Int)
240 then do errtype <- getErrType
241 if errtype == (ERR_EOF :: Int)
242 then return (chunk, I# off)
243 else constructErrorAndFail "slurpFile"
244 else case chr intc of
246 ch -> do writeCharOffAddr chunk (I# off) ch
247 let c' | ch == '\n' = 0#
248 | otherwise = c +# 1#
251 tabIt :: Int# -> Int# -> IO (Addr, Int)
252 -- can't run out of buffer in here, because we reserved an
253 -- extra tAB_SIZE bytes at the end earlier.
255 writeCharOffAddr chunk (I# off) ' '
258 if c' `remInt#` tAB_SIZE ==# 0#
263 -- allow space for a full tab at the end of the buffer
264 -- (that's what the max_off thing is for),
265 -- and add 1 to allow room for the final sentinel \NUL at
266 -- the end of the file.
267 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
268 #if __GLASGOW_HASKELL__ < 404
269 writeHandle handle handle_
272 then constructErrorAndFail "slurpFile"
273 else return (chunk', rc+1 {-room for sentinel-})
276 reAllocMem :: Addr -> Int -> IO Addr
277 reAllocMem ptr sz = do
278 chunk <- _ccall_ realloc ptr sz
280 #if __GLASGOW_HASKELL__ < 303
281 then fail (userError "reAllocMem")
283 then fail "reAllocMem"
287 allocMem :: Int -> IO Addr
289 #if __GLASGOW_HASKELL__ < 303
290 chunk <- _ccall_ malloc sz
292 then fail (userError "allocMem")
295 chunk <- _ccall_ allocMemory__ sz
297 then constructErrorAndFail "allocMem"
305 currentChar :: StringBuffer -> Char
306 currentChar sb = case currentChar# sb of c -> C# c
308 lookAhead :: StringBuffer -> Int -> Char
309 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
311 indexSBuffer :: StringBuffer -> Int -> Char
312 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
314 currentChar# :: StringBuffer -> Char#
315 indexSBuffer# :: StringBuffer -> Int# -> Char#
316 lookAhead# :: StringBuffer -> Int# -> Char#
317 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
318 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
320 -- relative lookup, i.e, currentChar = lookAhead 0
321 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
323 currentIndex# :: StringBuffer -> Int#
324 currentIndex# (StringBuffer fo# _ _ c#) = c#
326 lexemeIndex :: StringBuffer -> Int#
327 lexemeIndex (StringBuffer fo# _ c# _) = c#
330 moving the start point of the current lexeme.
333 -- moving the end point of the current lexeme.
334 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
335 setCurrentPos# (StringBuffer fo l# s# c#) i# =
336 StringBuffer fo l# s# (c# +# i#)
338 -- augmenting the current lexeme by one.
339 incLexeme :: StringBuffer -> StringBuffer
340 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
342 decLexeme :: StringBuffer -> StringBuffer
343 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
347 -- move the start and end point of the buffer on by
351 stepOn :: StringBuffer -> StringBuffer
352 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
354 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
355 stepOnBy# (StringBuffer fo# l# s# c#) i# =
357 new_s# -> StringBuffer fo# l# new_s# new_s#
360 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
361 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
363 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
364 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
366 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
367 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
368 = StringBuffer fo l s# c#
370 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
372 stepOnUntil pred (StringBuffer fo l# s# c#) =
376 case indexCharOffAddr# fo c# of
377 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
378 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
379 | otherwise -> loop (c# +# 1#)
381 stepOverLexeme :: StringBuffer -> StringBuffer
382 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
384 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
385 expandWhile pred (StringBuffer fo l# s# c#) =
389 case indexCharOffAddr# fo c# of
390 ch# | pred (C# ch#) -> loop (c# +# 1#)
391 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
392 | otherwise -> StringBuffer fo l# s# c#
394 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
395 expandWhile# pred (StringBuffer fo l# s# c#) =
399 case indexCharOffAddr# fo c# of
400 ch# | pred ch# -> loop (c# +# 1#)
401 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
402 | otherwise -> StringBuffer fo l# s# c#
404 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
405 scanNumLit acc (StringBuffer fo l# s# c#) =
409 case indexCharOffAddr# fo c# of
410 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
411 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
412 | otherwise -> (acc,StringBuffer fo l# s# c#)
415 expandUntilMatch :: StringBuffer -> String -> StringBuffer
416 expandUntilMatch (StringBuffer fo l# s# c#) str =
419 loop c# [] = StringBuffer fo l# s# c#
421 | indexCharOffAddr# fo c# `eqChar#` x#
424 = loop (c# +# 1#) str
429 -- at or beyond end of buffer?
430 bufferExhausted :: StringBuffer -> Bool
431 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
433 emptyLexeme :: StringBuffer -> Bool
434 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
437 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
438 prefixMatch (StringBuffer fo l# s# c#) str =
441 loop c# [] = Just (StringBuffer fo l# s# c#)
443 | indexCharOffAddr# fo c# `eqChar#` x#
448 untilEndOfString# :: StringBuffer -> StringBuffer
449 untilEndOfString# (StringBuffer fo l# s# c#) =
452 getch# i# = indexCharOffAddr# fo i#
457 case getch# (c# -# 1#) of
459 -- looks like an escaped something or other to me,
460 -- better count the number of "\\"s that are immediately
461 -- preceeding to decide if the " is escaped.
465 '\\'# -> odd_slashes (not flg) (i# -# 1#)
468 if odd_slashes True (c# -# 2#) then
469 -- odd number, " is ecaped.
471 else -- a real end of string delimiter after all.
472 StringBuffer fo l# s# c#
473 _ -> StringBuffer fo l# s# c#
475 if c# >=# l# then -- hit sentinel, this doesn't look too good..
476 StringBuffer fo l# l# l#
482 untilChar# :: StringBuffer -> Char# -> StringBuffer
483 untilChar# (StringBuffer fo l# s# c#) x# =
487 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
488 = StringBuffer fo l# s# c#
493 lexemeToString :: StringBuffer -> String
494 lexemeToString (StringBuffer fo _ start_pos# current#) =
495 if start_pos# ==# current# then
498 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
500 lexemeToByteArray :: StringBuffer -> _ByteArray Int
501 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
502 if start_pos# ==# current# then
503 error "lexemeToByteArray"
505 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
507 lexemeToFastString :: StringBuffer -> FastString
508 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
509 if start_pos# ==# current# then
510 mkFastCharString2 (A# fo) (I# 0#)
512 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
515 Create a StringBuffer from the current lexeme, and add a sentinel
516 at the end. Know What You're Doing before taking this function
519 lexemeToBuffer :: StringBuffer -> StringBuffer
520 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
521 if start_pos# ==# current# then
522 StringBuffer fo 0# start_pos# current# -- an error, really.
524 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)