2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
75 #include "../lib/std/cbits/error.h"
77 #if __GLASGOW_HASKELL__ >= 303
78 import IO ( openFile, slurpFile )
83 import IO ( openFile, hFileSize, hClose, IOMode(..) )
87 #if __GLASGOW_HASKELL__ < 301
88 import IOBase ( IOError(..), IOErrorType(..) )
89 import IOHandle ( readHandle, writeHandle, filePtr )
90 import PackBase ( unpackCStringBA )
92 # if __GLASGOW_HASKELL__ <= 302
93 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
94 constructErrorAndFail )
95 import PrelHandle ( readHandle, writeHandle, filePtr )
97 import PrelPack ( unpackCStringBA )
100 #if __GLASGOW_HASKELL__ < 402
101 import Util ( bracket )
103 import Exception ( bracket )
108 import Char (isDigit)
121 instance Text StringBuffer where
122 showsPrec _ s = showString ""
126 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
127 hGetStringBuffer expand_tabs fname = do
128 (a, read) <- if expand_tabs
129 then slurpFileExpandTabs fname
132 let (A# a#) = a; (I# read#) = read
134 -- add sentinel '\NUL'
135 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
136 return (StringBuffer a# read# 0# 0#)
138 #if __GLASGOW_HASKELL__ < 303
140 openFile fname ReadMode >>= \ hndl ->
141 hFileSize hndl >>= \ len ->
142 let len_i = fromInteger len in
143 -- Allocate an array for system call to store its bytes into.
144 -- ToDo: make it robust
145 -- trace (show ((len_i::Int)+1)) $
146 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
147 if addr2Int# a# ==# 0# then
148 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
150 readHandle hndl >>= \ hndl_ ->
151 writeHandle hndl hndl_ >>
152 let ptr = filePtr hndl_ in
153 #if __GLASGOW_HASKELL__ <= 302
154 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
156 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
159 if read# ==# 0# then -- EOF or some other error
160 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
162 return (arr, I# read#)
165 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
166 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
168 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
173 -----------------------------------------------------------------------------
174 This very disturbing bit of code is used for expanding the tabs in a
175 file before we start parsing it. Expanding the tabs early makes the
176 lexer a lot simpler: we only have to record the beginning of the line
177 in order to be able to calculate the column offset of the current
180 We guess the size of the buffer required as 20% extra for
181 expanded tabs, and enlarge it if necessary.
184 #if __GLASGOW_HASKELL__ < 303
185 mayBlock fo thing = thing
187 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
188 writeCharOffAddr addr off c
189 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
193 #if __GLASGOW_HASKELL__ < 303
194 getErrType = _casm_ ``%r = ghc_errtype;''
196 getErrType = _ccall_ getErrType__
199 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
200 slurpFileExpandTabs fname = do
201 bracket (openFile fname ReadMode) (hClose)
203 do sz <- hFileSize handle
204 if sz > toInteger (maxBound::Int)
205 then IOERROR (userError "slurpFile: file too big")
207 let sz_i = fromInteger sz
208 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
209 chunk <- allocMem sz_i'
210 trySlurp handle sz_i' chunk
213 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
214 trySlurp handle sz_i chunk =
215 #if __GLASGOW_HASKELL__ == 303
216 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
217 let fo = haFO__ handle_ in
218 #elif __GLASGOW_HASKELL__ > 303
219 wantReadableHandle "hGetChar" handle $ \ handle_ ->
220 let fo = haFO__ handle_ in
222 readHandle handle >>= \ handle_ ->
223 let fo = filePtr handle_ in
230 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
231 slurpFile c off chunk chunk_sz max_off = slurp c off
234 slurp :: Int# -> Int# -> IO (Addr, Int)
235 slurp c off | off >=# max_off = do
236 let new_sz = chunk_sz *# 2#
237 chunk' <- reAllocMem chunk (I# new_sz)
238 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
240 intc <- mayBlock fo (_ccall_ fileGetc fo)
241 if intc == ((-1)::Int)
242 then do errtype <- getErrType
243 if errtype == (ERR_EOF :: Int)
244 then return (chunk, I# off)
245 else constructErrorAndFail "slurpFile"
246 else case chr intc of
248 ch -> do writeCharOffAddr chunk (I# off) ch
249 let c' | ch == '\n' = 0#
250 | otherwise = c +# 1#
253 tabIt :: Int# -> Int# -> IO (Addr, Int)
254 -- can't run out of buffer in here, because we reserved an
255 -- extra tAB_SIZE bytes at the end earlier.
257 writeCharOffAddr chunk (I# off) ' '
260 if c' `remInt#` tAB_SIZE ==# 0#
265 -- allow space for a full tab at the end of the buffer
266 -- (that's what the max_off thing is for),
267 -- and add 1 to allow room for the final sentinel \NUL at
268 -- the end of the file.
269 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
270 #if __GLASGOW_HASKELL__ < 404
271 writeHandle handle handle_
274 then constructErrorAndFail "slurpFile"
275 else return (chunk', rc+1 {-room for sentinel-})
278 reAllocMem :: Addr -> Int -> IO Addr
279 reAllocMem ptr sz = do
280 chunk <- _ccall_ realloc ptr sz
282 #ifndef __HASKELL98__
283 then fail (userError "reAllocMem")
285 then fail "reAllocMem"
289 allocMem :: Int -> IO Addr
291 #if __GLASGOW_HASKELL__ < 303
292 chunk <- _ccall_ malloc sz
294 then fail (userError "allocMem")
297 chunk <- _ccall_ allocMemory__ sz
299 then constructErrorAndFail "allocMem"
307 currentChar :: StringBuffer -> Char
308 currentChar sb = case currentChar# sb of c -> C# c
310 lookAhead :: StringBuffer -> Int -> Char
311 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
313 indexSBuffer :: StringBuffer -> Int -> Char
314 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
316 currentChar# :: StringBuffer -> Char#
317 indexSBuffer# :: StringBuffer -> Int# -> Char#
318 lookAhead# :: StringBuffer -> Int# -> Char#
319 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
320 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
322 -- relative lookup, i.e, currentChar = lookAhead 0
323 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
325 currentIndex# :: StringBuffer -> Int#
326 currentIndex# (StringBuffer fo# _ _ c#) = c#
328 lexemeIndex :: StringBuffer -> Int#
329 lexemeIndex (StringBuffer fo# _ c# _) = c#
332 moving the start point of the current lexeme.
335 -- moving the end point of the current lexeme.
336 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
337 setCurrentPos# (StringBuffer fo l# s# c#) i# =
338 StringBuffer fo l# s# (c# +# i#)
340 -- augmenting the current lexeme by one.
341 incLexeme :: StringBuffer -> StringBuffer
342 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
344 decLexeme :: StringBuffer -> StringBuffer
345 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
349 -- move the start and end point of the buffer on by
353 stepOn :: StringBuffer -> StringBuffer
354 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
356 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
357 stepOnBy# (StringBuffer fo# l# s# c#) i# =
359 new_s# -> StringBuffer fo# l# new_s# new_s#
362 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
363 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
365 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
366 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
368 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
369 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
370 = StringBuffer fo l s# c#
372 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
374 stepOnUntil pred (StringBuffer fo l# s# c#) =
378 case indexCharOffAddr# fo c# of
379 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
380 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
381 | otherwise -> loop (c# +# 1#)
383 stepOverLexeme :: StringBuffer -> StringBuffer
384 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
386 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
387 expandWhile pred (StringBuffer fo l# s# c#) =
391 case indexCharOffAddr# fo c# of
392 ch# | pred (C# ch#) -> loop (c# +# 1#)
393 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
394 | otherwise -> StringBuffer fo l# s# c#
396 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
397 expandWhile# pred (StringBuffer fo l# s# c#) =
401 case indexCharOffAddr# fo c# of
402 ch# | pred ch# -> loop (c# +# 1#)
403 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
404 | otherwise -> StringBuffer fo l# s# c#
406 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
407 scanNumLit acc (StringBuffer fo l# s# c#) =
411 case indexCharOffAddr# fo c# of
412 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
413 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
414 | otherwise -> (acc,StringBuffer fo l# s# c#)
417 expandUntilMatch :: StringBuffer -> String -> StringBuffer
418 expandUntilMatch (StringBuffer fo l# s# c#) str =
421 loop c# [] = StringBuffer fo l# s# c#
423 | indexCharOffAddr# fo c# `eqChar#` x#
426 = loop (c# +# 1#) str
431 -- at or beyond end of buffer?
432 bufferExhausted :: StringBuffer -> Bool
433 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
435 emptyLexeme :: StringBuffer -> Bool
436 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
439 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
440 prefixMatch (StringBuffer fo l# s# c#) str =
443 loop c# [] = Just (StringBuffer fo l# s# c#)
445 | indexCharOffAddr# fo c# `eqChar#` x#
450 untilEndOfString# :: StringBuffer -> StringBuffer
451 untilEndOfString# (StringBuffer fo l# s# c#) =
454 getch# i# = indexCharOffAddr# fo i#
459 case getch# (c# -# 1#) of
461 -- looks like an escaped something or other to me,
462 -- better count the number of "\\"s that are immediately
463 -- preceeding to decide if the " is escaped.
467 '\\'# -> odd_slashes (not flg) (i# -# 1#)
470 if odd_slashes True (c# -# 2#) then
471 -- odd number, " is ecaped.
473 else -- a real end of string delimiter after all.
474 StringBuffer fo l# s# c#
475 _ -> StringBuffer fo l# s# c#
477 if c# >=# l# then -- hit sentinel, this doesn't look too good..
478 StringBuffer fo l# l# l#
484 untilChar# :: StringBuffer -> Char# -> StringBuffer
485 untilChar# (StringBuffer fo l# s# c#) x# =
489 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
490 = StringBuffer fo l# s# c#
495 lexemeToString :: StringBuffer -> String
496 lexemeToString (StringBuffer fo _ start_pos# current#) =
497 if start_pos# ==# current# then
500 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
502 lexemeToByteArray :: StringBuffer -> _ByteArray Int
503 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
504 if start_pos# ==# current# then
505 error "lexemeToByteArray"
507 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
509 lexemeToFastString :: StringBuffer -> FastString
510 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
511 if start_pos# ==# current# then
512 mkFastCharString2 (A# fo) (I# 0#)
514 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
517 Create a StringBuffer from the current lexeme, and add a sentinel
518 at the end. Know What You're Doing before taking this function
521 lexemeToBuffer :: StringBuffer -> StringBuffer
522 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
523 if start_pos# ==# current# then
524 StringBuffer fo 0# start_pos# current# -- an error, really.
526 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)