2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
75 #include "../lib/std/cbits/stgerror.h"
77 #if __GLASGOW_HASKELL__ >= 303
78 import IO ( openFile, slurpFile )
83 import IO ( openFile, hFileSize, hClose, IOMode(..) )
87 #if __GLASGOW_HASKELL__ < 301
88 import IOBase ( Handle, IOError(..), IOErrorType(..),
89 constructErrorAndFail )
90 import IOHandle ( readHandle, writeHandle, filePtr )
91 import PackBase ( unpackCStringBA )
93 # if __GLASGOW_HASKELL__ <= 302
94 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
95 constructErrorAndFail )
96 import PrelHandle ( readHandle, writeHandle, filePtr )
98 import PrelPack ( unpackCStringBA )
101 #if __GLASGOW_HASKELL__ < 402
102 import Util ( bracket )
104 import Exception ( bracket )
109 import Char (isDigit)
122 instance Text StringBuffer where
123 showsPrec _ s = showString ""
127 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
128 hGetStringBuffer expand_tabs fname = do
129 (a, read) <- if expand_tabs
130 then slurpFileExpandTabs fname
133 let (A# a#) = a; (I# read#) = read
135 -- add sentinel '\NUL'
136 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
137 return (StringBuffer a# read# 0# 0#)
139 #if __GLASGOW_HASKELL__ < 303
141 openFile fname ReadMode >>= \ hndl ->
142 hFileSize hndl >>= \ len ->
143 let len_i = fromInteger len in
144 -- Allocate an array for system call to store its bytes into.
145 -- ToDo: make it robust
146 -- trace (show ((len_i::Int)+1)) $
147 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
148 if addr2Int# a# ==# 0# then
149 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
151 readHandle hndl >>= \ hndl_ ->
152 writeHandle hndl hndl_ >>
153 let ptr = filePtr hndl_ in
154 #if __GLASGOW_HASKELL__ <= 302
155 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
157 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
160 if read# ==# 0# then -- EOF or some other error
161 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
163 return (arr, I# read#)
166 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
167 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
169 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
174 -----------------------------------------------------------------------------
175 This very disturbing bit of code is used for expanding the tabs in a
176 file before we start parsing it. Expanding the tabs early makes the
177 lexer a lot simpler: we only have to record the beginning of the line
178 in order to be able to calculate the column offset of the current
181 We guess the size of the buffer required as 20% extra for
182 expanded tabs, and enlarge it if necessary.
185 #if __GLASGOW_HASKELL__ < 303
186 mayBlock fo thing = thing
188 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
189 writeCharOffAddr addr off c
190 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
194 #if __GLASGOW_HASKELL__ < 303
195 getErrType = _casm_ ``%r = ghc_errtype;''
197 getErrType = _ccall_ getErrType__
200 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
201 slurpFileExpandTabs fname = do
202 bracket (openFile fname ReadMode) (hClose)
204 do sz <- hFileSize handle
205 if sz > toInteger (maxBound::Int)
206 then IOERROR (userError "slurpFile: file too big")
208 let sz_i = fromInteger sz
209 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
210 chunk <- allocMem sz_i'
211 trySlurp handle sz_i' chunk
214 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
215 trySlurp handle sz_i chunk =
216 #if __GLASGOW_HASKELL__ == 303
217 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
218 let fo = haFO__ handle_ in
219 #elif __GLASGOW_HASKELL__ > 303
220 wantReadableHandle "hGetChar" handle $ \ handle_ ->
221 let fo = haFO__ handle_ in
223 readHandle handle >>= \ handle_ ->
224 let fo = filePtr handle_ in
231 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
232 slurpFile c off chunk chunk_sz max_off = slurp c off
235 slurp :: Int# -> Int# -> IO (Addr, Int)
236 slurp c off | off >=# max_off = do
237 let new_sz = chunk_sz *# 2#
238 chunk' <- reAllocMem chunk (I# new_sz)
239 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
241 intc <- mayBlock fo (_ccall_ fileGetc fo)
242 if intc == ((-1)::Int)
243 then do errtype <- getErrType
244 if errtype == (ERR_EOF :: Int)
245 then return (chunk, I# off)
246 else constructErrorAndFail "slurpFile"
247 else case chr intc of
249 ch -> do writeCharOffAddr chunk (I# off) ch
250 let c' | ch == '\n' = 0#
251 | otherwise = c +# 1#
254 tabIt :: Int# -> Int# -> IO (Addr, Int)
255 -- can't run out of buffer in here, because we reserved an
256 -- extra tAB_SIZE bytes at the end earlier.
258 writeCharOffAddr chunk (I# off) ' '
261 if c' `remInt#` tAB_SIZE ==# 0#
266 -- allow space for a full tab at the end of the buffer
267 -- (that's what the max_off thing is for),
268 -- and add 1 to allow room for the final sentinel \NUL at
269 -- the end of the file.
270 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
271 #if __GLASGOW_HASKELL__ < 404
272 writeHandle handle handle_
275 then constructErrorAndFail "slurpFile"
276 else return (chunk', rc+1 {-room for sentinel-})
279 reAllocMem :: Addr -> Int -> IO Addr
280 reAllocMem ptr sz = do
281 chunk <- _ccall_ realloc ptr sz
283 #if __GLASGOW_HASKELL__ >= 400
284 then fail "reAllocMem"
286 then fail (userError "reAllocMem")
290 allocMem :: Int -> IO Addr
292 #if __GLASGOW_HASKELL__ < 303
293 chunk <- _ccall_ malloc sz
295 then fail (userError "allocMem")
298 chunk <- _ccall_ allocMemory__ sz
300 then constructErrorAndFail "allocMem"
308 currentChar :: StringBuffer -> Char
309 currentChar sb = case currentChar# sb of c -> C# c
311 lookAhead :: StringBuffer -> Int -> Char
312 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
314 indexSBuffer :: StringBuffer -> Int -> Char
315 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
317 currentChar# :: StringBuffer -> Char#
318 indexSBuffer# :: StringBuffer -> Int# -> Char#
319 lookAhead# :: StringBuffer -> Int# -> Char#
320 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
321 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
323 -- relative lookup, i.e, currentChar = lookAhead 0
324 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
326 currentIndex# :: StringBuffer -> Int#
327 currentIndex# (StringBuffer fo# _ _ c#) = c#
329 lexemeIndex :: StringBuffer -> Int#
330 lexemeIndex (StringBuffer fo# _ c# _) = c#
333 moving the start point of the current lexeme.
336 -- moving the end point of the current lexeme.
337 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
338 setCurrentPos# (StringBuffer fo l# s# c#) i# =
339 StringBuffer fo l# s# (c# +# i#)
341 -- augmenting the current lexeme by one.
342 incLexeme :: StringBuffer -> StringBuffer
343 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
345 decLexeme :: StringBuffer -> StringBuffer
346 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
350 -- move the start and end point of the buffer on by
354 stepOn :: StringBuffer -> StringBuffer
355 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
357 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
358 stepOnBy# (StringBuffer fo# l# s# c#) i# =
360 new_s# -> StringBuffer fo# l# new_s# new_s#
363 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
364 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
366 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
367 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
369 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
370 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
371 = StringBuffer fo l s# c#
373 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
375 stepOnUntil pred (StringBuffer fo l# s# c#) =
379 case indexCharOffAddr# fo c# of
380 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
381 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
382 | otherwise -> loop (c# +# 1#)
384 stepOverLexeme :: StringBuffer -> StringBuffer
385 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
387 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
388 expandWhile pred (StringBuffer fo l# s# c#) =
392 case indexCharOffAddr# fo c# of
393 ch# | pred (C# ch#) -> loop (c# +# 1#)
394 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
395 | otherwise -> StringBuffer fo l# s# c#
397 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
398 expandWhile# pred (StringBuffer fo l# s# c#) =
402 case indexCharOffAddr# fo c# of
403 ch# | pred ch# -> loop (c# +# 1#)
404 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
405 | otherwise -> StringBuffer fo l# s# c#
407 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
408 scanNumLit acc (StringBuffer fo l# s# c#) =
412 case indexCharOffAddr# fo c# of
413 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
414 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
415 | otherwise -> (acc,StringBuffer fo l# s# c#)
418 expandUntilMatch :: StringBuffer -> String -> StringBuffer
419 expandUntilMatch (StringBuffer fo l# s# c#) str =
422 loop c# [] = StringBuffer fo l# s# c#
424 | indexCharOffAddr# fo c# `eqChar#` x#
427 = loop (c# +# 1#) str
432 -- at or beyond end of buffer?
433 bufferExhausted :: StringBuffer -> Bool
434 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
436 emptyLexeme :: StringBuffer -> Bool
437 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
440 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
441 prefixMatch (StringBuffer fo l# s# c#) str =
444 loop c# [] = Just (StringBuffer fo l# s# c#)
446 | indexCharOffAddr# fo c# `eqChar#` x#
451 untilEndOfString# :: StringBuffer -> StringBuffer
452 untilEndOfString# (StringBuffer fo l# s# c#) =
455 getch# i# = indexCharOffAddr# fo i#
460 case getch# (c# -# 1#) of
462 -- looks like an escaped something or other to me,
463 -- better count the number of "\\"s that are immediately
464 -- preceeding to decide if the " is escaped.
468 '\\'# -> odd_slashes (not flg) (i# -# 1#)
471 if odd_slashes True (c# -# 2#) then
472 -- odd number, " is ecaped.
474 else -- a real end of string delimiter after all.
475 StringBuffer fo l# s# c#
476 _ -> StringBuffer fo l# s# c#
478 if c# >=# l# then -- hit sentinel, this doesn't look too good..
479 StringBuffer fo l# l# l#
485 untilChar# :: StringBuffer -> Char# -> StringBuffer
486 untilChar# (StringBuffer fo l# s# c#) x# =
490 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
491 = StringBuffer fo l# s# c#
496 lexemeToString :: StringBuffer -> String
497 lexemeToString (StringBuffer fo _ start_pos# current#) =
498 if start_pos# ==# current# then
501 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
503 lexemeToByteArray :: StringBuffer -> _ByteArray Int
504 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
505 if start_pos# ==# current# then
506 error "lexemeToByteArray"
508 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
510 lexemeToFastString :: StringBuffer -> FastString
511 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
512 if start_pos# ==# current# then
513 mkFastCharString2 (A# fo) (I# 0#)
515 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
518 Create a StringBuffer from the current lexeme, and add a sentinel
519 at the end. Know What You're Doing before taking this function
522 lexemeToBuffer :: StringBuffer -> StringBuffer
523 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
524 if start_pos# ==# current# then
525 StringBuffer fo 0# start_pos# current# -- an error, really.
527 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)