2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
41 stepOverLexeme, -- :: StringBuffer -> StringBuffer
42 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
43 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
44 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
45 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
46 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
47 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
48 -- at or beyond end of buffer?
49 bufferExhausted, -- :: StringBuffer -> Bool
50 emptyLexeme, -- :: StringBuffer -> Bool
53 prefixMatch, -- :: StringBuffer -> String -> Bool
54 untilEndOfString#, -- :: StringBuffer -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import PrelAddr ( Addr(..) )
74 #include "../lib/std/cbits/stgerror.h"
76 #if __GLASGOW_HASKELL__ >= 303
78 #if __GLASGOW_HASKELL__ < 407
79 , slurpFile -- comes from PrelHandle or IOExts now
86 import IO ( openFile, hFileSize, hClose, IOMode(..) )
90 #if __GLASGOW_HASKELL__ < 301
91 import IOBase ( Handle, IOError(..), IOErrorType(..),
92 constructErrorAndFail )
93 import IOHandle ( readHandle, writeHandle, filePtr )
94 import PackBase ( unpackCStringBA )
96 # if __GLASGOW_HASKELL__ <= 302
97 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
98 constructErrorAndFail )
99 import PrelHandle ( readHandle, writeHandle, filePtr )
101 import PrelPack ( unpackCStringBA )
104 #if __GLASGOW_HASKELL__ < 402
105 import Util ( bracket )
107 import Exception ( bracket )
112 import Char (isDigit)
125 instance Show StringBuffer where
126 showsPrec _ s = showString ""
130 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
131 hGetStringBuffer expand_tabs fname = do
132 (a, read) <- if expand_tabs
133 then slurpFileExpandTabs fname
136 let (A# a#) = a; (I# read#) = read
138 -- add sentinel '\NUL'
139 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
140 return (StringBuffer a# read# 0# 0#)
142 #if __GLASGOW_HASKELL__ < 303
144 openFile fname ReadMode >>= \ hndl ->
145 hFileSize hndl >>= \ len ->
146 let len_i = fromInteger len in
147 -- Allocate an array for system call to store its bytes into.
148 -- ToDo: make it robust
149 -- trace (show ((len_i::Int)+1)) $
150 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
151 if addr2Int# a# ==# 0# then
152 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
154 readHandle hndl >>= \ hndl_ ->
155 writeHandle hndl hndl_ >>
156 let ptr = filePtr hndl_ in
157 #if __GLASGOW_HASKELL__ <= 302
158 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
160 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
163 if read# ==# 0# then -- EOF or some other error
164 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
166 return (arr, I# read#)
169 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
170 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
172 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
177 -----------------------------------------------------------------------------
178 This very disturbing bit of code is used for expanding the tabs in a
179 file before we start parsing it. Expanding the tabs early makes the
180 lexer a lot simpler: we only have to record the beginning of the line
181 in order to be able to calculate the column offset of the current
184 We guess the size of the buffer required as 20% extra for
185 expanded tabs, and enlarge it if necessary.
188 #if __GLASGOW_HASKELL__ < 303
189 mayBlock fo thing = thing
191 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
192 writeCharOffAddr addr off c
193 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
197 #if __GLASGOW_HASKELL__ < 303
198 getErrType = _casm_ ``%r = ghc_errtype;''
200 getErrType = _ccall_ getErrType__
203 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
204 slurpFileExpandTabs fname = do
205 bracket (openFile fname ReadMode) (hClose)
207 do sz <- hFileSize handle
208 if sz > toInteger (maxBound::Int)
209 then IOERROR (userError "slurpFile: file too big")
211 let sz_i = fromInteger sz
212 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
213 chunk <- allocMem sz_i'
214 trySlurp handle sz_i' chunk
217 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
218 trySlurp handle sz_i chunk =
219 #if __GLASGOW_HASKELL__ == 303
220 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
221 let fo = haFO__ handle_ in
222 #elif __GLASGOW_HASKELL__ > 303
223 wantReadableHandle "hGetChar" handle $ \ handle_ ->
224 let fo = haFO__ handle_ in
226 readHandle handle >>= \ handle_ ->
227 let fo = filePtr handle_ in
234 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
235 slurpFile c off chunk chunk_sz max_off = slurp c off
238 slurp :: Int# -> Int# -> IO (Addr, Int)
239 slurp c off | off >=# max_off = do
240 let new_sz = chunk_sz *# 2#
241 chunk' <- reAllocMem chunk (I# new_sz)
242 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
244 intc <- mayBlock fo (_ccall_ fileGetc fo)
245 if intc == ((-1)::Int)
246 then do errtype <- getErrType
247 if errtype == (ERR_EOF :: Int)
248 then return (chunk, I# off)
249 else constructErrorAndFail "slurpFile"
250 else case chr intc of
252 ch -> do writeCharOffAddr chunk (I# off) ch
253 let c' | ch == '\n' = 0#
254 | otherwise = c +# 1#
257 tabIt :: Int# -> Int# -> IO (Addr, Int)
258 -- can't run out of buffer in here, because we reserved an
259 -- extra tAB_SIZE bytes at the end earlier.
261 writeCharOffAddr chunk (I# off) ' '
264 if c' `remInt#` tAB_SIZE ==# 0#
269 -- allow space for a full tab at the end of the buffer
270 -- (that's what the max_off thing is for),
271 -- and add 1 to allow room for the final sentinel \NUL at
272 -- the end of the file.
273 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
274 #if __GLASGOW_HASKELL__ < 404
275 writeHandle handle handle_
278 then constructErrorAndFail "slurpFile"
279 else return (chunk', rc+1 {-room for sentinel-})
282 reAllocMem :: Addr -> Int -> IO Addr
283 reAllocMem ptr sz = do
284 chunk <- _ccall_ realloc ptr sz
286 #if __GLASGOW_HASKELL__ >= 400
287 then fail "reAllocMem"
289 then fail (userError "reAllocMem")
293 allocMem :: Int -> IO Addr
295 chunk <- _ccall_ malloc sz
296 #if __GLASGOW_HASKELL__ < 303
298 then fail (userError "allocMem")
302 then constructErrorAndFail "allocMem"
310 currentChar :: StringBuffer -> Char
311 currentChar sb = case currentChar# sb of c -> C# c
313 lookAhead :: StringBuffer -> Int -> Char
314 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
316 indexSBuffer :: StringBuffer -> Int -> Char
317 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
319 currentChar# :: StringBuffer -> Char#
320 indexSBuffer# :: StringBuffer -> Int# -> Char#
321 lookAhead# :: StringBuffer -> Int# -> Char#
322 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
323 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
325 -- relative lookup, i.e, currentChar = lookAhead 0
326 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
328 currentIndex# :: StringBuffer -> Int#
329 currentIndex# (StringBuffer fo# _ _ c#) = c#
331 lexemeIndex :: StringBuffer -> Int#
332 lexemeIndex (StringBuffer fo# _ c# _) = c#
335 moving the start point of the current lexeme.
338 -- moving the end point of the current lexeme.
339 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
340 setCurrentPos# (StringBuffer fo l# s# c#) i# =
341 StringBuffer fo l# s# (c# +# i#)
343 -- augmenting the current lexeme by one.
344 incLexeme :: StringBuffer -> StringBuffer
345 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
347 decLexeme :: StringBuffer -> StringBuffer
348 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
352 -- move the start and end point of the buffer on by
356 stepOn :: StringBuffer -> StringBuffer
357 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
359 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
360 stepOnBy# (StringBuffer fo# l# s# c#) i# =
362 new_s# -> StringBuffer fo# l# new_s# new_s#
365 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
366 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
368 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
369 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
371 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
372 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
373 = StringBuffer fo l s# c#
375 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
377 stepOnUntil pred (StringBuffer fo l# s# c#) =
381 case indexCharOffAddr# fo c# of
382 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
383 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
384 | otherwise -> loop (c# +# 1#)
386 stepOverLexeme :: StringBuffer -> StringBuffer
387 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
389 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
390 expandWhile pred (StringBuffer fo l# s# c#) =
394 case indexCharOffAddr# fo c# of
395 ch# | pred (C# ch#) -> loop (c# +# 1#)
396 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
397 | otherwise -> StringBuffer fo l# s# c#
399 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
400 expandWhile# pred (StringBuffer fo l# s# c#) =
404 case indexCharOffAddr# fo c# of
405 ch# | pred ch# -> loop (c# +# 1#)
406 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
407 | otherwise -> StringBuffer fo l# s# c#
409 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
410 scanNumLit acc (StringBuffer fo l# s# c#) =
414 case indexCharOffAddr# fo c# of
415 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
416 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
417 | otherwise -> (acc,StringBuffer fo l# s# c#)
420 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
421 expandUntilMatch (StringBuffer fo l# s# c#) str =
424 loop c# [] = Just (StringBuffer fo l# s# c#)
425 loop c# ((C# x#):xs) =
426 case indexCharOffAddr# fo c# of
427 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
428 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
429 | otherwise -> loop (c# +# 1#) str
434 -- at or beyond end of buffer?
435 bufferExhausted :: StringBuffer -> Bool
436 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
438 emptyLexeme :: StringBuffer -> Bool
439 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
442 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
443 prefixMatch (StringBuffer fo l# s# c#) str =
446 loop c# [] = Just (StringBuffer fo l# s# c#)
448 | indexCharOffAddr# fo c# `eqChar#` x#
453 untilEndOfString# :: StringBuffer -> StringBuffer
454 untilEndOfString# (StringBuffer fo l# s# c#) =
457 getch# i# = indexCharOffAddr# fo i#
462 case getch# (c# -# 1#) of
464 -- looks like an escaped something or other to me,
465 -- better count the number of "\\"s that are immediately
466 -- preceeding to decide if the " is escaped.
470 '\\'# -> odd_slashes (not flg) (i# -# 1#)
473 if odd_slashes True (c# -# 2#) then
474 -- odd number, " is ecaped.
476 else -- a real end of string delimiter after all.
477 StringBuffer fo l# s# c#
478 _ -> StringBuffer fo l# s# c#
480 if c# >=# l# then -- hit sentinel, this doesn't look too good..
481 StringBuffer fo l# l# l#
487 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
488 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
492 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
493 = StringBuffer fo l# c# c#
498 lexemeToString :: StringBuffer -> String
499 lexemeToString (StringBuffer fo _ start_pos# current#) =
500 if start_pos# ==# current# then
503 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
505 lexemeToByteArray :: StringBuffer -> ByteArray Int
506 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
507 if start_pos# ==# current# then
508 error "lexemeToByteArray"
510 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
512 lexemeToFastString :: StringBuffer -> FastString
513 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
514 if start_pos# ==# current# then
515 mkFastCharString2 (A# fo) (I# 0#)
517 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
520 Create a StringBuffer from the current lexeme, and add a sentinel
521 at the end. Know What You're Doing before taking this function
524 lexemeToBuffer :: StringBuffer -> StringBuffer
525 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
526 if start_pos# ==# current# then
527 StringBuffer fo 0# start_pos# current# -- an error, really.
529 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)