2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
75 #include "../lib/std/cbits/error.h"
77 #if __GLASGOW_HASKELL__ >= 303
78 import IO ( openFile, slurpFile )
83 import IO ( openFile, hFileSize, hClose, IOMode(..) )
87 #if __GLASGOW_HASKELL__ < 301
88 import IOBase ( IOError(..), IOErrorType(..) )
89 import IOHandle ( readHandle, writeHandle, filePtr )
90 import PackBase ( unpackCStringBA )
92 # if __GLASGOW_HASKELL__ <= 302
93 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
94 constructErrorAndFail )
95 import PrelHandle ( readHandle, writeHandle, filePtr )
97 import PrelPack ( unpackCStringBA )
100 #if __GLASGOW_HASKELL__ < 402
101 import Util ( bracket )
103 import Exception ( bracket )
108 import Char (isDigit)
121 instance Text StringBuffer where
122 showsPrec _ s = showString ""
126 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
127 hGetStringBuffer expand_tabs fname = do
128 (a, read) <- if expand_tabs
129 then slurpFileExpandTabs fname
132 let (A# a#) = a; (I# read#) = read
134 -- add sentinel '\NUL'
135 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
136 return (StringBuffer a# read# 0# 0#)
138 #if __GLASGOW_HASKELL__ < 303
140 openFile fname ReadMode >>= \ hndl ->
141 hFileSize hndl >>= \ len ->
142 let len_i = fromInteger len in
143 -- Allocate an array for system call to store its bytes into.
144 -- ToDo: make it robust
145 -- trace (show ((len_i::Int)+1)) $
146 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
147 if addr2Int# a# ==# 0# then
148 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
150 readHandle hndl >>= \ hndl_ ->
151 writeHandle hndl hndl_ >>
152 let ptr = filePtr hndl_ in
153 #if __GLASGOW_HASKELL__ <= 302
154 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
156 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
159 if read# ==# 0# then -- EOF or some other error
160 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
162 return (arr, I# read#)
165 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
166 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
168 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
173 -----------------------------------------------------------------------------
174 This very disturbing bit of code is used for expanding the tabs in a
175 file before we start parsing it. Expanding the tabs early makes the
176 lexer a lot simpler: we only have to record the beginning of the line
177 in order to be able to calculate the column offset of the current
180 We guess the size of the buffer required as 20% extra for
181 expanded tabs, and enlarge it if necessary.
184 #if __GLASGOW_HASKELL__ < 303
186 mayBlock fo thing = thing
188 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
189 writeCharOffAddr addr off c
190 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
194 #if __GLASGOW_HASKELL__ < 303
195 getErrType = _casm_ ``%r = ghc_errtype;''
197 getErrType = _ccall_ getErrType__
200 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
201 slurpFileExpandTabs fname = do
202 bracket (openFile fname ReadMode) (hClose)
204 do sz <- hFileSize handle
205 if sz > toInteger (maxBound::Int)
206 then ioError (userError "slurpFile: file too big")
208 let sz_i = fromInteger sz
209 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
210 chunk <- allocMem sz_i'
211 trySlurp handle sz_i' chunk
214 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
215 trySlurp handle sz_i chunk =
216 #if __GLASGOW_HASKELL__ >= 303
217 wantReadableHandle "hGetChar" handle $ \ handle_ ->
218 let fo = haFO__ handle_ in
220 readHandle handle >>= \ handle_ ->
221 let fo = filePtr handle_ in
228 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
229 slurpFile c off chunk chunk_sz max_off = slurp c off
232 slurp :: Int# -> Int# -> IO (Addr, Int)
233 slurp c off | off >=# max_off = do
234 let new_sz = chunk_sz *# 2#
235 chunk' <- reAllocMem chunk (I# new_sz)
236 slurpFile c off chunk' new_sz (new_sz -# tAB_SIZE)
238 intc <- mayBlock fo (_ccall_ fileGetc fo)
239 if intc == ((-1)::Int)
240 then do errtype <- getErrType
241 if errtype == (ERR_EOF :: Int)
242 then return (chunk, I# off)
243 else constructErrorAndFail "slurpFile"
244 else case chr intc of
246 ch -> do writeCharOffAddr chunk (I# off) ch
247 let c' | ch == '\n' = 0#
248 | otherwise = c +# 1#
251 tabIt :: Int# -> Int# -> IO (Addr, Int)
252 -- can't run out of buffer in here, because we reserved an
253 -- extra tAB_SIZE bytes at the end earlier.
255 writeCharOffAddr chunk (I# off) ' '
258 if c' `remInt#` tAB_SIZE ==# 0#
263 -- allow space for a full tab at the end of the buffer
264 -- (that's what the max_off thing is for)
265 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# tAB_SIZE)
266 writeHandle handle handle_
268 then constructErrorAndFail "slurpFile"
269 else return (chunk', rc)
272 reAllocMem :: Addr -> Int -> IO Addr
273 reAllocMem ptr sz = do
274 chunk <- _ccall_ realloc ptr sz
276 #if __GLASGOW_HASKELL__ < 303
277 then fail (userError "reAllocMem")
279 then fail "reAllocMem"
283 allocMem :: Int -> IO Addr
285 #if __GLASGOW_HASKELL__ < 303
286 chunk <- _ccall_ malloc sz
288 then fail (userError "allocMem")
291 chunk <- _ccall_ allocMemory__ sz
293 then constructErrorAndFail "allocMem"
301 currentChar :: StringBuffer -> Char
302 currentChar sb = case currentChar# sb of c -> C# c
304 lookAhead :: StringBuffer -> Int -> Char
305 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
307 indexSBuffer :: StringBuffer -> Int -> Char
308 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
310 currentChar# :: StringBuffer -> Char#
311 indexSBuffer# :: StringBuffer -> Int# -> Char#
312 lookAhead# :: StringBuffer -> Int# -> Char#
313 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
314 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
316 -- relative lookup, i.e, currentChar = lookAhead 0
317 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
319 currentIndex# :: StringBuffer -> Int#
320 currentIndex# (StringBuffer fo# _ _ c#) = c#
322 lexemeIndex :: StringBuffer -> Int#
323 lexemeIndex (StringBuffer fo# _ c# _) = c#
326 moving the start point of the current lexeme.
329 -- moving the end point of the current lexeme.
330 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
331 setCurrentPos# (StringBuffer fo l# s# c#) i# =
332 StringBuffer fo l# s# (c# +# i#)
334 -- augmenting the current lexeme by one.
335 incLexeme :: StringBuffer -> StringBuffer
336 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
338 decLexeme :: StringBuffer -> StringBuffer
339 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
343 -- move the start and end point of the buffer on by
347 stepOn :: StringBuffer -> StringBuffer
348 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
350 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
351 stepOnBy# (StringBuffer fo# l# s# c#) i# =
353 new_s# -> StringBuffer fo# l# new_s# new_s#
356 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
357 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
359 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
360 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
362 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
363 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
364 = StringBuffer fo l s# c#
366 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
368 stepOnUntil pred (StringBuffer fo l# s# c#) =
372 case indexCharOffAddr# fo c# of
373 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
374 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
375 | otherwise -> loop (c# +# 1#)
377 stepOverLexeme :: StringBuffer -> StringBuffer
378 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
380 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
381 expandWhile pred (StringBuffer fo l# s# c#) =
385 case indexCharOffAddr# fo c# of
386 ch# | pred (C# ch#) -> loop (c# +# 1#)
387 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
388 | otherwise -> StringBuffer fo l# s# c#
390 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
391 expandWhile# pred (StringBuffer fo l# s# c#) =
395 case indexCharOffAddr# fo c# of
396 ch# | pred ch# -> loop (c# +# 1#)
397 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
398 | otherwise -> StringBuffer fo l# s# c#
400 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
401 scanNumLit acc (StringBuffer fo l# s# c#) =
405 case indexCharOffAddr# fo c# of
406 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
407 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
408 | otherwise -> (acc,StringBuffer fo l# s# c#)
411 expandUntilMatch :: StringBuffer -> String -> StringBuffer
412 expandUntilMatch (StringBuffer fo l# s# c#) str =
415 loop c# [] = StringBuffer fo l# s# c#
417 | indexCharOffAddr# fo c# `eqChar#` x#
420 = loop (c# +# 1#) str
425 -- at or beyond end of buffer?
426 bufferExhausted :: StringBuffer -> Bool
427 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
429 emptyLexeme :: StringBuffer -> Bool
430 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
433 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
434 prefixMatch (StringBuffer fo l# s# c#) str =
437 loop c# [] = Just (StringBuffer fo l# s# c#)
439 | indexCharOffAddr# fo c# `eqChar#` x#
444 untilEndOfString# :: StringBuffer -> StringBuffer
445 untilEndOfString# (StringBuffer fo l# s# c#) =
448 getch# i# = indexCharOffAddr# fo i#
453 case getch# (c# -# 1#) of
455 -- looks like an escaped something or other to me,
456 -- better count the number of "\\"s that are immediately
457 -- preceeding to decide if the " is escaped.
461 '\\'# -> odd_slashes (not flg) (i# -# 1#)
464 if odd_slashes True (c# -# 2#) then
465 -- odd number, " is ecaped.
467 else -- a real end of string delimiter after all.
468 StringBuffer fo l# s# c#
469 _ -> StringBuffer fo l# s# c#
471 if c# >=# l# then -- hit sentinel, this doesn't look too good..
472 StringBuffer fo l# l# l#
478 untilChar# :: StringBuffer -> Char# -> StringBuffer
479 untilChar# (StringBuffer fo l# s# c#) x# =
483 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
484 = StringBuffer fo l# s# c#
489 lexemeToString :: StringBuffer -> String
490 lexemeToString (StringBuffer fo _ start_pos# current#) =
491 if start_pos# ==# current# then
494 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
496 lexemeToByteArray :: StringBuffer -> _ByteArray Int
497 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
498 if start_pos# ==# current# then
499 error "lexemeToByteArray"
501 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
503 lexemeToFastString :: StringBuffer -> FastString
504 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
505 if start_pos# ==# current# then
506 mkFastCharString2 (A# fo) (I# 0#)
508 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
511 Create a StringBuffer from the current lexeme, and add a sentinel
512 at the end. Know What You're Doing before taking this function
515 lexemeToBuffer :: StringBuffer -> StringBuffer
516 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
517 if start_pos# ==# current# then
518 StringBuffer fo 0# start_pos# current# -- an error, really.
520 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)