2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
14 -- creation/destruction
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
17 stringToStringBuffer, -- :: String -> IO StringBuffer
18 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
76 import Addr ( Addr(..) )
82 #include "../lib/std/cbits/stgerror.h"
84 #if __GLASGOW_HASKELL__ >= 303
86 #if __GLASGOW_HASKELL__ < 407
87 , slurpFile -- comes from PrelHandle or IOExts now
94 import IO ( openFile, hFileSize, hClose, IOMode(..) )
97 #if __GLASGOW_HASKELL__ >= 411
98 import Ptr ( Ptr(..) )
101 #if __GLASGOW_HASKELL__ < 301
102 import IOBase ( Handle, IOError(..), IOErrorType(..),
103 constructErrorAndFail )
104 import IOHandle ( readHandle, writeHandle, filePtr )
105 import PackBase ( unpackCStringBA )
107 # if __GLASGOW_HASKELL__ <= 302
108 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
109 constructErrorAndFail )
110 import PrelHandle ( readHandle, writeHandle, filePtr )
112 import PrelPack ( unpackCStringBA )
115 #if __GLASGOW_HASKELL__ < 402
116 import Util ( bracket )
118 import Exception ( bracket )
123 import Char (isDigit)
136 instance Show StringBuffer where
137 showsPrec _ s = showString ""
141 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
142 hGetStringBuffer expand_tabs fname = do
143 (a, read) <- if expand_tabs
144 then slurpFileExpandTabs fname
145 #if __GLASGOW_HASKELL__ < 411
149 (Ptr a#, read) <- slurpFile fname
153 let (A# a#) = a; (I# read#) = read
155 -- add sentinel '\NUL'
156 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
157 return (StringBuffer a# read# 0# 0#)
159 #if __GLASGOW_HASKELL__ < 303
161 openFile fname ReadMode >>= \ hndl ->
162 hFileSize hndl >>= \ len ->
163 let len_i = fromInteger len in
164 -- Allocate an array for system call to store its bytes into.
165 -- ToDo: make it robust
166 -- trace (show ((len_i::Int)+1)) $
167 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
168 if addr2Int# a# ==# 0# then
169 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
171 readHandle hndl >>= \ hndl_ ->
172 writeHandle hndl hndl_ >>
173 let ptr = filePtr hndl_ in
174 #if __GLASGOW_HASKELL__ <= 302
175 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
177 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
180 if read# ==# 0# then -- EOF or some other error
181 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
183 return (arr, I# read#)
186 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
187 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
189 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
194 -----------------------------------------------------------------------------
195 -- Turn a String into a StringBuffer
199 stringToStringBuffer :: String -> IO StringBuffer
200 stringToStringBuffer str =
201 do let sz@(I# sz#) = length str + 1
202 (Ptr a#) <- mallocBytes sz
204 writeCharOffAddr (A# a#) (sz-1) '\0' -- sentinel
205 return (StringBuffer a# sz# 0# 0#)
207 fill_in [] _ = return ()
208 fill_in (c:cs) a = do
209 writeCharOffAddr a 0 c
210 fill_in cs (a `plusAddr` 1)
212 freeStringBuffer :: StringBuffer -> IO ()
213 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
217 -----------------------------------------------------------------------------
218 This very disturbing bit of code is used for expanding the tabs in a
219 file before we start parsing it. Expanding the tabs early makes the
220 lexer a lot simpler: we only have to record the beginning of the line
221 in order to be able to calculate the column offset of the current
224 We guess the size of the buffer required as 20% extra for
225 expanded tabs, and enlarge it if necessary.
228 #if __GLASGOW_HASKELL__ < 303
229 mayBlock fo thing = thing
231 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
232 writeCharOffAddr addr off c
233 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
237 #if __GLASGOW_HASKELL__ < 303
238 getErrType = _casm_ ``%r = ghc_errtype;''
240 getErrType = _ccall_ getErrType__
243 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
244 slurpFileExpandTabs fname = do
245 bracket (openFile fname ReadMode) (hClose)
247 do sz <- hFileSize handle
248 if sz > toInteger (maxBound::Int)
249 then IOERROR (userError "slurpFile: file too big")
251 let sz_i = fromInteger sz
252 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
253 chunk <- allocMem sz_i'
254 trySlurp handle sz_i' chunk
257 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
258 trySlurp handle sz_i chunk =
259 #if __GLASGOW_HASKELL__ == 303
260 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
261 let fo = haFO__ handle_ in
262 #elif __GLASGOW_HASKELL__ > 303
263 wantReadableHandle "hGetChar" handle $ \ handle_ ->
264 let fo = haFO__ handle_ in
266 readHandle handle >>= \ handle_ ->
267 let fo = filePtr handle_ in
274 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
275 slurpFile c off chunk chunk_sz max_off = slurp c off
278 slurp :: Int# -> Int# -> IO (Addr, Int)
279 slurp c off | off >=# max_off = do
280 let new_sz = chunk_sz *# 2#
281 chunk' <- reAllocMem chunk (I# new_sz)
282 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
284 intc <- mayBlock fo (_ccall_ fileGetc fo)
285 if intc == ((-1)::Int)
286 then do errtype <- getErrType
287 if errtype == (ERR_EOF :: Int)
288 then return (chunk, I# off)
289 else constructErrorAndFail "slurpFile"
290 else case chr intc of
292 ch -> do writeCharOffAddr chunk (I# off) ch
293 let c' | ch == '\n' = 0#
294 | otherwise = c +# 1#
297 tabIt :: Int# -> Int# -> IO (Addr, Int)
298 -- can't run out of buffer in here, because we reserved an
299 -- extra tAB_SIZE bytes at the end earlier.
301 writeCharOffAddr chunk (I# off) ' '
304 if c' `remInt#` tAB_SIZE ==# 0#
309 -- allow space for a full tab at the end of the buffer
310 -- (that's what the max_off thing is for),
311 -- and add 1 to allow room for the final sentinel \NUL at
312 -- the end of the file.
313 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
314 #if __GLASGOW_HASKELL__ < 404
315 writeHandle handle handle_
318 then constructErrorAndFail "slurpFile"
319 else return (chunk', rc+1 {-room for sentinel-})
322 reAllocMem :: Addr -> Int -> IO Addr
323 reAllocMem ptr sz = do
324 chunk <- _ccall_ realloc ptr sz
326 #if __GLASGOW_HASKELL__ >= 400
327 then fail "reAllocMem"
329 then fail (userError "reAllocMem")
333 allocMem :: Int -> IO Addr
335 chunk <- _ccall_ malloc sz
336 #if __GLASGOW_HASKELL__ < 303
338 then fail (userError "allocMem")
342 then constructErrorAndFail "allocMem"
350 currentChar :: StringBuffer -> Char
351 currentChar sb = case currentChar# sb of c -> C# c
353 lookAhead :: StringBuffer -> Int -> Char
354 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
356 indexSBuffer :: StringBuffer -> Int -> Char
357 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
359 currentChar# :: StringBuffer -> Char#
360 indexSBuffer# :: StringBuffer -> Int# -> Char#
361 lookAhead# :: StringBuffer -> Int# -> Char#
362 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
363 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
365 -- relative lookup, i.e, currentChar = lookAhead 0
366 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
368 currentIndex# :: StringBuffer -> Int#
369 currentIndex# (StringBuffer fo# _ _ c#) = c#
371 lexemeIndex :: StringBuffer -> Int#
372 lexemeIndex (StringBuffer fo# _ c# _) = c#
375 moving the start point of the current lexeme.
378 -- moving the end point of the current lexeme.
379 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
380 setCurrentPos# (StringBuffer fo l# s# c#) i# =
381 StringBuffer fo l# s# (c# +# i#)
383 -- augmenting the current lexeme by one.
384 incLexeme :: StringBuffer -> StringBuffer
385 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
387 decLexeme :: StringBuffer -> StringBuffer
388 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
392 -- move the start and end point of the buffer on by
396 stepOn :: StringBuffer -> StringBuffer
397 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
399 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
400 stepOnBy# (StringBuffer fo# l# s# c#) i# =
402 new_s# -> StringBuffer fo# l# new_s# new_s#
405 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
406 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
408 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
409 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
411 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
412 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
413 = StringBuffer fo l s# c#
415 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
417 stepOnUntil pred (StringBuffer fo l# s# c#) =
421 case indexCharOffAddr# fo c# of
422 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
423 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
424 | otherwise -> loop (c# +# 1#)
426 stepOverLexeme :: StringBuffer -> StringBuffer
427 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
429 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
430 expandWhile pred (StringBuffer fo l# s# c#) =
434 case indexCharOffAddr# fo c# of
435 ch# | pred (C# ch#) -> loop (c# +# 1#)
436 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
437 | otherwise -> StringBuffer fo l# s# c#
439 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
440 expandWhile# pred (StringBuffer fo l# s# c#) =
444 case indexCharOffAddr# fo c# of
445 ch# | pred ch# -> loop (c# +# 1#)
446 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
447 | otherwise -> StringBuffer fo l# s# c#
449 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
450 scanNumLit acc (StringBuffer fo l# s# c#) =
454 case indexCharOffAddr# fo c# of
455 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
456 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
457 | otherwise -> (acc,StringBuffer fo l# s# c#)
460 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
461 expandUntilMatch (StringBuffer fo l# s# c#) str =
464 loop c# [] = Just (StringBuffer fo l# s# c#)
465 loop c# ((C# x#):xs) =
466 case indexCharOffAddr# fo c# of
467 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
468 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
469 | otherwise -> loop (c# +# 1#) str
474 -- at or beyond end of buffer?
475 bufferExhausted :: StringBuffer -> Bool
476 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
478 emptyLexeme :: StringBuffer -> Bool
479 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
482 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
483 prefixMatch (StringBuffer fo l# s# c#) str =
486 loop c# [] = Just (StringBuffer fo l# s# c#)
488 | indexCharOffAddr# fo c# `eqChar#` x#
493 untilEndOfString# :: StringBuffer -> StringBuffer
494 untilEndOfString# (StringBuffer fo l# s# c#) =
497 getch# i# = indexCharOffAddr# fo i#
502 case getch# (c# -# 1#) of
504 -- looks like an escaped something or other to me,
505 -- better count the number of "\\"s that are immediately
506 -- preceeding to decide if the " is escaped.
510 '\\'# -> odd_slashes (not flg) (i# -# 1#)
513 if odd_slashes True (c# -# 2#) then
514 -- odd number, " is ecaped.
516 else -- a real end of string delimiter after all.
517 StringBuffer fo l# s# c#
518 _ -> StringBuffer fo l# s# c#
520 if c# >=# l# then -- hit sentinel, this doesn't look too good..
521 StringBuffer fo l# l# l#
527 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
528 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
532 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
533 = StringBuffer fo l# c# c#
538 lexemeToString :: StringBuffer -> String
539 lexemeToString (StringBuffer fo _ start_pos# current#) =
540 if start_pos# ==# current# then
543 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
545 lexemeToByteArray :: StringBuffer -> ByteArray Int
546 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
547 if start_pos# ==# current# then
548 error "lexemeToByteArray"
550 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
552 lexemeToFastString :: StringBuffer -> FastString
553 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
554 if start_pos# ==# current# then
555 mkFastCharString2 (A# fo) (I# 0#)
557 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
560 Create a StringBuffer from the current lexeme, and add a sentinel
561 at the end. Know What You're Doing before taking this function
564 lexemeToBuffer :: StringBuffer -> StringBuffer
565 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
566 if start_pos# ==# current# then
567 StringBuffer fo 0# start_pos# current# -- an error, really.
569 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)