2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
14 -- creation/destruction
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
16 stringToStringBuffer, -- :: String -> IO StringBuffer
17 freeStringBuffer, -- :: StringBuffer -> IO ()
20 currentChar, -- :: StringBuffer -> Char
21 currentChar#, -- :: StringBuffer -> Char#
22 indexSBuffer, -- :: StringBuffer -> Int -> Char
23 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
24 -- relative lookup, i.e, currentChar = lookAhead 0
25 lookAhead, -- :: StringBuffer -> Int -> Char
26 lookAhead#, -- :: StringBuffer -> Int# -> Char#
29 currentIndex#, -- :: StringBuffer -> Int#
30 lexemeIndex, -- :: StringBuffer -> Int#
32 -- moving the end point of the current lexeme.
33 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
34 incLexeme, -- :: StringBuffer -> StringBuffer
35 decLexeme, -- :: StringBuffer -> StringBuffer
37 -- move the start and end lexeme pointer on by x units.
38 stepOn, -- :: StringBuffer -> StringBuffer
39 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
40 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
41 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
42 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
43 stepOverLexeme, -- :: StringBuffer -> StringBuffer
44 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
45 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
46 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
47 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
48 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
49 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
50 -- at or beyond end of buffer?
51 bufferExhausted, -- :: StringBuffer -> Bool
52 emptyLexeme, -- :: StringBuffer -> Bool
55 prefixMatch, -- :: StringBuffer -> String -> Bool
56 untilEndOfString#, -- :: StringBuffer -> Int#
59 lexemeToString, -- :: StringBuffer -> String
60 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
61 lexemeToFastString, -- :: StringBuffer -> FastString
62 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
68 #include "HsVersions.h"
71 #if __GLASGOW_HASKELL__ < 411
72 import PrelAddr ( Addr(..) )
74 import Addr ( Addr(..) )
78 import Panic ( panic )
81 #include "../lib/std/cbits/stgerror.h"
83 #if __GLASGOW_HASKELL__ >= 303
85 #if __GLASGOW_HASKELL__ < 407
86 , slurpFile -- comes from PrelHandle or IOExts now
93 import IO ( openFile, hFileSize, hClose, IOMode(..) )
96 #if __GLASGOW_HASKELL__ >= 411
97 import Ptr ( Ptr(..) )
100 #if __GLASGOW_HASKELL__ < 301
101 import IOBase ( Handle, IOError(..), IOErrorType(..),
102 constructErrorAndFail )
103 import IOHandle ( readHandle, writeHandle, filePtr )
104 import PackBase ( unpackCStringBA )
106 # if __GLASGOW_HASKELL__ <= 302
107 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
108 constructErrorAndFail )
109 import PrelHandle ( readHandle, writeHandle, filePtr )
111 import PrelPack ( unpackCStringBA )
114 #if __GLASGOW_HASKELL__ < 402
115 import Util ( bracket )
117 import Exception ( bracket )
122 import Char (isDigit)
135 instance Show StringBuffer where
136 showsPrec _ s = showString ""
140 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
141 hGetStringBuffer expand_tabs fname = do
142 (a, read) <- if expand_tabs
143 then slurpFileExpandTabs fname
144 #if __GLASGOW_HASKELL__ < 411
148 (Ptr a#, read) <- slurpFile fname
152 let (A# a#) = a; (I# read#) = read
154 -- add sentinel '\NUL'
155 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
156 return (StringBuffer a# read# 0# 0#)
158 #if __GLASGOW_HASKELL__ < 303
160 openFile fname ReadMode >>= \ hndl ->
161 hFileSize hndl >>= \ len ->
162 let len_i = fromInteger len in
163 -- Allocate an array for system call to store its bytes into.
164 -- ToDo: make it robust
165 -- trace (show ((len_i::Int)+1)) $
166 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
167 if addr2Int# a# ==# 0# then
168 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
170 readHandle hndl >>= \ hndl_ ->
171 writeHandle hndl hndl_ >>
172 let ptr = filePtr hndl_ in
173 #if __GLASGOW_HASKELL__ <= 302
174 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
176 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
179 if read# ==# 0# then -- EOF or some other error
180 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
182 return (arr, I# read#)
185 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
186 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
188 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
193 -----------------------------------------------------------------------------
194 -- Turn a String into a StringBuffer
197 stringToStringBuffer :: String -> IO StringBuffer
198 freeStringBuffer :: StringBuffer -> IO ()
200 #if __GLASGOW_HASKELL__ >= 411
201 stringToStringBuffer str =
202 do let sz@(I# sz#) = length str + 1
203 (Ptr a#) <- mallocBytes sz
205 writeCharOffAddr (A# a#) (sz-1) '\0' -- sentinel
206 return (StringBuffer a# sz# 0# 0#)
208 fill_in [] _ = return ()
209 fill_in (c:cs) a = do
210 writeCharOffAddr a 0 c
211 fill_in cs (a `plusAddr` 1)
213 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
215 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
216 freeStringBuffer sb = return ()
221 -----------------------------------------------------------------------------
222 This very disturbing bit of code is used for expanding the tabs in a
223 file before we start parsing it. Expanding the tabs early makes the
224 lexer a lot simpler: we only have to record the beginning of the line
225 in order to be able to calculate the column offset of the current
228 We guess the size of the buffer required as 20% extra for
229 expanded tabs, and enlarge it if necessary.
232 #if __GLASGOW_HASKELL__ < 303
233 mayBlock fo thing = thing
235 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
236 writeCharOffAddr addr off c
237 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
241 #if __GLASGOW_HASKELL__ < 303
242 getErrType = _casm_ ``%r = ghc_errtype;''
244 getErrType = _ccall_ getErrType__
247 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
248 slurpFileExpandTabs fname = do
249 bracket (openFile fname ReadMode) (hClose)
251 do sz <- hFileSize handle
252 if sz > toInteger (maxBound::Int)
253 then IOERROR (userError "slurpFile: file too big")
255 let sz_i = fromInteger sz
256 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
257 chunk <- allocMem sz_i'
258 trySlurp handle sz_i' chunk
261 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
262 trySlurp handle sz_i chunk =
263 #if __GLASGOW_HASKELL__ == 303
264 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
265 let fo = haFO__ handle_ in
266 #elif __GLASGOW_HASKELL__ > 303
267 wantReadableHandle "hGetChar" handle $ \ handle_ ->
268 let fo = haFO__ handle_ in
270 readHandle handle >>= \ handle_ ->
271 let fo = filePtr handle_ in
278 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
279 slurpFile c off chunk chunk_sz max_off = slurp c off
282 slurp :: Int# -> Int# -> IO (Addr, Int)
283 slurp c off | off >=# max_off = do
284 let new_sz = chunk_sz *# 2#
285 chunk' <- reAllocMem chunk (I# new_sz)
286 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
288 intc <- mayBlock fo (_ccall_ fileGetc fo)
289 if intc == ((-1)::Int)
290 then do errtype <- getErrType
291 if errtype == (ERR_EOF :: Int)
292 then return (chunk, I# off)
293 else constructErrorAndFail "slurpFile"
294 else case chr intc of
296 ch -> do writeCharOffAddr chunk (I# off) ch
297 let c' | ch == '\n' = 0#
298 | otherwise = c +# 1#
301 tabIt :: Int# -> Int# -> IO (Addr, Int)
302 -- can't run out of buffer in here, because we reserved an
303 -- extra tAB_SIZE bytes at the end earlier.
305 writeCharOffAddr chunk (I# off) ' '
308 if c' `remInt#` tAB_SIZE ==# 0#
313 -- allow space for a full tab at the end of the buffer
314 -- (that's what the max_off thing is for),
315 -- and add 1 to allow room for the final sentinel \NUL at
316 -- the end of the file.
317 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
318 #if __GLASGOW_HASKELL__ < 404
319 writeHandle handle handle_
322 then constructErrorAndFail "slurpFile"
323 else return (chunk', rc+1 {-room for sentinel-})
326 reAllocMem :: Addr -> Int -> IO Addr
327 reAllocMem ptr sz = do
328 chunk <- _ccall_ realloc ptr sz
330 #if __GLASGOW_HASKELL__ >= 400
331 then fail "reAllocMem"
333 then fail (userError "reAllocMem")
337 allocMem :: Int -> IO Addr
339 chunk <- _ccall_ malloc sz
340 #if __GLASGOW_HASKELL__ < 303
342 then fail (userError "allocMem")
346 then constructErrorAndFail "allocMem"
354 currentChar :: StringBuffer -> Char
355 currentChar sb = case currentChar# sb of c -> C# c
357 lookAhead :: StringBuffer -> Int -> Char
358 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
360 indexSBuffer :: StringBuffer -> Int -> Char
361 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
363 currentChar# :: StringBuffer -> Char#
364 indexSBuffer# :: StringBuffer -> Int# -> Char#
365 lookAhead# :: StringBuffer -> Int# -> Char#
366 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
367 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
369 -- relative lookup, i.e, currentChar = lookAhead 0
370 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
372 currentIndex# :: StringBuffer -> Int#
373 currentIndex# (StringBuffer fo# _ _ c#) = c#
375 lexemeIndex :: StringBuffer -> Int#
376 lexemeIndex (StringBuffer fo# _ c# _) = c#
379 moving the start point of the current lexeme.
382 -- moving the end point of the current lexeme.
383 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
384 setCurrentPos# (StringBuffer fo l# s# c#) i# =
385 StringBuffer fo l# s# (c# +# i#)
387 -- augmenting the current lexeme by one.
388 incLexeme :: StringBuffer -> StringBuffer
389 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
391 decLexeme :: StringBuffer -> StringBuffer
392 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
396 -- move the start and end point of the buffer on by
400 stepOn :: StringBuffer -> StringBuffer
401 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
403 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
404 stepOnBy# (StringBuffer fo# l# s# c#) i# =
406 new_s# -> StringBuffer fo# l# new_s# new_s#
409 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
410 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
412 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
413 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
415 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
416 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
417 = StringBuffer fo l s# c#
419 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
421 stepOnUntil pred (StringBuffer fo l# s# c#) =
425 case indexCharOffAddr# fo c# of
426 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
427 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
428 | otherwise -> loop (c# +# 1#)
430 stepOverLexeme :: StringBuffer -> StringBuffer
431 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
433 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
434 expandWhile pred (StringBuffer fo l# s# c#) =
438 case indexCharOffAddr# fo c# of
439 ch# | pred (C# ch#) -> loop (c# +# 1#)
440 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
441 | otherwise -> StringBuffer fo l# s# c#
443 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
444 expandWhile# pred (StringBuffer fo l# s# c#) =
448 case indexCharOffAddr# fo c# of
449 ch# | pred ch# -> loop (c# +# 1#)
450 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
451 | otherwise -> StringBuffer fo l# s# c#
453 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
454 scanNumLit acc (StringBuffer fo l# s# c#) =
458 case indexCharOffAddr# fo c# of
459 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
460 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
461 | otherwise -> (acc,StringBuffer fo l# s# c#)
464 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
465 expandUntilMatch (StringBuffer fo l# s# c#) str =
468 loop c# [] = Just (StringBuffer fo l# s# c#)
469 loop c# ((C# x#):xs) =
470 case indexCharOffAddr# fo c# of
471 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
472 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
473 | otherwise -> loop (c# +# 1#) str
478 -- at or beyond end of buffer?
479 bufferExhausted :: StringBuffer -> Bool
480 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
482 emptyLexeme :: StringBuffer -> Bool
483 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
486 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
487 prefixMatch (StringBuffer fo l# s# c#) str =
490 loop c# [] = Just (StringBuffer fo l# s# c#)
492 | indexCharOffAddr# fo c# `eqChar#` x#
497 untilEndOfString# :: StringBuffer -> StringBuffer
498 untilEndOfString# (StringBuffer fo l# s# c#) =
501 getch# i# = indexCharOffAddr# fo i#
506 case getch# (c# -# 1#) of
508 -- looks like an escaped something or other to me,
509 -- better count the number of "\\"s that are immediately
510 -- preceeding to decide if the " is escaped.
514 '\\'# -> odd_slashes (not flg) (i# -# 1#)
517 if odd_slashes True (c# -# 2#) then
518 -- odd number, " is ecaped.
520 else -- a real end of string delimiter after all.
521 StringBuffer fo l# s# c#
522 _ -> StringBuffer fo l# s# c#
524 if c# >=# l# then -- hit sentinel, this doesn't look too good..
525 StringBuffer fo l# l# l#
531 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
532 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
536 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
537 = StringBuffer fo l# c# c#
542 lexemeToString :: StringBuffer -> String
543 lexemeToString (StringBuffer fo _ start_pos# current#) =
544 if start_pos# ==# current# then
547 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
549 lexemeToByteArray :: StringBuffer -> ByteArray Int
550 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
551 if start_pos# ==# current# then
552 error "lexemeToByteArray"
554 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
556 lexemeToFastString :: StringBuffer -> FastString
557 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
558 if start_pos# ==# current# then
559 mkFastCharString2 (A# fo) (I# 0#)
561 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
564 Create a StringBuffer from the current lexeme, and add a sentinel
565 at the end. Know What You're Doing before taking this function
568 lexemeToBuffer :: StringBuffer -> StringBuffer
569 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
570 if start_pos# ==# current# then
571 StringBuffer fo 0# start_pos# current# -- an error, really.
573 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)