2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
14 -- creation/destruction
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
16 stringToStringBuffer, -- :: String -> IO StringBuffer
17 freeStringBuffer, -- :: StringBuffer -> IO ()
20 currentChar, -- :: StringBuffer -> Char
21 currentChar#, -- :: StringBuffer -> Char#
22 indexSBuffer, -- :: StringBuffer -> Int -> Char
23 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
24 -- relative lookup, i.e, currentChar = lookAhead 0
25 lookAhead, -- :: StringBuffer -> Int -> Char
26 lookAhead#, -- :: StringBuffer -> Int# -> Char#
29 currentIndex#, -- :: StringBuffer -> Int#
30 lexemeIndex, -- :: StringBuffer -> Int#
32 -- moving the end point of the current lexeme.
33 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
34 incLexeme, -- :: StringBuffer -> StringBuffer
35 decLexeme, -- :: StringBuffer -> StringBuffer
37 -- move the start and end lexeme pointer on by x units.
38 stepOn, -- :: StringBuffer -> StringBuffer
39 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
40 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
41 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
42 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
43 stepOverLexeme, -- :: StringBuffer -> StringBuffer
44 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
45 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
46 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
47 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
48 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
49 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
50 -- at or beyond end of buffer?
51 bufferExhausted, -- :: StringBuffer -> Bool
52 emptyLexeme, -- :: StringBuffer -> Bool
55 prefixMatch, -- :: StringBuffer -> String -> Bool
56 untilEndOfString#, -- :: StringBuffer -> Int#
59 lexemeToString, -- :: StringBuffer -> String
60 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
61 lexemeToFastString, -- :: StringBuffer -> FastString
62 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
68 #include "HsVersions.h"
71 #if __GLASGOW_HASKELL__ < 411
72 import PrelAddr ( Addr(..) )
74 import Addr ( Addr(..) )
78 import Panic ( panic )
80 #if __GLASGOW_HASKELL__ >= 303
82 #if __GLASGOW_HASKELL__ < 407
83 , slurpFile -- comes from PrelHandle or IOExts now
88 #if __GLASGOW_HASKELL__ >= 501
89 import IOExts ( slurpFile )
93 import IO ( openFile, hFileSize, hClose, IOMode(..) )
96 #if __GLASGOW_HASKELL__ >= 411
97 import Ptr ( Ptr(..) )
100 #if __GLASGOW_HASKELL__ < 301
101 import IOBase ( Handle, IOError(..), IOErrorType(..),
102 constructErrorAndFail )
103 import IOHandle ( readHandle, writeHandle, filePtr )
104 import PackBase ( unpackCStringBA )
106 # if __GLASGOW_HASKELL__ <= 302
107 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
108 constructErrorAndFail )
109 import PrelHandle ( readHandle, writeHandle, filePtr )
111 import PrelPack ( unpackCStringBA )
113 #if __GLASGOW_HASKELL__ >= 501
114 import PrelIO ( hGetcBuffered )
115 import PrelCError ( throwErrnoIfMinus1RetryMayBlock )
116 import PrelConc ( threadWaitRead )
119 #if __GLASGOW_HASKELL__ < 402
120 import Util ( bracket )
122 import Exception ( bracket )
127 import Char (isDigit)
140 instance Show StringBuffer where
141 showsPrec _ s = showString ""
145 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
146 hGetStringBuffer expand_tabs fname = do
147 (a, read) <- if expand_tabs
148 then slurpFileExpandTabs fname
149 #if __GLASGOW_HASKELL__ < 411
153 (Ptr a#, read) <- slurpFile fname
157 let (A# a#) = a; (I# read#) = read
159 -- add sentinel '\NUL'
160 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
161 return (StringBuffer a# read# 0# 0#)
163 #if __GLASGOW_HASKELL__ < 303
165 openFile fname ReadMode >>= \ hndl ->
166 hFileSize hndl >>= \ len ->
167 let len_i = fromInteger len in
168 -- Allocate an array for system call to store its bytes into.
169 -- ToDo: make it robust
170 -- trace (show ((len_i::Int)+1)) $
171 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
172 if addr2Int# a# ==# 0# then
173 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
175 readHandle hndl >>= \ hndl_ ->
176 writeHandle hndl hndl_ >>
177 let ptr = filePtr hndl_ in
178 #if __GLASGOW_HASKELL__ <= 302
179 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
181 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
184 if read# ==# 0# then -- EOF or some other error
185 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
187 return (arr, I# read#)
190 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
191 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
193 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
198 -----------------------------------------------------------------------------
199 -- Turn a String into a StringBuffer
202 stringToStringBuffer :: String -> IO StringBuffer
203 freeStringBuffer :: StringBuffer -> IO ()
205 #if __GLASGOW_HASKELL__ >= 411
206 stringToStringBuffer str =
207 do let sz@(I# sz#) = length str
208 (Ptr a#) <- mallocBytes (sz+1)
210 writeCharOffAddr (A# a#) sz '\0' -- sentinel
211 return (StringBuffer a# sz# 0# 0#)
213 fill_in [] _ = return ()
214 fill_in (c:cs) a = do
215 writeCharOffAddr a 0 c
216 fill_in cs (a `plusAddr` 1)
218 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
220 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
221 freeStringBuffer sb = return ()
226 -----------------------------------------------------------------------------
227 This very disturbing bit of code is used for expanding the tabs in a
228 file before we start parsing it. Expanding the tabs early makes the
229 lexer a lot simpler: we only have to record the beginning of the line
230 in order to be able to calculate the column offset of the current
233 We guess the size of the buffer required as 20% extra for
234 expanded tabs, and enlarge it if necessary.
237 #if __GLASGOW_HASKELL__ < 303
238 mayBlock fo thing = thing
240 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
241 writeCharOffAddr addr off c
242 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
246 #if __GLASGOW_HASKELL__ < 303
247 getErrType = _casm_ ``%r = ghc_errtype;''
249 getErrType = _ccall_ getErrType__
252 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
253 slurpFileExpandTabs fname = do
254 bracket (openFile fname ReadMode) (hClose)
256 do sz <- hFileSize handle
257 if sz > toInteger (maxBound::Int)
258 then IOERROR (userError "slurpFile: file too big")
260 let sz_i = fromInteger sz
261 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
262 chunk <- allocMem sz_i'
263 trySlurp handle sz_i' chunk
266 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
267 trySlurp handle sz_i chunk =
268 #if __GLASGOW_HASKELL__ < 303
269 readHandle handle >>= \ handle_ ->
270 let fo = filePtr handle_ in
271 #elif __GLASGOW_HASKELL__ == 303
272 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
273 let fo = haFO__ handle_ in
274 #elif __GLASGOW_HASKELL__ < 501
275 wantReadableHandle "hGetChar" handle $ \ handle_ ->
276 let fo = haFO__ handle_ in
278 wantReadableHandle "hGetChar" handle $ \handle_ ->
279 let fd = haFD handle_
280 ref = haBuffer handle_ in
287 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
288 slurpFile c off chunk chunk_sz max_off = slurp c off
291 slurp :: Int# -> Int# -> IO (Addr, Int)
292 slurp c off | off >=# max_off = do
293 let new_sz = chunk_sz *# 2#
294 chunk' <- reAllocMem chunk (I# new_sz)
295 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
297 #if __GLASGOW_HASKELL__ < 501
298 intc <- mayBlock fo (_ccall_ fileGetc fo)
299 if intc == ((-1)::Int)
300 then do errtype <- getErrType
301 if errtype == (19{-ERR_EOF-} :: Int)
302 then return (chunk, I# off)
303 else constructErrorAndFail "slurpFile"
304 else case chr intc of
307 ch <- (if not (bufferEmpty buf)
308 then hGetcBuffered fd ref buf
309 else -- buffer is empty.
310 case haBufferMode handle_ of
312 new_buf <- fillReadBuffer fd True buf
313 hGetcBuffered fd ref new_buf
314 BlockBuffering _ -> do
315 new_buf <- fillReadBuffer fd False buf
316 hGetcBuffered fd ref new_buf
318 -- make use of the minimal buffer we already have
320 r <- throwErrnoIfMinus1RetryMayBlock "hGetChar"
321 (read_off (fromIntegral fd) raw 0 1)
325 else do (c,_) <- readCharFromBuffer raw 0
327 `catch` \e -> if isEOFError e
331 '\xFFFF' -> return (chunk, I# off)
334 ch -> do writeCharOffAddr chunk (I# off) ch
335 let c' | ch == '\n' = 0#
336 | otherwise = c +# 1#
339 tabIt :: Int# -> Int# -> IO (Addr, Int)
340 -- can't run out of buffer in here, because we reserved an
341 -- extra tAB_SIZE bytes at the end earlier.
343 writeCharOffAddr chunk (I# off) ' '
346 if c' `remInt#` tAB_SIZE ==# 0#
351 -- allow space for a full tab at the end of the buffer
352 -- (that's what the max_off thing is for),
353 -- and add 1 to allow room for the final sentinel \NUL at
354 -- the end of the file.
355 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
356 #if __GLASGOW_HASKELL__ < 404
357 writeHandle handle handle_
359 return (chunk', rc+1 {-room for sentinel-})
362 reAllocMem :: Addr -> Int -> IO Addr
363 reAllocMem ptr sz = do
364 chunk <- _ccall_ realloc ptr sz
366 #if __GLASGOW_HASKELL__ >= 400
367 then fail "reAllocMem"
369 then fail (userError "reAllocMem")
373 allocMem :: Int -> IO Addr
375 chunk <- _ccall_ malloc sz
377 #if __GLASGOW_HASKELL__ < 303
378 then fail (userError "allocMem")
379 #elif __GLASGOW_HASKELL__ < 501
380 then constructErrorAndFail "allocMem"
382 then ioException (IOError Nothing ResourceExhausted "malloc"
383 "out of memory" Nothing)
391 currentChar :: StringBuffer -> Char
392 currentChar sb = case currentChar# sb of c -> C# c
394 lookAhead :: StringBuffer -> Int -> Char
395 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
397 indexSBuffer :: StringBuffer -> Int -> Char
398 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
400 currentChar# :: StringBuffer -> Char#
401 indexSBuffer# :: StringBuffer -> Int# -> Char#
402 lookAhead# :: StringBuffer -> Int# -> Char#
403 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
404 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
406 -- relative lookup, i.e, currentChar = lookAhead 0
407 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
409 currentIndex# :: StringBuffer -> Int#
410 currentIndex# (StringBuffer fo# _ _ c#) = c#
412 lexemeIndex :: StringBuffer -> Int#
413 lexemeIndex (StringBuffer fo# _ c# _) = c#
416 moving the start point of the current lexeme.
419 -- moving the end point of the current lexeme.
420 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
421 setCurrentPos# (StringBuffer fo l# s# c#) i# =
422 StringBuffer fo l# s# (c# +# i#)
424 -- augmenting the current lexeme by one.
425 incLexeme :: StringBuffer -> StringBuffer
426 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
428 decLexeme :: StringBuffer -> StringBuffer
429 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
433 -- move the start and end point of the buffer on by
437 stepOn :: StringBuffer -> StringBuffer
438 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
440 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
441 stepOnBy# (StringBuffer fo# l# s# c#) i# =
443 new_s# -> StringBuffer fo# l# new_s# new_s#
446 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
447 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
449 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
450 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
452 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
453 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
454 = StringBuffer fo l s# c#
456 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
458 stepOnUntil pred (StringBuffer fo l# s# c#) =
462 case indexCharOffAddr# fo c# of
463 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
464 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
465 | otherwise -> loop (c# +# 1#)
467 stepOverLexeme :: StringBuffer -> StringBuffer
468 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
470 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
471 expandWhile pred (StringBuffer fo l# s# c#) =
475 case indexCharOffAddr# fo c# of
476 ch# | pred (C# ch#) -> loop (c# +# 1#)
477 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
478 | otherwise -> StringBuffer fo l# s# c#
480 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
481 expandWhile# pred (StringBuffer fo l# s# c#) =
485 case indexCharOffAddr# fo c# of
486 ch# | pred ch# -> loop (c# +# 1#)
487 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
488 | otherwise -> StringBuffer fo l# s# c#
490 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
491 scanNumLit acc (StringBuffer fo l# s# c#) =
495 case indexCharOffAddr# fo c# of
496 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
497 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
498 | otherwise -> (acc,StringBuffer fo l# s# c#)
501 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
502 expandUntilMatch (StringBuffer fo l# s# c#) str =
505 loop c# [] = Just (StringBuffer fo l# s# c#)
506 loop c# ((C# x#):xs) =
507 case indexCharOffAddr# fo c# of
508 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
509 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
510 | otherwise -> loop (c# +# 1#) str
515 -- at or beyond end of buffer?
516 bufferExhausted :: StringBuffer -> Bool
517 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
519 emptyLexeme :: StringBuffer -> Bool
520 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
523 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
524 prefixMatch (StringBuffer fo l# s# c#) str =
527 loop c# [] = Just (StringBuffer fo l# s# c#)
529 | indexCharOffAddr# fo c# `eqChar#` x#
534 untilEndOfString# :: StringBuffer -> StringBuffer
535 untilEndOfString# (StringBuffer fo l# s# c#) =
538 getch# i# = indexCharOffAddr# fo i#
543 case getch# (c# -# 1#) of
545 -- looks like an escaped something or other to me,
546 -- better count the number of "\\"s that are immediately
547 -- preceeding to decide if the " is escaped.
551 '\\'# -> odd_slashes (not flg) (i# -# 1#)
554 if odd_slashes True (c# -# 2#) then
555 -- odd number, " is ecaped.
557 else -- a real end of string delimiter after all.
558 StringBuffer fo l# s# c#
559 _ -> StringBuffer fo l# s# c#
561 if c# >=# l# then -- hit sentinel, this doesn't look too good..
562 StringBuffer fo l# l# l#
568 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
569 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
573 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
574 = StringBuffer fo l# c# c#
579 lexemeToString :: StringBuffer -> String
580 lexemeToString (StringBuffer fo _ start_pos# current#) =
581 if start_pos# ==# current# then
584 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
586 lexemeToByteArray :: StringBuffer -> ByteArray Int
587 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
588 if start_pos# ==# current# then
589 error "lexemeToByteArray"
591 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
593 lexemeToFastString :: StringBuffer -> FastString
594 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
595 if start_pos# ==# current# then
596 mkFastCharString2 (A# fo) (I# 0#)
598 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
601 Create a StringBuffer from the current lexeme, and add a sentinel
602 at the end. Know What You're Doing before taking this function
605 lexemeToBuffer :: StringBuffer -> StringBuffer
606 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
607 if start_pos# ==# current# then
608 StringBuffer fo 0# start_pos# current# -- an error, really.
610 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)