2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
75 import Panic ( panic )
77 import Addr ( Addr(..) )
78 import Ptr ( Ptr(..) )
81 #if __GLASGOW_HASKELL__ >= 501
82 import PrelIO ( hGetcBuffered )
90 import IO ( openFile )
91 import IOExts ( slurpFile )
96 import PrelPack ( unpackCStringBA )
98 import Exception ( bracket )
101 import Char ( isDigit )
114 instance Show StringBuffer where
115 showsPrec _ s = showString ""
119 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
120 hGetStringBuffer expand_tabs fname = do
121 (a, read) <- if expand_tabs
122 then slurpFileExpandTabs fname
123 #if __GLASGOW_HASKELL__ < 411
127 (Ptr a#, read) <- slurpFile fname
131 -- urk! slurpFile gives us a buffer that doesn't have room for
132 -- the sentinel. Assume it has a final newline for now, and overwrite
133 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
139 -- add sentinel '\NUL'
140 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
141 return (StringBuffer a# end# 0# 0#)
143 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
144 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
146 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
151 -----------------------------------------------------------------------------
152 -- Turn a String into a StringBuffer
155 stringToStringBuffer :: String -> IO StringBuffer
156 freeStringBuffer :: StringBuffer -> IO ()
158 #if __GLASGOW_HASKELL__ >= 411
159 stringToStringBuffer str =
160 do let sz@(I# sz#) = length str
161 (Ptr a#) <- mallocBytes (sz+1)
163 writeCharOffAddr (A# a#) sz '\0' -- sentinel
164 return (StringBuffer a# sz# 0# 0#)
166 fill_in [] _ = return ()
167 fill_in (c:cs) a = do
168 writeCharOffAddr a 0 c
169 fill_in cs (a `plusAddr` 1)
171 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
173 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
174 freeStringBuffer sb = return ()
179 -----------------------------------------------------------------------------
180 This very disturbing bit of code is used for expanding the tabs in a
181 file before we start parsing it. Expanding the tabs early makes the
182 lexer a lot simpler: we only have to record the beginning of the line
183 in order to be able to calculate the column offset of the current
186 We guess the size of the buffer required as 20% extra for
187 expanded tabs, and enlarge it if necessary.
191 getErrType = _ccall_ getErrType__
193 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
194 slurpFileExpandTabs fname = do
195 bracket (openFile fname ReadMode) (hClose)
197 do sz <- hFileSize handle
198 if sz > toInteger (maxBound::Int)
199 then ioError (userError "slurpFile: file too big")
201 let sz_i = fromInteger sz
203 -- empty file: just allocate a buffer containing '\0'
204 then do chunk <- allocMem 1
205 writeCharOffAddr chunk 0 '\0'
207 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
208 chunk <- allocMem sz_i'
209 trySlurp handle sz_i' chunk
212 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
213 trySlurp handle sz_i chunk =
214 #if __GLASGOW_HASKELL__ < 501
215 wantReadableHandle "hGetChar" handle $ \ handle_ ->
216 let fo = haFO__ handle_ in
218 wantReadableHandle "hGetChar" handle $
219 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
226 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
227 slurpFile c off chunk chunk_sz max_off = slurp c off
230 slurp :: Int# -> Int# -> IO (Addr, Int)
231 slurp c off | off >=# max_off = do
232 let new_sz = chunk_sz *# 2#
233 chunk' <- reAllocMem chunk (I# new_sz)
234 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
236 #if __GLASGOW_HASKELL__ < 501
237 intc <- mayBlock fo (_ccall_ fileGetc fo)
238 if intc == ((-1)::Int)
239 then do errtype <- getErrType
240 if errtype == (19{-ERR_EOF-} :: Int)
241 then return (chunk, I# off)
242 else constructErrorAndFail "slurpFile"
243 else case chr intc of
246 ch <- (if not (bufferEmpty buf)
247 then hGetcBuffered fd ref buf
249 #if __GLASGOW_HASKELL__ >= 503
250 new_buf <- fillReadBuffer fd True False buf
252 new_buf <- fillReadBuffer fd True buf
254 hGetcBuffered fd ref new_buf)
255 `catch` \e -> if isEOFError e
259 '\xFFFF' -> return (chunk, I# off)
262 ch -> do writeCharOffAddr chunk (I# off) ch
263 let c' | ch == '\n' = 0#
264 | otherwise = c +# 1#
267 tabIt :: Int# -> Int# -> IO (Addr, Int)
268 -- can't run out of buffer in here, because we reserved an
269 -- extra tAB_SIZE bytes at the end earlier.
271 writeCharOffAddr chunk (I# off) ' '
274 if c' `remInt#` tAB_SIZE ==# 0#
279 -- allow space for a full tab at the end of the buffer
280 -- (that's what the max_off thing is for),
281 -- and add 1 to allow room for the final sentinel \NUL at
282 -- the end of the file.
283 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
284 #if __GLASGOW_HASKELL__ < 404
285 writeHandle handle handle_
287 return (chunk', rc+1 {- room for sentinel -})
290 reAllocMem :: Addr -> Int -> IO Addr
291 reAllocMem ptr sz = do
292 chunk <- _ccall_ realloc ptr sz
294 then fail "reAllocMem"
297 allocMem :: Int -> IO Addr
299 chunk <- _ccall_ malloc sz
301 #if __GLASGOW_HASKELL__ < 501
302 then constructErrorAndFail "allocMem"
304 then ioException (IOError Nothing ResourceExhausted "malloc"
305 "out of memory" Nothing)
313 currentChar :: StringBuffer -> Char
314 currentChar sb = case currentChar# sb of c -> C# c
316 lookAhead :: StringBuffer -> Int -> Char
317 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
319 indexSBuffer :: StringBuffer -> Int -> Char
320 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
322 currentChar# :: StringBuffer -> Char#
323 indexSBuffer# :: StringBuffer -> Int# -> Char#
324 lookAhead# :: StringBuffer -> Int# -> Char#
325 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
326 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
328 -- relative lookup, i.e, currentChar = lookAhead 0
329 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
331 currentIndex# :: StringBuffer -> Int#
332 currentIndex# (StringBuffer fo# _ _ c#) = c#
334 lexemeIndex :: StringBuffer -> Int#
335 lexemeIndex (StringBuffer fo# _ c# _) = c#
338 moving the start point of the current lexeme.
341 -- moving the end point of the current lexeme.
342 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
343 setCurrentPos# (StringBuffer fo l# s# c#) i# =
344 StringBuffer fo l# s# (c# +# i#)
346 -- augmenting the current lexeme by one.
347 incLexeme :: StringBuffer -> StringBuffer
348 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
350 decLexeme :: StringBuffer -> StringBuffer
351 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
355 -- move the start and end point of the buffer on by
359 stepOn :: StringBuffer -> StringBuffer
360 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
362 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
363 stepOnBy# (StringBuffer fo# l# s# c#) i# =
365 new_s# -> StringBuffer fo# l# new_s# new_s#
368 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
369 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
371 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
372 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
374 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
375 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
376 = StringBuffer fo l s# c#
378 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
380 stepOnUntil pred (StringBuffer fo l# s# c#) =
384 case indexCharOffAddr# fo c# of
385 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
386 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
387 | otherwise -> loop (c# +# 1#)
389 stepOverLexeme :: StringBuffer -> StringBuffer
390 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
392 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
393 expandWhile pred (StringBuffer fo l# s# c#) =
397 case indexCharOffAddr# fo c# of
398 ch# | pred (C# ch#) -> loop (c# +# 1#)
399 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
400 | otherwise -> StringBuffer fo l# s# c#
402 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
403 expandWhile# pred (StringBuffer fo l# s# c#) =
407 case indexCharOffAddr# fo c# of
408 ch# | pred ch# -> loop (c# +# 1#)
409 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
410 | otherwise -> StringBuffer fo l# s# c#
412 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
413 scanNumLit acc (StringBuffer fo l# s# c#) =
417 case indexCharOffAddr# fo c# of
418 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
419 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
420 | otherwise -> (acc,StringBuffer fo l# s# c#)
423 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
424 expandUntilMatch (StringBuffer fo l# s# c#) str =
427 loop c# [] = Just (StringBuffer fo l# s# c#)
428 loop c# ((C# x#):xs) =
429 case indexCharOffAddr# fo c# of
430 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
431 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
432 | otherwise -> loop (c# +# 1#) str
437 -- at or beyond end of buffer?
438 bufferExhausted :: StringBuffer -> Bool
439 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
441 emptyLexeme :: StringBuffer -> Bool
442 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
445 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
446 prefixMatch (StringBuffer fo l# s# c#) str =
449 loop c# [] = Just (StringBuffer fo l# s# c#)
451 | indexCharOffAddr# fo c# `eqChar#` x#
456 untilEndOfString# :: StringBuffer -> StringBuffer
457 untilEndOfString# (StringBuffer fo l# s# c#) =
460 getch# i# = indexCharOffAddr# fo i#
465 case getch# (c# -# 1#) of
467 -- looks like an escaped something or other to me,
468 -- better count the number of "\\"s that are immediately
469 -- preceeding to decide if the " is escaped.
473 '\\'# -> odd_slashes (not flg) (i# -# 1#)
476 if odd_slashes True (c# -# 2#) then
477 -- odd number, " is ecaped.
479 else -- a real end of string delimiter after all.
480 StringBuffer fo l# s# c#
481 _ -> StringBuffer fo l# s# c#
483 if c# >=# l# then -- hit sentinel, this doesn't look too good..
484 StringBuffer fo l# l# l#
490 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
491 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
495 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
496 = StringBuffer fo l# c# c#
501 lexemeToString :: StringBuffer -> String
502 lexemeToString (StringBuffer fo _ start_pos# current#) =
503 if start_pos# ==# current# then
506 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
508 lexemeToByteArray :: StringBuffer -> ByteArray Int
509 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
510 if start_pos# ==# current# then
511 error "lexemeToByteArray"
513 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
515 lexemeToFastString :: StringBuffer -> FastString
516 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
517 if start_pos# ==# current# then
518 mkFastCharString2 (A# fo) (I# 0#)
520 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
523 Create a StringBuffer from the current lexeme, and add a sentinel
524 at the end. Know What You're Doing before taking this function
527 lexemeToBuffer :: StringBuffer -> StringBuffer
528 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
529 if start_pos# ==# current# then
530 StringBuffer fo 0# start_pos# current# -- an error, really.
532 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)