2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
75 import Panic ( panic )
77 import Addr ( Addr(..) )
78 import Ptr ( Ptr(..) )
81 #if __GLASGOW_HASKELL__ < 501
83 #elif __GLASGOW_HASKELL__ < 503
84 import PrelIO ( hGetcBuffered )
86 import GHC.IO ( hGetcBuffered )
94 import IO ( openFile, isEOFError )
95 import IOExts ( slurpFile )
97 import Exception ( bracket )
99 import CString ( unpackCStringBA )
101 #if __GLASGOW_HASKELL__ < 503
109 import Char ( isDigit )
122 instance Show StringBuffer where
123 showsPrec _ s = showString ""
127 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
128 hGetStringBuffer expand_tabs fname = do
129 (a, read) <- if expand_tabs
130 then slurpFileExpandTabs fname
131 #if __GLASGOW_HASKELL__ < 411
135 (Ptr a#, read) <- slurpFile fname
139 -- urk! slurpFile gives us a buffer that doesn't have room for
140 -- the sentinel. Assume it has a final newline for now, and overwrite
141 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
147 -- add sentinel '\NUL'
148 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
149 return (StringBuffer a# end# 0# 0#)
151 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
152 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
154 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
159 -----------------------------------------------------------------------------
160 -- Turn a String into a StringBuffer
163 stringToStringBuffer :: String -> IO StringBuffer
164 freeStringBuffer :: StringBuffer -> IO ()
166 #if __GLASGOW_HASKELL__ >= 411
167 stringToStringBuffer str =
168 do let sz@(I# sz#) = length str
169 (Ptr a#) <- mallocBytes (sz+1)
171 writeCharOffAddr (A# a#) sz '\0' -- sentinel
172 return (StringBuffer a# sz# 0# 0#)
174 fill_in [] _ = return ()
175 fill_in (c:cs) a = do
176 writeCharOffAddr a 0 c
177 fill_in cs (a `plusAddr` 1)
179 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
181 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
182 freeStringBuffer sb = return ()
187 -----------------------------------------------------------------------------
188 This very disturbing bit of code is used for expanding the tabs in a
189 file before we start parsing it. Expanding the tabs early makes the
190 lexer a lot simpler: we only have to record the beginning of the line
191 in order to be able to calculate the column offset of the current
194 We guess the size of the buffer required as 20% extra for
195 expanded tabs, and enlarge it if necessary.
199 getErrType = _ccall_ getErrType__
201 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
202 slurpFileExpandTabs fname = do
203 bracket (openFile fname ReadMode) (hClose)
205 do sz <- hFileSize handle
206 if sz > toInteger (maxBound::Int)
207 then ioError (userError "slurpFile: file too big")
209 let sz_i = fromInteger sz
211 -- empty file: just allocate a buffer containing '\0'
212 then do chunk <- allocMem 1
213 writeCharOffAddr chunk 0 '\0'
215 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
216 chunk <- allocMem sz_i'
217 trySlurp handle sz_i' chunk
220 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
221 trySlurp handle sz_i chunk =
222 #if __GLASGOW_HASKELL__ < 501
223 wantReadableHandle "hGetChar" handle $ \ handle_ ->
224 let fo = haFO__ handle_ in
226 wantReadableHandle "hGetChar" handle $
227 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
234 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
235 slurpFile c off chunk chunk_sz max_off = slurp c off
238 slurp :: Int# -> Int# -> IO (Addr, Int)
239 slurp c off | off >=# max_off = do
240 let new_sz = chunk_sz *# 2#
241 chunk' <- reAllocMem chunk (I# new_sz)
242 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
244 #if __GLASGOW_HASKELL__ < 501
245 intc <- mayBlock fo (_ccall_ fileGetc fo)
246 if intc == ((-1)::Int)
247 then do errtype <- getErrType
248 if errtype == (19{-ERR_EOF-} :: Int)
249 then return (chunk, I# off)
250 else constructErrorAndFail "slurpFile"
251 else case chr intc of
254 ch <- (if not (bufferEmpty buf)
255 then hGetcBuffered fd ref buf
257 #if __GLASGOW_HASKELL__ >= 503
258 new_buf <- fillReadBuffer fd True False buf
260 new_buf <- fillReadBuffer fd True buf
262 hGetcBuffered fd ref new_buf)
263 `catch` \e -> if isEOFError e
267 '\xFFFF' -> return (chunk, I# off)
270 ch -> do writeCharOffAddr chunk (I# off) ch
271 let c' | ch == '\n' = 0#
272 | otherwise = c +# 1#
275 tabIt :: Int# -> Int# -> IO (Addr, Int)
276 -- can't run out of buffer in here, because we reserved an
277 -- extra tAB_SIZE bytes at the end earlier.
279 writeCharOffAddr chunk (I# off) ' '
282 if c' `remInt#` tAB_SIZE ==# 0#
287 -- allow space for a full tab at the end of the buffer
288 -- (that's what the max_off thing is for),
289 -- and add 1 to allow room for the final sentinel \NUL at
290 -- the end of the file.
291 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
292 #if __GLASGOW_HASKELL__ < 404
293 writeHandle handle handle_
295 return (chunk', rc+1 {- room for sentinel -})
298 reAllocMem :: Addr -> Int -> IO Addr
299 reAllocMem ptr sz = do
300 chunk <- _ccall_ realloc ptr sz
302 then fail "reAllocMem"
305 allocMem :: Int -> IO Addr
307 chunk <- _ccall_ malloc sz
309 #if __GLASGOW_HASKELL__ < 501
310 then constructErrorAndFail "allocMem"
312 then ioException (IOError Nothing ResourceExhausted "malloc"
313 "out of memory" Nothing)
321 currentChar :: StringBuffer -> Char
322 currentChar sb = case currentChar# sb of c -> C# c
324 lookAhead :: StringBuffer -> Int -> Char
325 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
327 indexSBuffer :: StringBuffer -> Int -> Char
328 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
330 currentChar# :: StringBuffer -> Char#
331 indexSBuffer# :: StringBuffer -> Int# -> Char#
332 lookAhead# :: StringBuffer -> Int# -> Char#
333 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
334 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
336 -- relative lookup, i.e, currentChar = lookAhead 0
337 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
339 currentIndex# :: StringBuffer -> Int#
340 currentIndex# (StringBuffer fo# _ _ c#) = c#
342 lexemeIndex :: StringBuffer -> Int#
343 lexemeIndex (StringBuffer fo# _ c# _) = c#
346 moving the start point of the current lexeme.
349 -- moving the end point of the current lexeme.
350 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
351 setCurrentPos# (StringBuffer fo l# s# c#) i# =
352 StringBuffer fo l# s# (c# +# i#)
354 -- augmenting the current lexeme by one.
355 incLexeme :: StringBuffer -> StringBuffer
356 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
358 decLexeme :: StringBuffer -> StringBuffer
359 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
363 -- move the start and end point of the buffer on by
367 stepOn :: StringBuffer -> StringBuffer
368 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
370 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
371 stepOnBy# (StringBuffer fo# l# s# c#) i# =
373 new_s# -> StringBuffer fo# l# new_s# new_s#
376 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
377 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
379 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
380 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
382 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
383 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
384 = StringBuffer fo l s# c#
386 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
388 stepOnUntil pred (StringBuffer fo l# s# c#) =
392 case indexCharOffAddr# fo c# of
393 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
394 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
395 | otherwise -> loop (c# +# 1#)
397 stepOverLexeme :: StringBuffer -> StringBuffer
398 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
400 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
401 expandWhile pred (StringBuffer fo l# s# c#) =
405 case indexCharOffAddr# fo c# of
406 ch# | pred (C# ch#) -> loop (c# +# 1#)
407 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
408 | otherwise -> StringBuffer fo l# s# c#
410 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
411 expandWhile# pred (StringBuffer fo l# s# c#) =
415 case indexCharOffAddr# fo c# of
416 ch# | pred ch# -> loop (c# +# 1#)
417 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
418 | otherwise -> StringBuffer fo l# s# c#
420 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
421 scanNumLit acc (StringBuffer fo l# s# c#) =
425 case indexCharOffAddr# fo c# of
426 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
427 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
428 | otherwise -> (acc,StringBuffer fo l# s# c#)
431 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
432 expandUntilMatch (StringBuffer fo l# s# c#) str =
435 loop c# [] = Just (StringBuffer fo l# s# c#)
436 loop c# ((C# x#):xs) =
437 case indexCharOffAddr# fo c# of
438 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
439 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
440 | otherwise -> loop (c# +# 1#) str
445 -- at or beyond end of buffer?
446 bufferExhausted :: StringBuffer -> Bool
447 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
449 emptyLexeme :: StringBuffer -> Bool
450 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
453 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
454 prefixMatch (StringBuffer fo l# s# c#) str =
457 loop c# [] = Just (StringBuffer fo l# s# c#)
459 | indexCharOffAddr# fo c# `eqChar#` x#
464 untilEndOfString# :: StringBuffer -> StringBuffer
465 untilEndOfString# (StringBuffer fo l# s# c#) =
468 getch# i# = indexCharOffAddr# fo i#
473 case getch# (c# -# 1#) of
475 -- looks like an escaped something or other to me,
476 -- better count the number of "\\"s that are immediately
477 -- preceeding to decide if the " is escaped.
481 '\\'# -> odd_slashes (not flg) (i# -# 1#)
484 if odd_slashes True (c# -# 2#) then
485 -- odd number, " is ecaped.
487 else -- a real end of string delimiter after all.
488 StringBuffer fo l# s# c#
489 _ -> StringBuffer fo l# s# c#
491 if c# >=# l# then -- hit sentinel, this doesn't look too good..
492 StringBuffer fo l# l# l#
498 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
499 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
503 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
504 = StringBuffer fo l# c# c#
509 lexemeToString :: StringBuffer -> String
510 lexemeToString (StringBuffer fo _ start_pos# current#) =
511 if start_pos# ==# current# then
514 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
516 lexemeToByteArray :: StringBuffer -> ByteArray Int
517 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
518 if start_pos# ==# current# then
519 error "lexemeToByteArray"
521 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
523 lexemeToFastString :: StringBuffer -> FastString
524 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
525 if start_pos# ==# current# then
528 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
531 Create a StringBuffer from the current lexeme, and add a sentinel
532 at the end. Know What You're Doing before taking this function
535 lexemeToBuffer :: StringBuffer -> StringBuffer
536 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
537 if start_pos# ==# current# then
538 StringBuffer fo 0# start_pos# current# -- an error, really.
540 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)