2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
76 import Addr ( Addr(..) )
80 import Panic ( panic )
82 import IO ( openFile )
83 import IOExts ( slurpFile )
87 #if __GLASGOW_HASKELL__ >= 411
88 import Ptr ( Ptr(..) )
91 import PrelPack ( unpackCStringBA )
93 #if __GLASGOW_HASKELL__ >= 501
94 import PrelIO ( hGetcBuffered )
95 import PrelCError ( throwErrnoIfMinus1RetryMayBlock )
96 import PrelConc ( threadWaitRead )
99 import Exception ( bracket )
102 import Char ( isDigit )
115 instance Show StringBuffer where
116 showsPrec _ s = showString ""
120 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
121 hGetStringBuffer expand_tabs fname = do
122 (a, read) <- if expand_tabs
123 then slurpFileExpandTabs fname
124 #if __GLASGOW_HASKELL__ < 411
128 (Ptr a#, read) <- slurpFile fname
132 -- urk! slurpFile gives us a buffer that doesn't have room for
133 -- the sentinel. Assume it has a final newline for now, and overwrite
134 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
140 -- add sentinel '\NUL'
141 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
142 return (StringBuffer a# end# 0# 0#)
144 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
145 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
147 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
152 -----------------------------------------------------------------------------
153 -- Turn a String into a StringBuffer
156 stringToStringBuffer :: String -> IO StringBuffer
157 freeStringBuffer :: StringBuffer -> IO ()
159 #if __GLASGOW_HASKELL__ >= 411
160 stringToStringBuffer str =
161 do let sz@(I# sz#) = length str
162 (Ptr a#) <- mallocBytes (sz+1)
164 writeCharOffAddr (A# a#) sz '\0' -- sentinel
165 return (StringBuffer a# sz# 0# 0#)
167 fill_in [] _ = return ()
168 fill_in (c:cs) a = do
169 writeCharOffAddr a 0 c
170 fill_in cs (a `plusAddr` 1)
172 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
174 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
175 freeStringBuffer sb = return ()
180 -----------------------------------------------------------------------------
181 This very disturbing bit of code is used for expanding the tabs in a
182 file before we start parsing it. Expanding the tabs early makes the
183 lexer a lot simpler: we only have to record the beginning of the line
184 in order to be able to calculate the column offset of the current
187 We guess the size of the buffer required as 20% extra for
188 expanded tabs, and enlarge it if necessary.
192 getErrType = _ccall_ getErrType__
194 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
195 slurpFileExpandTabs fname = do
196 bracket (openFile fname ReadMode) (hClose)
198 do sz <- hFileSize handle
199 if sz > toInteger (maxBound::Int)
200 then IOERROR (userError "slurpFile: file too big")
202 let sz_i = fromInteger sz
203 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
204 chunk <- allocMem sz_i'
205 trySlurp handle sz_i' chunk
208 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
209 trySlurp handle sz_i chunk =
210 #if __GLASGOW_HASKELL__ < 501
211 wantReadableHandle "hGetChar" handle $ \ handle_ ->
212 let fo = haFO__ handle_ in
214 wantReadableHandle "hGetChar" handle $
215 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
222 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
223 slurpFile c off chunk chunk_sz max_off = slurp c off
226 slurp :: Int# -> Int# -> IO (Addr, Int)
227 slurp c off | off >=# max_off = do
228 let new_sz = chunk_sz *# 2#
229 chunk' <- reAllocMem chunk (I# new_sz)
230 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
232 #if __GLASGOW_HASKELL__ < 501
233 intc <- mayBlock fo (_ccall_ fileGetc fo)
234 if intc == ((-1)::Int)
235 then do errtype <- getErrType
236 if errtype == (19{-ERR_EOF-} :: Int)
237 then return (chunk, I# off)
238 else constructErrorAndFail "slurpFile"
239 else case chr intc of
242 ch <- (if not (bufferEmpty buf)
243 then hGetcBuffered fd ref buf
244 else do new_buf <- fillReadBuffer fd True buf
245 hGetcBuffered fd ref new_buf)
246 `catch` \e -> if isEOFError e
250 '\xFFFF' -> return (chunk, I# off)
253 ch -> do writeCharOffAddr chunk (I# off) ch
254 let c' | ch == '\n' = 0#
255 | otherwise = c +# 1#
258 tabIt :: Int# -> Int# -> IO (Addr, Int)
259 -- can't run out of buffer in here, because we reserved an
260 -- extra tAB_SIZE bytes at the end earlier.
262 writeCharOffAddr chunk (I# off) ' '
265 if c' `remInt#` tAB_SIZE ==# 0#
270 -- allow space for a full tab at the end of the buffer
271 -- (that's what the max_off thing is for),
272 -- and add 1 to allow room for the final sentinel \NUL at
273 -- the end of the file.
274 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
275 #if __GLASGOW_HASKELL__ < 404
276 writeHandle handle handle_
278 return (chunk', rc+1 {- room for sentinel -})
281 reAllocMem :: Addr -> Int -> IO Addr
282 reAllocMem ptr sz = do
283 chunk <- _ccall_ realloc ptr sz
285 then fail "reAllocMem"
288 allocMem :: Int -> IO Addr
290 chunk <- _ccall_ malloc sz
292 #if __GLASGOW_HASKELL__ < 501
293 then constructErrorAndFail "allocMem"
295 then ioException (IOError Nothing ResourceExhausted "malloc"
296 "out of memory" Nothing)
304 currentChar :: StringBuffer -> Char
305 currentChar sb = case currentChar# sb of c -> C# c
307 lookAhead :: StringBuffer -> Int -> Char
308 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
310 indexSBuffer :: StringBuffer -> Int -> Char
311 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
313 currentChar# :: StringBuffer -> Char#
314 indexSBuffer# :: StringBuffer -> Int# -> Char#
315 lookAhead# :: StringBuffer -> Int# -> Char#
316 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
317 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
319 -- relative lookup, i.e, currentChar = lookAhead 0
320 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
322 currentIndex# :: StringBuffer -> Int#
323 currentIndex# (StringBuffer fo# _ _ c#) = c#
325 lexemeIndex :: StringBuffer -> Int#
326 lexemeIndex (StringBuffer fo# _ c# _) = c#
329 moving the start point of the current lexeme.
332 -- moving the end point of the current lexeme.
333 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
334 setCurrentPos# (StringBuffer fo l# s# c#) i# =
335 StringBuffer fo l# s# (c# +# i#)
337 -- augmenting the current lexeme by one.
338 incLexeme :: StringBuffer -> StringBuffer
339 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
341 decLexeme :: StringBuffer -> StringBuffer
342 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
346 -- move the start and end point of the buffer on by
350 stepOn :: StringBuffer -> StringBuffer
351 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
353 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
354 stepOnBy# (StringBuffer fo# l# s# c#) i# =
356 new_s# -> StringBuffer fo# l# new_s# new_s#
359 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
360 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
362 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
363 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
365 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
366 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
367 = StringBuffer fo l s# c#
369 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
371 stepOnUntil pred (StringBuffer fo l# s# c#) =
375 case indexCharOffAddr# fo c# of
376 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
377 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
378 | otherwise -> loop (c# +# 1#)
380 stepOverLexeme :: StringBuffer -> StringBuffer
381 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
383 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
384 expandWhile pred (StringBuffer fo l# s# c#) =
388 case indexCharOffAddr# fo c# of
389 ch# | pred (C# ch#) -> loop (c# +# 1#)
390 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
391 | otherwise -> StringBuffer fo l# s# c#
393 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
394 expandWhile# pred (StringBuffer fo l# s# c#) =
398 case indexCharOffAddr# fo c# of
399 ch# | pred ch# -> loop (c# +# 1#)
400 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
401 | otherwise -> StringBuffer fo l# s# c#
403 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
404 scanNumLit acc (StringBuffer fo l# s# c#) =
408 case indexCharOffAddr# fo c# of
409 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
410 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
411 | otherwise -> (acc,StringBuffer fo l# s# c#)
414 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
415 expandUntilMatch (StringBuffer fo l# s# c#) str =
418 loop c# [] = Just (StringBuffer fo l# s# c#)
419 loop c# ((C# x#):xs) =
420 case indexCharOffAddr# fo c# of
421 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
422 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
423 | otherwise -> loop (c# +# 1#) str
428 -- at or beyond end of buffer?
429 bufferExhausted :: StringBuffer -> Bool
430 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
432 emptyLexeme :: StringBuffer -> Bool
433 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
436 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
437 prefixMatch (StringBuffer fo l# s# c#) str =
440 loop c# [] = Just (StringBuffer fo l# s# c#)
442 | indexCharOffAddr# fo c# `eqChar#` x#
447 untilEndOfString# :: StringBuffer -> StringBuffer
448 untilEndOfString# (StringBuffer fo l# s# c#) =
451 getch# i# = indexCharOffAddr# fo i#
456 case getch# (c# -# 1#) of
458 -- looks like an escaped something or other to me,
459 -- better count the number of "\\"s that are immediately
460 -- preceeding to decide if the " is escaped.
464 '\\'# -> odd_slashes (not flg) (i# -# 1#)
467 if odd_slashes True (c# -# 2#) then
468 -- odd number, " is ecaped.
470 else -- a real end of string delimiter after all.
471 StringBuffer fo l# s# c#
472 _ -> StringBuffer fo l# s# c#
474 if c# >=# l# then -- hit sentinel, this doesn't look too good..
475 StringBuffer fo l# l# l#
481 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
482 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
486 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
487 = StringBuffer fo l# c# c#
492 lexemeToString :: StringBuffer -> String
493 lexemeToString (StringBuffer fo _ start_pos# current#) =
494 if start_pos# ==# current# then
497 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
499 lexemeToByteArray :: StringBuffer -> ByteArray Int
500 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
501 if start_pos# ==# current# then
502 error "lexemeToByteArray"
504 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
506 lexemeToFastString :: StringBuffer -> FastString
507 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
508 if start_pos# ==# current# then
509 mkFastCharString2 (A# fo) (I# 0#)
511 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
514 Create a StringBuffer from the current lexeme, and add a sentinel
515 at the end. Know What You're Doing before taking this function
518 lexemeToBuffer :: StringBuffer -> StringBuffer
519 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
520 if start_pos# ==# current# then
521 StringBuffer fo 0# start_pos# current# -- an error, really.
523 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)