2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
76 import Addr ( Addr(..) )
80 import Panic ( panic )
82 import IO ( openFile )
83 import IOExts ( slurpFile )
87 #if __GLASGOW_HASKELL__ >= 411
88 import Ptr ( Ptr(..) )
91 import PrelPack ( unpackCStringBA )
93 #if __GLASGOW_HASKELL__ >= 501
94 import PrelIO ( hGetcBuffered )
97 import Exception ( bracket )
100 import Char ( isDigit )
113 instance Show StringBuffer where
114 showsPrec _ s = showString ""
118 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
119 hGetStringBuffer expand_tabs fname = do
120 (a, read) <- if expand_tabs
121 then slurpFileExpandTabs fname
122 #if __GLASGOW_HASKELL__ < 411
126 (Ptr a#, read) <- slurpFile fname
130 -- urk! slurpFile gives us a buffer that doesn't have room for
131 -- the sentinel. Assume it has a final newline for now, and overwrite
132 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
138 -- add sentinel '\NUL'
139 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
140 return (StringBuffer a# end# 0# 0#)
142 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
143 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
145 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
150 -----------------------------------------------------------------------------
151 -- Turn a String into a StringBuffer
154 stringToStringBuffer :: String -> IO StringBuffer
155 freeStringBuffer :: StringBuffer -> IO ()
157 #if __GLASGOW_HASKELL__ >= 411
158 stringToStringBuffer str =
159 do let sz@(I# sz#) = length str
160 (Ptr a#) <- mallocBytes (sz+1)
162 writeCharOffAddr (A# a#) sz '\0' -- sentinel
163 return (StringBuffer a# sz# 0# 0#)
165 fill_in [] _ = return ()
166 fill_in (c:cs) a = do
167 writeCharOffAddr a 0 c
168 fill_in cs (a `plusAddr` 1)
170 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
172 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
173 freeStringBuffer sb = return ()
178 -----------------------------------------------------------------------------
179 This very disturbing bit of code is used for expanding the tabs in a
180 file before we start parsing it. Expanding the tabs early makes the
181 lexer a lot simpler: we only have to record the beginning of the line
182 in order to be able to calculate the column offset of the current
185 We guess the size of the buffer required as 20% extra for
186 expanded tabs, and enlarge it if necessary.
190 getErrType = _ccall_ getErrType__
192 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
193 slurpFileExpandTabs fname = do
194 bracket (openFile fname ReadMode) (hClose)
196 do sz <- hFileSize handle
197 if sz > toInteger (maxBound::Int)
198 then IOERROR (userError "slurpFile: file too big")
200 let sz_i = fromInteger sz
201 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
202 chunk <- allocMem sz_i'
203 trySlurp handle sz_i' chunk
206 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
207 trySlurp handle sz_i chunk =
208 #if __GLASGOW_HASKELL__ < 501
209 wantReadableHandle "hGetChar" handle $ \ handle_ ->
210 let fo = haFO__ handle_ in
212 wantReadableHandle "hGetChar" handle $
213 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
220 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
221 slurpFile c off chunk chunk_sz max_off = slurp c off
224 slurp :: Int# -> Int# -> IO (Addr, Int)
225 slurp c off | off >=# max_off = do
226 let new_sz = chunk_sz *# 2#
227 chunk' <- reAllocMem chunk (I# new_sz)
228 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
230 #if __GLASGOW_HASKELL__ < 501
231 intc <- mayBlock fo (_ccall_ fileGetc fo)
232 if intc == ((-1)::Int)
233 then do errtype <- getErrType
234 if errtype == (19{-ERR_EOF-} :: Int)
235 then return (chunk, I# off)
236 else constructErrorAndFail "slurpFile"
237 else case chr intc of
240 ch <- (if not (bufferEmpty buf)
241 then hGetcBuffered fd ref buf
242 else do new_buf <- fillReadBuffer fd True buf
243 hGetcBuffered fd ref new_buf)
244 `catch` \e -> if isEOFError e
248 '\xFFFF' -> return (chunk, I# off)
251 ch -> do writeCharOffAddr chunk (I# off) ch
252 let c' | ch == '\n' = 0#
253 | otherwise = c +# 1#
256 tabIt :: Int# -> Int# -> IO (Addr, Int)
257 -- can't run out of buffer in here, because we reserved an
258 -- extra tAB_SIZE bytes at the end earlier.
260 writeCharOffAddr chunk (I# off) ' '
263 if c' `remInt#` tAB_SIZE ==# 0#
268 -- allow space for a full tab at the end of the buffer
269 -- (that's what the max_off thing is for),
270 -- and add 1 to allow room for the final sentinel \NUL at
271 -- the end of the file.
272 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
273 #if __GLASGOW_HASKELL__ < 404
274 writeHandle handle handle_
276 return (chunk', rc+1 {- room for sentinel -})
279 reAllocMem :: Addr -> Int -> IO Addr
280 reAllocMem ptr sz = do
281 chunk <- _ccall_ realloc ptr sz
283 then fail "reAllocMem"
286 allocMem :: Int -> IO Addr
288 chunk <- _ccall_ malloc sz
290 #if __GLASGOW_HASKELL__ < 501
291 then constructErrorAndFail "allocMem"
293 then ioException (IOError Nothing ResourceExhausted "malloc"
294 "out of memory" Nothing)
302 currentChar :: StringBuffer -> Char
303 currentChar sb = case currentChar# sb of c -> C# c
305 lookAhead :: StringBuffer -> Int -> Char
306 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
308 indexSBuffer :: StringBuffer -> Int -> Char
309 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
311 currentChar# :: StringBuffer -> Char#
312 indexSBuffer# :: StringBuffer -> Int# -> Char#
313 lookAhead# :: StringBuffer -> Int# -> Char#
314 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
315 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
317 -- relative lookup, i.e, currentChar = lookAhead 0
318 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
320 currentIndex# :: StringBuffer -> Int#
321 currentIndex# (StringBuffer fo# _ _ c#) = c#
323 lexemeIndex :: StringBuffer -> Int#
324 lexemeIndex (StringBuffer fo# _ c# _) = c#
327 moving the start point of the current lexeme.
330 -- moving the end point of the current lexeme.
331 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
332 setCurrentPos# (StringBuffer fo l# s# c#) i# =
333 StringBuffer fo l# s# (c# +# i#)
335 -- augmenting the current lexeme by one.
336 incLexeme :: StringBuffer -> StringBuffer
337 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
339 decLexeme :: StringBuffer -> StringBuffer
340 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
344 -- move the start and end point of the buffer on by
348 stepOn :: StringBuffer -> StringBuffer
349 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
351 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
352 stepOnBy# (StringBuffer fo# l# s# c#) i# =
354 new_s# -> StringBuffer fo# l# new_s# new_s#
357 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
358 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
360 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
361 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
363 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
364 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
365 = StringBuffer fo l s# c#
367 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
369 stepOnUntil pred (StringBuffer fo l# s# c#) =
373 case indexCharOffAddr# fo c# of
374 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
375 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
376 | otherwise -> loop (c# +# 1#)
378 stepOverLexeme :: StringBuffer -> StringBuffer
379 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
381 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
382 expandWhile pred (StringBuffer fo l# s# c#) =
386 case indexCharOffAddr# fo c# of
387 ch# | pred (C# ch#) -> loop (c# +# 1#)
388 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
389 | otherwise -> StringBuffer fo l# s# c#
391 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
392 expandWhile# pred (StringBuffer fo l# s# c#) =
396 case indexCharOffAddr# fo c# of
397 ch# | pred ch# -> loop (c# +# 1#)
398 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
399 | otherwise -> StringBuffer fo l# s# c#
401 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
402 scanNumLit acc (StringBuffer fo l# s# c#) =
406 case indexCharOffAddr# fo c# of
407 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
408 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
409 | otherwise -> (acc,StringBuffer fo l# s# c#)
412 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
413 expandUntilMatch (StringBuffer fo l# s# c#) str =
416 loop c# [] = Just (StringBuffer fo l# s# c#)
417 loop c# ((C# x#):xs) =
418 case indexCharOffAddr# fo c# of
419 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
420 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
421 | otherwise -> loop (c# +# 1#) str
426 -- at or beyond end of buffer?
427 bufferExhausted :: StringBuffer -> Bool
428 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
430 emptyLexeme :: StringBuffer -> Bool
431 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
434 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
435 prefixMatch (StringBuffer fo l# s# c#) str =
438 loop c# [] = Just (StringBuffer fo l# s# c#)
440 | indexCharOffAddr# fo c# `eqChar#` x#
445 untilEndOfString# :: StringBuffer -> StringBuffer
446 untilEndOfString# (StringBuffer fo l# s# c#) =
449 getch# i# = indexCharOffAddr# fo i#
454 case getch# (c# -# 1#) of
456 -- looks like an escaped something or other to me,
457 -- better count the number of "\\"s that are immediately
458 -- preceeding to decide if the " is escaped.
462 '\\'# -> odd_slashes (not flg) (i# -# 1#)
465 if odd_slashes True (c# -# 2#) then
466 -- odd number, " is ecaped.
468 else -- a real end of string delimiter after all.
469 StringBuffer fo l# s# c#
470 _ -> StringBuffer fo l# s# c#
472 if c# >=# l# then -- hit sentinel, this doesn't look too good..
473 StringBuffer fo l# l# l#
479 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
480 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
484 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
485 = StringBuffer fo l# c# c#
490 lexemeToString :: StringBuffer -> String
491 lexemeToString (StringBuffer fo _ start_pos# current#) =
492 if start_pos# ==# current# then
495 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
497 lexemeToByteArray :: StringBuffer -> ByteArray Int
498 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
499 if start_pos# ==# current# then
500 error "lexemeToByteArray"
502 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
504 lexemeToFastString :: StringBuffer -> FastString
505 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
506 if start_pos# ==# current# then
507 mkFastCharString2 (A# fo) (I# 0#)
509 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
512 Create a StringBuffer from the current lexeme, and add a sentinel
513 at the end. Know What You're Doing before taking this function
516 lexemeToBuffer :: StringBuffer -> StringBuffer
517 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
518 if start_pos# ==# current# then
519 StringBuffer fo 0# start_pos# current# -- an error, really.
521 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)