2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToFastString, -- :: StringBuffer -> FastString
65 #include "HsVersions.h"
68 #if __GLASGOW_HASKELL__ < 411
69 import PrelAddr ( Addr(..) )
70 import Panic ( panic )
72 import Addr ( Addr(..) )
73 #if __GLASGOW_HASKELL__ < 503
74 import Ptr ( Ptr(..) )
76 import GHC.Ptr ( Ptr(..) )
80 #if __GLASGOW_HASKELL__ < 501
82 #elif __GLASGOW_HASKELL__ < 503
83 import PrelIO ( hGetcBuffered )
85 import GHC.IO ( hGetcBuffered )
93 import IO ( openFile, isEOFError )
95 import Exception ( bracket )
97 import CString ( unpackCStringBA )
99 #if __GLASGOW_HASKELL__ < 503
107 import Char ( isDigit )
120 instance Show StringBuffer where
121 showsPrec _ s = showString ""
125 hGetStringBuffer :: FilePath -> IO StringBuffer
126 hGetStringBuffer fname = do
127 (a, read) <- slurpFileExpandTabs fname
129 -- urk! slurpFile gives us a buffer that doesn't have room for
130 -- the sentinel. Assume it has a final newline for now, and overwrite
131 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
137 -- add sentinel '\NUL'
138 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
139 return (StringBuffer a# end# 0# 0#)
141 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
142 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
144 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
149 -----------------------------------------------------------------------------
150 -- Turn a String into a StringBuffer
153 stringToStringBuffer :: String -> IO StringBuffer
154 freeStringBuffer :: StringBuffer -> IO ()
156 #if __GLASGOW_HASKELL__ >= 411
157 stringToStringBuffer str =
158 do let sz@(I# sz#) = length str
159 (Ptr a#) <- mallocBytes (sz+1)
161 writeCharOffAddr (A# a#) sz '\0' -- sentinel
162 return (StringBuffer a# sz# 0# 0#)
164 fill_in [] _ = return ()
165 fill_in (c:cs) a = do
166 writeCharOffAddr a 0 c
167 fill_in cs (a `plusAddr` 1)
169 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
171 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
172 freeStringBuffer sb = return ()
177 -----------------------------------------------------------------------------
178 This very disturbing bit of code is used for expanding the tabs in a
179 file before we start parsing it. Expanding the tabs early makes the
180 lexer a lot simpler: we only have to record the beginning of the line
181 in order to be able to calculate the column offset of the current
184 We guess the size of the buffer required as 20% extra for
185 expanded tabs, and enlarge it if necessary.
189 getErrType = _ccall_ getErrType__
191 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
192 slurpFileExpandTabs fname = do
193 bracket (openFile fname ReadMode) (hClose)
195 do sz <- hFileSize handle
196 if sz > toInteger (maxBound::Int)
197 then ioError (userError "slurpFile: file too big")
199 let sz_i = fromInteger sz
201 -- empty file: just allocate a buffer containing '\0'
202 then do chunk <- allocMem 1
203 writeCharOffAddr chunk 0 '\0'
205 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
206 chunk <- allocMem sz_i'
207 trySlurp handle sz_i' chunk
210 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
211 trySlurp handle sz_i chunk =
212 #if __GLASGOW_HASKELL__ < 501
213 wantReadableHandle "hGetChar" handle $ \ handle_ ->
214 let fo = haFO__ handle_ in
216 wantReadableHandle "hGetChar" handle $
217 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
224 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
225 slurpFile c off chunk chunk_sz max_off = slurp c off
228 slurp :: Int# -> Int# -> IO (Addr, Int)
229 slurp c off | off >=# max_off = do
230 let new_sz = chunk_sz *# 2#
231 chunk' <- reAllocMem chunk (I# new_sz)
232 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
234 #if __GLASGOW_HASKELL__ < 501
235 intc <- mayBlock fo (_ccall_ fileGetc fo)
236 if intc == ((-1)::Int)
237 then do errtype <- getErrType
238 if errtype == (19{-ERR_EOF-} :: Int)
239 then return (chunk, I# off)
240 else constructErrorAndFail "slurpFile"
241 else case chr intc of
244 ch <- (if not (bufferEmpty buf)
245 then hGetcBuffered fd ref buf
247 #if __GLASGOW_HASKELL__ >= 503
248 new_buf <- fillReadBuffer fd True False buf
250 new_buf <- fillReadBuffer fd True buf
252 hGetcBuffered fd ref new_buf)
253 `catch` \e -> if isEOFError e
257 '\xFFFF' -> return (chunk, I# off)
260 ch -> do writeCharOffAddr chunk (I# off) ch
261 let c' | ch == '\n' = 0#
262 | otherwise = c +# 1#
265 tabIt :: Int# -> Int# -> IO (Addr, Int)
266 -- can't run out of buffer in here, because we reserved an
267 -- extra tAB_SIZE bytes at the end earlier.
269 writeCharOffAddr chunk (I# off) ' '
272 if c' `remInt#` tAB_SIZE ==# 0#
277 -- allow space for a full tab at the end of the buffer
278 -- (that's what the max_off thing is for),
279 -- and add 1 to allow room for the final sentinel \NUL at
280 -- the end of the file.
281 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
282 return (chunk', rc+1 {- room for sentinel -})
285 reAllocMem :: Addr -> Int -> IO Addr
286 reAllocMem ptr sz = do
287 chunk <- _ccall_ realloc ptr sz
289 then fail "reAllocMem"
292 allocMem :: Int -> IO Addr
294 chunk <- _ccall_ malloc sz
296 #if __GLASGOW_HASKELL__ < 501
297 then constructErrorAndFail "allocMem"
299 then ioException (IOError Nothing ResourceExhausted "malloc"
300 "out of memory" Nothing)
308 currentChar :: StringBuffer -> Char
309 currentChar sb = case currentChar# sb of c -> C# c
311 lookAhead :: StringBuffer -> Int -> Char
312 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
314 indexSBuffer :: StringBuffer -> Int -> Char
315 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
317 currentChar# :: StringBuffer -> Char#
318 indexSBuffer# :: StringBuffer -> Int# -> Char#
319 lookAhead# :: StringBuffer -> Int# -> Char#
320 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
321 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
323 -- relative lookup, i.e, currentChar = lookAhead 0
324 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
326 currentIndex# :: StringBuffer -> Int#
327 currentIndex# (StringBuffer fo# _ _ c#) = c#
329 lexemeIndex :: StringBuffer -> Int#
330 lexemeIndex (StringBuffer fo# _ c# _) = c#
333 moving the start point of the current lexeme.
336 -- moving the end point of the current lexeme.
337 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
338 setCurrentPos# (StringBuffer fo l# s# c#) i# =
339 StringBuffer fo l# s# (c# +# i#)
341 -- augmenting the current lexeme by one.
342 incLexeme :: StringBuffer -> StringBuffer
343 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
345 decLexeme :: StringBuffer -> StringBuffer
346 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
350 -- move the start and end point of the buffer on by
354 stepOn :: StringBuffer -> StringBuffer
355 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
357 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
358 stepOnBy# (StringBuffer fo# l# s# c#) i# =
360 new_s# -> StringBuffer fo# l# new_s# new_s#
363 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
364 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
366 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
367 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
369 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
370 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
371 = StringBuffer fo l s# c#
373 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
375 stepOnUntil pred (StringBuffer fo l# s# c#) =
379 case indexCharOffAddr# fo c# of
380 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
381 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
382 | otherwise -> loop (c# +# 1#)
384 stepOverLexeme :: StringBuffer -> StringBuffer
385 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
387 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
388 expandWhile pred (StringBuffer fo l# s# c#) =
392 case indexCharOffAddr# fo c# of
393 ch# | pred (C# ch#) -> loop (c# +# 1#)
394 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
395 | otherwise -> StringBuffer fo l# s# c#
397 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
398 expandWhile# pred (StringBuffer fo l# s# c#) =
402 case indexCharOffAddr# fo c# of
403 ch# | pred ch# -> loop (c# +# 1#)
404 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
405 | otherwise -> StringBuffer fo l# s# c#
407 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
408 scanNumLit acc (StringBuffer fo l# s# c#) =
412 case indexCharOffAddr# fo c# of
413 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
414 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
415 | otherwise -> (acc,StringBuffer fo l# s# c#)
418 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
419 expandUntilMatch (StringBuffer fo l# s# c#) str =
422 loop c# [] = Just (StringBuffer fo l# s# c#)
423 loop c# ((C# x#):xs) =
424 case indexCharOffAddr# fo c# of
425 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
426 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
427 | otherwise -> loop (c# +# 1#) str
432 -- at or beyond end of buffer?
433 bufferExhausted :: StringBuffer -> Bool
434 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
436 emptyLexeme :: StringBuffer -> Bool
437 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
440 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
441 prefixMatch (StringBuffer fo l# s# c#) str =
444 loop c# [] = Just (StringBuffer fo l# s# c#)
446 | indexCharOffAddr# fo c# `eqChar#` x#
451 untilEndOfString# :: StringBuffer -> StringBuffer
452 untilEndOfString# (StringBuffer fo l# s# c#) =
455 getch# i# = indexCharOffAddr# fo i#
460 case getch# (c# -# 1#) of
462 -- looks like an escaped something or other to me,
463 -- better count the number of "\\"s that are immediately
464 -- preceeding to decide if the " is escaped.
468 '\\'# -> odd_slashes (not flg) (i# -# 1#)
471 if odd_slashes True (c# -# 2#) then
472 -- odd number, " is ecaped.
474 else -- a real end of string delimiter after all.
475 StringBuffer fo l# s# c#
476 _ -> StringBuffer fo l# s# c#
478 if c# >=# l# then -- hit sentinel, this doesn't look too good..
479 StringBuffer fo l# l# l#
485 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
486 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
490 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
491 = StringBuffer fo l# c# c#
496 lexemeToString :: StringBuffer -> String
497 lexemeToString (StringBuffer fo _ start_pos# current#) =
498 if start_pos# ==# current# then
501 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
503 lexemeToFastString :: StringBuffer -> FastString
504 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
505 if start_pos# ==# current# then
508 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))