2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 addToCurrentPos, -- :: StringBuffer -> Int# -> StringBuffer
36 incCurrentPos, -- :: StringBuffer -> StringBuffer
37 decCurrentPos, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToFastString, -- :: StringBuffer -> FastString
65 #include "HsVersions.h"
68 #if __GLASGOW_HASKELL__ < 502
69 import Panic ( panic )
71 #if __GLASGOW_HASKELL__ < 503
72 import Ptr ( Ptr(..) )
74 import GHC.Ptr ( Ptr(..) )
78 #if __GLASGOW_HASKELL__ < 501
80 #elif __GLASGOW_HASKELL__ < 503
81 import PrelIO ( hGetcBuffered )
83 import GHC.IO ( hGetcBuffered )
93 #if __GLASGOW_HASKELL__ >= 502
94 import CString ( newCString )
97 import IO ( openFile, isEOFError )
98 import EXCEPTION ( bracket )
100 #if __GLASGOW_HASKELL__ < 503
108 import Char ( isDigit )
121 instance Show StringBuffer where
122 showsPrec _ s = showString "<stringbuffer>"
126 hGetStringBuffer :: FilePath -> IO StringBuffer
127 hGetStringBuffer fname = do
128 (a, read) <- slurpFileExpandTabs fname
130 -- urk! slurpFile gives us a buffer that doesn't have room for
131 -- the sentinel. Assume it has a final newline for now, and overwrite
132 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
138 -- add sentinel '\NUL'
139 writeCharOffPtr a (I# end#) '\0'
141 return (StringBuffer a# end# 0# 0#)
144 -----------------------------------------------------------------------------
145 -- Turn a String into a StringBuffer
148 stringToStringBuffer :: String -> IO StringBuffer
149 freeStringBuffer :: StringBuffer -> IO ()
151 #if __GLASGOW_HASKELL__ >= 502
152 stringToStringBuffer str = do
153 let sz@(I# sz#) = length str
154 Ptr a# <- newCString str
155 return (StringBuffer a# sz# 0# 0#)
157 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
159 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
160 freeStringBuffer sb = return ()
165 -----------------------------------------------------------------------------
166 This very disturbing bit of code is used for expanding the tabs in a
167 file before we start parsing it. Expanding the tabs early makes the
168 lexer a lot simpler: we only have to record the beginning of the line
169 in order to be able to calculate the column offset of the current
172 We guess the size of the buffer required as 20% extra for
173 expanded tabs, and enlarge it if necessary.
176 #if __GLASGOW_HASKELL__ < 501
178 getErrType = _ccall_ getErrType__
181 slurpFileExpandTabs :: FilePath -> IO (Ptr (),Int)
182 slurpFileExpandTabs fname = do
183 bracket (openFile fname ReadMode) (hClose)
185 do sz <- hFileSize handle
186 if sz > toInteger (maxBound::Int)
187 then ioError (userError "slurpFile: file too big")
189 let sz_i = fromInteger sz
191 -- empty file: just allocate a buffer containing '\0'
192 then do chunk <- allocMem 1
193 writeCharOffPtr chunk 0 '\0'
195 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
196 chunk <- allocMem sz_i'
197 trySlurp handle sz_i' chunk
200 trySlurp :: Handle -> Int -> Ptr () -> IO (Ptr (), Int)
201 trySlurp handle sz_i chunk =
202 #if __GLASGOW_HASKELL__ < 501
203 wantReadableHandle "hGetChar" handle $ \ handle_ ->
204 let fo = haFO__ handle_ in
206 wantReadableHandle "hGetChar" handle $
207 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
214 slurpFile :: Int# -> Int# -> Ptr () -> Int# -> Int# -> IO (Ptr (), Int)
215 slurpFile c off chunk chunk_sz max_off = slurp c off
218 slurp :: Int# -> Int# -> IO (Ptr (), Int)
219 slurp c off | off >=# max_off = do
220 let new_sz = chunk_sz *# 2#
221 chunk' <- reAllocMem chunk (I# new_sz)
222 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
224 #if __GLASGOW_HASKELL__ < 501
225 intc <- mayBlock fo (_ccall_ fileGetc fo)
226 if intc == ((-1)::Int)
227 then do errtype <- getErrType
228 if errtype == (19{-ERR_EOF-} :: Int)
229 then return (chunk, I# off)
230 else constructErrorAndFail "slurpFile"
231 else case chr intc of
234 ch <- (if not (bufferEmpty buf)
235 then hGetcBuffered fd ref buf
237 #if __GLASGOW_HASKELL__ >= 503
238 new_buf <- fillReadBuffer fd True False buf
240 new_buf <- fillReadBuffer fd True buf
242 hGetcBuffered fd ref new_buf)
243 `catch` \e -> if isEOFError e
247 '\xFFFF' -> return (chunk, I# off)
250 ch -> do writeCharOffPtr chunk (I# off) ch
251 let c' | ch == '\n' = 0#
252 | otherwise = c +# 1#
255 tabIt :: Int# -> Int# -> IO (Ptr (), Int)
256 -- can't run out of buffer in here, because we reserved an
257 -- extra tAB_SIZE bytes at the end earlier.
259 writeCharOffPtr chunk (I# off) ' '
262 if c' `remInt#` tAB_SIZE ==# 0#
267 -- allow space for a full tab at the end of the buffer
268 -- (that's what the max_off thing is for),
269 -- and add 1 to allow room for the final sentinel \NUL at
270 -- the end of the file.
271 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
272 return (chunk', rc+1 {- room for sentinel -})
275 reAllocMem :: Ptr () -> Int -> IO (Ptr ())
276 reAllocMem ptr sz = do
277 chunk <- c_realloc ptr sz
279 then fail "reAllocMem"
282 allocMem :: Int -> IO (Ptr ())
286 #if __GLASGOW_HASKELL__ < 501
287 then constructErrorAndFail "allocMem"
289 then ioException (IOError Nothing ResourceExhausted "malloc"
290 "out of memory" Nothing)
294 #if __GLASGOW_HASKELL__ <= 408
295 c_malloc sz = do A# a <- c_malloc' sz; return (Ptr a)
296 foreign import ccall "malloc" unsafe
297 c_malloc' :: Int -> IO Addr
299 c_realloc (Ptr a) sz = do A# a <- c_realloc' (A# a) sz; return (Ptr a)
300 foreign import ccall "realloc" unsafe
301 c_realloc' :: Addr -> Int -> IO Addr
303 foreign import ccall "malloc" unsafe
304 c_malloc :: Int -> IO (Ptr a)
306 foreign import ccall "realloc" unsafe
307 c_realloc :: Ptr a -> Int -> IO (Ptr a)
314 currentChar :: StringBuffer -> Char
315 currentChar sb = case currentChar# sb of c -> C# c
317 lookAhead :: StringBuffer -> Int -> Char
318 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
320 indexSBuffer :: StringBuffer -> Int -> Char
321 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
323 currentChar# :: StringBuffer -> Char#
324 indexSBuffer# :: StringBuffer -> Int# -> Char#
325 lookAhead# :: StringBuffer -> Int# -> Char#
326 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
327 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
329 -- relative lookup, i.e, currentChar = lookAhead 0
330 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
332 currentIndex# :: StringBuffer -> Int#
333 currentIndex# (StringBuffer fo# _ _ c#) = c#
335 lexemeIndex :: StringBuffer -> Int#
336 lexemeIndex (StringBuffer fo# _ c# _) = c#
339 moving the start point of the current lexeme.
342 -- moving the end point of the current lexeme.
343 addToCurrentPos :: StringBuffer -> Int# -> StringBuffer
344 addToCurrentPos (StringBuffer fo l# s# c#) i# =
345 StringBuffer fo l# s# (c# +# i#)
347 -- augmenting the current lexeme by one.
348 incCurrentPos :: StringBuffer -> StringBuffer
349 incCurrentPos (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
351 decCurrentPos :: StringBuffer -> StringBuffer
352 decCurrentPos (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
356 -- move the start and end point of the buffer on by
360 stepOn :: StringBuffer -> StringBuffer
361 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
363 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
364 stepOnBy# (StringBuffer fo# l# s# c#) i# =
366 new_s# -> StringBuffer fo# l# new_s# new_s#
369 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
370 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
372 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
373 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
375 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
376 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
377 = StringBuffer fo l s# c#
379 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
381 stepOnUntil pred (StringBuffer fo l# s# c#) =
385 case indexCharOffAddr# fo c# of
386 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
387 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
388 | otherwise -> loop (c# +# 1#)
390 stepOverLexeme :: StringBuffer -> StringBuffer
391 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
393 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
394 expandWhile pred (StringBuffer fo l# s# c#) =
398 case indexCharOffAddr# fo c# of
399 ch# | pred (C# ch#) -> loop (c# +# 1#)
400 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
401 | otherwise -> StringBuffer fo l# s# c#
403 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
404 expandWhile# pred (StringBuffer fo l# s# c#) =
408 case indexCharOffAddr# fo c# of
409 ch# | pred ch# -> loop (c# +# 1#)
410 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
411 | otherwise -> StringBuffer fo l# s# c#
413 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
414 scanNumLit acc (StringBuffer fo l# s# c#) =
418 case indexCharOffAddr# fo c# of
419 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
420 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
421 | otherwise -> (acc,StringBuffer fo l# s# c#)
424 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
425 expandUntilMatch (StringBuffer fo l# s# c#) str =
428 loop c# [] = Just (StringBuffer fo l# s# c#)
429 loop c# ((C# x#):xs) =
430 case indexCharOffAddr# fo c# of
431 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
432 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
433 | otherwise -> loop (c# +# 1#) str
438 -- at or beyond end of buffer?
439 bufferExhausted :: StringBuffer -> Bool
440 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
442 emptyLexeme :: StringBuffer -> Bool
443 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
446 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
447 prefixMatch (StringBuffer fo l# s# c#) str =
450 loop c# [] = Just (StringBuffer fo l# s# c#)
452 | indexCharOffAddr# fo c# `eqChar#` x#
457 untilEndOfString# :: StringBuffer -> StringBuffer
458 untilEndOfString# (StringBuffer fo l# s# c#) =
461 getch# i# = indexCharOffAddr# fo i#
466 case getch# (c# -# 1#) of
468 -- looks like an escaped something or other to me,
469 -- better count the number of "\\"s that are immediately
470 -- preceeding to decide if the " is escaped.
474 '\\'# -> odd_slashes (not flg) (i# -# 1#)
477 if odd_slashes True (c# -# 2#) then
478 -- odd number, " is ecaped.
480 else -- a real end of string delimiter after all.
481 StringBuffer fo l# s# c#
482 _ -> StringBuffer fo l# s# c#
484 if c# >=# l# then -- hit sentinel, this doesn't look too good..
485 StringBuffer fo l# l# l#
491 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
492 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
496 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
497 = StringBuffer fo l# c# c#
502 lexemeToString :: StringBuffer -> String
503 lexemeToString (StringBuffer fo len# start_pos# current#) =
504 if start_pos# ==# current# then
508 (copySubStr fo (I# start_pos#) (I# (current# -# start_pos#)))
511 lexemeToFastString :: StringBuffer -> FastString
512 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
513 if start_pos# ==# current# then
516 mkFastSubString fo (I# start_pos#) (I# (current# -# start_pos#))