2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
14 -- creation/destruction
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
16 stringToStringBuffer, -- :: String -> IO StringBuffer
17 freeStringBuffer, -- :: StringBuffer -> IO ()
20 currentChar, -- :: StringBuffer -> Char
21 currentChar#, -- :: StringBuffer -> Char#
22 indexSBuffer, -- :: StringBuffer -> Int -> Char
23 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
24 -- relative lookup, i.e, currentChar = lookAhead 0
25 lookAhead, -- :: StringBuffer -> Int -> Char
26 lookAhead#, -- :: StringBuffer -> Int# -> Char#
29 currentIndex#, -- :: StringBuffer -> Int#
30 lexemeIndex, -- :: StringBuffer -> Int#
32 -- moving the end point of the current lexeme.
33 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
34 incLexeme, -- :: StringBuffer -> StringBuffer
35 decLexeme, -- :: StringBuffer -> StringBuffer
37 -- move the start and end lexeme pointer on by x units.
38 stepOn, -- :: StringBuffer -> StringBuffer
39 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
40 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
41 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
42 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
43 stepOverLexeme, -- :: StringBuffer -> StringBuffer
44 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
45 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
46 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
47 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
48 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
49 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
50 -- at or beyond end of buffer?
51 bufferExhausted, -- :: StringBuffer -> Bool
52 emptyLexeme, -- :: StringBuffer -> Bool
55 prefixMatch, -- :: StringBuffer -> String -> Bool
56 untilEndOfString#, -- :: StringBuffer -> Int#
59 lexemeToString, -- :: StringBuffer -> String
60 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
61 lexemeToFastString, -- :: StringBuffer -> FastString
62 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
68 #include "HsVersions.h"
71 #if __GLASGOW_HASKELL__ < 411
72 import PrelAddr ( Addr(..) )
74 import Addr ( Addr(..) )
78 import Panic ( panic )
80 import IO ( openFile )
81 import IOExts ( slurpFile )
85 #if __GLASGOW_HASKELL__ >= 411
86 import Ptr ( Ptr(..) )
89 import PrelPack ( unpackCStringBA )
91 #if __GLASGOW_HASKELL__ >= 501
92 import PrelIO ( hGetcBuffered )
93 import PrelCError ( throwErrnoIfMinus1RetryMayBlock )
94 import PrelConc ( threadWaitRead )
97 import Exception ( bracket )
100 import Char ( isDigit )
113 instance Show StringBuffer where
114 showsPrec _ s = showString ""
118 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
119 hGetStringBuffer expand_tabs fname = do
120 (a, read) <- if expand_tabs
121 then slurpFileExpandTabs fname
122 #if __GLASGOW_HASKELL__ < 411
126 (Ptr a#, read) <- slurpFile fname
130 let (A# a#) = a; (I# read#) = read
132 -- add sentinel '\NUL'
133 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
134 return (StringBuffer a# read# 0# 0#)
136 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
137 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
139 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
144 -----------------------------------------------------------------------------
145 -- Turn a String into a StringBuffer
148 stringToStringBuffer :: String -> IO StringBuffer
149 freeStringBuffer :: StringBuffer -> IO ()
151 #if __GLASGOW_HASKELL__ >= 411
152 stringToStringBuffer str =
153 do let sz@(I# sz#) = length str
154 (Ptr a#) <- mallocBytes (sz+1)
156 writeCharOffAddr (A# a#) sz '\0' -- sentinel
157 return (StringBuffer a# sz# 0# 0#)
159 fill_in [] _ = return ()
160 fill_in (c:cs) a = do
161 writeCharOffAddr a 0 c
162 fill_in cs (a `plusAddr` 1)
164 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
166 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
167 freeStringBuffer sb = return ()
172 -----------------------------------------------------------------------------
173 This very disturbing bit of code is used for expanding the tabs in a
174 file before we start parsing it. Expanding the tabs early makes the
175 lexer a lot simpler: we only have to record the beginning of the line
176 in order to be able to calculate the column offset of the current
179 We guess the size of the buffer required as 20% extra for
180 expanded tabs, and enlarge it if necessary.
184 getErrType = _ccall_ getErrType__
186 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
187 slurpFileExpandTabs fname = do
188 bracket (openFile fname ReadMode) (hClose)
190 do sz <- hFileSize handle
191 if sz > toInteger (maxBound::Int)
192 then IOERROR (userError "slurpFile: file too big")
194 let sz_i = fromInteger sz
195 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
196 chunk <- allocMem sz_i'
197 trySlurp handle sz_i' chunk
200 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
201 trySlurp handle sz_i chunk =
202 #if __GLASGOW_HASKELL__ < 501
203 wantReadableHandle "hGetChar" handle $ \ handle_ ->
204 let fo = haFO__ handle_ in
206 wantReadableHandle "hGetChar" handle $
207 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
214 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
215 slurpFile c off chunk chunk_sz max_off = slurp c off
218 slurp :: Int# -> Int# -> IO (Addr, Int)
219 slurp c off | off >=# max_off = do
220 let new_sz = chunk_sz *# 2#
221 chunk' <- reAllocMem chunk (I# new_sz)
222 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
224 #if __GLASGOW_HASKELL__ < 501
225 intc <- mayBlock fo (_ccall_ fileGetc fo)
226 if intc == ((-1)::Int)
227 then do errtype <- getErrType
228 if errtype == (19{-ERR_EOF-} :: Int)
229 then return (chunk, I# off)
230 else constructErrorAndFail "slurpFile"
231 else case chr intc of
234 ch <- (if not (bufferEmpty buf)
235 then hGetcBuffered fd ref buf
236 else do new_buf <- fillReadBuffer fd True buf
237 hGetcBuffered fd ref new_buf)
238 `catch` \e -> if isEOFError e
242 '\xFFFF' -> return (chunk, I# off)
245 ch -> do writeCharOffAddr chunk (I# off) ch
246 let c' | ch == '\n' = 0#
247 | otherwise = c +# 1#
250 tabIt :: Int# -> Int# -> IO (Addr, Int)
251 -- can't run out of buffer in here, because we reserved an
252 -- extra tAB_SIZE bytes at the end earlier.
254 writeCharOffAddr chunk (I# off) ' '
257 if c' `remInt#` tAB_SIZE ==# 0#
262 -- allow space for a full tab at the end of the buffer
263 -- (that's what the max_off thing is for),
264 -- and add 1 to allow room for the final sentinel \NUL at
265 -- the end of the file.
266 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
267 #if __GLASGOW_HASKELL__ < 404
268 writeHandle handle handle_
270 return (chunk', rc+1 {-room for sentinel-})
273 reAllocMem :: Addr -> Int -> IO Addr
274 reAllocMem ptr sz = do
275 chunk <- _ccall_ realloc ptr sz
277 then fail "reAllocMem"
280 allocMem :: Int -> IO Addr
282 chunk <- _ccall_ malloc sz
284 #if __GLASGOW_HASKELL__ < 501
285 then constructErrorAndFail "allocMem"
287 then ioException (IOError Nothing ResourceExhausted "malloc"
288 "out of memory" Nothing)
296 currentChar :: StringBuffer -> Char
297 currentChar sb = case currentChar# sb of c -> C# c
299 lookAhead :: StringBuffer -> Int -> Char
300 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
302 indexSBuffer :: StringBuffer -> Int -> Char
303 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
305 currentChar# :: StringBuffer -> Char#
306 indexSBuffer# :: StringBuffer -> Int# -> Char#
307 lookAhead# :: StringBuffer -> Int# -> Char#
308 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
309 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
311 -- relative lookup, i.e, currentChar = lookAhead 0
312 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
314 currentIndex# :: StringBuffer -> Int#
315 currentIndex# (StringBuffer fo# _ _ c#) = c#
317 lexemeIndex :: StringBuffer -> Int#
318 lexemeIndex (StringBuffer fo# _ c# _) = c#
321 moving the start point of the current lexeme.
324 -- moving the end point of the current lexeme.
325 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
326 setCurrentPos# (StringBuffer fo l# s# c#) i# =
327 StringBuffer fo l# s# (c# +# i#)
329 -- augmenting the current lexeme by one.
330 incLexeme :: StringBuffer -> StringBuffer
331 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
333 decLexeme :: StringBuffer -> StringBuffer
334 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
338 -- move the start and end point of the buffer on by
342 stepOn :: StringBuffer -> StringBuffer
343 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
345 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
346 stepOnBy# (StringBuffer fo# l# s# c#) i# =
348 new_s# -> StringBuffer fo# l# new_s# new_s#
351 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
352 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
354 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
355 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
357 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
358 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
359 = StringBuffer fo l s# c#
361 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
363 stepOnUntil pred (StringBuffer fo l# s# c#) =
367 case indexCharOffAddr# fo c# of
368 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
369 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
370 | otherwise -> loop (c# +# 1#)
372 stepOverLexeme :: StringBuffer -> StringBuffer
373 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
375 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
376 expandWhile pred (StringBuffer fo l# s# c#) =
380 case indexCharOffAddr# fo c# of
381 ch# | pred (C# ch#) -> loop (c# +# 1#)
382 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
383 | otherwise -> StringBuffer fo l# s# c#
385 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
386 expandWhile# pred (StringBuffer fo l# s# c#) =
390 case indexCharOffAddr# fo c# of
391 ch# | pred ch# -> loop (c# +# 1#)
392 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
393 | otherwise -> StringBuffer fo l# s# c#
395 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
396 scanNumLit acc (StringBuffer fo l# s# c#) =
400 case indexCharOffAddr# fo c# of
401 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
402 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
403 | otherwise -> (acc,StringBuffer fo l# s# c#)
406 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
407 expandUntilMatch (StringBuffer fo l# s# c#) str =
410 loop c# [] = Just (StringBuffer fo l# s# c#)
411 loop c# ((C# x#):xs) =
412 case indexCharOffAddr# fo c# of
413 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
414 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
415 | otherwise -> loop (c# +# 1#) str
420 -- at or beyond end of buffer?
421 bufferExhausted :: StringBuffer -> Bool
422 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
424 emptyLexeme :: StringBuffer -> Bool
425 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
428 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
429 prefixMatch (StringBuffer fo l# s# c#) str =
432 loop c# [] = Just (StringBuffer fo l# s# c#)
434 | indexCharOffAddr# fo c# `eqChar#` x#
439 untilEndOfString# :: StringBuffer -> StringBuffer
440 untilEndOfString# (StringBuffer fo l# s# c#) =
443 getch# i# = indexCharOffAddr# fo i#
448 case getch# (c# -# 1#) of
450 -- looks like an escaped something or other to me,
451 -- better count the number of "\\"s that are immediately
452 -- preceeding to decide if the " is escaped.
456 '\\'# -> odd_slashes (not flg) (i# -# 1#)
459 if odd_slashes True (c# -# 2#) then
460 -- odd number, " is ecaped.
462 else -- a real end of string delimiter after all.
463 StringBuffer fo l# s# c#
464 _ -> StringBuffer fo l# s# c#
466 if c# >=# l# then -- hit sentinel, this doesn't look too good..
467 StringBuffer fo l# l# l#
473 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
474 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
478 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
479 = StringBuffer fo l# c# c#
484 lexemeToString :: StringBuffer -> String
485 lexemeToString (StringBuffer fo _ start_pos# current#) =
486 if start_pos# ==# current# then
489 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
491 lexemeToByteArray :: StringBuffer -> ByteArray Int
492 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
493 if start_pos# ==# current# then
494 error "lexemeToByteArray"
496 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
498 lexemeToFastString :: StringBuffer -> FastString
499 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
500 if start_pos# ==# current# then
501 mkFastCharString2 (A# fo) (I# 0#)
503 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
506 Create a StringBuffer from the current lexeme, and add a sentinel
507 at the end. Know What You're Doing before taking this function
510 lexemeToBuffer :: StringBuffer -> StringBuffer
511 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
512 if start_pos# ==# current# then
513 StringBuffer fo 0# start_pos# current# -- an error, really.
515 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)