2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
76 import Addr ( Addr(..) )
80 import Panic ( panic )
82 import IO ( openFile )
83 import IOExts ( slurpFile )
87 #if __GLASGOW_HASKELL__ >= 411
88 import Ptr ( Ptr(..) )
91 import PrelPack ( unpackCStringBA )
93 #if __GLASGOW_HASKELL__ >= 501
94 import PrelIO ( hGetcBuffered )
97 import Exception ( bracket )
100 import Char ( isDigit )
113 instance Show StringBuffer where
114 showsPrec _ s = showString ""
118 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
119 hGetStringBuffer expand_tabs fname = do
120 (a, read) <- if expand_tabs
121 then slurpFileExpandTabs fname
122 #if __GLASGOW_HASKELL__ < 411
126 (Ptr a#, read) <- slurpFile fname
130 -- urk! slurpFile gives us a buffer that doesn't have room for
131 -- the sentinel. Assume it has a final newline for now, and overwrite
132 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
138 -- add sentinel '\NUL'
139 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
140 return (StringBuffer a# end# 0# 0#)
142 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
143 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
145 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
150 -----------------------------------------------------------------------------
151 -- Turn a String into a StringBuffer
154 stringToStringBuffer :: String -> IO StringBuffer
155 freeStringBuffer :: StringBuffer -> IO ()
157 #if __GLASGOW_HASKELL__ >= 411
158 stringToStringBuffer str =
159 do let sz@(I# sz#) = length str
160 (Ptr a#) <- mallocBytes (sz+1)
162 writeCharOffAddr (A# a#) sz '\0' -- sentinel
163 return (StringBuffer a# sz# 0# 0#)
165 fill_in [] _ = return ()
166 fill_in (c:cs) a = do
167 writeCharOffAddr a 0 c
168 fill_in cs (a `plusAddr` 1)
170 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
172 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
173 freeStringBuffer sb = return ()
178 -----------------------------------------------------------------------------
179 This very disturbing bit of code is used for expanding the tabs in a
180 file before we start parsing it. Expanding the tabs early makes the
181 lexer a lot simpler: we only have to record the beginning of the line
182 in order to be able to calculate the column offset of the current
185 We guess the size of the buffer required as 20% extra for
186 expanded tabs, and enlarge it if necessary.
190 getErrType = _ccall_ getErrType__
192 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
193 slurpFileExpandTabs fname = do
194 bracket (openFile fname ReadMode) (hClose)
196 do sz <- hFileSize handle
197 if sz > toInteger (maxBound::Int)
198 then ioError (userError "slurpFile: file too big")
200 let sz_i = fromInteger sz
202 -- empty file: just allocate a buffer containing '\0'
203 then do chunk <- allocMem 1
204 writeCharOffAddr chunk 0 '\0'
206 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
207 chunk <- allocMem sz_i'
208 trySlurp handle sz_i' chunk
211 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
212 trySlurp handle sz_i chunk =
213 #if __GLASGOW_HASKELL__ < 501
214 wantReadableHandle "hGetChar" handle $ \ handle_ ->
215 let fo = haFO__ handle_ in
217 wantReadableHandle "hGetChar" handle $
218 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
225 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
226 slurpFile c off chunk chunk_sz max_off = slurp c off
229 slurp :: Int# -> Int# -> IO (Addr, Int)
230 slurp c off | off >=# max_off = do
231 let new_sz = chunk_sz *# 2#
232 chunk' <- reAllocMem chunk (I# new_sz)
233 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
235 #if __GLASGOW_HASKELL__ < 501
236 intc <- mayBlock fo (_ccall_ fileGetc fo)
237 if intc == ((-1)::Int)
238 then do errtype <- getErrType
239 if errtype == (19{-ERR_EOF-} :: Int)
240 then return (chunk, I# off)
241 else constructErrorAndFail "slurpFile"
242 else case chr intc of
245 ch <- (if not (bufferEmpty buf)
246 then hGetcBuffered fd ref buf
247 else do new_buf <- fillReadBuffer fd True buf
248 hGetcBuffered fd ref new_buf)
249 `catch` \e -> if isEOFError e
253 '\xFFFF' -> return (chunk, I# off)
256 ch -> do writeCharOffAddr chunk (I# off) ch
257 let c' | ch == '\n' = 0#
258 | otherwise = c +# 1#
261 tabIt :: Int# -> Int# -> IO (Addr, Int)
262 -- can't run out of buffer in here, because we reserved an
263 -- extra tAB_SIZE bytes at the end earlier.
265 writeCharOffAddr chunk (I# off) ' '
268 if c' `remInt#` tAB_SIZE ==# 0#
273 -- allow space for a full tab at the end of the buffer
274 -- (that's what the max_off thing is for),
275 -- and add 1 to allow room for the final sentinel \NUL at
276 -- the end of the file.
277 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
278 #if __GLASGOW_HASKELL__ < 404
279 writeHandle handle handle_
281 return (chunk', rc+1 {- room for sentinel -})
284 reAllocMem :: Addr -> Int -> IO Addr
285 reAllocMem ptr sz = do
286 chunk <- _ccall_ realloc ptr sz
288 then fail "reAllocMem"
291 allocMem :: Int -> IO Addr
293 chunk <- _ccall_ malloc sz
295 #if __GLASGOW_HASKELL__ < 501
296 then constructErrorAndFail "allocMem"
298 then ioException (IOError Nothing ResourceExhausted "malloc"
299 "out of memory" Nothing)
307 currentChar :: StringBuffer -> Char
308 currentChar sb = case currentChar# sb of c -> C# c
310 lookAhead :: StringBuffer -> Int -> Char
311 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
313 indexSBuffer :: StringBuffer -> Int -> Char
314 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
316 currentChar# :: StringBuffer -> Char#
317 indexSBuffer# :: StringBuffer -> Int# -> Char#
318 lookAhead# :: StringBuffer -> Int# -> Char#
319 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
320 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
322 -- relative lookup, i.e, currentChar = lookAhead 0
323 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
325 currentIndex# :: StringBuffer -> Int#
326 currentIndex# (StringBuffer fo# _ _ c#) = c#
328 lexemeIndex :: StringBuffer -> Int#
329 lexemeIndex (StringBuffer fo# _ c# _) = c#
332 moving the start point of the current lexeme.
335 -- moving the end point of the current lexeme.
336 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
337 setCurrentPos# (StringBuffer fo l# s# c#) i# =
338 StringBuffer fo l# s# (c# +# i#)
340 -- augmenting the current lexeme by one.
341 incLexeme :: StringBuffer -> StringBuffer
342 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
344 decLexeme :: StringBuffer -> StringBuffer
345 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
349 -- move the start and end point of the buffer on by
353 stepOn :: StringBuffer -> StringBuffer
354 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
356 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
357 stepOnBy# (StringBuffer fo# l# s# c#) i# =
359 new_s# -> StringBuffer fo# l# new_s# new_s#
362 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
363 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
365 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
366 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
368 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
369 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
370 = StringBuffer fo l s# c#
372 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
374 stepOnUntil pred (StringBuffer fo l# s# c#) =
378 case indexCharOffAddr# fo c# of
379 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
380 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
381 | otherwise -> loop (c# +# 1#)
383 stepOverLexeme :: StringBuffer -> StringBuffer
384 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
386 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
387 expandWhile pred (StringBuffer fo l# s# c#) =
391 case indexCharOffAddr# fo c# of
392 ch# | pred (C# ch#) -> loop (c# +# 1#)
393 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
394 | otherwise -> StringBuffer fo l# s# c#
396 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
397 expandWhile# pred (StringBuffer fo l# s# c#) =
401 case indexCharOffAddr# fo c# of
402 ch# | pred ch# -> loop (c# +# 1#)
403 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
404 | otherwise -> StringBuffer fo l# s# c#
406 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
407 scanNumLit acc (StringBuffer fo l# s# c#) =
411 case indexCharOffAddr# fo c# of
412 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
413 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
414 | otherwise -> (acc,StringBuffer fo l# s# c#)
417 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
418 expandUntilMatch (StringBuffer fo l# s# c#) str =
421 loop c# [] = Just (StringBuffer fo l# s# c#)
422 loop c# ((C# x#):xs) =
423 case indexCharOffAddr# fo c# of
424 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
425 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
426 | otherwise -> loop (c# +# 1#) str
431 -- at or beyond end of buffer?
432 bufferExhausted :: StringBuffer -> Bool
433 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
435 emptyLexeme :: StringBuffer -> Bool
436 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
439 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
440 prefixMatch (StringBuffer fo l# s# c#) str =
443 loop c# [] = Just (StringBuffer fo l# s# c#)
445 | indexCharOffAddr# fo c# `eqChar#` x#
450 untilEndOfString# :: StringBuffer -> StringBuffer
451 untilEndOfString# (StringBuffer fo l# s# c#) =
454 getch# i# = indexCharOffAddr# fo i#
459 case getch# (c# -# 1#) of
461 -- looks like an escaped something or other to me,
462 -- better count the number of "\\"s that are immediately
463 -- preceeding to decide if the " is escaped.
467 '\\'# -> odd_slashes (not flg) (i# -# 1#)
470 if odd_slashes True (c# -# 2#) then
471 -- odd number, " is ecaped.
473 else -- a real end of string delimiter after all.
474 StringBuffer fo l# s# c#
475 _ -> StringBuffer fo l# s# c#
477 if c# >=# l# then -- hit sentinel, this doesn't look too good..
478 StringBuffer fo l# l# l#
484 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
485 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
489 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
490 = StringBuffer fo l# c# c#
495 lexemeToString :: StringBuffer -> String
496 lexemeToString (StringBuffer fo _ start_pos# current#) =
497 if start_pos# ==# current# then
500 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
502 lexemeToByteArray :: StringBuffer -> ByteArray Int
503 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
504 if start_pos# ==# current# then
505 error "lexemeToByteArray"
507 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
509 lexemeToFastString :: StringBuffer -> FastString
510 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
511 if start_pos# ==# current# then
512 mkFastCharString2 (A# fo) (I# 0#)
514 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
517 Create a StringBuffer from the current lexeme, and add a sentinel
518 at the end. Know What You're Doing before taking this function
521 lexemeToBuffer :: StringBuffer -> StringBuffer
522 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
523 if start_pos# ==# current# then
524 StringBuffer fo 0# start_pos# current# -- an error, really.
526 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)