2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
75 import Panic ( panic )
77 import Addr ( Addr(..) )
78 import Ptr ( Ptr(..) )
81 #if __GLASGOW_HASKELL__ >= 501
82 import PrelIO ( hGetcBuffered )
90 import IO ( openFile )
91 import IOExts ( slurpFile )
96 import PrelPack ( unpackCStringBA )
98 import Exception ( bracket )
101 import Char ( isDigit )
114 instance Show StringBuffer where
115 showsPrec _ s = showString ""
119 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
120 hGetStringBuffer expand_tabs fname = do
121 (a, read) <- if expand_tabs
122 then slurpFileExpandTabs fname
123 #if __GLASGOW_HASKELL__ < 411
127 (Ptr a#, read) <- slurpFile fname
131 -- urk! slurpFile gives us a buffer that doesn't have room for
132 -- the sentinel. Assume it has a final newline for now, and overwrite
133 -- that with the sentinel. slurpFileExpandTabs (below) leaves room
139 -- add sentinel '\NUL'
140 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# end#)
141 return (StringBuffer a# end# 0# 0#)
143 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
144 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
146 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
151 -----------------------------------------------------------------------------
152 -- Turn a String into a StringBuffer
155 stringToStringBuffer :: String -> IO StringBuffer
156 freeStringBuffer :: StringBuffer -> IO ()
158 #if __GLASGOW_HASKELL__ >= 411
159 stringToStringBuffer str =
160 do let sz@(I# sz#) = length str
161 (Ptr a#) <- mallocBytes (sz+1)
163 writeCharOffAddr (A# a#) sz '\0' -- sentinel
164 return (StringBuffer a# sz# 0# 0#)
166 fill_in [] _ = return ()
167 fill_in (c:cs) a = do
168 writeCharOffAddr a 0 c
169 fill_in cs (a `plusAddr` 1)
171 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
173 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
174 freeStringBuffer sb = return ()
179 -----------------------------------------------------------------------------
180 This very disturbing bit of code is used for expanding the tabs in a
181 file before we start parsing it. Expanding the tabs early makes the
182 lexer a lot simpler: we only have to record the beginning of the line
183 in order to be able to calculate the column offset of the current
186 We guess the size of the buffer required as 20% extra for
187 expanded tabs, and enlarge it if necessary.
191 getErrType = _ccall_ getErrType__
193 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
194 slurpFileExpandTabs fname = do
195 bracket (openFile fname ReadMode) (hClose)
197 do sz <- hFileSize handle
198 if sz > toInteger (maxBound::Int)
199 then ioError (userError "slurpFile: file too big")
201 let sz_i = fromInteger sz
203 -- empty file: just allocate a buffer containing '\0'
204 then do chunk <- allocMem 1
205 writeCharOffAddr chunk 0 '\0'
207 else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
208 chunk <- allocMem sz_i'
209 trySlurp handle sz_i' chunk
212 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
213 trySlurp handle sz_i chunk =
214 #if __GLASGOW_HASKELL__ < 501
215 wantReadableHandle "hGetChar" handle $ \ handle_ ->
216 let fo = haFO__ handle_ in
218 wantReadableHandle "hGetChar" handle $
219 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
226 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
227 slurpFile c off chunk chunk_sz max_off = slurp c off
230 slurp :: Int# -> Int# -> IO (Addr, Int)
231 slurp c off | off >=# max_off = do
232 let new_sz = chunk_sz *# 2#
233 chunk' <- reAllocMem chunk (I# new_sz)
234 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
236 #if __GLASGOW_HASKELL__ < 501
237 intc <- mayBlock fo (_ccall_ fileGetc fo)
238 if intc == ((-1)::Int)
239 then do errtype <- getErrType
240 if errtype == (19{-ERR_EOF-} :: Int)
241 then return (chunk, I# off)
242 else constructErrorAndFail "slurpFile"
243 else case chr intc of
246 ch <- (if not (bufferEmpty buf)
247 then hGetcBuffered fd ref buf
248 else do new_buf <- fillReadBuffer fd True buf
249 hGetcBuffered fd ref new_buf)
250 `catch` \e -> if isEOFError e
254 '\xFFFF' -> return (chunk, I# off)
257 ch -> do writeCharOffAddr chunk (I# off) ch
258 let c' | ch == '\n' = 0#
259 | otherwise = c +# 1#
262 tabIt :: Int# -> Int# -> IO (Addr, Int)
263 -- can't run out of buffer in here, because we reserved an
264 -- extra tAB_SIZE bytes at the end earlier.
266 writeCharOffAddr chunk (I# off) ' '
269 if c' `remInt#` tAB_SIZE ==# 0#
274 -- allow space for a full tab at the end of the buffer
275 -- (that's what the max_off thing is for),
276 -- and add 1 to allow room for the final sentinel \NUL at
277 -- the end of the file.
278 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
279 #if __GLASGOW_HASKELL__ < 404
280 writeHandle handle handle_
282 return (chunk', rc+1 {- room for sentinel -})
285 reAllocMem :: Addr -> Int -> IO Addr
286 reAllocMem ptr sz = do
287 chunk <- _ccall_ realloc ptr sz
289 then fail "reAllocMem"
292 allocMem :: Int -> IO Addr
294 chunk <- _ccall_ malloc sz
296 #if __GLASGOW_HASKELL__ < 501
297 then constructErrorAndFail "allocMem"
299 then ioException (IOError Nothing ResourceExhausted "malloc"
300 "out of memory" Nothing)
308 currentChar :: StringBuffer -> Char
309 currentChar sb = case currentChar# sb of c -> C# c
311 lookAhead :: StringBuffer -> Int -> Char
312 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
314 indexSBuffer :: StringBuffer -> Int -> Char
315 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
317 currentChar# :: StringBuffer -> Char#
318 indexSBuffer# :: StringBuffer -> Int# -> Char#
319 lookAhead# :: StringBuffer -> Int# -> Char#
320 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
321 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
323 -- relative lookup, i.e, currentChar = lookAhead 0
324 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
326 currentIndex# :: StringBuffer -> Int#
327 currentIndex# (StringBuffer fo# _ _ c#) = c#
329 lexemeIndex :: StringBuffer -> Int#
330 lexemeIndex (StringBuffer fo# _ c# _) = c#
333 moving the start point of the current lexeme.
336 -- moving the end point of the current lexeme.
337 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
338 setCurrentPos# (StringBuffer fo l# s# c#) i# =
339 StringBuffer fo l# s# (c# +# i#)
341 -- augmenting the current lexeme by one.
342 incLexeme :: StringBuffer -> StringBuffer
343 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
345 decLexeme :: StringBuffer -> StringBuffer
346 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
350 -- move the start and end point of the buffer on by
354 stepOn :: StringBuffer -> StringBuffer
355 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
357 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
358 stepOnBy# (StringBuffer fo# l# s# c#) i# =
360 new_s# -> StringBuffer fo# l# new_s# new_s#
363 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
364 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
366 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
367 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
369 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
370 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
371 = StringBuffer fo l s# c#
373 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
375 stepOnUntil pred (StringBuffer fo l# s# c#) =
379 case indexCharOffAddr# fo c# of
380 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
381 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
382 | otherwise -> loop (c# +# 1#)
384 stepOverLexeme :: StringBuffer -> StringBuffer
385 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
387 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
388 expandWhile pred (StringBuffer fo l# s# c#) =
392 case indexCharOffAddr# fo c# of
393 ch# | pred (C# ch#) -> loop (c# +# 1#)
394 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
395 | otherwise -> StringBuffer fo l# s# c#
397 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
398 expandWhile# pred (StringBuffer fo l# s# c#) =
402 case indexCharOffAddr# fo c# of
403 ch# | pred ch# -> loop (c# +# 1#)
404 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
405 | otherwise -> StringBuffer fo l# s# c#
407 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
408 scanNumLit acc (StringBuffer fo l# s# c#) =
412 case indexCharOffAddr# fo c# of
413 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
414 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
415 | otherwise -> (acc,StringBuffer fo l# s# c#)
418 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
419 expandUntilMatch (StringBuffer fo l# s# c#) str =
422 loop c# [] = Just (StringBuffer fo l# s# c#)
423 loop c# ((C# x#):xs) =
424 case indexCharOffAddr# fo c# of
425 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
426 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
427 | otherwise -> loop (c# +# 1#) str
432 -- at or beyond end of buffer?
433 bufferExhausted :: StringBuffer -> Bool
434 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
436 emptyLexeme :: StringBuffer -> Bool
437 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
440 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
441 prefixMatch (StringBuffer fo l# s# c#) str =
444 loop c# [] = Just (StringBuffer fo l# s# c#)
446 | indexCharOffAddr# fo c# `eqChar#` x#
451 untilEndOfString# :: StringBuffer -> StringBuffer
452 untilEndOfString# (StringBuffer fo l# s# c#) =
455 getch# i# = indexCharOffAddr# fo i#
460 case getch# (c# -# 1#) of
462 -- looks like an escaped something or other to me,
463 -- better count the number of "\\"s that are immediately
464 -- preceeding to decide if the " is escaped.
468 '\\'# -> odd_slashes (not flg) (i# -# 1#)
471 if odd_slashes True (c# -# 2#) then
472 -- odd number, " is ecaped.
474 else -- a real end of string delimiter after all.
475 StringBuffer fo l# s# c#
476 _ -> StringBuffer fo l# s# c#
478 if c# >=# l# then -- hit sentinel, this doesn't look too good..
479 StringBuffer fo l# l# l#
485 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
486 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
490 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
491 = StringBuffer fo l# c# c#
496 lexemeToString :: StringBuffer -> String
497 lexemeToString (StringBuffer fo _ start_pos# current#) =
498 if start_pos# ==# current# then
501 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
503 lexemeToByteArray :: StringBuffer -> ByteArray Int
504 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
505 if start_pos# ==# current# then
506 error "lexemeToByteArray"
508 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
510 lexemeToFastString :: StringBuffer -> FastString
511 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
512 if start_pos# ==# current# then
513 mkFastCharString2 (A# fo) (I# 0#)
515 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
518 Create a StringBuffer from the current lexeme, and add a sentinel
519 at the end. Know What You're Doing before taking this function
522 lexemeToBuffer :: StringBuffer -> StringBuffer
523 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
524 if start_pos# ==# current# then
525 StringBuffer fo 0# start_pos# current# -- an error, really.
527 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)