2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 {-# OPTIONS -fvia-C #-}
16 -- creation/destruction
17 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 stringToStringBuffer, -- :: String -> IO StringBuffer
19 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 #if __GLASGOW_HASKELL__ < 411
74 import PrelAddr ( Addr(..) )
76 import Addr ( Addr(..) )
80 import Panic ( panic )
82 import IO ( openFile )
83 import IOExts ( slurpFile )
87 #if __GLASGOW_HASKELL__ >= 411
88 import Ptr ( Ptr(..) )
91 import PrelPack ( unpackCStringBA )
93 #if __GLASGOW_HASKELL__ >= 501
94 import PrelIO ( hGetcBuffered )
95 import PrelCError ( throwErrnoIfMinus1RetryMayBlock )
96 import PrelConc ( threadWaitRead )
99 import Exception ( bracket )
102 import Char ( isDigit )
115 instance Show StringBuffer where
116 showsPrec _ s = showString ""
120 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
121 hGetStringBuffer expand_tabs fname = do
122 (a, read) <- if expand_tabs
123 then slurpFileExpandTabs fname
124 #if __GLASGOW_HASKELL__ < 411
128 (Ptr a#, read) <- slurpFile fname
132 let (A# a#) = a; (I# read#) = read
134 -- add sentinel '\NUL'
135 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
136 return (StringBuffer a# read# 0# 0#)
138 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
139 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
141 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
146 -----------------------------------------------------------------------------
147 -- Turn a String into a StringBuffer
150 stringToStringBuffer :: String -> IO StringBuffer
151 freeStringBuffer :: StringBuffer -> IO ()
153 #if __GLASGOW_HASKELL__ >= 411
154 stringToStringBuffer str =
155 do let sz@(I# sz#) = length str
156 (Ptr a#) <- mallocBytes (sz+1)
158 writeCharOffAddr (A# a#) sz '\0' -- sentinel
159 return (StringBuffer a# sz# 0# 0#)
161 fill_in [] _ = return ()
162 fill_in (c:cs) a = do
163 writeCharOffAddr a 0 c
164 fill_in cs (a `plusAddr` 1)
166 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
168 stringToStringBuffer = panic "stringToStringBuffer: not implemented"
169 freeStringBuffer sb = return ()
174 -----------------------------------------------------------------------------
175 This very disturbing bit of code is used for expanding the tabs in a
176 file before we start parsing it. Expanding the tabs early makes the
177 lexer a lot simpler: we only have to record the beginning of the line
178 in order to be able to calculate the column offset of the current
181 We guess the size of the buffer required as 20% extra for
182 expanded tabs, and enlarge it if necessary.
186 getErrType = _ccall_ getErrType__
188 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
189 slurpFileExpandTabs fname = do
190 bracket (openFile fname ReadMode) (hClose)
192 do sz <- hFileSize handle
193 if sz > toInteger (maxBound::Int)
194 then IOERROR (userError "slurpFile: file too big")
196 let sz_i = fromInteger sz
197 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
198 chunk <- allocMem sz_i'
199 trySlurp handle sz_i' chunk
202 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
203 trySlurp handle sz_i chunk =
204 #if __GLASGOW_HASKELL__ < 501
205 wantReadableHandle "hGetChar" handle $ \ handle_ ->
206 let fo = haFO__ handle_ in
208 wantReadableHandle "hGetChar" handle $
209 \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } ->
216 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
217 slurpFile c off chunk chunk_sz max_off = slurp c off
220 slurp :: Int# -> Int# -> IO (Addr, Int)
221 slurp c off | off >=# max_off = do
222 let new_sz = chunk_sz *# 2#
223 chunk' <- reAllocMem chunk (I# new_sz)
224 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
226 #if __GLASGOW_HASKELL__ < 501
227 intc <- mayBlock fo (_ccall_ fileGetc fo)
228 if intc == ((-1)::Int)
229 then do errtype <- getErrType
230 if errtype == (19{-ERR_EOF-} :: Int)
231 then return (chunk, I# off)
232 else constructErrorAndFail "slurpFile"
233 else case chr intc of
236 ch <- (if not (bufferEmpty buf)
237 then hGetcBuffered fd ref buf
238 else do new_buf <- fillReadBuffer fd True buf
239 hGetcBuffered fd ref new_buf)
240 `catch` \e -> if isEOFError e
244 '\xFFFF' -> return (chunk, I# off)
247 ch -> do writeCharOffAddr chunk (I# off) ch
248 let c' | ch == '\n' = 0#
249 | otherwise = c +# 1#
252 tabIt :: Int# -> Int# -> IO (Addr, Int)
253 -- can't run out of buffer in here, because we reserved an
254 -- extra tAB_SIZE bytes at the end earlier.
256 writeCharOffAddr chunk (I# off) ' '
259 if c' `remInt#` tAB_SIZE ==# 0#
264 -- allow space for a full tab at the end of the buffer
265 -- (that's what the max_off thing is for),
266 -- and add 1 to allow room for the final sentinel \NUL at
267 -- the end of the file.
268 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
269 #if __GLASGOW_HASKELL__ < 404
270 writeHandle handle handle_
272 return (chunk', rc+1 {-room for sentinel-})
275 reAllocMem :: Addr -> Int -> IO Addr
276 reAllocMem ptr sz = do
277 chunk <- _ccall_ realloc ptr sz
279 then fail "reAllocMem"
282 allocMem :: Int -> IO Addr
284 chunk <- _ccall_ malloc sz
286 #if __GLASGOW_HASKELL__ < 501
287 then constructErrorAndFail "allocMem"
289 then ioException (IOError Nothing ResourceExhausted "malloc"
290 "out of memory" Nothing)
298 currentChar :: StringBuffer -> Char
299 currentChar sb = case currentChar# sb of c -> C# c
301 lookAhead :: StringBuffer -> Int -> Char
302 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
304 indexSBuffer :: StringBuffer -> Int -> Char
305 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
307 currentChar# :: StringBuffer -> Char#
308 indexSBuffer# :: StringBuffer -> Int# -> Char#
309 lookAhead# :: StringBuffer -> Int# -> Char#
310 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
311 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
313 -- relative lookup, i.e, currentChar = lookAhead 0
314 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
316 currentIndex# :: StringBuffer -> Int#
317 currentIndex# (StringBuffer fo# _ _ c#) = c#
319 lexemeIndex :: StringBuffer -> Int#
320 lexemeIndex (StringBuffer fo# _ c# _) = c#
323 moving the start point of the current lexeme.
326 -- moving the end point of the current lexeme.
327 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
328 setCurrentPos# (StringBuffer fo l# s# c#) i# =
329 StringBuffer fo l# s# (c# +# i#)
331 -- augmenting the current lexeme by one.
332 incLexeme :: StringBuffer -> StringBuffer
333 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
335 decLexeme :: StringBuffer -> StringBuffer
336 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
340 -- move the start and end point of the buffer on by
344 stepOn :: StringBuffer -> StringBuffer
345 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
347 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
348 stepOnBy# (StringBuffer fo# l# s# c#) i# =
350 new_s# -> StringBuffer fo# l# new_s# new_s#
353 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
354 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
356 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
357 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
359 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
360 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
361 = StringBuffer fo l s# c#
363 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
365 stepOnUntil pred (StringBuffer fo l# s# c#) =
369 case indexCharOffAddr# fo c# of
370 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
371 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
372 | otherwise -> loop (c# +# 1#)
374 stepOverLexeme :: StringBuffer -> StringBuffer
375 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
377 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
378 expandWhile pred (StringBuffer fo l# s# c#) =
382 case indexCharOffAddr# fo c# of
383 ch# | pred (C# ch#) -> loop (c# +# 1#)
384 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
385 | otherwise -> StringBuffer fo l# s# c#
387 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
388 expandWhile# pred (StringBuffer fo l# s# c#) =
392 case indexCharOffAddr# fo c# of
393 ch# | pred ch# -> loop (c# +# 1#)
394 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
395 | otherwise -> StringBuffer fo l# s# c#
397 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
398 scanNumLit acc (StringBuffer fo l# s# c#) =
402 case indexCharOffAddr# fo c# of
403 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
404 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
405 | otherwise -> (acc,StringBuffer fo l# s# c#)
408 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
409 expandUntilMatch (StringBuffer fo l# s# c#) str =
412 loop c# [] = Just (StringBuffer fo l# s# c#)
413 loop c# ((C# x#):xs) =
414 case indexCharOffAddr# fo c# of
415 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
416 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
417 | otherwise -> loop (c# +# 1#) str
422 -- at or beyond end of buffer?
423 bufferExhausted :: StringBuffer -> Bool
424 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
426 emptyLexeme :: StringBuffer -> Bool
427 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
430 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
431 prefixMatch (StringBuffer fo l# s# c#) str =
434 loop c# [] = Just (StringBuffer fo l# s# c#)
436 | indexCharOffAddr# fo c# `eqChar#` x#
441 untilEndOfString# :: StringBuffer -> StringBuffer
442 untilEndOfString# (StringBuffer fo l# s# c#) =
445 getch# i# = indexCharOffAddr# fo i#
450 case getch# (c# -# 1#) of
452 -- looks like an escaped something or other to me,
453 -- better count the number of "\\"s that are immediately
454 -- preceeding to decide if the " is escaped.
458 '\\'# -> odd_slashes (not flg) (i# -# 1#)
461 if odd_slashes True (c# -# 2#) then
462 -- odd number, " is ecaped.
464 else -- a real end of string delimiter after all.
465 StringBuffer fo l# s# c#
466 _ -> StringBuffer fo l# s# c#
468 if c# >=# l# then -- hit sentinel, this doesn't look too good..
469 StringBuffer fo l# l# l#
475 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
476 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
480 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
481 = StringBuffer fo l# c# c#
486 lexemeToString :: StringBuffer -> String
487 lexemeToString (StringBuffer fo _ start_pos# current#) =
488 if start_pos# ==# current# then
491 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
493 lexemeToByteArray :: StringBuffer -> ByteArray Int
494 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
495 if start_pos# ==# current# then
496 error "lexemeToByteArray"
498 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
500 lexemeToFastString :: StringBuffer -> FastString
501 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
502 if start_pos# ==# current# then
503 mkFastCharString2 (A# fo) (I# 0#)
505 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
508 Create a StringBuffer from the current lexeme, and add a sentinel
509 at the end. Know What You're Doing before taking this function
512 lexemeToBuffer :: StringBuffer -> StringBuffer
513 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
514 if start_pos# ==# current# then
515 StringBuffer fo 0# start_pos# current# -- an error, really.
517 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)