2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls -#include "../lib/std/cbits/stgio.h" #-}
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
18 currentChar, -- :: StringBuffer -> Char
19 currentChar#, -- :: StringBuffer -> Char#
20 indexSBuffer, -- :: StringBuffer -> Int -> Char
21 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
22 -- relative lookup, i.e, currentChar = lookAhead 0
23 lookAhead, -- :: StringBuffer -> Int -> Char
24 lookAhead#, -- :: StringBuffer -> Int# -> Char#
27 currentIndex#, -- :: StringBuffer -> Int#
28 lexemeIndex, -- :: StringBuffer -> Int#
30 -- moving the end point of the current lexeme.
31 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
32 incLexeme, -- :: StringBuffer -> StringBuffer
33 decLexeme, -- :: StringBuffer -> StringBuffer
35 -- move the start and end lexeme pointer on by x units.
36 stepOn, -- :: StringBuffer -> StringBuffer
37 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
38 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
39 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
40 stepOverLexeme, -- :: StringBuffer -> StringBuffer
41 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
42 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
43 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
44 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
45 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
46 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
47 -- at or beyond end of buffer?
48 bufferExhausted, -- :: StringBuffer -> Bool
49 emptyLexeme, -- :: StringBuffer -> Bool
52 prefixMatch, -- :: StringBuffer -> String -> Bool
53 untilEndOfString#, -- :: StringBuffer -> Int#
54 untilChar#, -- :: StringBuffer -> Char# -> Int#
57 lexemeToString, -- :: StringBuffer -> String
58 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
59 lexemeToFastString, -- :: StringBuffer -> FastString
60 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
66 #include "HsVersions.h"
69 import Addr ( Addr(..) )
74 #if __GLASGOW_HASKELL__ >= 303
75 import IO ( openFile, slurpFile )
79 #include "../lib/std/cbits/error.h"
82 import IO ( openFile, hFileSize, hClose, IOMode(..) )
85 #if __GLASGOW_HASKELL__ < 301
86 import IOBase ( IOError(..), IOErrorType(..) )
87 import IOHandle ( readHandle, writeHandle, filePtr )
88 import PackBase ( unpackCStringBA )
90 # if __GLASGOW_HASKELL__ <= 302
91 import PrelIOBase ( IOError(..), IOErrorType(..) )
92 import PrelHandle ( readHandle, writeHandle, filePtr )
94 import PrelPack ( unpackCStringBA )
97 #if __GLASGOW_HASKELL__ < 402
98 import Util ( bracket )
100 import Exception ( bracket )
105 import Char (isDigit)
118 instance Text StringBuffer where
119 showsPrec _ s = showString ""
123 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
124 hGetStringBuffer expand_tabs fname =
125 #if __GLASGOW_HASKELL__ >= 303
127 then slurpFileExpandTabs fname
128 else slurpFile fname)
133 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' a (I# (read# -# 1#)) >>= \ () ->
134 return (StringBuffer a# read# 0# 0#)
136 openFile fname ReadMode >>= \ hndl ->
137 hFileSize hndl >>= \ len ->
138 let len_i = fromInteger len in
139 -- Allocate an array for system call to store its bytes into.
140 -- ToDo: make it robust
141 -- trace (show ((len_i::Int)+1)) $
142 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
143 if addr2Int# a# ==# 0# then
144 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
146 readHandle hndl >>= \ hndl_ ->
147 writeHandle hndl hndl_ >>
148 let ptr = filePtr hndl_ in
149 #if __GLASGOW_HASKELL__ <= 302
150 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
152 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
155 if read# ==# 0# then -- EOF or some other error
156 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
158 -- Add a sentinel NUL
159 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' arr (I# (read# -# 1#)) >>= \ () ->
160 return (StringBuffer a# read# 0# 0#)
164 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
165 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
167 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
172 -----------------------------------------------------------------------------
173 This very disturbing bit of code is used for expanding the tabs in a
174 file before we start parsing it. Expanding the tabs early makes the
175 lexer a lot simpler: we only have to record the beginning of the line
176 in order to be able to calculate the column offset of the current
179 We guess the size of the buffer required as 20% extra for
180 expanded tabs, and enlarge it if necessary.
183 slurpFileExpandTabs :: FilePath -> IO (Addr, Int)
184 slurpFileExpandTabs fname = do
185 bracket (openFile fname ReadMode) (hClose)
187 do sz <- hFileSize handle
188 if sz > toInteger (maxBound::Int)
189 then ioError (userError "slurpFile: file too big")
191 let sz_i = fromInteger sz
192 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
193 chunk <- allocMem sz_i'
194 trySlurp handle sz_i' chunk
197 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
198 trySlurp handle sz_i chunk =
199 wantReadableHandle "hGetChar" handle $ \ handle_ ->
207 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO Int
208 slurpFile c off chunk chunk_sz max_off = slurp c off
211 slurp :: Int# -> Int# -> IO Int
212 slurp c off | off >=# max_off = do
213 let new_sz = chunk_sz *# 2#
214 chunk' <- reAllocMem chunk (I# new_sz)
215 slurpFile c off chunk' new_sz (new_sz -# tAB_SIZE)
217 intc <- mayBlock fo (_ccall_ fileGetc fo)
218 if intc == ((-1)::Int)
219 then do errtype <- _ccall_ getErrType__
220 if errtype == (ERR_EOF :: Int)
222 else constructErrorAndFail "slurpFile"
223 else case chr intc of
225 ch -> do writeCharOffAddr chunk (I# off) ch
226 let c' | ch == '\n' = 0#
227 | otherwise = c +# 1#
230 tabIt :: Int# -> Int# -> IO Int
231 -- can't run out of buffer in here, because we reserved an
232 -- extra tAB_SIZE bytes at the end earlier.
234 writeCharOffAddr chunk (I# off) ' '
237 if c' `remInt#` tAB_SIZE ==# 0#
242 -- allow space for a full tab at the end of the buffer
243 -- (that's what the max_off thing is for)
244 rc <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# tAB_SIZE)
245 writeHandle handle handle_
247 then constructErrorAndFail "slurpFile"
248 else return (chunk, rc)
251 reAllocMem :: Addr -> Int -> IO Addr
252 reAllocMem ptr sz = do
253 chunk <- _ccall_ realloc ptr sz
255 then constructErrorAndFail "reAllocMem"
258 allocMem :: Int -> IO Addr
260 chunk <- _ccall_ allocMemory__ sz
262 then constructErrorAndFail "allocMem"
269 currentChar :: StringBuffer -> Char
270 currentChar sb = case currentChar# sb of c -> C# c
272 lookAhead :: StringBuffer -> Int -> Char
273 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
275 indexSBuffer :: StringBuffer -> Int -> Char
276 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
278 currentChar# :: StringBuffer -> Char#
279 indexSBuffer# :: StringBuffer -> Int# -> Char#
280 lookAhead# :: StringBuffer -> Int# -> Char#
281 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
282 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
284 -- relative lookup, i.e, currentChar = lookAhead 0
285 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
287 currentIndex# :: StringBuffer -> Int#
288 currentIndex# (StringBuffer fo# _ _ c#) = c#
290 lexemeIndex :: StringBuffer -> Int#
291 lexemeIndex (StringBuffer fo# _ c# _) = c#
294 moving the start point of the current lexeme.
297 -- moving the end point of the current lexeme.
298 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
299 setCurrentPos# (StringBuffer fo l# s# c#) i# =
300 StringBuffer fo l# s# (c# +# i#)
302 -- augmenting the current lexeme by one.
303 incLexeme :: StringBuffer -> StringBuffer
304 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
306 decLexeme :: StringBuffer -> StringBuffer
307 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
311 -- move the start and end point of the buffer on by
315 stepOn :: StringBuffer -> StringBuffer
316 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
318 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
319 stepOnBy# (StringBuffer fo# l# s# c#) i# =
321 new_s# -> StringBuffer fo# l# new_s# new_s#
324 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
325 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
327 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
328 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
330 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
331 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
332 = StringBuffer fo l s# c#
334 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
336 stepOnUntil pred (StringBuffer fo l# s# c#) =
340 case indexCharOffAddr# fo c# of
341 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
342 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
343 | otherwise -> loop (c# +# 1#)
345 stepOverLexeme :: StringBuffer -> StringBuffer
346 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
348 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
349 expandWhile pred (StringBuffer fo l# s# c#) =
353 case indexCharOffAddr# fo c# of
354 ch# | pred (C# ch#) -> loop (c# +# 1#)
355 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
356 | otherwise -> StringBuffer fo l# s# c#
358 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
359 expandWhile# pred (StringBuffer fo l# s# c#) =
363 case indexCharOffAddr# fo c# of
364 ch# | pred ch# -> loop (c# +# 1#)
365 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
366 | otherwise -> StringBuffer fo l# s# c#
368 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
369 scanNumLit acc (StringBuffer fo l# s# c#) =
373 case indexCharOffAddr# fo c# of
374 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
375 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
376 | otherwise -> (acc,StringBuffer fo l# s# c#)
379 expandUntilMatch :: StringBuffer -> String -> StringBuffer
380 expandUntilMatch (StringBuffer fo l# s# c#) str =
383 loop c# [] = StringBuffer fo l# s# c#
385 | indexCharOffAddr# fo c# `eqChar#` x#
388 = loop (c# +# 1#) str
393 -- at or beyond end of buffer?
394 bufferExhausted :: StringBuffer -> Bool
395 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
397 emptyLexeme :: StringBuffer -> Bool
398 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
401 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
402 prefixMatch (StringBuffer fo l# s# c#) str =
405 loop c# [] = Just (StringBuffer fo l# s# c#)
407 | indexCharOffAddr# fo c# `eqChar#` x#
412 untilEndOfString# :: StringBuffer -> StringBuffer
413 untilEndOfString# (StringBuffer fo l# s# c#) =
416 getch# i# = indexCharOffAddr# fo i#
421 case getch# (c# -# 1#) of
423 -- looks like an escaped something or other to me,
424 -- better count the number of "\\"s that are immediately
425 -- preceeding to decide if the " is escaped.
429 '\\'# -> odd_slashes (not flg) (i# -# 1#)
432 if odd_slashes True (c# -# 2#) then
433 -- odd number, " is ecaped.
435 else -- a real end of string delimiter after all.
436 StringBuffer fo l# s# c#
437 _ -> StringBuffer fo l# s# c#
439 if c# >=# l# then -- hit sentinel, this doesn't look too good..
440 StringBuffer fo l# l# l#
446 untilChar# :: StringBuffer -> Char# -> StringBuffer
447 untilChar# (StringBuffer fo l# s# c#) x# =
451 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
452 = StringBuffer fo l# s# c#
457 lexemeToString :: StringBuffer -> String
458 lexemeToString (StringBuffer fo _ start_pos# current#) =
459 if start_pos# ==# current# then
462 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
464 lexemeToByteArray :: StringBuffer -> _ByteArray Int
465 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
466 if start_pos# ==# current# then
467 error "lexemeToByteArray"
469 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
471 lexemeToFastString :: StringBuffer -> FastString
472 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
473 if start_pos# ==# current# then
474 mkFastCharString2 (A# fo) (I# 0#)
476 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
479 Create a StringBuffer from the current lexeme, and add a sentinel
480 at the end. Know What You're Doing before taking this function
483 lexemeToBuffer :: StringBuffer -> StringBuffer
484 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
485 if start_pos# ==# current# then
486 StringBuffer fo 0# start_pos# current# -- an error, really.
488 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)