2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
14 -- creation/destruction
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
17 stringToStringBuffer, -- :: String -> IO StringBuffer
18 freeStringBuffer, -- :: StringBuffer -> IO ()
22 currentChar, -- :: StringBuffer -> Char
23 currentChar#, -- :: StringBuffer -> Char#
24 indexSBuffer, -- :: StringBuffer -> Int -> Char
25 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
26 -- relative lookup, i.e, currentChar = lookAhead 0
27 lookAhead, -- :: StringBuffer -> Int -> Char
28 lookAhead#, -- :: StringBuffer -> Int# -> Char#
31 currentIndex#, -- :: StringBuffer -> Int#
32 lexemeIndex, -- :: StringBuffer -> Int#
34 -- moving the end point of the current lexeme.
35 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
36 incLexeme, -- :: StringBuffer -> StringBuffer
37 decLexeme, -- :: StringBuffer -> StringBuffer
39 -- move the start and end lexeme pointer on by x units.
40 stepOn, -- :: StringBuffer -> StringBuffer
41 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
42 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
43 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
44 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
45 stepOverLexeme, -- :: StringBuffer -> StringBuffer
46 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
47 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
48 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
49 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
50 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
51 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
52 -- at or beyond end of buffer?
53 bufferExhausted, -- :: StringBuffer -> Bool
54 emptyLexeme, -- :: StringBuffer -> Bool
57 prefixMatch, -- :: StringBuffer -> String -> Bool
58 untilEndOfString#, -- :: StringBuffer -> Int#
61 lexemeToString, -- :: StringBuffer -> String
62 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
63 lexemeToFastString, -- :: StringBuffer -> FastString
64 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
70 #include "HsVersions.h"
73 import PrelAddr ( Addr(..) )
78 #include "../lib/std/cbits/stgerror.h"
80 #if __GLASGOW_HASKELL__ >= 303
82 #if __GLASGOW_HASKELL__ < 407
83 , slurpFile -- comes from PrelHandle or IOExts now
90 import IO ( openFile, hFileSize, hClose, IOMode(..) )
94 #if __GLASGOW_HASKELL__ < 301
95 import IOBase ( Handle, IOError(..), IOErrorType(..),
96 constructErrorAndFail )
97 import IOHandle ( readHandle, writeHandle, filePtr )
98 import PackBase ( unpackCStringBA )
100 # if __GLASGOW_HASKELL__ <= 302
101 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
102 constructErrorAndFail )
103 import PrelHandle ( readHandle, writeHandle, filePtr )
105 import PrelPack ( unpackCStringBA )
108 #if __GLASGOW_HASKELL__ < 402
109 import Util ( bracket )
111 import Exception ( bracket )
116 import Char (isDigit)
129 instance Show StringBuffer where
130 showsPrec _ s = showString ""
134 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
135 hGetStringBuffer expand_tabs fname = do
136 (a, read) <- if expand_tabs
137 then slurpFileExpandTabs fname
140 let (A# a#) = a; (I# read#) = read
142 -- add sentinel '\NUL'
143 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
144 return (StringBuffer a# read# 0# 0#)
146 #if __GLASGOW_HASKELL__ < 303
148 openFile fname ReadMode >>= \ hndl ->
149 hFileSize hndl >>= \ len ->
150 let len_i = fromInteger len in
151 -- Allocate an array for system call to store its bytes into.
152 -- ToDo: make it robust
153 -- trace (show ((len_i::Int)+1)) $
154 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
155 if addr2Int# a# ==# 0# then
156 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
158 readHandle hndl >>= \ hndl_ ->
159 writeHandle hndl hndl_ >>
160 let ptr = filePtr hndl_ in
161 #if __GLASGOW_HASKELL__ <= 302
162 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
164 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
167 if read# ==# 0# then -- EOF or some other error
168 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
170 return (arr, I# read#)
173 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
174 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
176 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
181 -----------------------------------------------------------------------------
182 -- Turn a String into a StringBuffer
186 stringToStringBuffer :: String -> IO StringBuffer
187 stringToStringBuffer str =
188 do let sz@(I# sz#) = length str + 1
189 (Ptr a@(A# a#)) <- mallocBytes sz
191 writeCharOffAddr a (sz-1) '\0' -- sentinel
192 return (StringBuffer a# sz# 0# 0#)
194 fill_in [] _ = return ()
195 fill_in (c:cs) a = do
196 writeCharOffAddr a 0 c
197 fill_in cs (a `plusAddr` 1)
199 freeStringBuffer :: StringBuffer -> IO ()
200 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr (A# a#))
204 -----------------------------------------------------------------------------
205 This very disturbing bit of code is used for expanding the tabs in a
206 file before we start parsing it. Expanding the tabs early makes the
207 lexer a lot simpler: we only have to record the beginning of the line
208 in order to be able to calculate the column offset of the current
211 We guess the size of the buffer required as 20% extra for
212 expanded tabs, and enlarge it if necessary.
215 #if __GLASGOW_HASKELL__ < 303
216 mayBlock fo thing = thing
218 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
219 writeCharOffAddr addr off c
220 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
224 #if __GLASGOW_HASKELL__ < 303
225 getErrType = _casm_ ``%r = ghc_errtype;''
227 getErrType = _ccall_ getErrType__
230 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
231 slurpFileExpandTabs fname = do
232 bracket (openFile fname ReadMode) (hClose)
234 do sz <- hFileSize handle
235 if sz > toInteger (maxBound::Int)
236 then IOERROR (userError "slurpFile: file too big")
238 let sz_i = fromInteger sz
239 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
240 chunk <- allocMem sz_i'
241 trySlurp handle sz_i' chunk
244 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
245 trySlurp handle sz_i chunk =
246 #if __GLASGOW_HASKELL__ == 303
247 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
248 let fo = haFO__ handle_ in
249 #elif __GLASGOW_HASKELL__ > 303
250 wantReadableHandle "hGetChar" handle $ \ handle_ ->
251 let fo = haFO__ handle_ in
253 readHandle handle >>= \ handle_ ->
254 let fo = filePtr handle_ in
261 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
262 slurpFile c off chunk chunk_sz max_off = slurp c off
265 slurp :: Int# -> Int# -> IO (Addr, Int)
266 slurp c off | off >=# max_off = do
267 let new_sz = chunk_sz *# 2#
268 chunk' <- reAllocMem chunk (I# new_sz)
269 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
271 intc <- mayBlock fo (_ccall_ fileGetc fo)
272 if intc == ((-1)::Int)
273 then do errtype <- getErrType
274 if errtype == (ERR_EOF :: Int)
275 then return (chunk, I# off)
276 else constructErrorAndFail "slurpFile"
277 else case chr intc of
279 ch -> do writeCharOffAddr chunk (I# off) ch
280 let c' | ch == '\n' = 0#
281 | otherwise = c +# 1#
284 tabIt :: Int# -> Int# -> IO (Addr, Int)
285 -- can't run out of buffer in here, because we reserved an
286 -- extra tAB_SIZE bytes at the end earlier.
288 writeCharOffAddr chunk (I# off) ' '
291 if c' `remInt#` tAB_SIZE ==# 0#
296 -- allow space for a full tab at the end of the buffer
297 -- (that's what the max_off thing is for),
298 -- and add 1 to allow room for the final sentinel \NUL at
299 -- the end of the file.
300 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
301 #if __GLASGOW_HASKELL__ < 404
302 writeHandle handle handle_
305 then constructErrorAndFail "slurpFile"
306 else return (chunk', rc+1 {-room for sentinel-})
309 reAllocMem :: Addr -> Int -> IO Addr
310 reAllocMem ptr sz = do
311 chunk <- _ccall_ realloc ptr sz
313 #if __GLASGOW_HASKELL__ >= 400
314 then fail "reAllocMem"
316 then fail (userError "reAllocMem")
320 allocMem :: Int -> IO Addr
322 chunk <- _ccall_ malloc sz
323 #if __GLASGOW_HASKELL__ < 303
325 then fail (userError "allocMem")
329 then constructErrorAndFail "allocMem"
337 currentChar :: StringBuffer -> Char
338 currentChar sb = case currentChar# sb of c -> C# c
340 lookAhead :: StringBuffer -> Int -> Char
341 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
343 indexSBuffer :: StringBuffer -> Int -> Char
344 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
346 currentChar# :: StringBuffer -> Char#
347 indexSBuffer# :: StringBuffer -> Int# -> Char#
348 lookAhead# :: StringBuffer -> Int# -> Char#
349 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
350 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
352 -- relative lookup, i.e, currentChar = lookAhead 0
353 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
355 currentIndex# :: StringBuffer -> Int#
356 currentIndex# (StringBuffer fo# _ _ c#) = c#
358 lexemeIndex :: StringBuffer -> Int#
359 lexemeIndex (StringBuffer fo# _ c# _) = c#
362 moving the start point of the current lexeme.
365 -- moving the end point of the current lexeme.
366 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
367 setCurrentPos# (StringBuffer fo l# s# c#) i# =
368 StringBuffer fo l# s# (c# +# i#)
370 -- augmenting the current lexeme by one.
371 incLexeme :: StringBuffer -> StringBuffer
372 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
374 decLexeme :: StringBuffer -> StringBuffer
375 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
379 -- move the start and end point of the buffer on by
383 stepOn :: StringBuffer -> StringBuffer
384 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
386 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
387 stepOnBy# (StringBuffer fo# l# s# c#) i# =
389 new_s# -> StringBuffer fo# l# new_s# new_s#
392 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
393 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
395 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
396 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
398 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
399 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
400 = StringBuffer fo l s# c#
402 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
404 stepOnUntil pred (StringBuffer fo l# s# c#) =
408 case indexCharOffAddr# fo c# of
409 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
410 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
411 | otherwise -> loop (c# +# 1#)
413 stepOverLexeme :: StringBuffer -> StringBuffer
414 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
416 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
417 expandWhile pred (StringBuffer fo l# s# c#) =
421 case indexCharOffAddr# fo c# of
422 ch# | pred (C# ch#) -> loop (c# +# 1#)
423 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
424 | otherwise -> StringBuffer fo l# s# c#
426 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
427 expandWhile# pred (StringBuffer fo l# s# c#) =
431 case indexCharOffAddr# fo c# of
432 ch# | pred ch# -> loop (c# +# 1#)
433 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
434 | otherwise -> StringBuffer fo l# s# c#
436 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
437 scanNumLit acc (StringBuffer fo l# s# c#) =
441 case indexCharOffAddr# fo c# of
442 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
443 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
444 | otherwise -> (acc,StringBuffer fo l# s# c#)
447 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
448 expandUntilMatch (StringBuffer fo l# s# c#) str =
451 loop c# [] = Just (StringBuffer fo l# s# c#)
452 loop c# ((C# x#):xs) =
453 case indexCharOffAddr# fo c# of
454 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
455 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
456 | otherwise -> loop (c# +# 1#) str
461 -- at or beyond end of buffer?
462 bufferExhausted :: StringBuffer -> Bool
463 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
465 emptyLexeme :: StringBuffer -> Bool
466 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
469 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
470 prefixMatch (StringBuffer fo l# s# c#) str =
473 loop c# [] = Just (StringBuffer fo l# s# c#)
475 | indexCharOffAddr# fo c# `eqChar#` x#
480 untilEndOfString# :: StringBuffer -> StringBuffer
481 untilEndOfString# (StringBuffer fo l# s# c#) =
484 getch# i# = indexCharOffAddr# fo i#
489 case getch# (c# -# 1#) of
491 -- looks like an escaped something or other to me,
492 -- better count the number of "\\"s that are immediately
493 -- preceeding to decide if the " is escaped.
497 '\\'# -> odd_slashes (not flg) (i# -# 1#)
500 if odd_slashes True (c# -# 2#) then
501 -- odd number, " is ecaped.
503 else -- a real end of string delimiter after all.
504 StringBuffer fo l# s# c#
505 _ -> StringBuffer fo l# s# c#
507 if c# >=# l# then -- hit sentinel, this doesn't look too good..
508 StringBuffer fo l# l# l#
514 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
515 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
519 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
520 = StringBuffer fo l# c# c#
525 lexemeToString :: StringBuffer -> String
526 lexemeToString (StringBuffer fo _ start_pos# current#) =
527 if start_pos# ==# current# then
530 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
532 lexemeToByteArray :: StringBuffer -> ByteArray Int
533 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
534 if start_pos# ==# current# then
535 error "lexemeToByteArray"
537 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
539 lexemeToFastString :: StringBuffer -> FastString
540 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
541 if start_pos# ==# current# then
542 mkFastCharString2 (A# fo) (I# 0#)
544 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
547 Create a StringBuffer from the current lexeme, and add a sentinel
548 at the end. Know What You're Doing before taking this function
551 lexemeToBuffer :: StringBuffer -> StringBuffer
552 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
553 if start_pos# ==# current# then
554 StringBuffer fo 0# start_pos# current# -- an error, really.
556 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)