2 % (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
9 {-# OPTIONS -fno-prune-tydecls #-}
14 -- creation/destruction
15 hGetStringBuffer, -- :: FilePath -> IO StringBuffer
16 stringToStringBuffer, -- :: String -> IO StringBuffer
17 freeStringBuffer, -- :: StringBuffer -> IO ()
20 currentChar, -- :: StringBuffer -> Char
21 currentChar#, -- :: StringBuffer -> Char#
22 indexSBuffer, -- :: StringBuffer -> Int -> Char
23 indexSBuffer#, -- :: StringBuffer -> Int# -> Char#
24 -- relative lookup, i.e, currentChar = lookAhead 0
25 lookAhead, -- :: StringBuffer -> Int -> Char
26 lookAhead#, -- :: StringBuffer -> Int# -> Char#
29 currentIndex#, -- :: StringBuffer -> Int#
30 lexemeIndex, -- :: StringBuffer -> Int#
32 -- moving the end point of the current lexeme.
33 setCurrentPos#, -- :: StringBuffer -> Int# -> StringBuffer
34 incLexeme, -- :: StringBuffer -> StringBuffer
35 decLexeme, -- :: StringBuffer -> StringBuffer
37 -- move the start and end lexeme pointer on by x units.
38 stepOn, -- :: StringBuffer -> StringBuffer
39 stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer
40 stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer
41 stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
42 stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
43 stepOverLexeme, -- :: StringBuffer -> StringBuffer
44 scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer)
45 squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer
46 mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer
47 expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
48 expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
49 expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
50 -- at or beyond end of buffer?
51 bufferExhausted, -- :: StringBuffer -> Bool
52 emptyLexeme, -- :: StringBuffer -> Bool
55 prefixMatch, -- :: StringBuffer -> String -> Bool
56 untilEndOfString#, -- :: StringBuffer -> Int#
59 lexemeToString, -- :: StringBuffer -> String
60 lexemeToByteArray, -- :: StringBuffer -> _ByteArray Int
61 lexemeToFastString, -- :: StringBuffer -> FastString
62 lexemeToBuffer, -- :: StringBuffer -> StringBuffer
68 #include "HsVersions.h"
71 import PrelAddr ( Addr(..) )
76 #include "../lib/std/cbits/stgerror.h"
78 #if __GLASGOW_HASKELL__ >= 303
80 #if __GLASGOW_HASKELL__ < 407
81 , slurpFile -- comes from PrelHandle or IOExts now
88 import IO ( openFile, hFileSize, hClose, IOMode(..) )
92 #if __GLASGOW_HASKELL__ < 301
93 import IOBase ( Handle, IOError(..), IOErrorType(..),
94 constructErrorAndFail )
95 import IOHandle ( readHandle, writeHandle, filePtr )
96 import PackBase ( unpackCStringBA )
98 # if __GLASGOW_HASKELL__ <= 302
99 import PrelIOBase ( Handle, IOError(..), IOErrorType(..),
100 constructErrorAndFail )
101 import PrelHandle ( readHandle, writeHandle, filePtr )
103 import PrelPack ( unpackCStringBA )
106 #if __GLASGOW_HASKELL__ < 402
107 import Util ( bracket )
109 import Exception ( bracket )
114 import Char (isDigit)
127 instance Show StringBuffer where
128 showsPrec _ s = showString ""
132 hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
133 hGetStringBuffer expand_tabs fname = do
134 (a, read) <- if expand_tabs
135 then slurpFileExpandTabs fname
138 let (A# a#) = a; (I# read#) = read
140 -- add sentinel '\NUL'
141 _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
142 return (StringBuffer a# read# 0# 0#)
144 #if __GLASGOW_HASKELL__ < 303
146 openFile fname ReadMode >>= \ hndl ->
147 hFileSize hndl >>= \ len ->
148 let len_i = fromInteger len in
149 -- Allocate an array for system call to store its bytes into.
150 -- ToDo: make it robust
151 -- trace (show ((len_i::Int)+1)) $
152 _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
153 if addr2Int# a# ==# 0# then
154 fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
156 readHandle hndl >>= \ hndl_ ->
157 writeHandle hndl hndl_ >>
158 let ptr = filePtr hndl_ in
159 #if __GLASGOW_HASKELL__ <= 302
160 _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
162 _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
165 if read# ==# 0# then -- EOF or some other error
166 fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
168 return (arr, I# read#)
171 unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
172 unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
174 _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
179 -----------------------------------------------------------------------------
180 -- Turn a String into a StringBuffer
183 stringToStringBuffer :: String -> IO StringBuffer
184 stringToStringBuffer str =
185 do let sz@(I# sz#) = length str + 1
186 (Ptr a@(A# a#)) <- mallocBytes sz
188 writeCharOffAddr a (sz-1) '\0' -- sentinel
189 return (StringBuffer a# sz# 0# 0#)
191 fill_in [] _ = return ()
192 fill_in (c:cs) a = do
193 writeCharOffAddr a 0 c
194 fill_in cs (a `plusAddr` 1)
196 freeStringBuffer :: StringBuffer -> IO ()
197 freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr (A# a#))
200 -----------------------------------------------------------------------------
201 This very disturbing bit of code is used for expanding the tabs in a
202 file before we start parsing it. Expanding the tabs early makes the
203 lexer a lot simpler: we only have to record the beginning of the line
204 in order to be able to calculate the column offset of the current
207 We guess the size of the buffer required as 20% extra for
208 expanded tabs, and enlarge it if necessary.
211 #if __GLASGOW_HASKELL__ < 303
212 mayBlock fo thing = thing
214 writeCharOffAddr :: Addr -> Int -> Char -> IO ()
215 writeCharOffAddr addr off c
216 = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
220 #if __GLASGOW_HASKELL__ < 303
221 getErrType = _casm_ ``%r = ghc_errtype;''
223 getErrType = _ccall_ getErrType__
226 slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
227 slurpFileExpandTabs fname = do
228 bracket (openFile fname ReadMode) (hClose)
230 do sz <- hFileSize handle
231 if sz > toInteger (maxBound::Int)
232 then IOERROR (userError "slurpFile: file too big")
234 let sz_i = fromInteger sz
235 sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
236 chunk <- allocMem sz_i'
237 trySlurp handle sz_i' chunk
240 trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
241 trySlurp handle sz_i chunk =
242 #if __GLASGOW_HASKELL__ == 303
243 wantReadableHandle "hGetChar" handle >>= \ handle_ ->
244 let fo = haFO__ handle_ in
245 #elif __GLASGOW_HASKELL__ > 303
246 wantReadableHandle "hGetChar" handle $ \ handle_ ->
247 let fo = haFO__ handle_ in
249 readHandle handle >>= \ handle_ ->
250 let fo = filePtr handle_ in
257 slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
258 slurpFile c off chunk chunk_sz max_off = slurp c off
261 slurp :: Int# -> Int# -> IO (Addr, Int)
262 slurp c off | off >=# max_off = do
263 let new_sz = chunk_sz *# 2#
264 chunk' <- reAllocMem chunk (I# new_sz)
265 slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
267 intc <- mayBlock fo (_ccall_ fileGetc fo)
268 if intc == ((-1)::Int)
269 then do errtype <- getErrType
270 if errtype == (ERR_EOF :: Int)
271 then return (chunk, I# off)
272 else constructErrorAndFail "slurpFile"
273 else case chr intc of
275 ch -> do writeCharOffAddr chunk (I# off) ch
276 let c' | ch == '\n' = 0#
277 | otherwise = c +# 1#
280 tabIt :: Int# -> Int# -> IO (Addr, Int)
281 -- can't run out of buffer in here, because we reserved an
282 -- extra tAB_SIZE bytes at the end earlier.
284 writeCharOffAddr chunk (I# off) ' '
287 if c' `remInt#` tAB_SIZE ==# 0#
292 -- allow space for a full tab at the end of the buffer
293 -- (that's what the max_off thing is for),
294 -- and add 1 to allow room for the final sentinel \NUL at
295 -- the end of the file.
296 (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
297 #if __GLASGOW_HASKELL__ < 404
298 writeHandle handle handle_
301 then constructErrorAndFail "slurpFile"
302 else return (chunk', rc+1 {-room for sentinel-})
305 reAllocMem :: Addr -> Int -> IO Addr
306 reAllocMem ptr sz = do
307 chunk <- _ccall_ realloc ptr sz
309 #if __GLASGOW_HASKELL__ >= 400
310 then fail "reAllocMem"
312 then fail (userError "reAllocMem")
316 allocMem :: Int -> IO Addr
318 chunk <- _ccall_ malloc sz
319 #if __GLASGOW_HASKELL__ < 303
321 then fail (userError "allocMem")
325 then constructErrorAndFail "allocMem"
333 currentChar :: StringBuffer -> Char
334 currentChar sb = case currentChar# sb of c -> C# c
336 lookAhead :: StringBuffer -> Int -> Char
337 lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
339 indexSBuffer :: StringBuffer -> Int -> Char
340 indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
342 currentChar# :: StringBuffer -> Char#
343 indexSBuffer# :: StringBuffer -> Int# -> Char#
344 lookAhead# :: StringBuffer -> Int# -> Char#
345 currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
346 indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
348 -- relative lookup, i.e, currentChar = lookAhead 0
349 lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
351 currentIndex# :: StringBuffer -> Int#
352 currentIndex# (StringBuffer fo# _ _ c#) = c#
354 lexemeIndex :: StringBuffer -> Int#
355 lexemeIndex (StringBuffer fo# _ c# _) = c#
358 moving the start point of the current lexeme.
361 -- moving the end point of the current lexeme.
362 setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
363 setCurrentPos# (StringBuffer fo l# s# c#) i# =
364 StringBuffer fo l# s# (c# +# i#)
366 -- augmenting the current lexeme by one.
367 incLexeme :: StringBuffer -> StringBuffer
368 incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)
370 decLexeme :: StringBuffer -> StringBuffer
371 decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)
375 -- move the start and end point of the buffer on by
379 stepOn :: StringBuffer -> StringBuffer
380 stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.
382 stepOnBy# :: StringBuffer -> Int# -> StringBuffer
383 stepOnBy# (StringBuffer fo# l# s# c#) i# =
385 new_s# -> StringBuffer fo# l# new_s# new_s#
388 stepOnTo# :: StringBuffer -> Int# -> StringBuffer
389 stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#
391 squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
392 squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#
394 mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
395 mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
396 = StringBuffer fo l s# c#
398 stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer
400 stepOnUntil pred (StringBuffer fo l# s# c#) =
404 case indexCharOffAddr# fo c# of
405 ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
406 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
407 | otherwise -> loop (c# +# 1#)
409 stepOverLexeme :: StringBuffer -> StringBuffer
410 stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#
412 expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
413 expandWhile pred (StringBuffer fo l# s# c#) =
417 case indexCharOffAddr# fo c# of
418 ch# | pred (C# ch#) -> loop (c# +# 1#)
419 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
420 | otherwise -> StringBuffer fo l# s# c#
422 expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
423 expandWhile# pred (StringBuffer fo l# s# c#) =
427 case indexCharOffAddr# fo c# of
428 ch# | pred ch# -> loop (c# +# 1#)
429 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
430 | otherwise -> StringBuffer fo l# s# c#
432 scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
433 scanNumLit acc (StringBuffer fo l# s# c#) =
437 case indexCharOffAddr# fo c# of
438 ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
439 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
440 | otherwise -> (acc,StringBuffer fo l# s# c#)
443 expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
444 expandUntilMatch (StringBuffer fo l# s# c#) str =
447 loop c# [] = Just (StringBuffer fo l# s# c#)
448 loop c# ((C# x#):xs) =
449 case indexCharOffAddr# fo c# of
450 ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
451 | ch# `eqChar#` x# -> loop (c# +# 1#) xs
452 | otherwise -> loop (c# +# 1#) str
457 -- at or beyond end of buffer?
458 bufferExhausted :: StringBuffer -> Bool
459 bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#
461 emptyLexeme :: StringBuffer -> Bool
462 emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#
465 prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
466 prefixMatch (StringBuffer fo l# s# c#) str =
469 loop c# [] = Just (StringBuffer fo l# s# c#)
471 | indexCharOffAddr# fo c# `eqChar#` x#
476 untilEndOfString# :: StringBuffer -> StringBuffer
477 untilEndOfString# (StringBuffer fo l# s# c#) =
480 getch# i# = indexCharOffAddr# fo i#
485 case getch# (c# -# 1#) of
487 -- looks like an escaped something or other to me,
488 -- better count the number of "\\"s that are immediately
489 -- preceeding to decide if the " is escaped.
493 '\\'# -> odd_slashes (not flg) (i# -# 1#)
496 if odd_slashes True (c# -# 2#) then
497 -- odd number, " is ecaped.
499 else -- a real end of string delimiter after all.
500 StringBuffer fo l# s# c#
501 _ -> StringBuffer fo l# s# c#
503 if c# >=# l# then -- hit sentinel, this doesn't look too good..
504 StringBuffer fo l# l# l#
510 stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
511 stepOnUntilChar# (StringBuffer fo l# s# c#) x# =
515 | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
516 = StringBuffer fo l# c# c#
521 lexemeToString :: StringBuffer -> String
522 lexemeToString (StringBuffer fo _ start_pos# current#) =
523 if start_pos# ==# current# then
526 unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
528 lexemeToByteArray :: StringBuffer -> ByteArray Int
529 lexemeToByteArray (StringBuffer fo _ start_pos# current#) =
530 if start_pos# ==# current# then
531 error "lexemeToByteArray"
533 copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
535 lexemeToFastString :: StringBuffer -> FastString
536 lexemeToFastString (StringBuffer fo l# start_pos# current#) =
537 if start_pos# ==# current# then
538 mkFastCharString2 (A# fo) (I# 0#)
540 mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))
543 Create a StringBuffer from the current lexeme, and add a sentinel
544 at the end. Know What You're Doing before taking this function
547 lexemeToBuffer :: StringBuffer -> StringBuffer
548 lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
549 if start_pos# ==# current# then
550 StringBuffer fo 0# start_pos# current# -- an error, really.
552 unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)