2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
26 import CmdLineOpts ( opt_IgnoreIfacePragmas )
27 import Demand ( Demand(..) {- instance Read -} )
28 import UniqFM ( UniqFM, listToUFM, lookupUFM)
29 --import FiniteMap ( FiniteMap, listToFM, lookupFM )
30 #if __GLASGOW_HASKELL__ >= 202
31 import Maybes ( MaybeErr(..) )
33 import Maybes ( Maybe(..), MaybeErr(..) )
39 import ErrUtils ( Error(..) )
40 import Outputable ( Outputable(..) )
41 import PprStyle ( PprStyle(..) )
42 import Util ( nOfThem, panic )
47 #if __GLASGOW_HASKELL__ <= 201
54 %************************************************************************
56 \subsection{Lexical categories}
58 %************************************************************************
60 These functions test strings to see if they fit the lexical categories
61 defined in the Haskell report. Normally applied as in e.g. @isCon
65 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
66 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
68 isLexCon cs = isLexConId cs || isLexConSym cs
69 isLexVar cs = isLexVarId cs || isLexVarSym cs
71 isLexId cs = isLexConId cs || isLexVarId cs
72 isLexSym cs = isLexConSym cs || isLexVarSym cs
78 | cs == SLIT("[]") = True
79 | c == '(' = True -- (), (,), (,,), ...
80 | otherwise = isUpper c || isUpperISO c
86 | otherwise = isLower c || isLowerISO c
92 | otherwise = c == ':'
99 | otherwise = isSymbolASCII c
105 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
106 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
107 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
108 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
109 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
110 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
114 %************************************************************************
116 \subsection{Tuple strings -- ugh!}
118 %************************************************************************
121 mkTupNameStr 0 = SLIT("()")
122 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
123 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
124 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
125 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
126 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
131 %************************************************************************
133 \subsection{Data types}
135 %************************************************************************
137 The token data type, fairly un-interesting except from two constructors,
138 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
139 strictness, unfolding etc) and types for id decls.
141 The Idea/Observation here is that the renamer needs to scan through
142 all of an interface file before it can continue. But only a fraction
143 of the information contained in the file turns out to be useful, so
144 delaying as much as possible of the scanning and parsing of an
145 interface file Makes Sense (Heap profiles of the compiler
146 show at a reduction in heap usage by at least a factor of two,
149 Hence, the interface file lexer spots when value declarations are
150 being scanned and return the @ITidinfo@ and @ITtype@ constructors
151 for the type and any other id info for that binding (unfolding, strictness
152 etc). These constructors are applied to the result of lexing these sub-chunks.
154 The lexing of the type and id info is all done lazily, of course, so
155 the scanning (and subsequent parsing) will be done *only* on the ids the
156 renamer finds out that it is interested in. The rest will just be junked.
157 Laziness, you know it makes sense :-)
161 = ITinterface -- keywords
181 | ITbang -- magic symbols
196 | ITvarid FAST_STRING
197 | ITconid FAST_STRING
198 | ITvarsym FAST_STRING
199 | ITconsym FAST_STRING
200 | ITqvarid (FAST_STRING,FAST_STRING)
201 | ITqconid (FAST_STRING,FAST_STRING)
202 | ITqvarsym (FAST_STRING,FAST_STRING)
203 | ITqconsym (FAST_STRING,FAST_STRING)
205 | ITidinfo [IfaceToken] -- lazily return the stream of tokens for
206 -- the info attached to an id.
207 | ITtysig [IfaceToken] -- lazily return the stream of tokens for
208 -- the info attached to an id.
209 -- Stuff for reading unfoldings
210 | ITarity | ITstrict | ITunfold
211 | ITdemand [Demand] | ITbottom
212 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
213 | ITcoerce_in | ITcoerce_out | ITatsign
214 | ITccall (Bool,Bool) -- (is_casm, may_gc)
216 | ITchar Char | ITstring FAST_STRING
217 | ITinteger Integer | ITdouble Double
218 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
219 | ITunknown String -- Used when the lexer can't make sense of it
220 deriving Text -- debugging
222 instance Text CostCentre -- cheat!
226 %************************************************************************
228 \subsection{The lexical analyser}
230 %************************************************************************
233 lexIface :: StringBuffer -> [IfaceToken]
236 -- if bufferExhausted buf then
239 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
240 case currentChar# buf of
241 -- whitespace and comments, ignore.
242 ' '# -> lexIface (stepOn buf)
243 '\t'# -> lexIface (stepOn buf)
244 '\n'# -> lexIface (stepOn buf)
246 -- Numbers and comments
248 case lookAhead# buf 1# of
249 '-'# -> lex_comment (stepOnBy# buf 2#)
252 then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
255 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
256 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
259 case prefixMatch (stepOn buf) "..)" of
260 Just buf' -> ITdotdot : lexIface (stepOverLexeme buf')
262 case lookAhead# buf 1# of
263 ','# -> lex_tuple Nothing (stepOnBy# buf 2#)
264 ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
265 _ -> IToparen : lexIface (stepOn buf)
267 '{'# -> ITocurly : lexIface (stepOn buf)
268 '}'# -> ITccurly : lexIface (stepOn buf)
269 ')'# -> ITcparen : lexIface (stepOn buf)
271 case lookAhead# buf 1# of
272 ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
273 _ -> ITobrack : lexIface (stepOn buf)
274 ']'# -> ITcbrack : lexIface (stepOn buf)
275 ','# -> ITcomma : lexIface (stepOn buf)
276 ':'# -> case lookAhead# buf 1# of
277 ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#)
278 _ -> lex_id (incLexeme buf)
279 ';'# -> ITsemi : lexIface (stepOn buf)
280 '\"'# -> case untilEndOfString# (stepOn buf) of
282 -- the string literal does *not* include the dquotes
283 case lexemeToFastString buf' of
284 v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
287 -- untilEndOfChar# extends the current lexeme until
288 -- it hits a non-escaped single quote. The lexeme of the
289 -- StringBuffer returned does *not* include the closing quote,
290 -- hence we augment the lexeme and make sure to add the
291 -- starting quote, before `read'ing the string.
293 case untilEndOfChar# (stepOn buf) of
294 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
295 [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
297 -- ``thingy'' form for casm
299 case lookAhead# buf 1# of
300 '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
301 _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume
302 -- scanning an id of some sort.
305 case lookAhead# buf 1# of
306 'S'# -> case lookAhead# buf 2# of
308 lex_demand (stepOnUntil (not . isSpace)
309 (stepOnBy# buf 3#)) -- past _S_
310 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
311 Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
312 Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
314 _ -> lex_keyword (stepOn buf)
317 if bufferExhausted (stepOn buf) then
322 if isDigit (C# c) then
323 lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
328 -- _trace ("comment: "++[C# (currentChar# buf)]) $
329 case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
333 -- _trace ("demand: "++[C# (currentChar# buf)]) $
334 case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
336 -- code snatched from Demand.lhs
338 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
339 case currentChar# buf of
340 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
341 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
342 'S'# -> read_em (WwStrict : acc) (stepOn buf)
343 'P'# -> read_em (WwPrim : acc) (stepOn buf)
344 'E'# -> read_em (WwEnum : acc) (stepOn buf)
345 ')'# -> (reverse acc, stepOn buf)
346 'U'# -> do_unpack True acc (stepOnBy# buf 2#)
347 'u'# -> do_unpack False acc (stepOnBy# buf 2#)
348 _ -> (reverse acc, buf)
350 do_unpack wrapper_unpacks acc buf
351 = case read_em [] buf of
352 (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest
356 -- _trace ("scc: "++[C# (currentChar# buf)]) $
357 case currentChar# buf of
360 case prefixMatch (stepOn buf) "NO_CC\"" of
361 Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
363 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
364 Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
366 case prefixMatch (stepOn buf) "OVERHEAD\"" of
367 Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
369 case prefixMatch (stepOn buf) "DONT_CARE\"" of
370 Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
372 case prefixMatch (stepOn buf) "SUBSUMED\"" of
373 Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
375 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
376 Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
378 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
380 case untilChar# (stepOverLexeme buf') '\"'# of
381 buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
382 lexIface (stepOn (stepOverLexeme buf''))
384 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
385 Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
387 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
389 case untilChar# (stepOverLexeme buf') '\"'# of
390 buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
391 lexIface (stepOn (stepOverLexeme buf''))
395 case untilChar# buf '/'# of
397 let mod_name = lexemeToFastString buf' in
398 case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
400 let grp_name = lexemeToFastString buf'' in
401 case untilChar# (stepOn (stepOverLexeme buf'')) '\"'# of
403 let cc_name = lexemeToFastString buf''' in
404 (mkUserCC cc_name mod_name grp_name,
405 stepOn (stepOverLexeme buf'''))
407 case prefixMatch (stepOn buf) "CAF:" of
409 case match_user_cc (stepOverLexeme buf') of
410 (cc, buf'') -> ITscc (cafifyCC cc) : lexIface buf''
412 case match_user_cc (stepOn buf) of
413 (cc, buf'') -> ITscc cc : lexIface buf''
414 c -> ITunknown [C# c] : lexIface (stepOn buf)
418 lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
419 lex_num minus acc# buf =
420 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
421 case scanNumLit (I# acc#) buf of
423 case currentChar# buf' of
425 -- this case is not optimised at all, as the
426 -- presence of floating point numbers in interface
427 -- files is not that common. (ToDo)
428 case expandWhile (isDigit) (incLexeme buf') of
429 buf'' -> -- points to first non digit char
430 case reads (lexemeToString buf'') of
431 [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
432 _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
434 -- case reads (lexemeToString buf') of
435 -- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
439 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
440 case currentChar# buf of
441 ':'# -> case lookAhead# buf 1# of
442 '_'# -> -- a binding, type (and other id-info) follows,
443 -- to make the parser ever so slightly, we push
445 lex_decl (stepOnBy# buf 2#)
446 v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
448 case expandWhile (is_kwd_char) buf of
450 let kw = lexemeToFastString buf' in
451 -- _trace ("kw: "++lexemeToString buf') $
452 case lookupUFM ifaceKeywordsFM kw of
453 Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
454 lexIface (stepOverLexeme buf')
455 Just xx -> xx : lexIface (stepOverLexeme buf')
458 case doDiscard False buf of -- spin until ;; is found
460 {- _trace (show (lexemeToString buf')) $ -}
461 case currentChar# buf' of
462 '\n'# -> -- newline, no id info.
463 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
464 lexIface (stepOverLexeme buf')
465 '\r'# -> -- just to be sure for those Win* boxes..
466 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
467 lexIface (stepOverLexeme buf')
469 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
470 lexIface (stepOverLexeme buf')
471 c -> -- run all over the id info
472 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
474 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
475 --_trace (show (lexemeToString (decLexeme buf''))) $
476 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
477 let ls = lexIface (stepOverLexeme buf'') in
478 if opt_IgnoreIfacePragmas then
481 let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
486 is_kwd_char c@(C# c#) =
487 isAlphanum c || -- OLD: c `elem` "_@/\\"
499 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
500 case expandUntilMatch buf "\'\'" of
501 buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
502 lexIface (stepOverLexeme buf')
505 lex_tuple module_dot buf =
506 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
510 case currentChar# buf of
511 ','# -> go (n+1) (stepOn buf)
512 ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
513 _ -> ITunknown ("tuple " ++ show n) : lexIface buf
515 -- Similarly ' itself is ok inside an identifier, but not at the start
517 id_arr :: _ByteArray Int
519 unsafePerformPrimIO (
520 newCharArray (0,255) `thenPrimIO` \ barr ->
522 loop 256# = returnPrimIO ()
524 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
525 writeCharArray barr (I# i#) '\1' `seqPrimIO`
528 writeCharArray barr (I# i#) '\0' `seqPrimIO`
532 unsafeFreezeByteArray barr)
536 _ByteArray _ arr# = id_arr
538 case ord# (indexCharArray# arr# (ord# c#)) of
542 --is_id_char c@(C# c#) = isAlphanum c || is_sym c#
546 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
547 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
548 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
549 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
550 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
551 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
553 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
556 mod_arr :: _ByteArray Int
558 unsafePerformPrimIO (
559 newCharArray (0,255) `thenPrimIO` \ barr ->
561 loop 256# = returnPrimIO ()
563 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
564 writeCharArray barr (I# i#) '\1' `seqPrimIO`
567 writeCharArray barr (I# i#) '\0' `seqPrimIO`
571 unsafeFreezeByteArray barr)
574 is_mod_char (C# c#) =
576 _ByteArray _ arr# = mod_arr
578 case ord# (indexCharArray# arr# (ord# c#)) of
582 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
586 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
590 [] -> lex_id2 Nothing cs
591 _ -> lex_id3 Nothing len xs cs
595 [] -> lex_id2 Nothing cs
598 pk_str = _PK_ (xs::String)
599 len = lengthPS pk_str
602 error "Well, I never!"
604 lex_id2 (Just pk_str) cs''
606 [] -> lex_id2 Nothing cs
607 _ -> lex_id3 Nothing len xs cs'
612 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
613 case expandWhile (is_mod_char) buf of
615 case currentChar# buf' of
617 if not (emptyLexeme buf') then
618 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
619 case lexemeToFastString buf' of
620 l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
621 (stepOn (stepOverLexeme buf'))
624 _ -> lex_id2 Nothing buf'
626 -- Dealt with the Module.part
627 lex_id2 module_dot buf =
628 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
629 case currentChar# buf of
631 case lookAhead# buf 1# of
632 ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
633 _ -> lex_id3 module_dot buf
635 case lookAhead# buf 1# of
636 ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
637 ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
638 _ -> lex_id3 module_dot buf
639 ':'# -> lex_id3 module_dot (incLexeme buf)
640 _ -> lex_id3 module_dot buf
644 -- Dealt with [], (), : special cases
646 lex_id3 module_dot buf =
647 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
648 case expandWhile (is_id_char) buf of
652 end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
654 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
655 Just kwd_token -> kwd_token : lexIface new_buf
656 Nothing -> mk_var_token lexeme : lexIface new_buf
658 lexeme = lexemeToFastString buf'
659 new_buf = stepOverLexeme buf'
663 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
664 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
665 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
666 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
667 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
671 -- Dealt with [], (), : special cases
674 lex_id3 module_dot len_xs xs cs =
675 case my_span' (is_id_char) cs of
676 (xs1,len_xs1,rest) ->
678 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
680 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
681 Just kwd_token -> kwd_token : lexIface rest
682 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
684 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
686 mk_var_token pk_str =
691 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
692 -- remove the second half of disjunction when using a 1.3 prelude.
694 if isUpper f then ITconid pk_str
695 else if isLower f then ITvarid pk_str
696 else if f == ':' then ITconsym pk_str
697 else if isLowerISO f then ITvarid pk_str
698 else if isUpperISO f then ITconid pk_str
702 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
703 | f == ':' = ITconsym n
704 | isAlpha f = ITvarid n
705 | otherwise = ITvarsym n
710 end_lex_id Nothing token buf = token : lexIface buf
711 end_lex_id (Just m) token buf =
713 ITconid n -> ITqconid (m,n) : lexIface buf
714 ITvarid n -> ITqvarid (m,n) : lexIface buf
715 ITconsym n -> ITqconsym (m,n) : lexIface buf
716 ITvarsym n -> ITqvarsym (m,n) : lexIface buf
717 ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf
718 _ -> ITunknown (show token) : lexIface buf
721 ifaceKeywordsFM :: UniqFM IfaceToken
722 ifaceKeywordsFM = listToUFM $
723 map (\ (x,y) -> (_PK_ x,y))
726 ,("letrec_", ITletrec)
727 ,("interface_", ITinterface)
728 ,("usages_", ITusages)
729 ,("versions_", ITversions)
730 ,("exports_", ITexports)
731 ,("instance_modules_", ITinstance_modules)
732 ,("instances_", ITinstances)
733 ,("fixities_", ITfixities)
734 ,("declarations_", ITdeclarations)
735 ,("pragmas_", ITpragmas)
736 ,("forall_", ITforall)
739 ,("coerce_in_", ITcoerce_in)
740 ,("coerce_out_", ITcoerce_out)
742 ,("integer_", ITinteger_lit)
743 ,("rational_", ITrational_lit)
744 ,("addr_", ITaddr_lit)
745 ,("float_", ITfloat_lit)
746 ,("string_", ITstring_lit)
747 ,("litlit_", ITlit_lit)
748 ,("ccall_", ITccall (False, False))
749 ,("ccall_GC_", ITccall (False, True))
750 ,("casm_", ITccall (True, False))
751 ,("casm_GC_", ITccall (True, True))
754 haskellKeywordsFM = listToUFM $
755 map (\ (x,y) -> (_PK_ x,y))
758 ,("newtype", ITnewtype)
761 ,("instance", ITinstance)
762 ,("infixl", ITinfixl)
763 ,("infixr", ITinfixr)
766 ,("case#", ITprim_case)
770 ,("deriving", ITderiving)
781 -- doDiscard rips along really fast looking for a double semicolon,
782 -- indicating the end of the pragma we're skipping
783 doDiscard inStr buf =
784 -- _trace (show (C# (currentChar# buf))) $
785 case currentChar# buf of
788 case lookAhead# buf 1# of
789 ';'# -> incLexeme (incLexeme buf)
790 _ -> doDiscard inStr (incLexeme buf)
792 doDiscard inStr (incLexeme buf)
795 odd_slashes buf flg i# =
796 case lookAhead# buf i# of
797 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
800 case lookAhead# buf (negateInt# 1#) of --backwards, actually
801 '\\'# -> -- escaping something..
802 if odd_slashes buf True (negateInt# 2#) then
803 -- odd number of slashes, " is escaped.
804 doDiscard inStr (incLexeme buf)
806 -- even number of slashes, \ is escaped.
807 doDiscard (not inStr) (incLexeme buf)
808 _ -> case inStr of -- forced to avoid build-up
809 True -> doDiscard False (incLexeme buf)
810 False -> doDiscard True (incLexeme buf)
811 _ -> doDiscard inStr (incLexeme buf)
816 my_span :: (a -> Bool) -> [a] -> ([a],[a])
817 my_span p xs = go [] xs
819 go so_far (x:xs') | p x = go (x:so_far) xs'
820 go so_far xs = (reverse so_far, xs)
822 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
823 my_span' p xs = go [] 0 xs
825 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
826 go so_far n xs = (reverse so_far,n, xs)
830 %************************************************************************
832 \subsection{Other utility functions
834 %************************************************************************
837 type IfM a = MaybeErr a Error
839 returnIf :: a -> IfM a
840 thenIf :: IfM a -> (a -> IfM b) -> IfM b
841 happyError :: Int -> [IfaceToken] -> IfM a
843 returnIf a = Succeeded a
845 thenIf (Succeeded a) k = k a
846 thenIf (Failed err) _ = Failed err
848 happyError ln toks = Failed (ifaceParseErr ln toks)
850 -----------------------------------------------------------------
852 ifaceParseErr ln toks sty
853 = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]