2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 201
26 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
28 import {-# SOURCE #-} CostCentre
29 # if __GLASGOW_HASKELL__ == 202
30 import PrelBase ( Char(..) )
34 import CmdLineOpts ( opt_IgnoreIfacePragmas )
35 import Demand ( Demand(..) {- instance Read -} )
36 import UniqFM ( UniqFM, listToUFM, lookupUFM)
37 import BasicTypes ( NewOrData(..) )
39 #if __GLASGOW_HASKELL__ >= 202
40 import Maybes ( MaybeErr(..) )
42 import Maybes ( Maybe(..), MaybeErr(..) )
48 import ErrUtils ( Error(..) )
49 import Outputable ( Outputable(..), PprStyle(..) )
50 import Util ( nOfThem, panic )
55 #if __GLASGOW_HASKELL__ <= 201
62 %************************************************************************
64 \subsection{Lexical categories}
66 %************************************************************************
68 These functions test strings to see if they fit the lexical categories
69 defined in the Haskell report. Normally applied as in e.g. @isCon
73 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
74 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
76 isLexCon cs = isLexConId cs || isLexConSym cs
77 isLexVar cs = isLexVarId cs || isLexVarSym cs
79 isLexId cs = isLexConId cs || isLexVarId cs
80 isLexSym cs = isLexConSym cs || isLexVarSym cs
86 | cs == SLIT("[]") = True
87 | c == '(' = True -- (), (,), (,,), ...
88 | otherwise = isUpper c || isUpperISO c
94 | otherwise = isLower c || isLowerISO c
100 | otherwise = c == ':'
107 | otherwise = isSymbolASCII c
113 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
114 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
115 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
116 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
117 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
118 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
122 %************************************************************************
124 \subsection{Tuple strings -- ugh!}
126 %************************************************************************
129 mkTupNameStr 0 = SLIT("()")
130 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
131 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
132 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
133 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
134 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
139 %************************************************************************
141 \subsection{Data types}
143 %************************************************************************
145 The token data type, fairly un-interesting except from two constructors,
146 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
147 strictness, unfolding etc) and types for id decls.
149 The Idea/Observation here is that the renamer needs to scan through
150 all of an interface file before it can continue. But only a fraction
151 of the information contained in the file turns out to be useful, so
152 delaying as much as possible of the scanning and parsing of an
153 interface file Makes Sense (Heap profiles of the compiler
154 show at a reduction in heap usage by at least a factor of two,
157 Hence, the interface file lexer spots when value declarations are
158 being scanned and return the @ITidinfo@ and @ITtype@ constructors
159 for the type and any other id info for that binding (unfolding, strictness
160 etc). These constructors are applied to the result of lexing these sub-chunks.
162 The lexing of the type and id info is all done lazily, of course, so
163 the scanning (and subsequent parsing) will be done *only* on the ids the
164 renamer finds out that it is interested in. The rest will just be junked.
165 Laziness, you know it makes sense :-)
169 = ITinterface -- keywords
189 | ITbang -- magic symbols
204 | ITvarid FAST_STRING
205 | ITconid FAST_STRING
206 | ITvarsym FAST_STRING
207 | ITconsym FAST_STRING
208 | ITqvarid (FAST_STRING,FAST_STRING)
209 | ITqconid (FAST_STRING,FAST_STRING)
210 | ITqvarsym (FAST_STRING,FAST_STRING)
211 | ITqconsym (FAST_STRING,FAST_STRING)
213 | ITidinfo [IfaceToken] -- lazily return the stream of tokens for
214 -- the info attached to an id.
215 | ITtysig [IfaceToken] -- lazily return the stream of tokens for
216 -- the info attached to an id.
217 -- Stuff for reading unfoldings
219 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
220 | ITdemand [Demand] | ITbottom
221 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
222 | ITcoerce_in | ITcoerce_out | ITatsign
223 | ITccall (Bool,Bool) -- (is_casm, may_gc)
225 | ITchar Char | ITstring FAST_STRING
226 | ITinteger Integer | ITdouble Double
227 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
228 | ITunknown String -- Used when the lexer can't make sense of it
229 deriving Text -- debugging
231 instance Text CostCentre -- cheat!
235 %************************************************************************
237 \subsection{The lexical analyser}
239 %************************************************************************
242 lexIface :: StringBuffer -> [IfaceToken]
245 -- if bufferExhausted buf then
248 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
249 case currentChar# buf of
250 -- whitespace and comments, ignore.
251 ' '# -> lexIface (stepOn buf)
252 '\t'# -> lexIface (stepOn buf)
253 '\n'# -> lexIface (stepOn buf)
255 -- Numbers and comments
257 case lookAhead# buf 1# of
258 '-'# -> lex_comment (stepOnBy# buf 2#)
261 then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
264 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
265 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
268 case prefixMatch (stepOn buf) "..)" of
269 Just buf' -> ITdotdot : lexIface (stepOverLexeme buf')
271 case lookAhead# buf 1# of
272 ','# -> lex_tuple Nothing (stepOnBy# buf 2#)
273 ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
274 _ -> IToparen : lexIface (stepOn buf)
276 '{'# -> ITocurly : lexIface (stepOn buf)
277 '}'# -> ITccurly : lexIface (stepOn buf)
278 ')'# -> ITcparen : lexIface (stepOn buf)
280 case lookAhead# buf 1# of
281 ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
282 _ -> ITobrack : lexIface (stepOn buf)
283 ']'# -> ITcbrack : lexIface (stepOn buf)
284 ','# -> ITcomma : lexIface (stepOn buf)
285 ':'# -> case lookAhead# buf 1# of
286 ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#)
287 _ -> lex_id (incLexeme buf)
288 ';'# -> ITsemi : lexIface (stepOn buf)
289 '\"'# -> case untilEndOfString# (stepOn buf) of
291 -- the string literal does *not* include the dquotes
292 case lexemeToFastString buf' of
293 v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
296 -- untilEndOfChar# extends the current lexeme until
297 -- it hits a non-escaped single quote. The lexeme of the
298 -- StringBuffer returned does *not* include the closing quote,
299 -- hence we augment the lexeme and make sure to add the
300 -- starting quote, before `read'ing the string.
302 case untilEndOfChar# (stepOn buf) of
303 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
304 [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
306 -- ``thingy'' form for casm
308 case lookAhead# buf 1# of
309 '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
310 _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume
311 -- scanning an id of some sort.
314 case lookAhead# buf 1# of
315 'S'# -> case lookAhead# buf 2# of
317 lex_demand (stepOnUntil (not . isSpace)
318 (stepOnBy# buf 3#)) -- past _S_
319 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
320 Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
321 Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
323 _ -> lex_keyword (stepOn buf)
326 if bufferExhausted (stepOn buf) then
331 if isDigit (C# c) then
332 lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
337 -- _trace ("comment: "++[C# (currentChar# buf)]) $
338 case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
342 -- _trace ("demand: "++[C# (currentChar# buf)]) $
343 case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
345 -- code snatched from Demand.lhs
347 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
348 case currentChar# buf of
349 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
350 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
351 'S'# -> read_em (WwStrict : acc) (stepOn buf)
352 'P'# -> read_em (WwPrim : acc) (stepOn buf)
353 'E'# -> read_em (WwEnum : acc) (stepOn buf)
354 ')'# -> (reverse acc, stepOn buf)
355 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
356 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
357 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
358 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
359 _ -> (reverse acc, buf)
361 do_unpack new_or_data wrapper_unpacks acc buf
362 = case read_em [] buf of
363 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
367 -- _trace ("scc: "++[C# (currentChar# buf)]) $
368 case currentChar# buf of
371 case prefixMatch (stepOn buf) "NO_CC\"" of
372 Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
374 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
375 Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
377 case prefixMatch (stepOn buf) "OVERHEAD\"" of
378 Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
380 case prefixMatch (stepOn buf) "DONT_CARE\"" of
381 Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
383 case prefixMatch (stepOn buf) "SUBSUMED\"" of
384 Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
386 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
387 Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
389 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
391 case untilChar# (stepOverLexeme buf') '\"'# of
392 buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
393 lexIface (stepOn (stepOverLexeme buf''))
395 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
396 Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
398 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
400 case untilChar# (stepOverLexeme buf') '\"'# of
401 buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
402 lexIface (stepOn (stepOverLexeme buf''))
406 case untilChar# buf '/'# of
408 let mod_name = lexemeToFastString buf' in
409 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
411 -- let grp_name = lexemeToFastString buf'' in
412 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
414 let cc_name = lexemeToFastString buf'' in
415 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
416 stepOn (stepOverLexeme buf''))
418 case prefixMatch (stepOn buf) "CAF:" of
420 case match_user_cc (stepOverLexeme buf') of
421 (cc, buf'') -> ITscc (cafifyCC cc) : lexIface buf''
423 case match_user_cc (stepOn buf) of
424 (cc, buf'') -> ITscc cc : lexIface buf''
425 c -> ITunknown [C# c] : lexIface (stepOn buf)
429 lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
430 lex_num minus acc# buf =
431 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
432 case scanNumLit (I# acc#) buf of
434 case currentChar# buf' of
436 -- this case is not optimised at all, as the
437 -- presence of floating point numbers in interface
438 -- files is not that common. (ToDo)
439 case expandWhile (isDigit) (incLexeme buf') of
440 buf'' -> -- points to first non digit char
441 case reads (lexemeToString buf'') of
442 [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
443 _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
445 -- case reads (lexemeToString buf') of
446 -- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
450 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
451 case currentChar# buf of
452 ':'# -> case lookAhead# buf 1# of
453 '_'# -> -- a binding, type (and other id-info) follows,
454 -- to make the parser ever so slightly, we push
456 lex_decl (stepOnBy# buf 2#)
457 v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
459 case expandWhile (is_kwd_char) buf of
461 let kw = lexemeToFastString buf' in
462 -- _trace ("kw: "++lexemeToString buf') $
463 case lookupUFM ifaceKeywordsFM kw of
464 Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
465 lexIface (stepOverLexeme buf')
466 Just xx -> xx : lexIface (stepOverLexeme buf')
469 case doDiscard False buf of -- spin until ;; is found
471 {- _trace (show (lexemeToString buf')) $ -}
472 case currentChar# buf' of
473 '\n'# -> -- newline, no id info.
474 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
475 lexIface (stepOverLexeme buf')
476 '\r'# -> -- just to be sure for those Win* boxes..
477 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
478 lexIface (stepOverLexeme buf')
480 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
481 lexIface (stepOverLexeme buf')
482 c -> -- run all over the id info
483 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
485 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
486 --_trace (show (lexemeToString (decLexeme buf''))) $
487 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
488 let ls = lexIface (stepOverLexeme buf'') in
489 if opt_IgnoreIfacePragmas then
492 let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
497 is_kwd_char c@(C# c#) =
498 isAlphanum c || -- OLD: c `elem` "_@/\\"
510 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
511 case expandUntilMatch buf "\'\'" of
512 buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
513 lexIface (stepOverLexeme buf')
516 lex_tuple module_dot buf =
517 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
521 case currentChar# buf of
522 ','# -> go (n+1) (stepOn buf)
523 ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
524 _ -> ITunknown ("tuple " ++ show n) : lexIface buf
526 -- Similarly ' itself is ok inside an identifier, but not at the start
528 id_arr :: _ByteArray Int
530 unsafePerformPrimIO (
531 newCharArray (0,255) `thenPrimIO` \ barr ->
533 loop 256# = returnPrimIO ()
535 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
536 writeCharArray barr (I# i#) '\1' `seqPrimIO`
539 writeCharArray barr (I# i#) '\0' `seqPrimIO`
543 unsafeFreezeByteArray barr)
547 _ByteArray _ arr# = id_arr
549 case ord# (indexCharArray# arr# (ord# c#)) of
553 --is_id_char c@(C# c#) = isAlphanum c || is_sym c#
557 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
558 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
559 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
560 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
561 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
562 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
564 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
567 mod_arr :: _ByteArray Int
569 unsafePerformPrimIO (
570 newCharArray (0,255) `thenPrimIO` \ barr ->
572 loop 256# = returnPrimIO ()
574 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
575 writeCharArray barr (I# i#) '\1' `seqPrimIO`
578 writeCharArray barr (I# i#) '\0' `seqPrimIO`
582 unsafeFreezeByteArray barr)
585 is_mod_char (C# c#) =
587 _ByteArray _ arr# = mod_arr
589 case ord# (indexCharArray# arr# (ord# c#)) of
593 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
597 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
601 [] -> lex_id2 Nothing cs
602 _ -> lex_id3 Nothing len xs cs
606 [] -> lex_id2 Nothing cs
609 pk_str = _PK_ (xs::String)
610 len = lengthPS pk_str
613 error "Well, I never!"
615 lex_id2 (Just pk_str) cs''
617 [] -> lex_id2 Nothing cs
618 _ -> lex_id3 Nothing len xs cs'
623 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
624 case expandWhile (is_mod_char) buf of
626 case currentChar# buf' of
628 if not (emptyLexeme buf') then
629 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
630 case lexemeToFastString buf' of
631 l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
632 (stepOn (stepOverLexeme buf'))
635 _ -> lex_id2 Nothing buf'
637 -- Dealt with the Module.part
638 lex_id2 module_dot buf =
639 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
640 case currentChar# buf of
642 case lookAhead# buf 1# of
643 ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
644 _ -> lex_id3 module_dot buf
646 case lookAhead# buf 1# of
647 ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
648 ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
649 _ -> lex_id3 module_dot buf
650 ':'# -> lex_id3 module_dot (incLexeme buf)
651 _ -> lex_id3 module_dot buf
655 -- Dealt with [], (), : special cases
657 lex_id3 module_dot buf =
658 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
659 case expandWhile (is_id_char) buf of
663 end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
665 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
666 Just kwd_token -> kwd_token : lexIface new_buf
667 Nothing -> mk_var_token lexeme : lexIface new_buf
669 lexeme = lexemeToFastString buf'
670 new_buf = stepOverLexeme buf'
674 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
675 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
676 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
677 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
678 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
682 -- Dealt with [], (), : special cases
685 lex_id3 module_dot len_xs xs cs =
686 case my_span' (is_id_char) cs of
687 (xs1,len_xs1,rest) ->
689 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
691 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
692 Just kwd_token -> kwd_token : lexIface rest
693 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
695 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
697 mk_var_token pk_str =
702 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
703 -- remove the second half of disjunction when using a 1.3 prelude.
705 if isUpper f then ITconid pk_str
706 else if isLower f then ITvarid pk_str
707 else if f == ':' then ITconsym pk_str
708 else if isLowerISO f then ITvarid pk_str
709 else if isUpperISO f then ITconid pk_str
713 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
714 | f == ':' = ITconsym n
715 | isAlpha f = ITvarid n
716 | otherwise = ITvarsym n
721 end_lex_id Nothing token buf = token : lexIface buf
722 end_lex_id (Just m) token buf =
724 ITconid n -> ITqconid (m,n) : lexIface buf
725 ITvarid n -> ITqvarid (m,n) : lexIface buf
726 ITconsym n -> ITqconsym (m,n) : lexIface buf
727 ITvarsym n -> ITqvarsym (m,n) : lexIface buf
728 ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf
729 _ -> ITunknown (show token) : lexIface buf
732 ifaceKeywordsFM :: UniqFM IfaceToken
733 ifaceKeywordsFM = listToUFM $
734 map (\ (x,y) -> (_PK_ x,y))
737 ,("letrec_", ITletrec)
738 ,("interface_", ITinterface)
739 ,("usages_", ITusages)
740 ,("versions_", ITversions)
741 ,("exports_", ITexports)
742 ,("instance_modules_", ITinstance_modules)
743 ,("instances_", ITinstances)
744 ,("fixities_", ITfixities)
745 ,("declarations_", ITdeclarations)
746 ,("pragmas_", ITpragmas)
747 ,("forall_", ITforall)
748 ,("U_", ITunfold False)
749 ,("U!_", ITunfold True)
751 ,("coerce_in_", ITcoerce_in)
752 ,("coerce_out_", ITcoerce_out)
754 ,("integer_", ITinteger_lit)
755 ,("rational_", ITrational_lit)
756 ,("addr_", ITaddr_lit)
757 ,("float_", ITfloat_lit)
758 ,("string_", ITstring_lit)
759 ,("litlit_", ITlit_lit)
760 ,("ccall_", ITccall (False, False))
761 ,("ccall_GC_", ITccall (False, True))
762 ,("casm_", ITccall (True, False))
763 ,("casm_GC_", ITccall (True, True))
766 haskellKeywordsFM = listToUFM $
767 map (\ (x,y) -> (_PK_ x,y))
770 ,("newtype", ITnewtype)
773 ,("instance", ITinstance)
774 ,("infixl", ITinfixl)
775 ,("infixr", ITinfixr)
778 ,("case#", ITprim_case)
782 ,("deriving", ITderiving)
793 -- doDiscard rips along really fast looking for a double semicolon,
794 -- indicating the end of the pragma we're skipping
795 doDiscard inStr buf =
796 -- _trace (show (C# (currentChar# buf))) $
797 case currentChar# buf of
800 case lookAhead# buf 1# of
801 ';'# -> incLexeme (incLexeme buf)
802 _ -> doDiscard inStr (incLexeme buf)
804 doDiscard inStr (incLexeme buf)
807 odd_slashes buf flg i# =
808 case lookAhead# buf i# of
809 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
812 case lookAhead# buf (negateInt# 1#) of --backwards, actually
813 '\\'# -> -- escaping something..
814 if odd_slashes buf True (negateInt# 2#) then
815 -- odd number of slashes, " is escaped.
816 doDiscard inStr (incLexeme buf)
818 -- even number of slashes, \ is escaped.
819 doDiscard (not inStr) (incLexeme buf)
820 _ -> case inStr of -- forced to avoid build-up
821 True -> doDiscard False (incLexeme buf)
822 False -> doDiscard True (incLexeme buf)
823 _ -> doDiscard inStr (incLexeme buf)
828 my_span :: (a -> Bool) -> [a] -> ([a],[a])
829 my_span p xs = go [] xs
831 go so_far (x:xs') | p x = go (x:so_far) xs'
832 go so_far xs = (reverse so_far, xs)
834 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
835 my_span' p xs = go [] 0 xs
837 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
838 go so_far n xs = (reverse so_far,n, xs)
842 %************************************************************************
844 \subsection{Other utility functions
846 %************************************************************************
849 type IfM a = MaybeErr a Error
851 returnIf :: a -> IfM a
852 thenIf :: IfM a -> (a -> IfM b) -> IfM b
853 happyError :: Int -> [IfaceToken] -> IfM a
855 returnIf a = Succeeded a
857 thenIf (Succeeded a) k = k a
858 thenIf (Failed err) _ = Failed err
860 happyError ln toks = Failed (ifaceParseErr ln toks)
862 -----------------------------------------------------------------
864 ifaceParseErr ln toks sty
865 = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]