2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 \section[Lexical analysis]{Lexical analysis}
6 --------------------------------------------------------
8 There's a known bug in here:
10 If an interface file ends prematurely, Lex tries to
11 do headFS of an empty FastString.
13 An example that provokes the error is
15 f _:_ _forall_ [a] <<<END OF FILE>>>
16 --------------------------------------------------------
22 ifaceParseErr, srcParseErr,
25 Token(..), lexer, ParseResult(..), PState(..),
26 checkVersion, ExtFlags(..), mkPState,
29 P, thenP, thenP_, returnP, mapP, failP, failMsgP,
30 getSrcLocP, setSrcLocP, getSrcFile,
31 layoutOn, layoutOff, pushContext, popContext
34 #include "HsVersions.h"
36 import Char ( isSpace, toUpper )
37 import List ( isSuffixOf )
39 import PrelNames ( mkTupNameStr )
40 import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck )
41 import ForeignCall ( Safety(..) )
42 import NewDemand ( StrictSig(..), Demand(..), Demands(..),
43 DmdResult(..), mkTopDmdType, evalDmd, lazyDmd )
44 import UniqFM ( listToUFM, lookupUFM )
45 import BasicTypes ( Boxity(..) )
46 import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
47 replaceSrcLine, mkSrcLoc )
49 import ErrUtils ( Message )
56 import Char ( chr, ord )
58 import Bits ( Bits(..) ) -- non-std
60 #if __GLASGOW_HASKELL__ >= 503
61 import GHC.Read ( readRational__ ) -- Glasgow non-std
63 import PrelRead ( readRational__ ) -- Glasgow non-std
68 %************************************************************************
70 \subsection{Data types}
72 %************************************************************************
74 The token data type, fairly un-interesting except from one
75 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
76 strictness, unfolding etc).
78 The Idea/Observation here is that the renamer needs to scan through
79 all of an interface file before it can continue. But only a fraction
80 of the information contained in the file turns out to be useful, so
81 delaying as much as possible of the scanning and parsing of an
82 interface file Makes Sense (Heap profiles of the compiler
83 show a reduction in heap usage by at least a factor of two,
86 Hence, the interface file lexer spots when value declarations are
87 being scanned and return the @ITidinfo@ and @ITtype@ constructors
88 for the type and any other id info for that binding (unfolding, strictness
89 etc). These constructors are applied to the result of lexing these sub-chunks.
91 The lexing of the type and id info is all done lazily, of course, so
92 the scanning (and subsequent parsing) will be done *only* on the ids the
93 renamer finds out that it is interested in. The rest will just be junked.
94 Laziness, you know it makes sense :-)
98 = ITas -- Haskell keywords
122 | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now)
124 | ITforall -- GHC extension keywords
137 | ITinterface -- interface keywords
145 | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc)
172 | ITspecialise_prag -- Pragmas
182 | ITdotdot -- reserved symbols
197 | ITbiglam -- GHC-extension symbols
199 | ITocurly -- special symbols
201 | ITocurlybar -- {|, for type applications
202 | ITccurlybar -- |}, for type applications
205 | ITopabrack -- [:, for parallel arrays with -fparr
206 | ITcpabrack -- :], for parallel arrays with -fparr
217 | ITvarid FAST_STRING -- identifiers
218 | ITconid FAST_STRING
219 | ITvarsym FAST_STRING
220 | ITconsym FAST_STRING
221 | ITqvarid (FAST_STRING,FAST_STRING)
222 | ITqconid (FAST_STRING,FAST_STRING)
223 | ITqvarsym (FAST_STRING,FAST_STRING)
224 | ITqconsym (FAST_STRING,FAST_STRING)
226 | ITdupipvarid FAST_STRING -- GHC extension: implicit param: ?x
227 | ITsplitipvarid FAST_STRING -- GHC extension: implicit param: %x
229 | ITpragma StringBuffer
232 | ITstring FAST_STRING
234 | ITrational Rational
237 | ITprimstring FAST_STRING
239 | ITprimfloat Rational
240 | ITprimdouble Rational
241 | ITlitlit FAST_STRING
243 | ITunknown String -- Used when the lexer can't make sense of it
244 | ITeof -- end of file token
245 deriving Show -- debugging
248 -----------------------------------------------------------------------------
252 pragmaKeywordsFM = listToUFM $
253 map (\ (x,y) -> (_PK_ x,y))
254 [( "SPECIALISE", ITspecialise_prag ),
255 ( "SPECIALIZE", ITspecialise_prag ),
256 ( "SOURCE", ITsource_prag ),
257 ( "INLINE", ITinline_prag ),
258 ( "NOINLINE", ITnoinline_prag ),
259 ( "NOTINLINE", ITnoinline_prag ),
260 ( "LINE", ITline_prag ),
261 ( "RULES", ITrules_prag ),
262 ( "RULEZ", ITrules_prag ), -- american spelling :-)
263 ( "SCC", ITscc_prag ),
264 ( "DEPRECATED", ITdeprecated_prag )
267 haskellKeywordsFM = listToUFM $
268 map (\ (x,y) -> (_PK_ x,y))
269 [( "_", ITunderscore ),
272 ( "class", ITclass ),
274 ( "default", ITdefault ),
275 ( "deriving", ITderiving ),
278 ( "hiding", IThiding ),
280 ( "import", ITimport ),
282 ( "infix", ITinfix ),
283 ( "infixl", ITinfixl ),
284 ( "infixr", ITinfixr ),
285 ( "instance", ITinstance ),
287 ( "module", ITmodule ),
288 ( "newtype", ITnewtype ),
290 ( "qualified", ITqualified ),
293 ( "where", ITwhere ),
294 ( "_scc_", ITscc ) -- ToDo: remove
297 isSpecial :: Token -> Bool
298 -- If we see M.x, where x is a keyword, but
299 -- is special, we treat is as just plain M.x,
301 isSpecial ITas = True
302 isSpecial IThiding = True
303 isSpecial ITqualified = True
304 isSpecial ITforall = True
305 isSpecial ITexport = True
306 isSpecial ITlabel = True
307 isSpecial ITdynamic = True
308 isSpecial ITsafe = True
309 isSpecial ITthreadsafe = True
310 isSpecial ITunsafe = True
311 isSpecial ITwith = True
312 isSpecial ITccallconv = True
313 isSpecial ITstdcallconv = True
316 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
317 ghcExtensionKeywordsFM = listToUFM $
318 map (\ (x,y) -> (_PK_ x,y))
319 [ ( "forall", ITforall ),
320 ( "foreign", ITforeign ),
321 ( "export", ITexport ),
322 ( "label", ITlabel ),
323 ( "dynamic", ITdynamic ),
325 ( "threadsafe", ITthreadsafe ),
326 ( "unsafe", ITunsafe ),
328 ( "stdcall", ITstdcallconv),
329 ( "ccall", ITccallconv),
330 ( "dotnet", ITdotnet),
331 ("_ccall_", ITccall (False, False, PlayRisky)),
332 ("_ccall_GC_", ITccall (False, False, PlaySafe False)),
333 ("_casm_", ITccall (False, True, PlayRisky)),
334 ("_casm_GC_", ITccall (False, True, PlaySafe False)),
336 -- interface keywords
337 ("__interface", ITinterface),
338 ("__export", IT__export),
339 ("__depends", ITdepends),
340 ("__forall", IT__forall),
341 ("__letrec", ITletrec),
342 ("__coerce", ITcoerce),
343 ("__inline_me", ITinlineMe),
344 ("__inline_call", ITinlineCall),
345 ("__depends", ITdepends),
346 ("__DEFAULT", ITdefaultbranch),
348 ("__integer", ITinteger_lit),
349 ("__float", ITfloat_lit),
350 ("__int64", ITint64_lit),
351 ("__word", ITword_lit),
352 ("__word64", ITword64_lit),
353 ("__rational", ITrational_lit),
354 ("__addr", ITaddr_lit),
355 ("__label", ITlabel_lit),
356 ("__litlit", ITlit_lit),
357 ("__string", ITstring_lit),
360 ("__fuall", ITfuall),
362 ("__P", ITspecialise),
365 ("__D", ITdeprecated),
368 ("__ccall", ITccall (False, False, PlayRisky)),
369 ("__ccall_GC", ITccall (False, False, PlaySafe False)),
370 ("__dyn_ccall", ITccall (True, False, PlayRisky)),
371 ("__dyn_ccall_GC", ITccall (True, False, PlaySafe False)),
372 ("__casm", ITccall (False, True, PlayRisky)),
373 ("__dyn_casm", ITccall (True, True, PlayRisky)),
374 ("__casm_GC", ITccall (False, True, PlaySafe False)),
375 ("__dyn_casm_GC", ITccall (True, True, PlaySafe False)),
381 haskellKeySymsFM = listToUFM $
382 map (\ (x,y) -> (_PK_ x,y))
396 ,(".", ITdot) -- sadly, for 'forall a . t'
400 -----------------------------------------------------------------------------
405 - (exts) lexing a source with extensions, eg, an interface file or
407 - (bol) pointer to beginning of line (for column calculations)
408 - (buf) pointer to beginning of token
409 - (buf) pointer to current char
410 - (atbol) flag indicating whether we're at the beginning of a line
413 lexer :: (Token -> P a) -> P a
414 lexer cont buf s@(PState{
422 -- first, start a new lexeme and lose all the whitespace
424 tab line bol atbol (stepOverLexeme buf)
426 line = srcLocLine loc
428 tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
429 case currentChar# buf of
432 if bufferExhausted (stepOn buf)
433 then cont ITeof buf s'
434 else trace "lexer: misplaced NUL?" $
435 tab y bol atbol (stepOn buf)
437 '\n'# -> let buf' = stepOn buf
438 in tab (y +# 1#) (currentIndex# buf') 1# buf'
440 -- find comments. This got harder in Haskell 98.
441 '-'# -> let trundle n =
442 let next = lookAhead# buf n in
443 if next `eqChar#` '-'# then trundle (n +# 1#)
444 else if is_symbol next || n <# 2#
447 (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
450 -- comments and pragmas. We deal with LINE pragmas here,
451 -- and throw out any unrecognised pragmas as comments. Any
452 -- pragmas we know about are dealt with later (after any layout
453 -- processing if necessary).
454 '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
455 if lookAhead# buf 2# `eqChar#` '#'# then
456 if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
457 case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
458 case expandWhile# is_ident (stepOverLexeme buf1) of { buf2->
459 let lexeme = mkFastString -- ToDo: too slow
460 (map toUpper (lexemeToString buf2)) in
461 case lookupUFM pragmaKeywordsFM lexeme of
462 -- ignore RULES pragmas when -fglasgow-exts is off
463 Just ITrules_prag | not (glaExtsEnabled exts) ->
464 skip_to_end (stepOnBy# buf 2#) s'
466 line_prag skip_to_end buf2 s'
467 Just other -> is_a_token
468 Nothing -> skip_to_end (stepOnBy# buf 2#) s'
471 else skip_to_end (stepOnBy# buf 2#) s'
473 skip_to_end = skipNestedComment (lexer cont)
475 -- special GHC extension: we grok cpp-style #line pragmas
476 '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0
477 let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
478 lookAhead# buf 2# `eqChar#` 'i'# &&
479 lookAhead# buf 3# `eqChar#` 'n'# &&
480 lookAhead# buf 4# `eqChar#` 'e'# = stepOnBy# buf 5#
481 | otherwise = stepOn buf
483 case expandWhile# is_space buf1 of { buf2 ->
484 if is_digit (currentChar# buf2)
485 then line_prag next_line buf2 s'
489 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
491 -- tabs have been expanded beforehand
492 c | is_space c -> tab y bol atbol (stepOn buf)
493 | otherwise -> is_a_token
495 where s' = s{loc = replaceSrcLine loc y,
499 is_a_token | atbol /=# 0# = lexBOL cont buf s'
500 | otherwise = lexToken cont exts buf s'
502 -- {-# LINE .. #-} pragmas. yeuch.
503 line_prag cont buf s@PState{loc=loc} =
504 case expandWhile# is_space buf of { buf1 ->
505 case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) ->
506 -- subtract one: the line number refers to the *following* line.
507 let real_line = line - 1 in
508 case fromInteger real_line of { i@(I# l) ->
509 -- ToDo, if no filename then we skip the newline.... d'oh
510 case expandWhile# is_space buf2 of { buf3 ->
511 case currentChar# buf3 of
513 case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
515 file = lexemeToFastString buf4
516 new_buf = stepOn (stepOverLexeme buf4)
518 if nullFastString file
519 then cont new_buf s{loc = replaceSrcLine loc l}
520 else cont new_buf s{loc = mkSrcLoc file i}
522 _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
525 skipNestedComment :: P a -> P a
526 skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
528 skipNestedComment' :: SrcLoc -> P a -> P a
529 skipNestedComment' orig_loc cont buf = loop buf
532 case currentChar# buf of
533 '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
535 '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
537 (skipNestedComment' orig_loc cont)
540 '\n'# -> \ s@PState{loc=loc} ->
541 let buf' = stepOn buf in
542 loop buf' s{loc = incSrcLine loc,
543 bol = currentIndex# buf',
546 -- pass the original SrcLoc to lexError so that the error is
547 -- reported at the line it was originally on, not the line at
548 -- the end of the file.
549 '\NUL'# | bufferExhausted (stepOn buf) ->
550 \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
552 _ -> loop (stepOn buf)
554 -- When we are lexing the first token of a line, check whether we need to
555 -- insert virtual semicolons or close braces due to layout.
557 lexBOL :: (Token -> P a) -> P a
558 lexBOL cont buf s@(PState{
565 if need_close_curly then
566 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
567 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
568 else if need_semi_colon then
569 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
570 cont ITsemi buf s{atbol = 0#}
572 lexToken cont exts buf s{atbol = 0#}
574 col = currentIndex# buf -# bol
587 Layout n -> col ==# n
590 lexToken :: (Token -> P a) -> Int# -> P a
591 lexToken cont exts buf =
592 -- trace "lexToken" $
593 case currentChar# buf of
595 -- special symbols ----------------------------------------------------
596 '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
597 -> cont IToubxparen (setCurrentPos# buf 2#)
599 -> cont IToparen (incLexeme buf)
601 ')'# -> cont ITcparen (incLexeme buf)
602 '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
603 cont ITopabrack (setCurrentPos# buf 2#)
605 cont ITobrack (incLexeme buf)
606 ']'# -> cont ITcbrack (incLexeme buf)
607 ','# -> cont ITcomma (incLexeme buf)
608 ';'# -> cont ITsemi (incLexeme buf)
609 '}'# -> \ s@PState{context = ctx} ->
611 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
612 _ -> lexError "too many '}'s" buf s
613 '|'# -> case lookAhead# buf 1# of
614 '}'# | glaExtsEnabled exts -> cont ITccurlybar
615 (setCurrentPos# buf 2#)
616 _ -> lex_sym cont (incLexeme buf)
617 ':'# -> case lookAhead# buf 1# of
618 ']'# | parrEnabled exts -> cont ITcpabrack
619 (setCurrentPos# buf 2#)
620 _ -> lex_sym cont (incLexeme buf)
623 '#'# -> case lookAhead# buf 1# of
624 ')'# | glaExtsEnabled exts
625 -> cont ITcubxparen (setCurrentPos# buf 2#)
626 '-'# -> case lookAhead# buf 2# of
627 '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
628 _ -> lex_sym cont (incLexeme buf)
629 _ -> lex_sym cont (incLexeme buf)
631 '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
632 -> lex_cstring cont (setCurrentPos# buf 2#)
634 -> cont ITbackquote (incLexeme buf)
636 '{'# -> -- look for "{-##" special iface pragma -- for Emacs: -}
637 case lookAhead# buf 1# of
638 '|'# | glaExtsEnabled exts
639 -> cont ITocurlybar (setCurrentPos# buf 2#)
640 '-'# -> case lookAhead# buf 2# of
641 '#'# -> case lookAhead# buf 3# of
645 (\ cont lexeme buf' -> cont (ITpragma lexeme) buf')
647 (stepOnBy# (stepOverLexeme buf) 4#)
648 _ -> lex_prag cont (setCurrentPos# buf 3#)
649 _ -> cont ITocurly (incLexeme buf)
650 _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf)
652 -- strings/characters -------------------------------------------------
653 '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf)
654 '\''# -> lex_char (char_end cont) exts (incLexeme buf)
656 -- strictness and cpr pragmas and __scc treated specially.
657 '_'# | glaExtsEnabled exts ->
658 case lookAhead# buf 1# of
659 '_'# -> case lookAhead# buf 2# of
661 lex_demand cont (stepOnUntil (not . isSpace)
662 (stepOnBy# buf 3#)) -- past __S
664 cont ITcprinfo (stepOnBy# buf 3#) -- past __M
667 case prefixMatch (stepOnBy# buf 3#) "cc" of
668 Just buf' -> lex_scc cont (stepOverLexeme buf')
669 Nothing -> lex_id cont exts buf
670 _ -> lex_id cont exts buf
671 _ -> lex_id cont exts buf
673 -- Hexadecimal and octal constants
674 '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
675 -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
676 | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
677 -> readNum (after_lexnum cont exts) buf' is_octdigit 8 oct_or_dec
678 where ch = lookAhead# buf 1#
679 ch2 = lookAhead# buf 2#
680 buf' = setCurrentPos# buf 2#
683 if bufferExhausted (stepOn buf) then
686 trace "lexIface: misplaced NUL?" $
687 cont (ITunknown "\NUL") (stepOn buf)
689 '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
690 lex_ip ITdupipvarid cont (incLexeme buf)
691 '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
692 lex_ip ITsplitipvarid cont (incLexeme buf)
693 c | is_digit c -> lex_num cont exts 0 buf
694 | is_symbol c -> lex_sym cont buf
695 | is_upper c -> lex_con cont exts buf
696 | is_ident c -> lex_id cont exts buf
697 | otherwise -> lexError "illegal character" buf
699 -- Int# is unlifted, and therefore faster than Bool for flags.
705 -------------------------------------------------------------------------------
709 = case expandWhile# is_space buf of { buf1 ->
710 case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
711 let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
712 case lookupUFM pragmaKeywordsFM lexeme of
713 Just kw -> cont kw (mergeLexemes buf buf2)
714 Nothing -> panic "lex_prag"
717 -------------------------------------------------------------------------------
720 lex_string cont exts s buf
721 = case currentChar# buf of
723 let buf' = incLexeme buf
724 s' = mkFastStringNarrow (map chr (reverse s))
725 in case currentChar# buf' of
726 '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s
727 then cont (ITprimstring s') (incLexeme buf')
728 else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
729 _ -> cont (ITstring s') buf'
731 -- ignore \& in a string, deal with string gaps
732 '\\'# | next_ch `eqChar#` '&'#
733 -> lex_string cont exts s buf'
735 -> lex_stringgap cont exts s (incLexeme buf)
737 where next_ch = lookAhead# buf 1#
738 buf' = setCurrentPos# buf 2#
740 _ -> lex_char (lex_next_string cont s) exts buf
742 lex_stringgap cont exts s buf
743 = let buf' = incLexeme buf in
744 case currentChar# buf of
745 '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
746 st{loc = incSrcLine loc}
747 '\\'# -> lex_string cont exts s buf'
748 c | is_space c -> lex_stringgap cont exts s buf'
749 other -> charError buf'
751 lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
753 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
754 lex_char cont exts buf
755 = case currentChar# buf of
756 '\\'# -> lex_escape (cont exts) (incLexeme buf)
757 c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf)
758 other -> charError buf
760 char_end cont exts c buf
761 = case currentChar# buf of
762 '\''# -> let buf' = incLexeme buf in
763 case currentChar# buf' of
764 '#'# | glaExtsEnabled exts
765 -> cont (ITprimchar c) (incLexeme buf')
766 _ -> cont (ITchar c) buf'
770 = let buf' = incLexeme buf in
771 case currentChar# buf of
772 'a'# -> cont (ord '\a') buf'
773 'b'# -> cont (ord '\b') buf'
774 'f'# -> cont (ord '\f') buf'
775 'n'# -> cont (ord '\n') buf'
776 'r'# -> cont (ord '\r') buf'
777 't'# -> cont (ord '\t') buf'
778 'v'# -> cont (ord '\v') buf'
779 '\\'# -> cont (ord '\\') buf'
780 '"'# -> cont (ord '\"') buf'
781 '\''# -> cont (ord '\'') buf'
782 '^'# -> let c = currentChar# buf' in
783 if c `geChar#` '@'# && c `leChar#` '_'#
784 then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
787 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
788 'o'# -> readNum (after_charnum cont) buf' is_octdigit 8 oct_or_dec
790 -> readNum (after_charnum cont) buf is_digit 10 oct_or_dec
792 _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
793 Just buf2 <- [prefixMatch buf p] ] of
794 (c,buf2):_ -> cont (ord c) buf2
797 after_charnum cont i buf
798 = if i >= 0 && i <= 0x10FFFF
799 then cont (fromInteger i) buf
802 readNum cont buf is_digit base conv = read buf 0
804 = case currentChar# buf of { c ->
806 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
812 || (c `geChar#` 'a'# && c `leChar#` 'f'#)
813 || (c `geChar#` 'A'# && c `leChar#` 'F'#)
815 hex c | is_digit c = ord# c -# ord# '0'#
816 | otherwise = ord# (to_lower c) -# ord# 'a'# +# 10#
817 oct_or_dec c = ord# c -# ord# '0'#
819 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
822 | c `geChar#` 'A'# && c `leChar#` 'Z'#
823 = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
826 charError buf = lexError "error in character literal" buf
828 silly_escape_chars = [
865 -------------------------------------------------------------------------------
867 lex_demand cont buf =
868 case read_em [] buf of { (ls,buf') ->
869 case currentChar# buf' of
870 'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf')
871 'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf')
872 _ -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf'
876 case currentChar# buf of
877 'T'# -> read_em (Top : acc) (stepOn buf)
878 'L'# -> read_em (lazyDmd : acc) (stepOn buf)
879 'A'# -> read_em (Abs : acc) (stepOn buf)
880 'V'# -> read_em (evalDmd : acc) (stepOn buf) -- Temporary, until
881 -- we've recompiled prelude etc
882 'C'# -> do_unary Call acc (stepOnBy# buf 2#) -- Skip 'C('
884 'U'# -> do_seq1 Eval acc (stepOnBy# buf 1#)
885 'D'# -> do_seq1 Defer acc (stepOnBy# buf 1#)
886 'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#)
888 _ -> (reverse acc, buf)
891 = case currentChar# buf of
892 '('# -> do_seq2 fn acc (stepOnBy# buf 1#)
893 _ -> read_em (fn (Poly Abs) : acc) buf
896 = case read_em [] buf of { (dmds, buf) ->
897 case currentChar# buf of
898 ')'# -> read_em (fn (Prod dmds) : acc)
900 '*'# -> ASSERT( length dmds == 1 )
901 read_em (fn (Poly (head dmds)) : acc)
902 (stepOnBy# buf 2#) -- Skip '*)'
906 = case read_em [] buf of
907 ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest) -- Skip ')'
911 case currentChar# buf of
912 'C'# -> cont ITsccAllCafs (incLexeme buf)
913 other -> cont ITscc buf
915 -----------------------------------------------------------------------------
918 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
919 lex_num cont exts acc buf =
920 case scanNumLit acc buf of
922 case currentChar# buf' of
923 '.'# | is_digit (lookAhead# buf' 1#) ->
924 -- this case is not optimised at all, as the
925 -- presence of floating point numbers in interface
926 -- files is not that common. (ToDo)
927 case expandWhile# is_digit (incLexeme buf') of
928 buf2 -> -- points to first non digit char
930 let l = case currentChar# buf2 of
936 = let buf3 = incLexeme buf2 in
937 case currentChar# buf3 of
938 '-'# | is_digit (lookAhead# buf3 1#)
939 -> expandWhile# is_digit (incLexeme buf3)
940 '+'# | is_digit (lookAhead# buf3 1#)
941 -> expandWhile# is_digit (incLexeme buf3)
942 x | is_digit x -> expandWhile# is_digit buf3
945 v = readRational__ (lexemeToString l)
947 in case currentChar# l of -- glasgow exts only
948 '#'# | glaExtsEnabled exts -> let l' = incLexeme l in
949 case currentChar# l' of
950 '#'# -> cont (ITprimdouble v) (incLexeme l')
951 _ -> cont (ITprimfloat v) l'
952 _ -> cont (ITrational v) l
954 _ -> after_lexnum cont exts acc' buf'
956 after_lexnum cont exts i buf
957 = case currentChar# buf of
958 '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf)
959 _ -> cont (ITinteger i) buf
961 -----------------------------------------------------------------------------
962 -- C "literal literal"s (i.e. things like ``NULL'', ``stdout'' etc.)
964 -- we lexemeToFastString on the bit between the ``''s, but include the
965 -- quotes in the full lexeme.
967 lex_cstring cont buf =
968 case expandUntilMatch (stepOverLexeme buf) "\'\'" of
969 Just buf' -> cont (ITlitlit (lexemeToFastString
970 (setCurrentPos# buf' (negateInt# 2#))))
971 (mergeLexemes buf buf')
972 Nothing -> lexError "unterminated ``" buf
974 -----------------------------------------------------------------------------
975 -- identifiers, symbols etc.
977 lex_ip ip_constr cont buf =
978 case expandWhile# is_ident buf of
979 buf' -> cont (ip_constr (tailFS lexeme)) buf'
980 where lexeme = lexemeToFastString buf'
982 lex_id cont exts buf =
983 let buf1 = expandWhile# is_ident buf in
986 case (if glaExtsEnabled exts
987 then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
988 else buf1) of { buf' ->
990 let lexeme = lexemeToFastString buf' in
992 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
993 Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
997 let var_token = cont (ITvarid lexeme) buf' in
999 if not (glaExtsEnabled exts)
1003 case lookupUFM ghcExtensionKeywordsFM lexeme of {
1004 Just kwd_token -> cont kwd_token buf';
1005 Nothing -> var_token
1010 -- trace "lex_sym" $
1011 case expandWhile# is_symbol buf of
1012 buf' -> case lookupUFM haskellKeySymsFM lexeme of {
1013 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
1014 cont kwd_token buf' ;
1015 Nothing -> --trace ("sym: "++unpackFS lexeme) $
1016 cont (mk_var_token lexeme) buf'
1018 where lexeme = lexemeToFastString buf'
1021 -- lex_con recursively collects components of a qualified identifer.
1022 -- The argument buf is the StringBuffer representing the lexeme
1023 -- identified so far, where the next character is upper-case.
1025 lex_con cont exts buf =
1026 -- trace ("con: "{-++unpackFS lexeme-}) $
1027 let empty_buf = stepOverLexeme buf in
1028 case expandWhile# is_ident empty_buf of { buf1 ->
1029 case slurp_trailing_hashes buf1 exts of { con_buf ->
1031 let all_buf = mergeLexemes buf con_buf
1033 con_lexeme = lexemeToFastString con_buf
1034 mod_lexeme = lexemeToFastString (decLexeme buf)
1035 all_lexeme = lexemeToFastString all_buf
1038 | emptyLexeme buf = cont (ITconid con_lexeme) all_buf
1039 | otherwise = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
1042 case currentChar# all_buf of
1043 '.'# -> maybe_qualified cont exts all_lexeme
1044 (incLexeme all_buf) just_a_conid
1049 maybe_qualified cont exts mod buf just_a_conid =
1050 -- trace ("qid: "{-++unpackFS lexeme-}) $
1051 case currentChar# buf of
1052 '['# -> -- Special case for []
1053 case lookAhead# buf 1# of
1054 ']'# -> cont (ITqconid (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
1057 '('# -> -- Special case for (,,,)
1058 -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
1059 case lookAhead# buf 1# of
1060 '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
1061 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
1064 ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
1065 ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
1068 '-'# -> case lookAhead# buf 1# of
1069 '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
1070 _ -> lex_id3 cont exts mod buf just_a_conid
1072 _ -> lex_id3 cont exts mod buf just_a_conid
1075 lex_id3 cont exts mod buf just_a_conid
1076 | is_upper (currentChar# buf) =
1077 lex_con cont exts buf
1079 | is_symbol (currentChar# buf) =
1081 start_new_lexeme = stepOverLexeme buf
1083 -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
1084 case expandWhile# is_symbol start_new_lexeme of { buf' ->
1086 lexeme = lexemeToFastString buf'
1087 -- real lexeme is M.<sym>
1088 new_buf = mergeLexemes buf buf'
1090 cont (mk_qvar_token mod lexeme) new_buf
1091 -- wrong, but arguably morally right: M... is now a qvarsym
1096 start_new_lexeme = stepOverLexeme buf
1098 -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1099 case expandWhile# is_ident start_new_lexeme of { buf1 ->
1104 case slurp_trailing_hashes buf1 exts of { buf' ->
1107 lexeme = lexemeToFastString buf'
1108 new_buf = mergeLexemes buf buf'
1109 is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1111 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1112 Nothing -> is_a_qvarid ;
1114 Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be
1115 -> is_a_qvarid -- recognised as keywords here.
1117 -> just_a_conid -- avoid M.where etc.
1120 slurp_trailing_hashes buf exts
1121 | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
1126 | is_upper f = ITconid pk_str
1127 | is_ident f = ITvarid pk_str
1128 | f `eqChar#` ':'# = ITconsym pk_str
1129 | otherwise = ITvarsym pk_str
1131 (C# f) = _HEAD_ pk_str
1132 -- tl = _TAIL_ pk_str
1134 mk_qvar_token m token =
1135 -- trace ("mk_qvar ") $
1136 case mk_var_token token of
1137 ITconid n -> ITqconid (m,n)
1138 ITvarid n -> ITqvarid (m,n)
1139 ITconsym n -> ITqconsym (m,n)
1140 ITvarsym n -> ITqvarsym (m,n)
1141 _ -> ITunknown (show token)
1144 ----------------------------------------------------------------------------
1145 Horrible stuff for dealing with M.(,,,)
1148 lex_tuple cont mod buf back_off =
1152 case currentChar# buf of
1153 ','# -> go (n+1) (stepOn buf)
1154 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1157 lex_ubx_tuple cont mod buf back_off =
1161 case currentChar# buf of
1162 ','# -> go (n+1) (stepOn buf)
1163 '#'# -> case lookAhead# buf 1# of
1164 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1170 -----------------------------------------------------------------------------
1171 'lexPragma' rips along really fast, looking for a '##-}',
1172 indicating the end of the pragma we're skipping
1175 lexPragma cont contf inStr buf =
1176 case currentChar# buf of
1177 '#'# | inStr ==# 0# ->
1178 case lookAhead# buf 1# of { '#'# ->
1179 case lookAhead# buf 2# of { '-'# ->
1180 case lookAhead# buf 3# of { '}'# ->
1181 contf cont (lexemeToBuffer buf)
1182 (stepOverLexeme (setCurrentPos# buf 4#));
1183 _ -> lexPragma cont contf inStr (incLexeme buf) };
1184 _ -> lexPragma cont contf inStr (incLexeme buf) };
1185 _ -> lexPragma cont contf inStr (incLexeme buf) }
1189 odd_slashes buf flg i# =
1190 case lookAhead# buf i# of
1191 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1194 not_inStr = if inStr ==# 0# then 1# else 0#
1196 case lookAhead# buf (negateInt# 1#) of --backwards, actually
1197 '\\'# -> -- escaping something..
1198 if odd_slashes buf True (negateInt# 2#)
1199 then -- odd number of slashes, " is escaped.
1200 lexPragma cont contf inStr (incLexeme buf)
1201 else -- even number of slashes, \ is escaped.
1202 lexPragma cont contf not_inStr (incLexeme buf)
1203 _ -> lexPragma cont contf not_inStr (incLexeme buf)
1205 '\''# | inStr ==# 0# ->
1206 case lookAhead# buf 1# of { '"'# ->
1207 case lookAhead# buf 2# of { '\''# ->
1208 lexPragma cont contf inStr (setCurrentPos# buf 3#);
1209 _ -> lexPragma cont contf inStr (incLexeme buf) };
1210 _ -> lexPragma cont contf inStr (incLexeme buf) }
1212 -- a sign that the input is ill-formed, since pragmas are
1213 -- assumed to always be properly closed (in .hi files).
1214 '\NUL'# -> trace "lexPragma: unexpected end-of-file" $
1215 cont (ITunknown "\NUL") buf
1217 _ -> lexPragma cont contf inStr (incLexeme buf)
1221 -----------------------------------------------------------------------------
1232 data PState = PState {
1234 extsBitmap :: Int#, -- bitmap that determines permitted extensions
1237 context :: [LayoutContext]
1240 type P a = StringBuffer -- Input string
1245 returnP a buf s = POk s a
1247 thenP :: P a -> (a -> P b) -> P b
1248 m `thenP` k = \ buf s ->
1250 POk s1 a -> k a buf s1
1251 PFailed err -> PFailed err
1253 thenP_ :: P a -> P b -> P b
1254 m `thenP_` k = m `thenP` \_ -> k
1256 mapP :: (a -> P b) -> [a] -> P [b]
1257 mapP f [] = returnP []
1260 mapP f as `thenP` \bs ->
1263 failP :: String -> P a
1264 failP msg buf s = PFailed (text msg)
1266 failMsgP :: Message -> P a
1267 failMsgP msg buf s = PFailed msg
1269 lexError :: String -> P a
1270 lexError str buf s@PState{ loc = loc }
1271 = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1273 getSrcLocP :: P SrcLoc
1274 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1276 -- use a temporary SrcLoc for the duration of the argument
1277 setSrcLocP :: SrcLoc -> P a -> P a
1278 setSrcLocP new_loc p buf s =
1279 case p buf s{ loc=new_loc } of
1281 PFailed e -> PFailed e
1283 getSrcFile :: P FAST_STRING
1284 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1286 pushContext :: LayoutContext -> P ()
1287 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1291 This special case in layoutOn is to handle layout contexts with are
1292 indented the same or less than the current context. This is illegal
1293 according to the Haskell spec, so we have to arrange to close the
1294 current context. eg.
1299 after the first 'where', the sequence of events is:
1301 - layout system inserts a ';' (column 0)
1302 - parser begins a new context at column 0
1303 - parser shifts ';' (legal empty declaration)
1304 - parser sees 'class': parse error (we're still in the inner context)
1306 trouble is, by the time we know we need a new context, the lexer has
1307 already generated the ';'. Hacky solution is as follows: since we
1308 know the column of the next token (it's the column number of the new
1309 context), we set the ACTUAL column number of the new context to this
1310 numer plus one. Hence the next time the lexer is called, a '}' will
1311 be generated to close the new context straight away. Furthermore, we
1312 have to set the atbol flag so that the ';' that the parser shifted as
1313 part of the new context is re-generated.
1315 when the new context is *less* indented than the current one:
1317 f = f where g = g where
1320 - current context: column 12.
1321 - on seeing 'h' (column 0), the layout system inserts '}'
1322 - parser starts a new context, column 0
1323 - parser sees '}', uses it to close new context
1324 - we still need to insert another '}' followed by a ';',
1325 hence the atbol trick.
1327 There's also a special hack in here to deal with
1334 i.e. the inner context is at the same indentation level as the outer
1335 context. This is strictly illegal according to Haskell 98, but
1336 there's a lot of existing code using this style and it doesn't make
1337 any sense to disallow it, since empty 'do' lists don't make sense.
1340 layoutOn :: Bool -> P ()
1341 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1342 let offset = lexemeIndex buf -# bol in
1345 | if strict then prev_off >=# offset else prev_off ># offset ->
1346 --trace ("layout on, column: " ++ show (I# offset)) $
1347 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1349 --trace ("layout on, column: " ++ show (I# offset)) $
1350 POk s{ context = Layout offset : ctx } ()
1353 layoutOff buf s@(PState{ context = ctx }) =
1354 POk s{ context = NoLayout:ctx } ()
1357 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1359 (_:tl) -> POk s{ context = tl } ()
1360 [] -> PFailed (srcParseErr buf loc)
1363 Note that if the name of the file we're processing ends
1364 with `hi-boot', we accept it on faith as having the right
1365 version. This is done so that .hi-boot files that comes
1366 with hsc don't have to be updated before every release,
1367 *and* it allows us to share .hi-boot files with versions
1368 of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1370 If the version number is 0, the checking is also turned off.
1371 (needed to deal with GHC.hi only!)
1373 Once we can assume we're compiling with a version of ghc that
1374 supports interface file checking, we can drop the special
1377 checkVersion :: Maybe Integer -> P ()
1378 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1379 | (v==0) || (v == fromIntegral opt_HiVersion) || opt_NoHiCheck = POk s ()
1380 | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1381 checkVersion mb@Nothing buf s@(PState{loc = loc})
1382 | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1383 | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1386 -- for reasons of efficiency, flags indicating language extensions (eg,
1387 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1390 glaExtsBit, ffiBit, parrBit :: Int
1392 ffiBit = 1 -- FIXME: not used yet; still part of `glaExtsBit'
1395 glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
1396 glaExtsEnabled flags = testBit (toInt32 flags) glaExtsBit
1397 ffiEnabled flags = testBit (toInt32 flags) ffiBit
1398 parrEnabled flags = testBit (toInt32 flags) parrBit
1400 toInt32 :: Int# -> Int32
1401 toInt32 x# = fromIntegral (I# x#)
1403 -- convenient record-based bitmap for the interface to the rest of the world
1405 data ExtFlags = ExtFlags {
1406 glasgowExtsEF :: Bool,
1407 -- ffiEF :: Bool, -- commented out to avoid warnings
1408 parrEF :: Bool -- while not used yet
1411 -- create a parse state
1413 mkPState :: SrcLoc -> ExtFlags -> PState
1414 mkPState loc exts = PState {
1416 extsBitmap = case (fromIntegral bitmap) of {I# bits -> bits},
1422 bitmap = glaExtsBit `setBitIf` glasgowExtsEF exts
1423 -- .|. ffiBit `setBitIf` ffiEF exts
1424 .|. parrBit `setBitIf` parrEF exts
1426 setBitIf :: Int -> Bool -> Int32
1427 b `setBitIf` cond | cond = bit b
1431 -----------------------------------------------------------------
1433 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1435 = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1436 text (lexemeToString s), char '\'']
1438 ifaceVersionErr hi_vers l toks
1439 = hsep [ppr l, ptext SLIT("Interface file version error;"),
1440 ptext SLIT("Expected"), int opt_HiVersion,
1441 ptext SLIT("found "), pp_version]
1445 Nothing -> ptext SLIT("pre ghc-3.02 version")
1446 Just v -> ptext SLIT("version") <+> integer v
1448 -----------------------------------------------------------------------------
1450 srcParseErr :: StringBuffer -> SrcLoc -> Message
1454 then ptext SLIT(": parse error (possibly incorrect indentation)")
1455 else hcat [ptext SLIT(": parse error on input "),
1456 char '`', text token, char '\'']
1459 token = lexemeToString s