ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( chr )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | IT__export
 127   | ITdepends
 128   | IT__forall
 129   | ITletrec
 130   | ITcoerce
 131   | ITinlineMe
 132   | ITinlineCall
 133   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 134   | ITdefaultbranch
 135   | ITbottom
 136   | ITinteger_lit
 137   | ITfloat_lit
 138   | ITword_lit
 139   | ITword64_lit
 140   | ITint64_lit
 141   | ITrational_lit
 142   | ITaddr_lit
 143   | ITlit_lit
 144   | ITstring_lit
 145   | ITtypeapp
 146   | ITusage
 147   | ITfuall
 148   | ITarity
 149   | ITspecialise
 150   | ITnocaf
 151   | ITunfold InlinePragInfo
 152   | ITstrict ([Demand], Bool)
 153   | ITrules
 154   | ITcprinfo
 155   | ITdeprecated
 156   | IT__scc
 157   | ITsccAllCafs
 158
 159   | ITspecialise_prag           -- Pragmas
 160   | ITsource_prag
 161   | ITinline_prag
 162   | ITnoinline_prag
 163   | ITrules_prag
 164   | ITdeprecated_prag
 165   | ITline_prag
 166   | ITclose_prag
 167
 168   | ITdotdot                    -- reserved symbols
 169   | ITdcolon
 170   | ITequal
 171   | ITlam
 172   | ITvbar
 173   | ITlarrow
 174   | ITrarrow
 175   | ITat
 176   | ITtilde
 177   | ITdarrow
 178   | ITminus
 179   | ITbang
 180   | ITdot
 181
 182   | ITbiglam                    -- GHC-extension symbols
 183
 184   | ITocurly                    -- special symbols
 185   | ITccurly
 186   | ITvccurly
 187   | ITobrack
 188   | ITcbrack
 189   | IToparen
 190   | ITcparen
 191   | IToubxparen
 192   | ITcubxparen
 193   | ITsemi
 194   | ITcomma
 195   | ITunderscore
 196   | ITbackquote
 197
 198   | ITvarid   FAST_STRING       -- identifiers
 199   | ITconid   FAST_STRING
 200   | ITvarsym  FAST_STRING
 201   | ITconsym  FAST_STRING
 202   | ITqvarid  (FAST_STRING,FAST_STRING)
 203   | ITqconid  (FAST_STRING,FAST_STRING)
 204   | ITqvarsym (FAST_STRING,FAST_STRING)
 205   | ITqconsym (FAST_STRING,FAST_STRING)
 206
 207   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 208
 209   | ITpragma StringBuffer
 210
 211   | ITchar       Char
 212   | ITstring     FAST_STRING
 213   | ITinteger    Integer
 214   | ITrational   Rational
 215
 216   | ITprimchar   Char
 217   | ITprimstring FAST_STRING
 218   | ITprimint    Integer
 219   | ITprimfloat  Rational
 220   | ITprimdouble Rational
 221   | ITlitlit     FAST_STRING
 222
 223   | ITunknown String            -- Used when the lexer can't make sense of it
 224   | ITeof                       -- end of file token
 225   deriving Show -- debugging
 226 \end{code}
 227
 228 -----------------------------------------------------------------------------
 229 Keyword Lists
 230
 231 \begin{code}
 232 pragmaKeywordsFM = listToUFM $
 233       map (\ (x,y) -> (_PK_ x,y))
 234        [( "SPECIALISE", ITspecialise_prag ),
 235         ( "SPECIALIZE", ITspecialise_prag ),
 236         ( "SOURCE",     ITsource_prag ),
 237         ( "INLINE",     ITinline_prag ),
 238         ( "NOINLINE",   ITnoinline_prag ),
 239         ( "NOTINLINE",  ITnoinline_prag ),
 240         ( "LINE",       ITline_prag ),
 241         ( "RULES",      ITrules_prag ),
 242         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 243         ( "DEPRECATED", ITdeprecated_prag )
 244         ]
 245
 246 haskellKeywordsFM = listToUFM $
 247       map (\ (x,y) -> (_PK_ x,y))
 248        [( "_",          ITunderscore ),
 249         ( "as",         ITas ),
 250         ( "case",       ITcase ),
 251         ( "class",      ITclass ),
 252         ( "data",       ITdata ),
 253         ( "default",    ITdefault ),
 254         ( "deriving",   ITderiving ),
 255         ( "do",         ITdo ),
 256         ( "else",       ITelse ),
 257         ( "hiding",     IThiding ),
 258         ( "if",         ITif ),
 259         ( "import",     ITimport ),
 260         ( "in",         ITin ),
 261         ( "infix",      ITinfix ),
 262         ( "infixl",     ITinfixl ),
 263         ( "infixr",     ITinfixr ),
 264         ( "instance",   ITinstance ),
 265         ( "let",        ITlet ),
 266         ( "module",     ITmodule ),
 267         ( "newtype",    ITnewtype ),
 268         ( "of",         ITof ),
 269         ( "qualified",  ITqualified ),
 270         ( "then",       ITthen ),
 271         ( "type",       ITtype ),
 272         ( "where",      ITwhere ),
 273         ( "_scc_",      ITscc )
 274      ]
 275
 276 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 277 ghcExtensionKeywordsFM = listToUFM $
 278         map (\ (x,y) -> (_PK_ x,y))
 279      [  ( "forall",     ITforall ),
 280         ( "foreign",    ITforeign ),
 281         ( "export",     ITexport ),
 282         ( "label",      ITlabel ),
 283         ( "dynamic",    ITdynamic ),
 284         ( "unsafe",     ITunsafe ),
 285         ( "with",       ITwith ),
 286         ( "stdcall",    ITstdcallconv),
 287         ( "ccall",      ITccallconv),
 288         ("_ccall_",     ITccall (False, False, False)),
 289         ("_ccall_GC_",  ITccall (False, False, True)),
 290         ("_casm_",      ITccall (False, True,  False)),
 291         ("_casm_GC_",   ITccall (False, True,  True)),
 292
 293         -- interface keywords
 294         ("__interface",         ITinterface),
 295         ("__export",            IT__export),
 296         ("__depends",           ITdepends),
 297         ("__forall",            IT__forall),
 298         ("__letrec",            ITletrec),
 299         ("__coerce",            ITcoerce),
 300         ("__inline_me",         ITinlineMe),
 301         ("__inline_call",       ITinlineCall),
 302         ("__depends",           ITdepends),
 303         ("__DEFAULT",           ITdefaultbranch),
 304         ("__bot",               ITbottom),
 305         ("__integer",           ITinteger_lit),
 306         ("__float",             ITfloat_lit),
 307         ("__int64",             ITint64_lit),
 308         ("__word",              ITword_lit),
 309         ("__word64",            ITword64_lit),
 310         ("__rational",          ITrational_lit),
 311         ("__addr",              ITaddr_lit),
 312         ("__litlit",            ITlit_lit),
 313         ("__string",            ITstring_lit),
 314         ("__a",                 ITtypeapp),
 315         ("__u",                 ITusage),
 316         ("__fuall",             ITfuall),
 317         ("__A",                 ITarity),
 318         ("__P",                 ITspecialise),
 319         ("__C",                 ITnocaf),
 320         ("__R",                 ITrules),
 321         ("__D",                 ITdeprecated),
 322         ("__U",                 ITunfold NoInlinePragInfo),
 323
 324         ("__ccall",             ITccall (False, False, False)),
 325         ("__ccall_GC",          ITccall (False, False, True)),
 326         ("__dyn_ccall",         ITccall (True,  False, False)),
 327         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 328         ("__casm",              ITccall (False, True,  False)),
 329         ("__dyn_casm",          ITccall (True,  True,  False)),
 330         ("__casm_GC",           ITccall (False, True,  True)),
 331         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 332
 333         ("/\\",                 ITbiglam)
 334      ]
 335
 336
 337 haskellKeySymsFM = listToUFM $
 338         map (\ (x,y) -> (_PK_ x,y))
 339       [ ("..",          ITdotdot)
 340        ,("::",          ITdcolon)
 341        ,("=",           ITequal)
 342        ,("\\",          ITlam)
 343        ,("|",           ITvbar)
 344        ,("<-",          ITlarrow)
 345        ,("->",          ITrarrow)
 346        ,("@",           ITat)
 347        ,("~",           ITtilde)
 348        ,("=>",          ITdarrow)
 349        ,("-",           ITminus)
 350        ,("!",           ITbang)
 351        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 352        ]
 353 \end{code}
 354
 355 -----------------------------------------------------------------------------
 356 The lexical analyser
 357
 358 Lexer state:
 359
 360         - (glaexts) lexing an interface file or -fglasgow-exts
 361         - (bol)   pointer to beginning of line (for column calculations)
 362         - (buf)   pointer to beginning of token
 363         - (buf)   pointer to current char
 364         - (atbol) flag indicating whether we're at the beginning of a line
 365
 366 \begin{code}
 367 lexer :: (Token -> P a) -> P a
 368 lexer cont buf s@(PState{
 369                     loc = loc,
 370                     glasgow_exts = glaexts,
 371                     bol = bol,
 372                     atbol = atbol,
 373                     context = ctx
 374                 })
 375
 376         -- first, start a new lexeme and lose all the whitespace
 377   =  _scc_ "Lexer"
 378   tab line bol atbol (stepOverLexeme buf)
 379   where
 380         line = srcLocLine loc
 381
 382         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 383           case currentChar# buf of
 384
 385             '\NUL'# ->
 386                    if bufferExhausted (stepOn buf)
 387                         then cont ITeof buf s'
 388                         else trace "lexer: misplaced NUL?" $
 389                              tab y bol atbol (stepOn buf)
 390
 391             '\n'# -> let buf' = stepOn buf
 392                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 393
 394                 -- find comments.  This got harder in Haskell 98.
 395             '-'# ->  let trundle n =
 396                           let next = lookAhead# buf n in
 397                           if next `eqChar#` '-'# then trundle (n +# 1#)
 398                           else if is_symbol next || n <# 2#
 399                                 then is_a_token
 400                                 else tab y bol atbol
 401                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 402                     in trundle 1#
 403
 404                 -- comments and pragmas.  We deal with LINE pragmas here,
 405                 -- and throw out any unrecognised pragmas as comments.  Any
 406                 -- pragmas we know about are dealt with later (after any layout
 407                 -- processing if necessary).
 408
 409             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 410                 if lookAhead# buf 2# `eqChar#` '#'# then
 411                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 412                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 413                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 414                   let lexeme = mkFastString -- ToDo: too slow
 415                                   (map toUpper (lexemeToString buf2)) in
 416                   case lookupUFM pragmaKeywordsFM lexeme of
 417                         Just ITline_prag ->
 418                            line_prag skip_to_end buf2 s'
 419                         Just other -> is_a_token
 420                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 421                   }}
 422
 423                 else skip_to_end (stepOnBy# buf 2#) s'
 424                 where
 425                     skip_to_end = nested_comment (lexer cont)
 426
 427                 -- special GHC extension: we grok cpp-style #line pragmas
 428             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 429                 line_prag next_line (stepOn buf) s'
 430                 where
 431                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 432
 433                 -- tabs have been expanded beforehand
 434             c | is_space c -> tab y bol atbol (stepOn buf)
 435               | otherwise  -> is_a_token
 436
 437            where s' = s{loc = replaceSrcLine loc y,
 438                         bol = bol,
 439                        atbol = atbol}
 440
 441                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 442                             | otherwise    = lexToken cont glaexts buf s'
 443
 444 -- {-# LINE .. #-} pragmas.  yeuch.
 445 line_prag cont buf s@PState{loc=loc} =
 446   case expandWhile# is_space buf                of { buf1 ->
 447   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 448   -- subtract one: the line number refers to the *following* line.
 449   let real_line = line - 1 in
 450   case fromInteger real_line                    of { i@(I# l) ->
 451         -- ToDo, if no filename then we skip the newline.... d'oh
 452   case expandWhile# is_space buf2               of { buf3 ->
 453   case currentChar# buf3                        of
 454      '\"'#{-"-} ->
 455         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 456         let
 457             file = lexemeToFastString buf4
 458             new_buf = stepOn (stepOverLexeme buf4)
 459         in
 460         if nullFastString file
 461                 then cont new_buf s{loc = replaceSrcLine loc l}
 462                 else cont new_buf s{loc = mkSrcLoc file i}
 463         }
 464      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 465   }}}}
 466
 467 nested_comment :: P a -> P a
 468 nested_comment cont buf = loop buf
 469  where
 470    loop buf =
 471      case currentChar# buf of
 472         '\NUL'# | bufferExhausted (stepOn buf) ->
 473                 lexError "unterminated `{-'" buf
 474
 475         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 476                 cont (stepOnBy# buf 2#)
 477
 478         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 479               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 480
 481         '\n'# -> \ s@PState{loc=loc} ->
 482                  let buf' = stepOn buf in
 483                  nested_comment cont buf'
 484                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 485                           atbol = 1#}
 486
 487         _   -> nested_comment cont (stepOn buf)
 488
 489 -- When we are lexing the first token of a line, check whether we need to
 490 -- insert virtual semicolons or close braces due to layout.
 491
 492 lexBOL :: (Token -> P a) -> P a
 493 lexBOL cont buf s@(PState{
 494                     loc = loc,
 495                     glasgow_exts = glaexts,
 496                     bol = bol,
 497                     atbol = atbol,
 498                     context = ctx
 499                   }) =
 500         if need_close_curly then
 501                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 502                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 503         else if need_semi_colon then
 504                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 505                 cont ITsemi buf s{atbol = 0#}
 506         else
 507                 lexToken cont glaexts buf s{atbol = 0#}
 508   where
 509         col = currentIndex# buf -# bol
 510
 511         need_close_curly =
 512                 case ctx of
 513                         [] -> False
 514                         (i:_) -> case i of
 515                                     NoLayout -> False
 516                                     Layout n -> col <# n
 517         need_semi_colon =
 518                 case ctx of
 519                         [] -> False
 520                         (i:_) -> case i of
 521                                     NoLayout -> False
 522                                     Layout n -> col ==# n
 523
 524
 525 lexToken :: (Token -> P a) -> Int# -> P a
 526 lexToken cont glaexts buf =
 527  --trace "lexToken" $
 528   case currentChar# buf of
 529
 530     -- special symbols ----------------------------------------------------
 531     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 532                 -> cont IToubxparen (setCurrentPos# buf 2#)
 533          | otherwise
 534                 -> cont IToparen (incLexeme buf)
 535
 536     ')'# -> cont ITcparen    (incLexeme buf)
 537     '['# -> cont ITobrack    (incLexeme buf)
 538     ']'# -> cont ITcbrack    (incLexeme buf)
 539     ','# -> cont ITcomma     (incLexeme buf)
 540     ';'# -> cont ITsemi      (incLexeme buf)
 541
 542     '}'# -> \ s@PState{context = ctx} ->
 543             case ctx of
 544                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 545                 _        -> lexError "too many '}'s" buf s
 546
 547     '#'# -> case lookAhead# buf 1# of
 548                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 549                 '-'# -> case lookAhead# buf 2# of
 550                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 551                            _    -> lex_sym cont (incLexeme buf)
 552                 _    -> lex_sym cont (incLexeme buf)
 553
 554     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 555                 -> lex_cstring cont (setCurrentPos# buf 2#)
 556          | otherwise
 557                 -> cont ITbackquote (incLexeme buf)
 558
 559     '{'# ->     -- look for "{-##" special iface pragma
 560         case lookAhead# buf 1# of
 561            '-'# -> case lookAhead# buf 2# of
 562                     '#'# -> case lookAhead# buf 3# of
 563                                 '#'# ->
 564                                    let (lexeme, buf')
 565                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 566                                    cont (ITpragma lexeme) buf'
 567                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 568                     _    -> cont ITocurly (incLexeme buf)
 569            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 570
 571     -- strings/characters -------------------------------------------------
 572     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 573     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 574
 575     -- strictness and cpr pragmas and __scc treated specially.
 576     '_'# | flag glaexts ->
 577          case lookAhead# buf 1# of
 578            '_'# -> case lookAhead# buf 2# of
 579                     'S'# ->
 580                         lex_demand cont (stepOnUntil (not . isSpace)
 581                                         (stepOnBy# buf 3#)) -- past __S
 582                     'M'# ->
 583                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 584
 585                     's'# ->
 586                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 587                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 588                                Nothing   -> lex_id cont glaexts buf
 589                     _ -> lex_id cont glaexts buf
 590            _    -> lex_id cont glaexts buf
 591
 592         -- Hexadecimal and octal constants
 593     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 594                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 595          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 596                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 597         where ch   = lookAhead# buf 1#
 598               ch2  = lookAhead# buf 2#
 599               buf' = setCurrentPos# buf 2#
 600
 601     '\NUL'# ->
 602             if bufferExhausted (stepOn buf) then
 603                cont ITeof buf
 604             else
 605                trace "lexIface: misplaced NUL?" $
 606                cont (ITunknown "\NUL") (stepOn buf)
 607
 608     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 609             lex_ip cont (incLexeme buf)
 610     c | is_digit  c -> lex_num cont glaexts 0 buf
 611       | is_symbol c -> lex_sym cont buf
 612       | is_upper  c -> lex_con cont glaexts buf
 613       | is_ident  c -> lex_id  cont glaexts buf
 614       | otherwise   -> lexError "illegal character" buf
 615
 616 -- Int# is unlifted, and therefore faster than Bool for flags.
 617 {-# INLINE flag #-}
 618 flag :: Int# -> Bool
 619 flag 0# = False
 620 flag _  = True
 621
 622 -------------------------------------------------------------------------------
 623 -- Pragmas
 624
 625 lex_prag cont buf
 626   = case expandWhile# is_space buf of { buf1 ->
 627     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 628     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 629     case lookupUFM pragmaKeywordsFM lexeme of
 630         Just kw -> cont kw (mergeLexemes buf buf2)
 631         Nothing -> panic "lex_prag"
 632   }}
 633
 634 -------------------------------------------------------------------------------
 635 -- Strings & Chars
 636
 637 lex_string cont glaexts s buf
 638   = case currentChar# buf of
 639         '"'#{-"-} ->
 640            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 641            case currentChar# buf' of
 642                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 643                 _                   -> cont (ITstring s') buf'
 644
 645         -- ignore \& in a string, deal with string gaps
 646         '\\'# | next_ch `eqChar#` '&'#
 647                 -> lex_string cont glaexts s buf'
 648               | is_space next_ch
 649                 -> lex_stringgap cont glaexts s (incLexeme buf)
 650
 651             where next_ch = lookAhead# buf 1#
 652                   buf' = setCurrentPos# buf 2#
 653
 654         _ -> lex_char (lex_next_string cont s) glaexts buf
 655
 656 lex_stringgap cont glaexts s buf
 657   = let buf' = incLexeme buf in
 658     case currentChar# buf of
 659         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 660                   st{loc = incSrcLine loc}
 661         '\\'# -> lex_string cont glaexts s buf'
 662         c | is_space c -> lex_stringgap cont glaexts s buf'
 663         other -> charError buf'
 664
 665 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 666
 667 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 668 lex_char cont glaexts buf
 669   = case currentChar# buf of
 670         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 671         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 672         other -> charError buf
 673
 674 char_end cont glaexts c buf
 675   = case currentChar# buf of
 676         '\''# -> let buf' = incLexeme buf in
 677                  case currentChar# buf' of
 678                         '#'# | flag glaexts
 679                                 -> cont (ITprimchar c) (incLexeme buf')
 680                         _       -> cont (ITchar c) buf'
 681         _     -> charError buf
 682
 683 lex_escape cont buf
 684   = let buf' = incLexeme buf in
 685     case currentChar# buf of
 686         'a'#       -> cont '\a' buf'
 687         'b'#       -> cont '\b' buf'
 688         'f'#       -> cont '\f' buf'
 689         'n'#       -> cont '\n' buf'
 690         'r'#       -> cont '\r' buf'
 691         't'#       -> cont '\t' buf'
 692         'v'#       -> cont '\v' buf'
 693         '\\'#      -> cont '\\' buf'
 694         '"'#       -> cont '\"' buf'
 695         '\''#      -> cont '\'' buf'
 696         '^'#       -> let c = currentChar# buf' in
 697                       if c `geChar#` '@'# && c `leChar#` '_'#
 698                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 699                         else charError buf'
 700
 701         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 702         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 703         x | is_digit x
 704                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 705
 706         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 707                                        Just buf2 <- [prefixMatch buf p] ] of
 708                             (c,buf2):_ -> cont c buf2
 709                             [] -> charError buf'
 710
 711 after_charnum cont i buf
 712   = let int = fromInteger i in
 713     if i >= 0 && i <= 255
 714         then cont (chr int) buf
 715         else charError buf
 716
 717 readNum cont buf is_digit base conv = read buf 0
 718   where read buf i
 719           = case currentChar# buf of { c ->
 720             if is_digit c
 721                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 722                 else cont i buf
 723             }
 724
 725 is_hexdigit c
 726         =  is_digit c
 727         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 728         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 729
 730 hex c | is_digit c = ord# c -# ord# '0'#
 731       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 732 oct_or_dec c = ord# c -# ord# '0'#
 733
 734 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 735
 736 to_lower c
 737   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 738         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 739   | otherwise = c
 740
 741 charError buf = lexError "error in character literal" buf
 742
 743 silly_escape_chars = [
 744         ("NUL", '\NUL'),
 745         ("SOH", '\SOH'),
 746         ("STX", '\STX'),
 747         ("ETX", '\ETX'),
 748         ("EOT", '\EOT'),
 749         ("ENQ", '\ENQ'),
 750         ("ACK", '\ACK'),
 751         ("BEL", '\BEL'),
 752         ("BS", '\BS'),
 753         ("HT", '\HT'),
 754         ("LF", '\LF'),
 755         ("VT", '\VT'),
 756         ("FF", '\FF'),
 757         ("CR", '\CR'),
 758         ("SO", '\SO'),
 759         ("SI", '\SI'),
 760         ("DLE", '\DLE'),
 761         ("DC1", '\DC1'),
 762         ("DC2", '\DC2'),
 763         ("DC3", '\DC3'),
 764         ("DC4", '\DC4'),
 765         ("NAK", '\NAK'),
 766         ("SYN", '\SYN'),
 767         ("ETB", '\ETB'),
 768         ("CAN", '\CAN'),
 769         ("EM", '\EM'),
 770         ("SUB", '\SUB'),
 771         ("ESC", '\ESC'),
 772         ("FS", '\FS'),
 773         ("GS", '\GS'),
 774         ("RS", '\RS'),
 775         ("US", '\US'),
 776         ("SP", '\SP'),
 777         ("DEL", '\DEL')
 778         ]
 779
 780 -------------------------------------------------------------------------------
 781
 782 lex_demand cont buf =
 783  case read_em [] buf of { (ls,buf') ->
 784  case currentChar# buf' of
 785    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 786    _    -> cont (ITstrict (ls, False)) buf'
 787  }
 788  where
 789    -- code snatched from Demand.lhs
 790   read_em acc buf =
 791    case currentChar# buf of
 792     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 793     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 794     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 795     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 796     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 797     ')'# -> (reverse acc, stepOn buf)
 798     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 799     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 800     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 801     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 802     _    -> (reverse acc, buf)
 803
 804   do_unpack new_or_data wrapper_unpacks acc buf
 805    = case read_em [] buf of
 806       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 807
 808
 809 ------------------
 810 lex_scc cont buf =
 811  case currentChar# buf of
 812   'C'# -> cont ITsccAllCafs (incLexeme buf)
 813   other -> cont ITscc buf
 814
 815 -----------------------------------------------------------------------------
 816 -- Numbers
 817
 818 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 819 lex_num cont glaexts acc buf =
 820  case scanNumLit acc buf of
 821      (acc',buf') ->
 822        case currentChar# buf' of
 823          '.'# | is_digit (lookAhead# buf' 1#) ->
 824              -- this case is not optimised at all, as the
 825              -- presence of floating point numbers in interface
 826              -- files is not that common. (ToDo)
 827             case expandWhile# is_digit (incLexeme buf') of
 828               buf2 -> -- points to first non digit char
 829
 830                 let l = case currentChar# buf2 of
 831                           'E'# -> do_exponent
 832                           'e'# -> do_exponent
 833                           _ -> buf2
 834
 835                     do_exponent
 836                         = let buf3 = incLexeme buf2 in
 837                           case currentChar# buf3 of
 838                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 839                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 840                                 x | is_digit x -> expandWhile# is_digit buf3
 841                                 _ -> buf2
 842
 843                     v = readRational__ (lexemeToString l)
 844
 845                 in case currentChar# l of -- glasgow exts only
 846                       '#'# | flag glaexts -> let l' = incLexeme l in
 847                               case currentChar# l' of
 848                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 849                                 _    -> cont (ITprimfloat  v) l'
 850                       _ -> cont (ITrational v) l
 851
 852          _ -> after_lexnum cont glaexts acc' buf'
 853
 854 after_lexnum cont glaexts i buf
 855   = case currentChar# buf of
 856         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 857         _    -> cont (ITinteger i) buf
 858
 859 -----------------------------------------------------------------------------
 860 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 861
 862 -- we lexemeToFastString on the bit between the ``''s, but include the
 863 -- quotes in the full lexeme.
 864
 865 lex_cstring cont buf =
 866  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 867    Just buf' -> cont (ITlitlit (lexemeToFastString
 868                                 (setCurrentPos# buf' (negateInt# 2#))))
 869                    (mergeLexemes buf buf')
 870    Nothing   -> lexError "unterminated ``" buf
 871
 872 -----------------------------------------------------------------------------
 873 -- identifiers, symbols etc.
 874
 875 lex_ip cont buf =
 876  case expandWhile# is_ident buf of
 877    buf' -> cont (ITipvarid lexeme) buf'
 878            where lexeme = lexemeToFastString buf'
 879
 880 lex_id cont glaexts buf =
 881  let buf1 = expandWhile# is_ident buf in
 882  seq buf1 $
 883
 884  case (if flag glaexts
 885         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 886         else buf1)                              of { buf' ->
 887
 888  let lexeme  = lexemeToFastString buf' in
 889
 890  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 891         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 892                           cont kwd_token buf';
 893         Nothing        ->
 894
 895  let var_token = cont (ITvarid lexeme) buf' in
 896
 897  if not (flag glaexts)
 898    then var_token
 899    else
 900
 901  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 902         Just kwd_token -> cont kwd_token buf';
 903         Nothing        -> var_token
 904
 905  }}}
 906
 907 lex_sym cont buf =
 908  case expandWhile# is_symbol buf of
 909    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 910                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 911                                   cont kwd_token buf' ;
 912                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 913                                   cont (mk_var_token lexeme) buf'
 914            }
 915         where lexeme = lexemeToFastString buf'
 916
 917
 918 lex_con cont glaexts buf =
 919  case expandWhile# is_ident buf          of { buf1 ->
 920  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 921
 922  case currentChar# buf' of
 923      '.'# -> munch
 924      _    -> just_a_conid
 925
 926    where
 927     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 928                    cont (ITconid lexeme) buf'
 929     lexeme = lexemeToFastString buf'
 930     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 931  }}
 932
 933 lex_qid cont glaexts mod buf just_a_conid =
 934  case currentChar# buf of
 935   '['# ->       -- Special case for []
 936     case lookAhead# buf 1# of
 937      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 938      _    -> just_a_conid
 939
 940   '('# ->  -- Special case for (,,,)
 941            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 942     case lookAhead# buf 1# of
 943      '#'# | flag glaexts -> case lookAhead# buf 2# of
 944                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 945                                 just_a_conid
 946                 _    -> just_a_conid
 947      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 948      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 949      _    -> just_a_conid
 950
 951   '-'# -> case lookAhead# buf 1# of
 952             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 953             _    -> lex_id3 cont glaexts mod buf just_a_conid
 954   _    -> lex_id3 cont glaexts mod buf just_a_conid
 955
 956 lex_id3 cont glaexts mod buf just_a_conid
 957   | is_symbol (currentChar# buf) =
 958      let
 959         start_new_lexeme = stepOverLexeme buf
 960      in
 961      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 962      let
 963        lexeme  = lexemeToFastString buf'
 964         -- real lexeme is M.<sym>
 965        new_buf = mergeLexemes buf buf'
 966      in
 967      cont (mk_qvar_token mod lexeme) new_buf
 968         -- wrong, but arguably morally right: M... is now a qvarsym
 969      }
 970
 971   | otherwise   =
 972      let
 973         start_new_lexeme = stepOverLexeme buf
 974      in
 975      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 976      if emptyLexeme buf1
 977             then just_a_conid
 978             else
 979
 980      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 981
 982      let
 983       lexeme  = lexemeToFastString buf'
 984       new_buf = mergeLexemes buf buf'
 985       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
 986      in
 987      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 988             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 989             Nothing        -> is_a_qvarid
 990         -- TODO: special ids (as, qualified, hiding) shouldn't be
 991         -- recognised as keywords here.  ie.  M.as is a qualified varid.
 992      }}}
 993
 994
 995 slurp_trailing_hashes buf glaexts
 996   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
 997   | otherwise    = buf
 998
 999
1000 mk_var_token pk_str
1001   | is_upper f          = ITconid pk_str
1002   | is_ident f          = ITvarid pk_str
1003   | f `eqChar#` ':'#    = ITconsym pk_str
1004   | otherwise           = ITvarsym pk_str
1005   where
1006       (C# f) = _HEAD_ pk_str
1007       tl     = _TAIL_ pk_str
1008
1009 mk_qvar_token m token =
1010  case mk_var_token token of
1011    ITconid n  -> ITqconid  (m,n)
1012    ITvarid n  -> ITqvarid  (m,n)
1013    ITconsym n -> ITqconsym (m,n)
1014    ITvarsym n -> ITqvarsym (m,n)
1015    _          -> ITunknown (show token)
1016 \end{code}
1017
1018 ----------------------------------------------------------------------------
1019 Horrible stuff for dealing with M.(,,,)
1020
1021 \begin{code}
1022 lex_tuple cont mod buf back_off =
1023   go 2 buf
1024   where
1025    go n buf =
1026     case currentChar# buf of
1027       ','# -> go (n+1) (stepOn buf)
1028       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1029       _    -> back_off
1030
1031 lex_ubx_tuple cont mod buf back_off =
1032   go 2 buf
1033   where
1034    go n buf =
1035     case currentChar# buf of
1036       ','# -> go (n+1) (stepOn buf)
1037       '#'# -> case lookAhead# buf 1# of
1038                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1039                                  (stepOnBy# buf 2#)
1040                 _    -> back_off
1041       _    -> back_off
1042 \end{code}
1043
1044 -----------------------------------------------------------------------------
1045 doDiscard rips along really fast, looking for a '#-}',
1046 indicating the end of the pragma we're skipping
1047
1048 \begin{code}
1049 doDiscard inStr buf =
1050  case currentChar# buf of
1051    '#'# | not inStr ->
1052        case lookAhead# buf 1# of { '#'# ->
1053        case lookAhead# buf 2# of { '-'# ->
1054        case lookAhead# buf 3# of { '}'# ->
1055            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1056         _    -> doDiscard inStr (incLexeme buf) };
1057         _    -> doDiscard inStr (incLexeme buf) };
1058         _    -> doDiscard inStr (incLexeme buf) }
1059    '"'# ->
1060        let
1061         odd_slashes buf flg i# =
1062           case lookAhead# buf i# of
1063            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1064            _     -> flg
1065        in
1066        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1067          '\\'# -> -- escaping something..
1068            if odd_slashes buf True (negateInt# 2#) then
1069                -- odd number of slashes, " is escaped.
1070               doDiscard inStr (incLexeme buf)
1071            else
1072                -- even number of slashes, \ is escaped.
1073               doDiscard (not inStr) (incLexeme buf)
1074          _ -> case inStr of -- forced to avoid build-up
1075                True  -> doDiscard False (incLexeme buf)
1076                False -> doDiscard True  (incLexeme buf)
1077    _ -> doDiscard inStr (incLexeme buf)
1078
1079 \end{code}
1080
1081 -----------------------------------------------------------------------------
1082
1083 \begin{code}
1084 data LayoutContext
1085   = NoLayout
1086   | Layout Int#
1087
1088 data ParseResult a
1089   = POk PState a
1090   | PFailed Message
1091
1092 data PState = PState {
1093         loc           :: SrcLoc,
1094         glasgow_exts  :: Int#,
1095         bol           :: Int#,
1096         atbol         :: Int#,
1097         context       :: [LayoutContext]
1098      }
1099
1100 type P a = StringBuffer         -- Input string
1101         -> PState
1102         -> ParseResult a
1103
1104 returnP   :: a -> P a
1105 returnP a buf s = POk s a
1106
1107 thenP      :: P a -> (a -> P b) -> P b
1108 m `thenP` k = \ buf s ->
1109         case m buf s of
1110                 POk s1 a -> k a buf s1
1111                 PFailed err  -> PFailed err
1112
1113 thenP_     :: P a -> P b -> P b
1114 m `thenP_` k = m `thenP` \_ -> k
1115
1116 mapP :: (a -> P b) -> [a] -> P [b]
1117 mapP f [] = returnP []
1118 mapP f (a:as) =
1119      f a `thenP` \b ->
1120      mapP f as `thenP` \bs ->
1121      returnP (b:bs)
1122
1123 failP :: String -> P a
1124 failP msg buf s = PFailed (text msg)
1125
1126 failMsgP :: Message -> P a
1127 failMsgP msg buf s = PFailed msg
1128
1129 lexError :: String -> P a
1130 lexError str buf s@PState{ loc = loc }
1131   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1132
1133 getSrcLocP :: P SrcLoc
1134 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1135
1136 getSrcFile :: P FAST_STRING
1137 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1138
1139 getContext :: P [LayoutContext]
1140 getContext buf s@(PState{ context = ctx }) = POk s ctx
1141
1142 pushContext :: LayoutContext -> P ()
1143 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1144
1145 {-
1146
1147 This special case in layoutOn is to handle layout contexts with are
1148 indented the same or less than the current context.  This is illegal
1149 according to the Haskell spec, so we have to arrange to close the
1150 current context.  eg.
1151
1152 class Foo a where
1153 class Bar a
1154
1155 after the first 'where', the sequence of events is:
1156
1157         - layout system inserts a ';' (column 0)
1158         - parser begins a new context at column 0
1159         - parser shifts ';' (legal empty declaration)
1160         - parser sees 'class': parse error (we're still in the inner context)
1161
1162 trouble is, by the time we know we need a new context, the lexer has
1163 already generated the ';'.  Hacky solution is as follows: since we
1164 know the column of the next token (it's the column number of the new
1165 context), we set the ACTUAL column number of the new context to this
1166 numer plus one.  Hence the next time the lexer is called, a '}' will
1167 be generated to close the new context straight away.  Furthermore, we
1168 have to set the atbol flag so that the ';' that the parser shifted as
1169 part of the new context is re-generated.
1170
1171 when the new context is *less* indented than the current one:
1172
1173 f = f where g = g where
1174 h = h
1175
1176         - current context: column 12.
1177         - on seeing 'h' (column 0), the layout system inserts '}'
1178         - parser starts a new context, column 0
1179         - parser sees '}', uses it to close new context
1180         - we still need to insert another '}' followed by a ';',
1181           hence the atbol trick.
1182
1183 There's also a special hack in here to deal with
1184
1185         do
1186            ....
1187            e $ do
1188            blah
1189
1190 i.e. the inner context is at the same indentation level as the outer
1191 context.  This is strictly illegal according to Haskell 98, but
1192 there's a lot of existing code using this style and it doesn't make
1193 any sense to disallow it, since empty 'do' lists don't make sense.
1194 -}
1195
1196 layoutOn :: Bool -> P ()
1197 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1198     let offset = lexemeIndex buf -# bol in
1199     case ctx of
1200         Layout prev_off : _
1201            | if strict then prev_off >=# offset else prev_off ># offset ->
1202                 --trace ("layout on, column: " ++  show (I# offset)) $
1203                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1204         other ->
1205                 --trace ("layout on, column: " ++  show (I# offset)) $
1206                 POk s{ context = Layout offset : ctx } ()
1207
1208 layoutOff :: P ()
1209 layoutOff buf s@(PState{ context = ctx }) =
1210     POk s{ context = NoLayout:ctx } ()
1211
1212 popContext :: P ()
1213 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1214   case ctx of
1215         (_:tl) -> POk s{ context = tl } ()
1216         []     -> PFailed (srcParseErr buf loc)
1217
1218 {-
1219  Note that if the name of the file we're processing ends
1220  with `hi-boot', we accept it on faith as having the right
1221  version. This is done so that .hi-boot files that comes
1222  with hsc don't have to be updated before every release,
1223  *and* it allows us to share .hi-boot files with versions
1224  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1225
1226  If the version number is 0, the checking is also turned off.
1227  (needed to deal with GHC.hi only!)
1228
1229  Once we can assume we're compiling with a version of ghc that
1230  supports interface file checking, we can drop the special
1231  pleading
1232 -}
1233 checkVersion :: Maybe Integer -> P ()
1234 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1235  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1236  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1237 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1238  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1239  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1240
1241 -----------------------------------------------------------------
1242
1243 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1244 ifaceParseErr s l
1245   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1246           text (lexemeToString s), char '\'']
1247
1248 ifaceVersionErr hi_vers l toks
1249   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1250           ptext SLIT("Expected"), int opt_HiVersion,
1251           ptext SLIT("found "), pp_version]
1252     where
1253      pp_version =
1254       case hi_vers of
1255         Nothing -> ptext SLIT("pre ghc-3.02 version")
1256         Just v  -> ptext SLIT("version") <+> integer v
1257
1258 -----------------------------------------------------------------------------
1259
1260 srcParseErr :: StringBuffer -> SrcLoc -> Message
1261 srcParseErr s l
1262   = hcat [ppr l,
1263           if null token
1264              then ptext SLIT(": parse error (possibly incorrect indentation)")
1265              else hcat [ptext SLIT(": parse error on input "),
1266                         char '`', text token, char '\'']
1267     ]
1268   where
1269         token = lexemeToString s
1270
1271 \end{code}