ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( chr )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | IT__export
 127   | ITdepends
 128   | IT__forall
 129   | ITletrec
 130   | ITcoerce
 131   | ITinlineMe
 132   | ITinlineCall
 133   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 134   | ITdefaultbranch
 135   | ITbottom
 136   | ITinteger_lit
 137   | ITfloat_lit
 138   | ITword_lit
 139   | ITword64_lit
 140   | ITint64_lit
 141   | ITrational_lit
 142   | ITaddr_lit
 143   | ITlabel_lit
 144   | ITlit_lit
 145   | ITstring_lit
 146   | ITtypeapp
 147   | ITusage
 148   | ITfuall
 149   | ITarity
 150   | ITspecialise
 151   | ITnocaf
 152   | ITunfold InlinePragInfo
 153   | ITstrict ([Demand], Bool)
 154   | ITrules
 155   | ITcprinfo
 156   | ITdeprecated
 157   | IT__scc
 158   | ITsccAllCafs
 159
 160   | ITspecialise_prag           -- Pragmas
 161   | ITsource_prag
 162   | ITinline_prag
 163   | ITnoinline_prag
 164   | ITrules_prag
 165   | ITdeprecated_prag
 166   | ITline_prag
 167   | ITclose_prag
 168
 169   | ITdotdot                    -- reserved symbols
 170   | ITdcolon
 171   | ITequal
 172   | ITlam
 173   | ITvbar
 174   | ITlarrow
 175   | ITrarrow
 176   | ITat
 177   | ITtilde
 178   | ITdarrow
 179   | ITminus
 180   | ITbang
 181   | ITdot
 182
 183   | ITbiglam                    -- GHC-extension symbols
 184
 185   | ITocurly                    -- special symbols
 186   | ITccurly
 187   | ITvccurly
 188   | ITobrack
 189   | ITcbrack
 190   | IToparen
 191   | ITcparen
 192   | IToubxparen
 193   | ITcubxparen
 194   | ITsemi
 195   | ITcomma
 196   | ITunderscore
 197   | ITbackquote
 198
 199   | ITvarid   FAST_STRING       -- identifiers
 200   | ITconid   FAST_STRING
 201   | ITvarsym  FAST_STRING
 202   | ITconsym  FAST_STRING
 203   | ITqvarid  (FAST_STRING,FAST_STRING)
 204   | ITqconid  (FAST_STRING,FAST_STRING)
 205   | ITqvarsym (FAST_STRING,FAST_STRING)
 206   | ITqconsym (FAST_STRING,FAST_STRING)
 207
 208   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 209
 210   | ITpragma StringBuffer
 211
 212   | ITchar       Char
 213   | ITstring     FAST_STRING
 214   | ITinteger    Integer
 215   | ITrational   Rational
 216
 217   | ITprimchar   Char
 218   | ITprimstring FAST_STRING
 219   | ITprimint    Integer
 220   | ITprimfloat  Rational
 221   | ITprimdouble Rational
 222   | ITlitlit     FAST_STRING
 223
 224   | ITunknown String            -- Used when the lexer can't make sense of it
 225   | ITeof                       -- end of file token
 226   deriving Show -- debugging
 227 \end{code}
 228
 229 -----------------------------------------------------------------------------
 230 Keyword Lists
 231
 232 \begin{code}
 233 pragmaKeywordsFM = listToUFM $
 234       map (\ (x,y) -> (_PK_ x,y))
 235        [( "SPECIALISE", ITspecialise_prag ),
 236         ( "SPECIALIZE", ITspecialise_prag ),
 237         ( "SOURCE",     ITsource_prag ),
 238         ( "INLINE",     ITinline_prag ),
 239         ( "NOINLINE",   ITnoinline_prag ),
 240         ( "NOTINLINE",  ITnoinline_prag ),
 241         ( "LINE",       ITline_prag ),
 242         ( "RULES",      ITrules_prag ),
 243         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 244         ( "DEPRECATED", ITdeprecated_prag )
 245         ]
 246
 247 haskellKeywordsFM = listToUFM $
 248       map (\ (x,y) -> (_PK_ x,y))
 249        [( "_",          ITunderscore ),
 250         ( "as",         ITas ),
 251         ( "case",       ITcase ),
 252         ( "class",      ITclass ),
 253         ( "data",       ITdata ),
 254         ( "default",    ITdefault ),
 255         ( "deriving",   ITderiving ),
 256         ( "do",         ITdo ),
 257         ( "else",       ITelse ),
 258         ( "hiding",     IThiding ),
 259         ( "if",         ITif ),
 260         ( "import",     ITimport ),
 261         ( "in",         ITin ),
 262         ( "infix",      ITinfix ),
 263         ( "infixl",     ITinfixl ),
 264         ( "infixr",     ITinfixr ),
 265         ( "instance",   ITinstance ),
 266         ( "let",        ITlet ),
 267         ( "module",     ITmodule ),
 268         ( "newtype",    ITnewtype ),
 269         ( "of",         ITof ),
 270         ( "qualified",  ITqualified ),
 271         ( "then",       ITthen ),
 272         ( "type",       ITtype ),
 273         ( "where",      ITwhere ),
 274         ( "_scc_",      ITscc )
 275      ]
 276
 277 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 278 ghcExtensionKeywordsFM = listToUFM $
 279         map (\ (x,y) -> (_PK_ x,y))
 280      [  ( "forall",     ITforall ),
 281         ( "foreign",    ITforeign ),
 282         ( "export",     ITexport ),
 283         ( "label",      ITlabel ),
 284         ( "dynamic",    ITdynamic ),
 285         ( "unsafe",     ITunsafe ),
 286         ( "with",       ITwith ),
 287         ( "stdcall",    ITstdcallconv),
 288         ( "ccall",      ITccallconv),
 289         ("_ccall_",     ITccall (False, False, False)),
 290         ("_ccall_GC_",  ITccall (False, False, True)),
 291         ("_casm_",      ITccall (False, True,  False)),
 292         ("_casm_GC_",   ITccall (False, True,  True)),
 293
 294         -- interface keywords
 295         ("__interface",         ITinterface),
 296         ("__export",            IT__export),
 297         ("__depends",           ITdepends),
 298         ("__forall",            IT__forall),
 299         ("__letrec",            ITletrec),
 300         ("__coerce",            ITcoerce),
 301         ("__inline_me",         ITinlineMe),
 302         ("__inline_call",       ITinlineCall),
 303         ("__depends",           ITdepends),
 304         ("__DEFAULT",           ITdefaultbranch),
 305         ("__bot",               ITbottom),
 306         ("__integer",           ITinteger_lit),
 307         ("__float",             ITfloat_lit),
 308         ("__int64",             ITint64_lit),
 309         ("__word",              ITword_lit),
 310         ("__word64",            ITword64_lit),
 311         ("__rational",          ITrational_lit),
 312         ("__addr",              ITaddr_lit),
 313         ("__label",             ITlabel_lit),
 314         ("__litlit",            ITlit_lit),
 315         ("__string",            ITstring_lit),
 316         ("__a",                 ITtypeapp),
 317         ("__u",                 ITusage),
 318         ("__fuall",             ITfuall),
 319         ("__A",                 ITarity),
 320         ("__P",                 ITspecialise),
 321         ("__C",                 ITnocaf),
 322         ("__R",                 ITrules),
 323         ("__D",                 ITdeprecated),
 324         ("__U",                 ITunfold NoInlinePragInfo),
 325
 326         ("__ccall",             ITccall (False, False, False)),
 327         ("__ccall_GC",          ITccall (False, False, True)),
 328         ("__dyn_ccall",         ITccall (True,  False, False)),
 329         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 330         ("__casm",              ITccall (False, True,  False)),
 331         ("__dyn_casm",          ITccall (True,  True,  False)),
 332         ("__casm_GC",           ITccall (False, True,  True)),
 333         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 334
 335         ("/\\",                 ITbiglam)
 336      ]
 337
 338
 339 haskellKeySymsFM = listToUFM $
 340         map (\ (x,y) -> (_PK_ x,y))
 341       [ ("..",          ITdotdot)
 342        ,("::",          ITdcolon)
 343        ,("=",           ITequal)
 344        ,("\\",          ITlam)
 345        ,("|",           ITvbar)
 346        ,("<-",          ITlarrow)
 347        ,("->",          ITrarrow)
 348        ,("@",           ITat)
 349        ,("~",           ITtilde)
 350        ,("=>",          ITdarrow)
 351        ,("-",           ITminus)
 352        ,("!",           ITbang)
 353        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 354        ]
 355 \end{code}
 356
 357 -----------------------------------------------------------------------------
 358 The lexical analyser
 359
 360 Lexer state:
 361
 362         - (glaexts) lexing an interface file or -fglasgow-exts
 363         - (bol)   pointer to beginning of line (for column calculations)
 364         - (buf)   pointer to beginning of token
 365         - (buf)   pointer to current char
 366         - (atbol) flag indicating whether we're at the beginning of a line
 367
 368 \begin{code}
 369 lexer :: (Token -> P a) -> P a
 370 lexer cont buf s@(PState{
 371                     loc = loc,
 372                     glasgow_exts = glaexts,
 373                     bol = bol,
 374                     atbol = atbol,
 375                     context = ctx
 376                 })
 377
 378         -- first, start a new lexeme and lose all the whitespace
 379   =  _scc_ "Lexer"
 380   tab line bol atbol (stepOverLexeme buf)
 381   where
 382         line = srcLocLine loc
 383
 384         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 385           case currentChar# buf of
 386
 387             '\NUL'# ->
 388                    if bufferExhausted (stepOn buf)
 389                         then cont ITeof buf s'
 390                         else trace "lexer: misplaced NUL?" $
 391                              tab y bol atbol (stepOn buf)
 392
 393             '\n'# -> let buf' = stepOn buf
 394                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 395
 396                 -- find comments.  This got harder in Haskell 98.
 397             '-'# ->  let trundle n =
 398                           let next = lookAhead# buf n in
 399                           if next `eqChar#` '-'# then trundle (n +# 1#)
 400                           else if is_symbol next || n <# 2#
 401                                 then is_a_token
 402                                 else tab y bol atbol
 403                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 404                     in trundle 1#
 405
 406                 -- comments and pragmas.  We deal with LINE pragmas here,
 407                 -- and throw out any unrecognised pragmas as comments.  Any
 408                 -- pragmas we know about are dealt with later (after any layout
 409                 -- processing if necessary).
 410
 411             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 412                 if lookAhead# buf 2# `eqChar#` '#'# then
 413                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 414                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 415                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 416                   let lexeme = mkFastString -- ToDo: too slow
 417                                   (map toUpper (lexemeToString buf2)) in
 418                   case lookupUFM pragmaKeywordsFM lexeme of
 419                         Just ITline_prag ->
 420                            line_prag skip_to_end buf2 s'
 421                         Just other -> is_a_token
 422                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 423                   }}
 424
 425                 else skip_to_end (stepOnBy# buf 2#) s'
 426                 where
 427                     skip_to_end = nested_comment (lexer cont)
 428
 429                 -- special GHC extension: we grok cpp-style #line pragmas
 430             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 431                 line_prag next_line (stepOn buf) s'
 432                 where
 433                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 434
 435                 -- tabs have been expanded beforehand
 436             c | is_space c -> tab y bol atbol (stepOn buf)
 437               | otherwise  -> is_a_token
 438
 439            where s' = s{loc = replaceSrcLine loc y,
 440                         bol = bol,
 441                        atbol = atbol}
 442
 443                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 444                             | otherwise    = lexToken cont glaexts buf s'
 445
 446 -- {-# LINE .. #-} pragmas.  yeuch.
 447 line_prag cont buf s@PState{loc=loc} =
 448   case expandWhile# is_space buf                of { buf1 ->
 449   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 450   -- subtract one: the line number refers to the *following* line.
 451   let real_line = line - 1 in
 452   case fromInteger real_line                    of { i@(I# l) ->
 453         -- ToDo, if no filename then we skip the newline.... d'oh
 454   case expandWhile# is_space buf2               of { buf3 ->
 455   case currentChar# buf3                        of
 456      '\"'#{-"-} ->
 457         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 458         let
 459             file = lexemeToFastString buf4
 460             new_buf = stepOn (stepOverLexeme buf4)
 461         in
 462         if nullFastString file
 463                 then cont new_buf s{loc = replaceSrcLine loc l}
 464                 else cont new_buf s{loc = mkSrcLoc file i}
 465         }
 466      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 467   }}}}
 468
 469 nested_comment :: P a -> P a
 470 nested_comment cont buf = loop buf
 471  where
 472    loop buf =
 473      case currentChar# buf of
 474         '\NUL'# | bufferExhausted (stepOn buf) ->
 475                 lexError "unterminated `{-'" buf
 476
 477         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 478                 cont (stepOnBy# buf 2#)
 479
 480         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 481               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 482
 483         '\n'# -> \ s@PState{loc=loc} ->
 484                  let buf' = stepOn buf in
 485                  nested_comment cont buf'
 486                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 487                           atbol = 1#}
 488
 489         _   -> nested_comment cont (stepOn buf)
 490
 491 -- When we are lexing the first token of a line, check whether we need to
 492 -- insert virtual semicolons or close braces due to layout.
 493
 494 lexBOL :: (Token -> P a) -> P a
 495 lexBOL cont buf s@(PState{
 496                     loc = loc,
 497                     glasgow_exts = glaexts,
 498                     bol = bol,
 499                     atbol = atbol,
 500                     context = ctx
 501                   }) =
 502         if need_close_curly then
 503                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 504                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 505         else if need_semi_colon then
 506                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 507                 cont ITsemi buf s{atbol = 0#}
 508         else
 509                 lexToken cont glaexts buf s{atbol = 0#}
 510   where
 511         col = currentIndex# buf -# bol
 512
 513         need_close_curly =
 514                 case ctx of
 515                         [] -> False
 516                         (i:_) -> case i of
 517                                     NoLayout -> False
 518                                     Layout n -> col <# n
 519         need_semi_colon =
 520                 case ctx of
 521                         [] -> False
 522                         (i:_) -> case i of
 523                                     NoLayout -> False
 524                                     Layout n -> col ==# n
 525
 526
 527 lexToken :: (Token -> P a) -> Int# -> P a
 528 lexToken cont glaexts buf =
 529  --trace "lexToken" $
 530   case currentChar# buf of
 531
 532     -- special symbols ----------------------------------------------------
 533     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 534                 -> cont IToubxparen (setCurrentPos# buf 2#)
 535          | otherwise
 536                 -> cont IToparen (incLexeme buf)
 537
 538     ')'# -> cont ITcparen    (incLexeme buf)
 539     '['# -> cont ITobrack    (incLexeme buf)
 540     ']'# -> cont ITcbrack    (incLexeme buf)
 541     ','# -> cont ITcomma     (incLexeme buf)
 542     ';'# -> cont ITsemi      (incLexeme buf)
 543
 544     '}'# -> \ s@PState{context = ctx} ->
 545             case ctx of
 546                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 547                 _        -> lexError "too many '}'s" buf s
 548
 549     '#'# -> case lookAhead# buf 1# of
 550                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 551                 '-'# -> case lookAhead# buf 2# of
 552                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 553                            _    -> lex_sym cont (incLexeme buf)
 554                 _    -> lex_sym cont (incLexeme buf)
 555
 556     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 557                 -> lex_cstring cont (setCurrentPos# buf 2#)
 558          | otherwise
 559                 -> cont ITbackquote (incLexeme buf)
 560
 561     '{'# ->     -- look for "{-##" special iface pragma
 562         case lookAhead# buf 1# of
 563            '-'# -> case lookAhead# buf 2# of
 564                     '#'# -> case lookAhead# buf 3# of
 565                                 '#'# ->
 566                                    let (lexeme, buf')
 567                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 568                                    cont (ITpragma lexeme) buf'
 569                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 570                     _    -> cont ITocurly (incLexeme buf)
 571            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 572
 573     -- strings/characters -------------------------------------------------
 574     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 575     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 576
 577     -- strictness and cpr pragmas and __scc treated specially.
 578     '_'# | flag glaexts ->
 579          case lookAhead# buf 1# of
 580            '_'# -> case lookAhead# buf 2# of
 581                     'S'# ->
 582                         lex_demand cont (stepOnUntil (not . isSpace)
 583                                         (stepOnBy# buf 3#)) -- past __S
 584                     'M'# ->
 585                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 586
 587                     's'# ->
 588                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 589                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 590                                Nothing   -> lex_id cont glaexts buf
 591                     _ -> lex_id cont glaexts buf
 592            _    -> lex_id cont glaexts buf
 593
 594         -- Hexadecimal and octal constants
 595     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 596                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 597          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 598                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 599         where ch   = lookAhead# buf 1#
 600               ch2  = lookAhead# buf 2#
 601               buf' = setCurrentPos# buf 2#
 602
 603     '\NUL'# ->
 604             if bufferExhausted (stepOn buf) then
 605                cont ITeof buf
 606             else
 607                trace "lexIface: misplaced NUL?" $
 608                cont (ITunknown "\NUL") (stepOn buf)
 609
 610     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 611             lex_ip cont (incLexeme buf)
 612     c | is_digit  c -> lex_num cont glaexts 0 buf
 613       | is_symbol c -> lex_sym cont buf
 614       | is_upper  c -> lex_con cont glaexts buf
 615       | is_ident  c -> lex_id  cont glaexts buf
 616       | otherwise   -> lexError "illegal character" buf
 617
 618 -- Int# is unlifted, and therefore faster than Bool for flags.
 619 {-# INLINE flag #-}
 620 flag :: Int# -> Bool
 621 flag 0# = False
 622 flag _  = True
 623
 624 -------------------------------------------------------------------------------
 625 -- Pragmas
 626
 627 lex_prag cont buf
 628   = case expandWhile# is_space buf of { buf1 ->
 629     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 630     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 631     case lookupUFM pragmaKeywordsFM lexeme of
 632         Just kw -> cont kw (mergeLexemes buf buf2)
 633         Nothing -> panic "lex_prag"
 634   }}
 635
 636 -------------------------------------------------------------------------------
 637 -- Strings & Chars
 638
 639 lex_string cont glaexts s buf
 640   = case currentChar# buf of
 641         '"'#{-"-} ->
 642            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 643            case currentChar# buf' of
 644                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 645                 _                   -> cont (ITstring s') buf'
 646
 647         -- ignore \& in a string, deal with string gaps
 648         '\\'# | next_ch `eqChar#` '&'#
 649                 -> lex_string cont glaexts s buf'
 650               | is_space next_ch
 651                 -> lex_stringgap cont glaexts s (incLexeme buf)
 652
 653             where next_ch = lookAhead# buf 1#
 654                   buf' = setCurrentPos# buf 2#
 655
 656         _ -> lex_char (lex_next_string cont s) glaexts buf
 657
 658 lex_stringgap cont glaexts s buf
 659   = let buf' = incLexeme buf in
 660     case currentChar# buf of
 661         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 662                   st{loc = incSrcLine loc}
 663         '\\'# -> lex_string cont glaexts s buf'
 664         c | is_space c -> lex_stringgap cont glaexts s buf'
 665         other -> charError buf'
 666
 667 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 668
 669 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 670 lex_char cont glaexts buf
 671   = case currentChar# buf of
 672         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 673         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 674         other -> charError buf
 675
 676 char_end cont glaexts c buf
 677   = case currentChar# buf of
 678         '\''# -> let buf' = incLexeme buf in
 679                  case currentChar# buf' of
 680                         '#'# | flag glaexts
 681                                 -> cont (ITprimchar c) (incLexeme buf')
 682                         _       -> cont (ITchar c) buf'
 683         _     -> charError buf
 684
 685 lex_escape cont buf
 686   = let buf' = incLexeme buf in
 687     case currentChar# buf of
 688         'a'#       -> cont '\a' buf'
 689         'b'#       -> cont '\b' buf'
 690         'f'#       -> cont '\f' buf'
 691         'n'#       -> cont '\n' buf'
 692         'r'#       -> cont '\r' buf'
 693         't'#       -> cont '\t' buf'
 694         'v'#       -> cont '\v' buf'
 695         '\\'#      -> cont '\\' buf'
 696         '"'#       -> cont '\"' buf'
 697         '\''#      -> cont '\'' buf'
 698         '^'#       -> let c = currentChar# buf' in
 699                       if c `geChar#` '@'# && c `leChar#` '_'#
 700                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 701                         else charError buf'
 702
 703         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 704         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 705         x | is_digit x
 706                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 707
 708         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 709                                        Just buf2 <- [prefixMatch buf p] ] of
 710                             (c,buf2):_ -> cont c buf2
 711                             [] -> charError buf'
 712
 713 after_charnum cont i buf
 714   = let int = fromInteger i in
 715     if i >= 0 && i <= 255
 716         then cont (chr int) buf
 717         else charError buf
 718
 719 readNum cont buf is_digit base conv = read buf 0
 720   where read buf i
 721           = case currentChar# buf of { c ->
 722             if is_digit c
 723                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 724                 else cont i buf
 725             }
 726
 727 is_hexdigit c
 728         =  is_digit c
 729         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 730         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 731
 732 hex c | is_digit c = ord# c -# ord# '0'#
 733       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 734 oct_or_dec c = ord# c -# ord# '0'#
 735
 736 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 737
 738 to_lower c
 739   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 740         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 741   | otherwise = c
 742
 743 charError buf = lexError "error in character literal" buf
 744
 745 silly_escape_chars = [
 746         ("NUL", '\NUL'),
 747         ("SOH", '\SOH'),
 748         ("STX", '\STX'),
 749         ("ETX", '\ETX'),
 750         ("EOT", '\EOT'),
 751         ("ENQ", '\ENQ'),
 752         ("ACK", '\ACK'),
 753         ("BEL", '\BEL'),
 754         ("BS", '\BS'),
 755         ("HT", '\HT'),
 756         ("LF", '\LF'),
 757         ("VT", '\VT'),
 758         ("FF", '\FF'),
 759         ("CR", '\CR'),
 760         ("SO", '\SO'),
 761         ("SI", '\SI'),
 762         ("DLE", '\DLE'),
 763         ("DC1", '\DC1'),
 764         ("DC2", '\DC2'),
 765         ("DC3", '\DC3'),
 766         ("DC4", '\DC4'),
 767         ("NAK", '\NAK'),
 768         ("SYN", '\SYN'),
 769         ("ETB", '\ETB'),
 770         ("CAN", '\CAN'),
 771         ("EM", '\EM'),
 772         ("SUB", '\SUB'),
 773         ("ESC", '\ESC'),
 774         ("FS", '\FS'),
 775         ("GS", '\GS'),
 776         ("RS", '\RS'),
 777         ("US", '\US'),
 778         ("SP", '\SP'),
 779         ("DEL", '\DEL')
 780         ]
 781
 782 -------------------------------------------------------------------------------
 783
 784 lex_demand cont buf =
 785  case read_em [] buf of { (ls,buf') ->
 786  case currentChar# buf' of
 787    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 788    _    -> cont (ITstrict (ls, False)) buf'
 789  }
 790  where
 791    -- code snatched from Demand.lhs
 792   read_em acc buf =
 793    case currentChar# buf of
 794     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 795     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 796     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 797     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 798     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 799     ')'# -> (reverse acc, stepOn buf)
 800     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 801     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 802     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 803     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 804     _    -> (reverse acc, buf)
 805
 806   do_unpack new_or_data wrapper_unpacks acc buf
 807    = case read_em [] buf of
 808       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 809
 810
 811 ------------------
 812 lex_scc cont buf =
 813  case currentChar# buf of
 814   'C'# -> cont ITsccAllCafs (incLexeme buf)
 815   other -> cont ITscc buf
 816
 817 -----------------------------------------------------------------------------
 818 -- Numbers
 819
 820 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 821 lex_num cont glaexts acc buf =
 822  case scanNumLit acc buf of
 823      (acc',buf') ->
 824        case currentChar# buf' of
 825          '.'# | is_digit (lookAhead# buf' 1#) ->
 826              -- this case is not optimised at all, as the
 827              -- presence of floating point numbers in interface
 828              -- files is not that common. (ToDo)
 829             case expandWhile# is_digit (incLexeme buf') of
 830               buf2 -> -- points to first non digit char
 831
 832                 let l = case currentChar# buf2 of
 833                           'E'# -> do_exponent
 834                           'e'# -> do_exponent
 835                           _ -> buf2
 836
 837                     do_exponent
 838                         = let buf3 = incLexeme buf2 in
 839                           case currentChar# buf3 of
 840                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 841                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 842                                 x | is_digit x -> expandWhile# is_digit buf3
 843                                 _ -> buf2
 844
 845                     v = readRational__ (lexemeToString l)
 846
 847                 in case currentChar# l of -- glasgow exts only
 848                       '#'# | flag glaexts -> let l' = incLexeme l in
 849                               case currentChar# l' of
 850                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 851                                 _    -> cont (ITprimfloat  v) l'
 852                       _ -> cont (ITrational v) l
 853
 854          _ -> after_lexnum cont glaexts acc' buf'
 855
 856 after_lexnum cont glaexts i buf
 857   = case currentChar# buf of
 858         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 859         _    -> cont (ITinteger i) buf
 860
 861 -----------------------------------------------------------------------------
 862 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 863
 864 -- we lexemeToFastString on the bit between the ``''s, but include the
 865 -- quotes in the full lexeme.
 866
 867 lex_cstring cont buf =
 868  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 869    Just buf' -> cont (ITlitlit (lexemeToFastString
 870                                 (setCurrentPos# buf' (negateInt# 2#))))
 871                    (mergeLexemes buf buf')
 872    Nothing   -> lexError "unterminated ``" buf
 873
 874 -----------------------------------------------------------------------------
 875 -- identifiers, symbols etc.
 876
 877 lex_ip cont buf =
 878  case expandWhile# is_ident buf of
 879    buf' -> cont (ITipvarid lexeme) buf'
 880            where lexeme = lexemeToFastString buf'
 881
 882 lex_id cont glaexts buf =
 883  let buf1 = expandWhile# is_ident buf in
 884  seq buf1 $
 885
 886  case (if flag glaexts
 887         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 888         else buf1)                              of { buf' ->
 889
 890  let lexeme  = lexemeToFastString buf' in
 891
 892  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 893         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 894                           cont kwd_token buf';
 895         Nothing        ->
 896
 897  let var_token = cont (ITvarid lexeme) buf' in
 898
 899  if not (flag glaexts)
 900    then var_token
 901    else
 902
 903  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 904         Just kwd_token -> cont kwd_token buf';
 905         Nothing        -> var_token
 906
 907  }}}
 908
 909 lex_sym cont buf =
 910  case expandWhile# is_symbol buf of
 911    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 912                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 913                                   cont kwd_token buf' ;
 914                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 915                                   cont (mk_var_token lexeme) buf'
 916            }
 917         where lexeme = lexemeToFastString buf'
 918
 919
 920 lex_con cont glaexts buf =
 921  case expandWhile# is_ident buf          of { buf1 ->
 922  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 923
 924  case currentChar# buf' of
 925      '.'# -> munch
 926      _    -> just_a_conid
 927
 928    where
 929     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 930                    cont (ITconid lexeme) buf'
 931     lexeme = lexemeToFastString buf'
 932     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 933  }}
 934
 935 lex_qid cont glaexts mod buf just_a_conid =
 936  case currentChar# buf of
 937   '['# ->       -- Special case for []
 938     case lookAhead# buf 1# of
 939      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 940      _    -> just_a_conid
 941
 942   '('# ->  -- Special case for (,,,)
 943            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 944     case lookAhead# buf 1# of
 945      '#'# | flag glaexts -> case lookAhead# buf 2# of
 946                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 947                                 just_a_conid
 948                 _    -> just_a_conid
 949      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 950      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 951      _    -> just_a_conid
 952
 953   '-'# -> case lookAhead# buf 1# of
 954             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 955             _    -> lex_id3 cont glaexts mod buf just_a_conid
 956   _    -> lex_id3 cont glaexts mod buf just_a_conid
 957
 958 lex_id3 cont glaexts mod buf just_a_conid
 959   | is_symbol (currentChar# buf) =
 960      let
 961         start_new_lexeme = stepOverLexeme buf
 962      in
 963      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 964      let
 965        lexeme  = lexemeToFastString buf'
 966         -- real lexeme is M.<sym>
 967        new_buf = mergeLexemes buf buf'
 968      in
 969      cont (mk_qvar_token mod lexeme) new_buf
 970         -- wrong, but arguably morally right: M... is now a qvarsym
 971      }
 972
 973   | otherwise   =
 974      let
 975         start_new_lexeme = stepOverLexeme buf
 976      in
 977      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 978      if emptyLexeme buf1
 979             then just_a_conid
 980             else
 981
 982      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 983
 984      let
 985       lexeme  = lexemeToFastString buf'
 986       new_buf = mergeLexemes buf buf'
 987       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
 988      in
 989      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 990             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 991             Nothing        -> is_a_qvarid
 992         -- TODO: special ids (as, qualified, hiding) shouldn't be
 993         -- recognised as keywords here.  ie.  M.as is a qualified varid.
 994      }}}
 995
 996
 997 slurp_trailing_hashes buf glaexts
 998   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
 999   | otherwise    = buf
1000
1001
1002 mk_var_token pk_str
1003   | is_upper f          = ITconid pk_str
1004   | is_ident f          = ITvarid pk_str
1005   | f `eqChar#` ':'#    = ITconsym pk_str
1006   | otherwise           = ITvarsym pk_str
1007   where
1008       (C# f) = _HEAD_ pk_str
1009
1010 mk_qvar_token m token =
1011  case mk_var_token token of
1012    ITconid n  -> ITqconid  (m,n)
1013    ITvarid n  -> ITqvarid  (m,n)
1014    ITconsym n -> ITqconsym (m,n)
1015    ITvarsym n -> ITqvarsym (m,n)
1016    _          -> ITunknown (show token)
1017 \end{code}
1018
1019 ----------------------------------------------------------------------------
1020 Horrible stuff for dealing with M.(,,,)
1021
1022 \begin{code}
1023 lex_tuple cont mod buf back_off =
1024   go 2 buf
1025   where
1026    go n buf =
1027     case currentChar# buf of
1028       ','# -> go (n+1) (stepOn buf)
1029       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1030       _    -> back_off
1031
1032 lex_ubx_tuple cont mod buf back_off =
1033   go 2 buf
1034   where
1035    go n buf =
1036     case currentChar# buf of
1037       ','# -> go (n+1) (stepOn buf)
1038       '#'# -> case lookAhead# buf 1# of
1039                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1040                                  (stepOnBy# buf 2#)
1041                 _    -> back_off
1042       _    -> back_off
1043 \end{code}
1044
1045 -----------------------------------------------------------------------------
1046 doDiscard rips along really fast, looking for a '#-}',
1047 indicating the end of the pragma we're skipping
1048
1049 \begin{code}
1050 doDiscard inStr buf =
1051  case currentChar# buf of
1052    '#'# | not inStr ->
1053        case lookAhead# buf 1# of { '#'# ->
1054        case lookAhead# buf 2# of { '-'# ->
1055        case lookAhead# buf 3# of { '}'# ->
1056            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1057         _    -> doDiscard inStr (incLexeme buf) };
1058         _    -> doDiscard inStr (incLexeme buf) };
1059         _    -> doDiscard inStr (incLexeme buf) }
1060    '"'# ->
1061        let
1062         odd_slashes buf flg i# =
1063           case lookAhead# buf i# of
1064            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1065            _     -> flg
1066        in
1067        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1068          '\\'# -> -- escaping something..
1069            if odd_slashes buf True (negateInt# 2#) then
1070                -- odd number of slashes, " is escaped.
1071               doDiscard inStr (incLexeme buf)
1072            else
1073                -- even number of slashes, \ is escaped.
1074               doDiscard (not inStr) (incLexeme buf)
1075          _ -> case inStr of -- forced to avoid build-up
1076                True  -> doDiscard False (incLexeme buf)
1077                False -> doDiscard True  (incLexeme buf)
1078    _ -> doDiscard inStr (incLexeme buf)
1079
1080 \end{code}
1081
1082 -----------------------------------------------------------------------------
1083
1084 \begin{code}
1085 data LayoutContext
1086   = NoLayout
1087   | Layout Int#
1088
1089 data ParseResult a
1090   = POk PState a
1091   | PFailed Message
1092
1093 data PState = PState {
1094         loc           :: SrcLoc,
1095         glasgow_exts  :: Int#,
1096         bol           :: Int#,
1097         atbol         :: Int#,
1098         context       :: [LayoutContext]
1099      }
1100
1101 type P a = StringBuffer         -- Input string
1102         -> PState
1103         -> ParseResult a
1104
1105 returnP   :: a -> P a
1106 returnP a buf s = POk s a
1107
1108 thenP      :: P a -> (a -> P b) -> P b
1109 m `thenP` k = \ buf s ->
1110         case m buf s of
1111                 POk s1 a -> k a buf s1
1112                 PFailed err  -> PFailed err
1113
1114 thenP_     :: P a -> P b -> P b
1115 m `thenP_` k = m `thenP` \_ -> k
1116
1117 mapP :: (a -> P b) -> [a] -> P [b]
1118 mapP f [] = returnP []
1119 mapP f (a:as) =
1120      f a `thenP` \b ->
1121      mapP f as `thenP` \bs ->
1122      returnP (b:bs)
1123
1124 failP :: String -> P a
1125 failP msg buf s = PFailed (text msg)
1126
1127 failMsgP :: Message -> P a
1128 failMsgP msg buf s = PFailed msg
1129
1130 lexError :: String -> P a
1131 lexError str buf s@PState{ loc = loc }
1132   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1133
1134 getSrcLocP :: P SrcLoc
1135 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1136
1137 getSrcFile :: P FAST_STRING
1138 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1139
1140 getContext :: P [LayoutContext]
1141 getContext buf s@(PState{ context = ctx }) = POk s ctx
1142
1143 pushContext :: LayoutContext -> P ()
1144 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1145
1146 {-
1147
1148 This special case in layoutOn is to handle layout contexts with are
1149 indented the same or less than the current context.  This is illegal
1150 according to the Haskell spec, so we have to arrange to close the
1151 current context.  eg.
1152
1153 class Foo a where
1154 class Bar a
1155
1156 after the first 'where', the sequence of events is:
1157
1158         - layout system inserts a ';' (column 0)
1159         - parser begins a new context at column 0
1160         - parser shifts ';' (legal empty declaration)
1161         - parser sees 'class': parse error (we're still in the inner context)
1162
1163 trouble is, by the time we know we need a new context, the lexer has
1164 already generated the ';'.  Hacky solution is as follows: since we
1165 know the column of the next token (it's the column number of the new
1166 context), we set the ACTUAL column number of the new context to this
1167 numer plus one.  Hence the next time the lexer is called, a '}' will
1168 be generated to close the new context straight away.  Furthermore, we
1169 have to set the atbol flag so that the ';' that the parser shifted as
1170 part of the new context is re-generated.
1171
1172 when the new context is *less* indented than the current one:
1173
1174 f = f where g = g where
1175 h = h
1176
1177         - current context: column 12.
1178         - on seeing 'h' (column 0), the layout system inserts '}'
1179         - parser starts a new context, column 0
1180         - parser sees '}', uses it to close new context
1181         - we still need to insert another '}' followed by a ';',
1182           hence the atbol trick.
1183
1184 There's also a special hack in here to deal with
1185
1186         do
1187            ....
1188            e $ do
1189            blah
1190
1191 i.e. the inner context is at the same indentation level as the outer
1192 context.  This is strictly illegal according to Haskell 98, but
1193 there's a lot of existing code using this style and it doesn't make
1194 any sense to disallow it, since empty 'do' lists don't make sense.
1195 -}
1196
1197 layoutOn :: Bool -> P ()
1198 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1199     let offset = lexemeIndex buf -# bol in
1200     case ctx of
1201         Layout prev_off : _
1202            | if strict then prev_off >=# offset else prev_off ># offset ->
1203                 --trace ("layout on, column: " ++  show (I# offset)) $
1204                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1205         other ->
1206                 --trace ("layout on, column: " ++  show (I# offset)) $
1207                 POk s{ context = Layout offset : ctx } ()
1208
1209 layoutOff :: P ()
1210 layoutOff buf s@(PState{ context = ctx }) =
1211     POk s{ context = NoLayout:ctx } ()
1212
1213 popContext :: P ()
1214 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1215   case ctx of
1216         (_:tl) -> POk s{ context = tl } ()
1217         []     -> PFailed (srcParseErr buf loc)
1218
1219 {-
1220  Note that if the name of the file we're processing ends
1221  with `hi-boot', we accept it on faith as having the right
1222  version. This is done so that .hi-boot files that comes
1223  with hsc don't have to be updated before every release,
1224  *and* it allows us to share .hi-boot files with versions
1225  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1226
1227  If the version number is 0, the checking is also turned off.
1228  (needed to deal with GHC.hi only!)
1229
1230  Once we can assume we're compiling with a version of ghc that
1231  supports interface file checking, we can drop the special
1232  pleading
1233 -}
1234 checkVersion :: Maybe Integer -> P ()
1235 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1236  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1237  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1238 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1239  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1240  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1241
1242 -----------------------------------------------------------------
1243
1244 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1245 ifaceParseErr s l
1246   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1247           text (lexemeToString s), char '\'']
1248
1249 ifaceVersionErr hi_vers l toks
1250   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1251           ptext SLIT("Expected"), int opt_HiVersion,
1252           ptext SLIT("found "), pp_version]
1253     where
1254      pp_version =
1255       case hi_vers of
1256         Nothing -> ptext SLIT("pre ghc-3.02 version")
1257         Just v  -> ptext SLIT("version") <+> integer v
1258
1259 -----------------------------------------------------------------------------
1260
1261 srcParseErr :: StringBuffer -> SrcLoc -> Message
1262 srcParseErr s l
1263   = hcat [ppr l,
1264           if null token
1265              then ptext SLIT(": parse error (possibly incorrect indentation)")
1266              else hcat [ptext SLIT(": parse error on input "),
1267                         char '`', text token, char '\'']
1268     ]
1269   where
1270         token = lexemeToString s
1271
1272 \end{code}