ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         Token(..), lexer, ParseResult(..), PState(..),
  27         checkVersion,
  28         StringBuffer,
  29
  30         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  31         getSrcLocP, getSrcFile,
  32         layoutOn, layoutOff, pushContext, popContext
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import Char             ( ord, isSpace, toUpper )
  38 import List             ( isSuffixOf )
  39
  40 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  41 import Name             ( isLowerISO, isUpperISO )
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  48                           replaceSrcLine, mkSrcLoc )
  49
  50 import Maybes           ( MaybeErr(..) )
  51 import ErrUtils         ( Message )
  52 import Outputable
  53
  54 import FastString
  55 import StringBuffer
  56 import GlaExts
  57 import ST               ( runST )
  58
  59 #if __GLASGOW_HASKELL__ >= 303
  60 import Bits
  61 import Word
  62 #endif
  63
  64 import Char             ( chr )
  65 import Addr
  66 import PrelRead         ( readRational__ ) -- Glasgow non-std
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Data types}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 The token data type, fairly un-interesting except from one
  76 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  77 strictness, unfolding etc).
  78
  79 The Idea/Observation here is that the renamer needs to scan through
  80 all of an interface file before it can continue. But only a fraction
  81 of the information contained in the file turns out to be useful, so
  82 delaying as much as possible of the scanning and parsing of an
  83 interface file Makes Sense (Heap profiles of the compiler
  84 show a reduction in heap usage by at least a factor of two,
  85 post-renamer).
  86
  87 Hence, the interface file lexer spots when value declarations are
  88 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  89 for the type and any other id info for that binding (unfolding, strictness
  90 etc). These constructors are applied to the result of lexing these sub-chunks.
  91
  92 The lexing of the type and id info is all done lazily, of course, so
  93 the scanning (and subsequent parsing) will be done *only* on the ids the
  94 renamer finds out that it is interested in. The rest will just be junked.
  95 Laziness, you know it makes sense :-)
  96
  97 \begin{code}
  98 data Token
  99   = ITas                        -- Haskell keywords
 100   | ITcase
 101   | ITclass
 102   | ITdata
 103   | ITdefault
 104   | ITderiving
 105   | ITdo
 106   | ITelse
 107   | IThiding
 108   | ITif
 109   | ITimport
 110   | ITin
 111   | ITinfix
 112   | ITinfixl
 113   | ITinfixr
 114   | ITinstance
 115   | ITlet
 116   | ITmodule
 117   | ITnewtype
 118   | ITof
 119   | ITqualified
 120   | ITthen
 121   | ITtype
 122   | ITwhere
 123   | ITscc
 124
 125   | ITforall                    -- GHC extension keywords
 126   | ITforeign
 127   | ITexport
 128   | ITlabel
 129   | ITdynamic
 130   | ITunsafe
 131   | ITwith
 132   | ITstdcallconv
 133   | ITccallconv
 134
 135   | ITinterface                 -- interface keywords
 136   | IT__export
 137   | ITdepends
 138   | IT__forall
 139   | ITletrec
 140   | ITcoerce
 141   | ITinlineMe
 142   | ITinlineCall
 143   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 144   | ITdefaultbranch
 145   | ITbottom
 146   | ITinteger_lit
 147   | ITfloat_lit
 148   | ITrational_lit
 149   | ITaddr_lit
 150   | ITlit_lit
 151   | ITstring_lit
 152   | ITtypeapp
 153   | ITusage
 154   | ITfuall
 155   | ITarity
 156   | ITspecialise
 157   | ITnocaf
 158   | ITunfold InlinePragInfo
 159   | ITstrict ([Demand], Bool)
 160   | ITrules
 161   | ITcprinfo (CprInfo)
 162   | IT__scc
 163   | ITsccAllCafs
 164
 165   | ITspecialise_prag           -- Pragmas
 166   | ITsource_prag
 167   | ITinline_prag
 168   | ITnoinline_prag
 169   | ITrules_prag
 170   | ITline_prag
 171   | ITclose_prag
 172
 173   | ITdotdot                    -- reserved symbols
 174   | ITdcolon
 175   | ITequal
 176   | ITlam
 177   | ITvbar
 178   | ITlarrow
 179   | ITrarrow
 180   | ITat
 181   | ITtilde
 182   | ITdarrow
 183   | ITminus
 184   | ITbang
 185   | ITdot
 186
 187   | ITbiglam                    -- GHC-extension symbols
 188
 189   | ITocurly                    -- special symbols
 190   | ITccurly
 191   | ITvccurly
 192   | ITobrack
 193   | ITcbrack
 194   | IToparen
 195   | ITcparen
 196   | IToubxparen
 197   | ITcubxparen
 198   | ITsemi
 199   | ITcomma
 200   | ITunderscore
 201   | ITbackquote
 202
 203   | ITvarid   FAST_STRING       -- identifiers
 204   | ITconid   FAST_STRING
 205   | ITvarsym  FAST_STRING
 206   | ITconsym  FAST_STRING
 207   | ITqvarid  (FAST_STRING,FAST_STRING)
 208   | ITqconid  (FAST_STRING,FAST_STRING)
 209   | ITqvarsym (FAST_STRING,FAST_STRING)
 210   | ITqconsym (FAST_STRING,FAST_STRING)
 211
 212   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 213
 214   | ITpragma StringBuffer
 215
 216   | ITchar       Char
 217   | ITstring     FAST_STRING
 218   | ITinteger    Integer
 219   | ITrational   Rational
 220
 221   | ITprimchar   Char
 222   | ITprimstring FAST_STRING
 223   | ITprimint    Integer
 224   | ITprimfloat  Rational
 225   | ITprimdouble Rational
 226   | ITlitlit     FAST_STRING
 227
 228   | ITunknown String            -- Used when the lexer can't make sense of it
 229   | ITeof                       -- end of file token
 230   deriving Text -- debugging
 231 \end{code}
 232
 233 -----------------------------------------------------------------------------
 234 Keyword Lists
 235
 236 \begin{code}
 237 pragmaKeywordsFM = listToUFM $
 238       map (\ (x,y) -> (_PK_ x,y))
 239        [( "SPECIALISE", ITspecialise_prag ),
 240         ( "SPECIALIZE", ITspecialise_prag ),
 241         ( "SOURCE",     ITsource_prag ),
 242         ( "INLINE",     ITinline_prag ),
 243         ( "NOINLINE",   ITnoinline_prag ),
 244         ( "NOTINLINE",  ITnoinline_prag ),
 245         ( "LINE",       ITline_prag ),
 246         ( "RULES",      ITrules_prag ),
 247         ( "RULEZ",      ITrules_prag )  -- american spelling :-)
 248         ]
 249
 250 haskellKeywordsFM = listToUFM $
 251       map (\ (x,y) -> (_PK_ x,y))
 252        [( "_",          ITunderscore ),
 253         ( "as",         ITas ),
 254         ( "case",       ITcase ),
 255         ( "class",      ITclass ),
 256         ( "data",       ITdata ),
 257         ( "default",    ITdefault ),
 258         ( "deriving",   ITderiving ),
 259         ( "do",         ITdo ),
 260         ( "else",       ITelse ),
 261         ( "hiding",     IThiding ),
 262         ( "if",         ITif ),
 263         ( "import",     ITimport ),
 264         ( "in",         ITin ),
 265         ( "infix",      ITinfix ),
 266         ( "infixl",     ITinfixl ),
 267         ( "infixr",     ITinfixr ),
 268         ( "instance",   ITinstance ),
 269         ( "let",        ITlet ),
 270         ( "module",     ITmodule ),
 271         ( "newtype",    ITnewtype ),
 272         ( "of",         ITof ),
 273         ( "qualified",  ITqualified ),
 274         ( "then",       ITthen ),
 275         ( "type",       ITtype ),
 276         ( "where",      ITwhere ),
 277         ( "_scc_",      ITscc )
 278      ]
 279
 280 ghcExtensionKeywordsFM = listToUFM $
 281         map (\ (x,y) -> (_PK_ x,y))
 282      [  ( "forall",     ITforall ),
 283         ( "foreign",    ITforeign ),
 284         ( "export",     ITexport ),
 285         ( "label",      ITlabel ),
 286         ( "dynamic",    ITdynamic ),
 287         ( "unsafe",     ITunsafe ),
 288         ( "with",       ITwith ),
 289         ( "stdcall",    ITstdcallconv),
 290         ( "ccall",      ITccallconv),
 291         ("_ccall_",     ITccall (False, False, False)),
 292         ("_ccall_GC_",  ITccall (False, False, True)),
 293         ("_casm_",      ITccall (False, True,  False)),
 294         ("_casm_GC_",   ITccall (False, True,  True)),
 295
 296         -- interface keywords
 297         ("__interface",         ITinterface),
 298         ("__export",            IT__export),
 299         ("__depends",           ITdepends),
 300         ("__forall",            IT__forall),
 301         ("__letrec",            ITletrec),
 302         ("__coerce",            ITcoerce),
 303         ("__inline_me",         ITinlineMe),
 304         ("__inline_call",       ITinlineCall),
 305         ("__depends",           ITdepends),
 306         ("__DEFAULT",           ITdefaultbranch),
 307         ("__bot",               ITbottom),
 308         ("__integer",           ITinteger_lit),
 309         ("__float",             ITfloat_lit),
 310         ("__rational",          ITrational_lit),
 311         ("__addr",              ITaddr_lit),
 312         ("__litlit",            ITlit_lit),
 313         ("__string",            ITstring_lit),
 314         ("__a",                 ITtypeapp),
 315         ("__u",                 ITusage),
 316         ("__fuall",             ITfuall),
 317         ("__A",                 ITarity),
 318         ("__P",                 ITspecialise),
 319         ("__C",                 ITnocaf),
 320         ("__R",                 ITrules),
 321         ("__U",                 ITunfold NoInlinePragInfo),
 322
 323         ("__ccall",             ITccall (False, False, False)),
 324         ("__ccall_GC",          ITccall (False, False, True)),
 325         ("__dyn_ccall",         ITccall (True,  False, False)),
 326         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 327         ("__casm",              ITccall (False, True,  False)),
 328         ("__dyn_casm",          ITccall (True,  True,  False)),
 329         ("__casm_GC",           ITccall (False, True,  True)),
 330         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 331
 332         ("/\\",                 ITbiglam)
 333      ]
 334
 335
 336 haskellKeySymsFM = listToUFM $
 337         map (\ (x,y) -> (_PK_ x,y))
 338       [ ("..",          ITdotdot)
 339        ,("::",          ITdcolon)
 340        ,("=",           ITequal)
 341        ,("\\",          ITlam)
 342        ,("|",           ITvbar)
 343        ,("<-",          ITlarrow)
 344        ,("->",          ITrarrow)
 345        ,("@",           ITat)
 346        ,("~",           ITtilde)
 347        ,("=>",          ITdarrow)
 348        ,("-",           ITminus)
 349        ,("!",           ITbang)
 350        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 351        ]
 352 \end{code}
 353
 354 -----------------------------------------------------------------------------
 355 The lexical analyser
 356
 357 Lexer state:
 358
 359         - (glaexts) lexing an interface file or -fglasgow-exts
 360         - (bol)   pointer to beginning of line (for column calculations)
 361         - (buf)   pointer to beginning of token
 362         - (buf)   pointer to current char
 363         - (atbol) flag indicating whether we're at the beginning of a line
 364
 365 \begin{code}
 366 lexer :: (Token -> P a) -> P a
 367 lexer cont buf s@(PState{
 368                     loc = loc,
 369                     glasgow_exts = glaexts,
 370                     bol = bol,
 371                     atbol = atbol,
 372                     context = ctx
 373                 })
 374
 375         -- first, start a new lexeme and lose all the whitespace
 376   =  _scc_ "Lexer"
 377   tab line bol atbol (stepOverLexeme buf)
 378   where
 379         line = srcLocLine loc
 380
 381         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 382           case currentChar# buf of
 383
 384             '\NUL'# ->
 385                    if bufferExhausted (stepOn buf)
 386                         then cont ITeof buf s'
 387                         else trace "lexer: misplaced NUL?" $
 388                              tab y bol atbol (stepOn buf)
 389
 390             '\n'# -> let buf' = stepOn buf
 391                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 392
 393                 -- find comments.  This got harder in Haskell 98.
 394             '-'# ->  let trundle n =
 395                           let next = lookAhead# buf n in
 396                           if next `eqChar#` '-'# then trundle (n +# 1#)
 397                           else if is_symbol next || n <# 2#
 398                                 then is_a_token
 399                                 else case untilChar# (stepOnBy# buf n) '\n'# of
 400                                     { buf' -> tab y bol atbol (stepOverLexeme buf')
 401                                     }
 402                     in trundle 1#
 403
 404                 -- comments and pragmas.  We deal with LINE pragmas here,
 405                 -- and throw out any unrecognised pragmas as comments.  Any
 406                 -- pragmas we know about are dealt with later (after any layout
 407                 -- processing if necessary).
 408
 409             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 410                 if lookAhead# buf 2# `eqChar#` '#'# then
 411                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 412                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 413                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 414                   let lexeme = mkFastString -- ToDo: too slow
 415                                   (map toUpper (lexemeToString buf2)) in
 416                   case lookupUFM pragmaKeywordsFM lexeme of
 417                         Just ITline_prag -> line_prag (lexer cont) buf2 s'
 418                         Just other -> is_a_token
 419                         Nothing -> skip_to_end (stepOnBy# buf 2#)
 420                   }}
 421
 422                 else skip_to_end (stepOnBy# buf 2#)
 423                 where
 424                     skip_to_end buf = nested_comment (lexer cont) buf s'
 425
 426                 -- tabs have been expanded beforehand
 427             c | is_space c -> tab y bol atbol (stepOn buf)
 428               | otherwise  -> is_a_token
 429
 430            where s' = s{loc = replaceSrcLine loc y,
 431                         bol = bol,
 432                        atbol = atbol}
 433
 434                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 435                             | otherwise    = lexToken cont glaexts buf s'
 436
 437 -- {-# LINE .. #-} pragmas.  yeuch.
 438 line_prag cont buf =
 439   case expandWhile# is_space buf                of { buf1 ->
 440   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 441   -- subtract one: the line number refers to the *following* line.
 442   let real_line = line - 1 in
 443   case fromInteger real_line                    of { i@(I# l) ->
 444   case expandWhile# is_space buf2               of { buf3 ->
 445   case currentChar# buf3                        of
 446      '\"'#{-"-} ->
 447         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 448         let file = lexemeToFastString buf4 in
 449         \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
 450         }
 451      other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
 452   }}}}
 453   where
 454         skipToEnd buf = nested_comment cont buf
 455
 456 nested_comment :: P a -> P a
 457 nested_comment cont buf = loop buf
 458  where
 459    loop buf =
 460      case currentChar# buf of
 461         '\NUL'# | bufferExhausted (stepOn buf) ->
 462                 lexError "unterminated `{-'" buf
 463
 464         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 465                 cont (stepOnBy# buf 2#)
 466
 467         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 468               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 469
 470         '\n'# -> \ s@PState{loc=loc} ->
 471                  let buf' = stepOn buf in
 472                  nested_comment cont buf'
 473                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 474                           atbol = 1#}
 475
 476         _   -> nested_comment cont (stepOn buf)
 477
 478 -- When we are lexing the first token of a line, check whether we need to
 479 -- insert virtual semicolons or close braces due to layout.
 480
 481 lexBOL :: (Token -> P a) -> P a
 482 lexBOL cont buf s@(PState{
 483                     loc = loc,
 484                     glasgow_exts = glaexts,
 485                     bol = bol,
 486                     atbol = atbol,
 487                     context = ctx
 488                   }) =
 489         if need_close_curly then
 490                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 491                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 492         else if need_semi_colon then
 493                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 494                 cont ITsemi buf s{atbol = 0#}
 495         else
 496                 lexToken cont glaexts buf s{atbol = 0#}
 497   where
 498         col = currentIndex# buf -# bol
 499
 500         need_close_curly =
 501                 case ctx of
 502                         [] -> False
 503                         (i:_) -> case i of
 504                                     NoLayout -> False
 505                                     Layout n -> col <# n
 506         need_semi_colon =
 507                 case ctx of
 508                         [] -> False
 509                         (i:_) -> case i of
 510                                     NoLayout -> False
 511                                     Layout n -> col ==# n
 512
 513
 514 lexToken :: (Token -> P a) -> Int# -> P a
 515 lexToken cont glaexts buf =
 516  --trace "lexToken" $
 517   case currentChar# buf of
 518
 519     -- special symbols ----------------------------------------------------
 520     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 521                 -> cont IToubxparen (setCurrentPos# buf 2#)
 522          | otherwise
 523                 -> cont IToparen (incLexeme buf)
 524
 525     ')'# -> cont ITcparen    (incLexeme buf)
 526     '['# -> cont ITobrack    (incLexeme buf)
 527     ']'# -> cont ITcbrack    (incLexeme buf)
 528     ','# -> cont ITcomma     (incLexeme buf)
 529     ';'# -> cont ITsemi      (incLexeme buf)
 530
 531     '}'# -> \ s@PState{context = ctx} ->
 532             case ctx of
 533                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 534                 _        -> lexError "too many '}'s" buf s
 535
 536     '#'# -> case lookAhead# buf 1# of
 537                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 538                 '-'# -> case lookAhead# buf 2# of
 539                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 540                            _    -> lex_sym cont (incLexeme buf)
 541                 _    -> lex_sym cont (incLexeme buf)
 542
 543     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 544                 -> lex_cstring cont (setCurrentPos# buf 2#)
 545          | otherwise
 546                 -> cont ITbackquote (incLexeme buf)
 547
 548     '{'# ->     -- look for "{-##" special iface pragma
 549         case lookAhead# buf 1# of
 550            '-'# -> case lookAhead# buf 2# of
 551                     '#'# -> case lookAhead# buf 3# of
 552                                 '#'# ->
 553                                    let (lexeme, buf')
 554                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 555                                    cont (ITpragma lexeme) buf'
 556                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 557                     _    -> cont ITocurly (incLexeme buf)
 558            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 559
 560     -- strings/characters -------------------------------------------------
 561     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 562     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 563
 564     -- strictness and cpr pragmas and __scc treated specially.
 565     '_'# | flag glaexts ->
 566          case lookAhead# buf 1# of
 567            '_'# -> case lookAhead# buf 2# of
 568                     'S'# ->
 569                         lex_demand cont (stepOnUntil (not . isSpace)
 570                                         (stepOnBy# buf 3#)) -- past __S
 571                     'M'# ->
 572                         lex_cpr cont (stepOnUntil (not . isSpace)
 573                                      (stepOnBy# buf 3#)) -- past __M
 574                     's'# ->
 575                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 576                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 577                                Nothing   -> lex_id cont glaexts buf
 578                     _ -> lex_id cont glaexts buf
 579            _    -> lex_id cont glaexts buf
 580
 581         -- Hexadecimal and octal constants
 582     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 583                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 584          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 585                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 586         where ch   = lookAhead# buf 1#
 587               ch2  = lookAhead# buf 2#
 588               buf' = setCurrentPos# buf 2#
 589
 590     '\NUL'# ->
 591             if bufferExhausted (stepOn buf) then
 592                cont ITeof buf
 593             else
 594                trace "lexIface: misplaced NUL?" $
 595                cont (ITunknown "\NUL") (stepOn buf)
 596
 597     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 598             lex_ip cont (stepOn buf)
 599     c | is_digit  c -> lex_num cont glaexts 0 buf
 600       | is_symbol c -> lex_sym cont buf
 601       | is_upper  c -> lex_con cont glaexts buf
 602       | is_ident  c -> lex_id  cont glaexts buf
 603       | otherwise   -> lexError "illegal character" buf
 604
 605 -- Int# is unlifted, and therefore faster than Bool for flags.
 606 {-# INLINE flag #-}
 607 flag :: Int# -> Bool
 608 flag 0# = False
 609 flag _  = True
 610
 611 -------------------------------------------------------------------------------
 612 -- Pragmas
 613
 614 lex_prag cont buf
 615   = case expandWhile# is_space buf of { buf1 ->
 616     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 617     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 618     case lookupUFM pragmaKeywordsFM lexeme of
 619         Just kw -> cont kw (mergeLexemes buf buf2)
 620         Nothing -> panic "lex_prag"
 621   }}
 622
 623 -------------------------------------------------------------------------------
 624 -- Strings & Chars
 625
 626 lex_string cont glaexts s buf
 627   = case currentChar# buf of
 628         '"'#{-"-} ->
 629            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 630            case currentChar# buf' of
 631                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 632                 _                   -> cont (ITstring s') buf'
 633
 634         -- ignore \& in a string, deal with string gaps
 635         '\\'# | next_ch `eqChar#` '&'#
 636                 -> lex_string cont glaexts s buf'
 637               | is_space next_ch
 638                 -> lex_stringgap cont glaexts s (incLexeme buf)
 639
 640             where next_ch = lookAhead# buf 1#
 641                   buf' = setCurrentPos# buf 2#
 642
 643         _ -> lex_char (lex_next_string cont s) glaexts buf
 644
 645 lex_stringgap cont glaexts s buf
 646   = let buf' = incLexeme buf in
 647     case currentChar# buf of
 648         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 649                   st{loc = incSrcLine loc}
 650         '\\'# -> lex_string cont glaexts s buf'
 651         c | is_space c -> lex_stringgap cont glaexts s buf'
 652         other -> charError buf'
 653
 654 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 655
 656 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 657 lex_char cont glaexts buf
 658   = case currentChar# buf of
 659         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 660         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 661         other -> charError buf
 662
 663 char_end cont glaexts c buf
 664   = case currentChar# buf of
 665         '\''# -> let buf' = incLexeme buf in
 666                  case currentChar# buf' of
 667                         '#'# | flag glaexts
 668                                 -> cont (ITprimchar c) (incLexeme buf')
 669                         _       -> cont (ITchar c) buf'
 670         _     -> charError buf
 671
 672 lex_escape cont buf
 673   = let buf' = incLexeme buf in
 674     case currentChar# buf of
 675         'a'#       -> cont '\a' buf'
 676         'b'#       -> cont '\b' buf'
 677         'f'#       -> cont '\f' buf'
 678         'n'#       -> cont '\n' buf'
 679         'r'#       -> cont '\r' buf'
 680         't'#       -> cont '\t' buf'
 681         'v'#       -> cont '\v' buf'
 682         '\\'#      -> cont '\\' buf'
 683         '"'#       -> cont '\"' buf'
 684         '\''#      -> cont '\'' buf'
 685         '^'#       -> let c = currentChar# buf' in
 686                       if c `geChar#` '@'# && c `leChar#` '_'#
 687                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 688                         else charError buf'
 689
 690         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 691         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 692         x | is_digit x
 693                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 694
 695         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 696                                        Just buf2 <- [prefixMatch buf p] ] of
 697                             (c,buf2):_ -> cont c buf2
 698                             [] -> charError buf'
 699
 700 after_charnum cont i buf
 701   = let int = fromInteger i in
 702     if i >= 0 && i <= 255
 703         then cont (chr int) buf
 704         else charError buf
 705
 706 readNum cont buf is_digit base conv = read buf 0
 707   where read buf i
 708           = case currentChar# buf of { c ->
 709             if is_digit c
 710                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 711                 else cont i buf
 712             }
 713
 714 is_hexdigit c
 715         =  is_digit c
 716         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 717         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 718
 719 hex c | is_digit c = ord# c -# ord# '0'#
 720       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 721 oct_or_dec c = ord# c -# ord# '0'#
 722
 723 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 724
 725 to_lower c
 726   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 727         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 728   | otherwise = c
 729
 730 charError buf = lexError "error in character literal" buf
 731
 732 silly_escape_chars = [
 733         ("NUL", '\NUL'),
 734         ("SOH", '\SOH'),
 735         ("STX", '\STX'),
 736         ("ETX", '\ETX'),
 737         ("EOT", '\EOT'),
 738         ("ENQ", '\ENQ'),
 739         ("ACK", '\ACK'),
 740         ("BEL", '\BEL'),
 741         ("BS", '\BS'),
 742         ("HT", '\HT'),
 743         ("LF", '\LF'),
 744         ("VT", '\VT'),
 745         ("FF", '\FF'),
 746         ("CR", '\CR'),
 747         ("SO", '\SO'),
 748         ("SI", '\SI'),
 749         ("DLE", '\DLE'),
 750         ("DC1", '\DC1'),
 751         ("DC2", '\DC2'),
 752         ("DC3", '\DC3'),
 753         ("DC4", '\DC4'),
 754         ("NAK", '\NAK'),
 755         ("SYN", '\SYN'),
 756         ("ETB", '\ETB'),
 757         ("CAN", '\CAN'),
 758         ("EM", '\EM'),
 759         ("SUB", '\SUB'),
 760         ("ESC", '\ESC'),
 761         ("FS", '\FS'),
 762         ("GS", '\GS'),
 763         ("RS", '\RS'),
 764         ("US", '\US'),
 765         ("SP", '\SP'),
 766         ("DEL", '\DEL')
 767         ]
 768
 769 -------------------------------------------------------------------------------
 770
 771 lex_demand cont buf =
 772  case read_em [] buf of { (ls,buf') ->
 773  case currentChar# buf' of
 774    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 775    _    -> cont (ITstrict (ls, False)) buf'
 776  }
 777  where
 778    -- code snatched from Demand.lhs
 779   read_em acc buf =
 780    case currentChar# buf of
 781     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 782     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 783     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 784     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 785     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 786     ')'# -> (reverse acc, stepOn buf)
 787     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 788     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 789     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 790     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 791     _    -> (reverse acc, buf)
 792
 793   do_unpack new_or_data wrapper_unpacks acc buf
 794    = case read_em [] buf of
 795       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 796
 797 lex_cpr cont buf =
 798  case read_em [] buf of { (cpr_inf,buf') ->
 799    ASSERT ( null (tail cpr_inf) )
 800    cont (ITcprinfo $ head cpr_inf) buf'
 801  }
 802  where
 803    -- code snatched from lex_demand above
 804   read_em acc buf =
 805    case currentChar# buf of
 806     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 807     '('# -> do_unpack acc (stepOn buf)
 808     ')'# -> (reverse acc, stepOn buf)
 809     _    -> (reverse acc, buf)
 810
 811   do_unpack acc buf
 812    = case read_em [] buf of
 813       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 814
 815 ------------------
 816 lex_scc cont buf =
 817  case currentChar# buf of
 818   'C'# -> cont ITsccAllCafs (incLexeme buf)
 819   other -> cont ITscc buf
 820
 821 -----------------------------------------------------------------------------
 822 -- Numbers
 823
 824 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 825 lex_num cont glaexts acc buf =
 826  case scanNumLit acc buf of
 827      (acc',buf') ->
 828        case currentChar# buf' of
 829          '.'# | is_digit (lookAhead# buf' 1#) ->
 830              -- this case is not optimised at all, as the
 831              -- presence of floating point numbers in interface
 832              -- files is not that common. (ToDo)
 833             case expandWhile# is_digit (incLexeme buf') of
 834               buf2 -> -- points to first non digit char
 835
 836                 let l = case currentChar# buf2 of
 837                           'E'# -> do_exponent
 838                           'e'# -> do_exponent
 839                           _ -> buf2
 840
 841                     do_exponent
 842                         = let buf3 = incLexeme buf2 in
 843                           case currentChar# buf3 of
 844                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 845                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 846                                 x | is_digit x -> expandWhile# is_digit buf3
 847                                 _ -> buf2
 848
 849                     v = readRational__ (lexemeToString l)
 850
 851                 in case currentChar# l of -- glasgow exts only
 852                       '#'# | flag glaexts -> let l' = incLexeme l in
 853                               case currentChar# l' of
 854                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 855                                 _    -> cont (ITprimfloat  v) l'
 856                       _ -> cont (ITrational v) l
 857
 858          _ -> after_lexnum cont glaexts acc' buf'
 859
 860 after_lexnum cont glaexts i buf
 861   = case currentChar# buf of
 862         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 863         _    -> cont (ITinteger i) buf
 864
 865 -----------------------------------------------------------------------------
 866 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 867
 868 -- we lexemeToFastString on the bit between the ``''s, but include the
 869 -- quotes in the full lexeme.
 870
 871 lex_cstring cont buf =
 872  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 873    Just buf' -> cont (ITlitlit (lexemeToFastString
 874                                 (setCurrentPos# buf' (negateInt# 2#))))
 875                    (mergeLexemes buf buf')
 876    Nothing   -> lexError "unterminated ``" buf
 877
 878 ------------------------------------------------------------------------------
 879 -- Character Classes
 880
 881 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 882
 883 {-# INLINE is_ctype #-}
 884 #if __GLASGOW_HASKELL__ >= 303
 885 is_ctype :: Word8 -> Char# -> Bool
 886 is_ctype mask = \c ->
 887    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 888 #else
 889 is_ctype :: Int -> Char# -> Bool
 890 is_ctype (I# mask) = \c ->
 891     let (A# ctype) = ``char_types'' :: Addr
 892         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 893     in
 894         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 895 #endif
 896
 897 is_ident  = is_ctype 1
 898 is_symbol = is_ctype 2
 899 is_any    = is_ctype 4
 900 is_space  = is_ctype 8
 901 is_lower  = is_ctype 16
 902 is_upper  = is_ctype 32
 903 is_digit  = is_ctype 64
 904
 905 -----------------------------------------------------------------------------
 906 -- identifiers, symbols etc.
 907
 908 lex_ip cont buf =
 909  case expandWhile# is_ident buf of
 910    buf' -> cont (ITipvarid lexeme) buf'
 911            where lexeme = lexemeToFastString buf'
 912
 913 lex_id cont glaexts buf =
 914  case expandWhile# is_ident buf of { buf1 ->
 915
 916  case (if flag glaexts
 917         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 918         else buf1)                              of { buf' ->
 919
 920  let lexeme  = lexemeToFastString buf' in
 921
 922  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 923         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 924                           cont kwd_token buf';
 925         Nothing        ->
 926
 927  let var_token = cont (mk_var_token lexeme) buf' in
 928
 929  if not (flag glaexts)
 930    then var_token
 931    else
 932
 933  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 934         Just kwd_token -> cont kwd_token buf';
 935         Nothing        -> var_token
 936
 937  }}}}
 938
 939 lex_sym cont buf =
 940  case expandWhile# is_symbol buf of
 941    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 942                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 943                                   cont kwd_token buf' ;
 944                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 945                                   cont (mk_var_token lexeme) buf'
 946            }
 947         where lexeme = lexemeToFastString buf'
 948
 949
 950 lex_con cont glaexts buf =
 951  case expandWhile# is_ident buf          of { buf1 ->
 952  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 953
 954  case currentChar# buf' of
 955      '.'# -> munch
 956      _    -> just_a_conid
 957
 958    where
 959     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 960                    cont (ITconid lexeme) buf'
 961     lexeme = lexemeToFastString buf'
 962     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 963  }}
 964
 965 lex_qid cont glaexts mod buf just_a_conid =
 966  case currentChar# buf of
 967   '['# ->       -- Special case for []
 968     case lookAhead# buf 1# of
 969      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 970      _    -> just_a_conid
 971
 972   '('# ->  -- Special case for (,,,)
 973            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 974     case lookAhead# buf 1# of
 975      '#'# | flag glaexts -> case lookAhead# buf 2# of
 976                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 977                                 just_a_conid
 978                 _    -> just_a_conid
 979      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 980      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 981      _    -> just_a_conid
 982
 983   '-'# -> case lookAhead# buf 1# of
 984             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 985             _    -> lex_id3 cont glaexts mod buf just_a_conid
 986   _    -> lex_id3 cont glaexts mod buf just_a_conid
 987
 988 lex_id3 cont glaexts mod buf just_a_conid
 989   | is_symbol (currentChar# buf) =
 990      let
 991         start_new_lexeme = stepOverLexeme buf
 992      in
 993      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 994      let
 995        lexeme  = lexemeToFastString buf'
 996         -- real lexeme is M.<sym>
 997        new_buf = mergeLexemes buf buf'
 998      in
 999      cont (mk_qvar_token mod lexeme) new_buf
1000         -- wrong, but arguably morally right: M... is now a qvarsym
1001      }
1002
1003   | otherwise   =
1004      let
1005         start_new_lexeme = stepOverLexeme buf
1006      in
1007      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1008      if emptyLexeme buf1
1009             then just_a_conid
1010             else
1011
1012      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1013
1014      let
1015       lexeme  = lexemeToFastString buf'
1016       new_buf = mergeLexemes buf buf'
1017       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1018      in
1019      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1020             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1021             Nothing        -> is_a_qvarid
1022         -- TODO: special ids (as, qualified, hiding) shouldn't be
1023         -- recognised as keywords here.  ie.  M.as is a qualified varid.
1024      }}}
1025
1026
1027 slurp_trailing_hashes buf glaexts
1028   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1029   | otherwise    = buf
1030
1031
1032 mk_var_token pk_str
1033   | is_upper f          = ITconid pk_str
1034         -- _[A-Z] is treated as a constructor in interface files.
1035   | f `eqChar#` '_'# && not (_NULL_ tl)
1036         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
1037   | is_ident f          = ITvarid pk_str
1038   | f `eqChar#` ':'#    = ITconsym pk_str
1039   | otherwise           = ITvarsym pk_str
1040   where
1041       (C# f) = _HEAD_ pk_str
1042       tl     = _TAIL_ pk_str
1043
1044 mk_qvar_token m token =
1045  case mk_var_token token of
1046    ITconid n  -> ITqconid  (m,n)
1047    ITvarid n  -> ITqvarid  (m,n)
1048    ITconsym n -> ITqconsym (m,n)
1049    ITvarsym n -> ITqvarsym (m,n)
1050    _          -> ITunknown (show token)
1051 \end{code}
1052
1053 ----------------------------------------------------------------------------
1054 Horrible stuff for dealing with M.(,,,)
1055
1056 \begin{code}
1057 lex_tuple cont mod buf back_off =
1058   go 2 buf
1059   where
1060    go n buf =
1061     case currentChar# buf of
1062       ','# -> go (n+1) (stepOn buf)
1063       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
1064       _    -> back_off
1065
1066 lex_ubx_tuple cont mod buf back_off =
1067   go 2 buf
1068   where
1069    go n buf =
1070     case currentChar# buf of
1071       ','# -> go (n+1) (stepOn buf)
1072       '#'# -> case lookAhead# buf 1# of
1073                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
1074                                  (stepOnBy# buf 2#)
1075                 _    -> back_off
1076       _    -> back_off
1077 \end{code}
1078
1079 -----------------------------------------------------------------------------
1080 doDiscard rips along really fast, looking for a '#-}',
1081 indicating the end of the pragma we're skipping
1082
1083 \begin{code}
1084 doDiscard inStr buf =
1085  case currentChar# buf of
1086    '#'# | not inStr ->
1087        case lookAhead# buf 1# of { '#'# ->
1088        case lookAhead# buf 2# of { '-'# ->
1089        case lookAhead# buf 3# of { '}'# ->
1090            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1091         _    -> doDiscard inStr (incLexeme buf) };
1092         _    -> doDiscard inStr (incLexeme buf) };
1093         _    -> doDiscard inStr (incLexeme buf) }
1094    '"'# ->
1095        let
1096         odd_slashes buf flg i# =
1097           case lookAhead# buf i# of
1098            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1099            _     -> flg
1100        in
1101        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1102          '\\'# -> -- escaping something..
1103            if odd_slashes buf True (negateInt# 2#) then
1104                -- odd number of slashes, " is escaped.
1105               doDiscard inStr (incLexeme buf)
1106            else
1107                -- even number of slashes, \ is escaped.
1108               doDiscard (not inStr) (incLexeme buf)
1109          _ -> case inStr of -- forced to avoid build-up
1110                True  -> doDiscard False (incLexeme buf)
1111                False -> doDiscard True  (incLexeme buf)
1112    _ -> doDiscard inStr (incLexeme buf)
1113
1114 \end{code}
1115
1116 -----------------------------------------------------------------------------
1117
1118 \begin{code}
1119 data LayoutContext
1120   = NoLayout
1121   | Layout Int#
1122
1123 data ParseResult a
1124   = POk PState a
1125   | PFailed Message
1126
1127 data PState = PState {
1128         loc           :: SrcLoc,
1129         glasgow_exts  :: Int#,
1130         bol           :: Int#,
1131         atbol         :: Int#,
1132         context       :: [LayoutContext]
1133      }
1134
1135 type P a = StringBuffer         -- Input string
1136         -> PState
1137         -> ParseResult a
1138
1139 returnP   :: a -> P a
1140 returnP a buf s = POk s a
1141
1142 thenP      :: P a -> (a -> P b) -> P b
1143 m `thenP` k = \ buf s ->
1144         case m buf s of
1145                 POk s1 a -> k a buf s1
1146                 PFailed err  -> PFailed err
1147
1148 thenP_     :: P a -> P b -> P b
1149 m `thenP_` k = m `thenP` \_ -> k
1150
1151 mapP :: (a -> P b) -> [a] -> P [b]
1152 mapP f [] = returnP []
1153 mapP f (a:as) =
1154      f a `thenP` \b ->
1155      mapP f as `thenP` \bs ->
1156      returnP (b:bs)
1157
1158 failP :: String -> P a
1159 failP msg buf s = PFailed (text msg)
1160
1161 failMsgP :: Message -> P a
1162 failMsgP msg buf s = PFailed msg
1163
1164 lexError :: String -> P a
1165 lexError str buf s@PState{ loc = loc }
1166   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1167
1168 getSrcLocP :: P SrcLoc
1169 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1170
1171 getSrcFile :: P FAST_STRING
1172 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1173
1174 getContext :: P [LayoutContext]
1175 getContext buf s@(PState{ context = ctx }) = POk s ctx
1176
1177 pushContext :: LayoutContext -> P ()
1178 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1179
1180 {-
1181
1182 This special case in layoutOn is to handle layout contexts with are
1183 indented the same or less than the current context.  This is illegal
1184 according to the Haskell spec, so we have to arrange to close the
1185 current context.  eg.
1186
1187 class Foo a where
1188 class Bar a
1189
1190 after the first 'where', the sequence of events is:
1191
1192         - layout system inserts a ';' (column 0)
1193         - parser begins a new context at column 0
1194         - parser shifts ';' (legal empty declaration)
1195         - parser sees 'class': parse error (we're still in the inner context)
1196
1197 trouble is, by the time we know we need a new context, the lexer has
1198 already generated the ';'.  Hacky solution is as follows: since we
1199 know the column of the next token (it's the column number of the new
1200 context), we set the ACTUAL column number of the new context to this
1201 numer plus one.  Hence the next time the lexer is called, a '}' will
1202 be generated to close the new context straight away.  Furthermore, we
1203 have to set the atbol flag so that the ';' that the parser shifted as
1204 part of the new context is re-generated.
1205
1206 when the new context is *less* indented than the current one:
1207
1208 f = f where g = g where
1209 h = h
1210
1211         - current context: column 12.
1212         - on seeing 'h' (column 0), the layout system inserts '}'
1213         - parser starts a new context, column 0
1214         - parser sees '}', uses it to close new context
1215         - we still need to insert another '}' followed by a ';',
1216           hence the atbol trick.
1217
1218 There's also a special hack in here to deal with
1219
1220         do
1221            ....
1222            e $ do
1223            blah
1224
1225 i.e. the inner context is at the same indentation level as the outer
1226 context.  This is strictly illegal according to Haskell 98, but
1227 there's a lot of existing code using this style and it doesn't make
1228 any sense to disallow it, since empty 'do' lists don't make sense.
1229 -}
1230
1231 layoutOn :: Bool -> P ()
1232 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1233     let offset = lexemeIndex buf -# bol in
1234     case ctx of
1235         Layout prev_off : _
1236            | if strict then prev_off >=# offset else prev_off ># offset ->
1237                 --trace ("layout on, column: " ++  show (I# offset)) $
1238                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1239         other ->
1240                 --trace ("layout on, column: " ++  show (I# offset)) $
1241                 POk s{ context = Layout offset : ctx } ()
1242
1243 layoutOff :: P ()
1244 layoutOff buf s@(PState{ context = ctx }) =
1245     POk s{ context = NoLayout:ctx } ()
1246
1247 popContext :: P ()
1248 popContext = \ buf s@(PState{ context = ctx }) ->
1249   case ctx of
1250         (_:tl) -> POk s{ context = tl } ()
1251         []    -> panic "Lex.popContext: empty context"
1252
1253 {-
1254  Note that if the name of the file we're processing ends
1255  with `hi-boot', we accept it on faith as having the right
1256  version. This is done so that .hi-boot files that comes
1257  with hsc don't have to be updated before every release,
1258  *and* it allows us to share .hi-boot files with versions
1259  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1260
1261  If the version number is 0, the checking is also turned off.
1262  (needed to deal with GHC.hi only!)
1263
1264  Once we can assume we're compiling with a version of ghc that
1265  supports interface file checking, we can drop the special
1266  pleading
1267 -}
1268 checkVersion :: Maybe Integer -> P ()
1269 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1270  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1271  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1272 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1273  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1274  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1275
1276 -----------------------------------------------------------------
1277
1278 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1279 ifaceParseErr s l
1280   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1281           text (lexemeToString s), char '\'']
1282
1283 ifaceVersionErr hi_vers l toks
1284   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1285           ptext SLIT("Expected"), int opt_HiVersion,
1286           ptext SLIT("found "), pp_version]
1287     where
1288      pp_version =
1289       case hi_vers of
1290         Nothing -> ptext SLIT("pre ghc-3.02 version")
1291         Just v  -> ptext SLIT("version") <+> integer v
1292
1293 \end{code}