ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( ord )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | ITexpr
 127   | IT__export
 128   | ITdepends
 129   | IT__forall
 130   | ITletrec
 131   | ITcoerce
 132   | ITinlineMe
 133   | ITinlineCall
 134   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 135   | ITdefaultbranch
 136   | ITbottom
 137   | ITinteger_lit
 138   | ITfloat_lit
 139   | ITword_lit
 140   | ITword64_lit
 141   | ITint64_lit
 142   | ITrational_lit
 143   | ITaddr_lit
 144   | ITlabel_lit
 145   | ITlit_lit
 146   | ITstring_lit
 147   | ITtypeapp
 148   | ITusage
 149   | ITfuall
 150   | ITarity
 151   | ITspecialise
 152   | ITnocaf
 153   | ITunfold InlinePragInfo
 154   | ITstrict ([Demand], Bool)
 155   | ITrules
 156   | ITcprinfo
 157   | ITdeprecated
 158   | IT__scc
 159   | ITsccAllCafs
 160
 161   | ITspecialise_prag           -- Pragmas
 162   | ITsource_prag
 163   | ITinline_prag
 164   | ITnoinline_prag
 165   | ITrules_prag
 166   | ITdeprecated_prag
 167   | ITline_prag
 168   | ITclose_prag
 169
 170   | ITdotdot                    -- reserved symbols
 171   | ITdcolon
 172   | ITequal
 173   | ITlam
 174   | ITvbar
 175   | ITlarrow
 176   | ITrarrow
 177   | ITat
 178   | ITtilde
 179   | ITdarrow
 180   | ITminus
 181   | ITbang
 182   | ITdot
 183
 184   | ITbiglam                    -- GHC-extension symbols
 185
 186   | ITocurly                    -- special symbols
 187   | ITccurly
 188   | ITocurlybar                 -- {|, for type applications
 189   | ITccurlybar                 -- |}, for type applications
 190   | ITvccurly
 191   | ITobrack
 192   | ITcbrack
 193   | IToparen
 194   | ITcparen
 195   | IToubxparen
 196   | ITcubxparen
 197   | ITsemi
 198   | ITcomma
 199   | ITunderscore
 200   | ITbackquote
 201
 202   | ITvarid   FAST_STRING       -- identifiers
 203   | ITconid   FAST_STRING
 204   | ITvarsym  FAST_STRING
 205   | ITconsym  FAST_STRING
 206   | ITqvarid  (FAST_STRING,FAST_STRING)
 207   | ITqconid  (FAST_STRING,FAST_STRING)
 208   | ITqvarsym (FAST_STRING,FAST_STRING)
 209   | ITqconsym (FAST_STRING,FAST_STRING)
 210
 211   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 212
 213   | ITpragma StringBuffer
 214
 215   | ITchar       Int
 216   | ITstring     FAST_STRING
 217   | ITinteger    Integer
 218   | ITrational   Rational
 219
 220   | ITprimchar   Int
 221   | ITprimstring FAST_STRING
 222   | ITprimint    Integer
 223   | ITprimfloat  Rational
 224   | ITprimdouble Rational
 225   | ITlitlit     FAST_STRING
 226
 227   | ITunknown String            -- Used when the lexer can't make sense of it
 228   | ITeof                       -- end of file token
 229   deriving Show -- debugging
 230 \end{code}
 231
 232 -----------------------------------------------------------------------------
 233 Keyword Lists
 234
 235 \begin{code}
 236 pragmaKeywordsFM = listToUFM $
 237       map (\ (x,y) -> (_PK_ x,y))
 238        [( "SPECIALISE", ITspecialise_prag ),
 239         ( "SPECIALIZE", ITspecialise_prag ),
 240         ( "SOURCE",     ITsource_prag ),
 241         ( "INLINE",     ITinline_prag ),
 242         ( "NOINLINE",   ITnoinline_prag ),
 243         ( "NOTINLINE",  ITnoinline_prag ),
 244         ( "LINE",       ITline_prag ),
 245         ( "RULES",      ITrules_prag ),
 246         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 247         ( "DEPRECATED", ITdeprecated_prag )
 248         ]
 249
 250 haskellKeywordsFM = listToUFM $
 251       map (\ (x,y) -> (_PK_ x,y))
 252        [( "_",          ITunderscore ),
 253         ( "as",         ITas ),
 254         ( "case",       ITcase ),
 255         ( "class",      ITclass ),
 256         ( "data",       ITdata ),
 257         ( "default",    ITdefault ),
 258         ( "deriving",   ITderiving ),
 259         ( "do",         ITdo ),
 260         ( "else",       ITelse ),
 261         ( "hiding",     IThiding ),
 262         ( "if",         ITif ),
 263         ( "import",     ITimport ),
 264         ( "in",         ITin ),
 265         ( "infix",      ITinfix ),
 266         ( "infixl",     ITinfixl ),
 267         ( "infixr",     ITinfixr ),
 268         ( "instance",   ITinstance ),
 269         ( "let",        ITlet ),
 270         ( "module",     ITmodule ),
 271         ( "newtype",    ITnewtype ),
 272         ( "of",         ITof ),
 273         ( "qualified",  ITqualified ),
 274         ( "then",       ITthen ),
 275         ( "type",       ITtype ),
 276         ( "where",      ITwhere ),
 277         ( "_scc_",      ITscc )
 278      ]
 279
 280 isSpecial :: Token -> Bool
 281 -- If we see M.x, where x is a keyword, but
 282 -- is special, we treat is as just plain M.x,
 283 -- not as a keyword.
 284 isSpecial ITas          = True
 285 isSpecial IThiding      = True
 286 isSpecial ITqualified   = True
 287 isSpecial ITforall      = True
 288 isSpecial ITexport      = True
 289 isSpecial ITlabel       = True
 290 isSpecial ITdynamic     = True
 291 isSpecial ITunsafe      = True
 292 isSpecial ITwith        = True
 293 isSpecial ITccallconv   = True
 294 isSpecial ITstdcallconv = True
 295 isSpecial _             = False
 296
 297 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 298 ghcExtensionKeywordsFM = listToUFM $
 299         map (\ (x,y) -> (_PK_ x,y))
 300      [  ( "forall",     ITforall ),
 301         ( "foreign",    ITforeign ),
 302         ( "export",     ITexport ),
 303         ( "label",      ITlabel ),
 304         ( "dynamic",    ITdynamic ),
 305         ( "unsafe",     ITunsafe ),
 306         ( "with",       ITwith ),
 307         ( "stdcall",    ITstdcallconv),
 308         ( "ccall",      ITccallconv),
 309         ("_ccall_",     ITccall (False, False, False)),
 310         ("_ccall_GC_",  ITccall (False, False, True)),
 311         ("_casm_",      ITccall (False, True,  False)),
 312         ("_casm_GC_",   ITccall (False, True,  True)),
 313
 314         -- interface keywords
 315         ("__interface",         ITinterface),
 316         ("__expr",              ITexpr),
 317         ("__export",            IT__export),
 318         ("__depends",           ITdepends),
 319         ("__forall",            IT__forall),
 320         ("__letrec",            ITletrec),
 321         ("__coerce",            ITcoerce),
 322         ("__inline_me",         ITinlineMe),
 323         ("__inline_call",       ITinlineCall),
 324         ("__depends",           ITdepends),
 325         ("__DEFAULT",           ITdefaultbranch),
 326         ("__bot",               ITbottom),
 327         ("__integer",           ITinteger_lit),
 328         ("__float",             ITfloat_lit),
 329         ("__int64",             ITint64_lit),
 330         ("__word",              ITword_lit),
 331         ("__word64",            ITword64_lit),
 332         ("__rational",          ITrational_lit),
 333         ("__addr",              ITaddr_lit),
 334         ("__label",             ITlabel_lit),
 335         ("__litlit",            ITlit_lit),
 336         ("__string",            ITstring_lit),
 337         ("__a",                 ITtypeapp),
 338         ("__u",                 ITusage),
 339         ("__fuall",             ITfuall),
 340         ("__A",                 ITarity),
 341         ("__P",                 ITspecialise),
 342         ("__C",                 ITnocaf),
 343         ("__R",                 ITrules),
 344         ("__D",                 ITdeprecated),
 345         ("__U",                 ITunfold NoInlinePragInfo),
 346
 347         ("__ccall",             ITccall (False, False, False)),
 348         ("__ccall_GC",          ITccall (False, False, True)),
 349         ("__dyn_ccall",         ITccall (True,  False, False)),
 350         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 351         ("__casm",              ITccall (False, True,  False)),
 352         ("__dyn_casm",          ITccall (True,  True,  False)),
 353         ("__casm_GC",           ITccall (False, True,  True)),
 354         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 355
 356         ("/\\",                 ITbiglam)
 357      ]
 358
 359
 360 haskellKeySymsFM = listToUFM $
 361         map (\ (x,y) -> (_PK_ x,y))
 362       [ ("..",          ITdotdot)
 363        ,("::",          ITdcolon)
 364        ,("=",           ITequal)
 365        ,("\\",          ITlam)
 366        ,("|",           ITvbar)
 367        ,("<-",          ITlarrow)
 368        ,("->",          ITrarrow)
 369        ,("@",           ITat)
 370        ,("~",           ITtilde)
 371        ,("=>",          ITdarrow)
 372        ,("-",           ITminus)
 373        ,("!",           ITbang)
 374        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 375        ]
 376 \end{code}
 377
 378 -----------------------------------------------------------------------------
 379 The lexical analyser
 380
 381 Lexer state:
 382
 383         - (glaexts) lexing an interface file or -fglasgow-exts
 384         - (bol)   pointer to beginning of line (for column calculations)
 385         - (buf)   pointer to beginning of token
 386         - (buf)   pointer to current char
 387         - (atbol) flag indicating whether we're at the beginning of a line
 388
 389 \begin{code}
 390 lexer :: (Token -> P a) -> P a
 391 lexer cont buf s@(PState{
 392                     loc = loc,
 393                     glasgow_exts = glaexts,
 394                     bol = bol,
 395                     atbol = atbol,
 396                     context = ctx
 397                 })
 398
 399         -- first, start a new lexeme and lose all the whitespace
 400   =  _scc_ "Lexer"
 401   tab line bol atbol (stepOverLexeme buf)
 402   where
 403         line = srcLocLine loc
 404
 405         tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 406           case currentChar# buf of
 407
 408             '\NUL'# ->
 409                    if bufferExhausted (stepOn buf)
 410                         then cont ITeof buf s'
 411                         else trace "lexer: misplaced NUL?" $
 412                              tab y bol atbol (stepOn buf)
 413
 414             '\n'# -> let buf' = stepOn buf
 415                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 416
 417                 -- find comments.  This got harder in Haskell 98.
 418             '-'# ->  let trundle n =
 419                           let next = lookAhead# buf n in
 420                           if next `eqChar#` '-'# then trundle (n +# 1#)
 421                           else if is_symbol next || n <# 2#
 422                                 then is_a_token
 423                                 else tab y bol atbol
 424                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 425                     in trundle 1#
 426
 427                 -- comments and pragmas.  We deal with LINE pragmas here,
 428                 -- and throw out any unrecognised pragmas as comments.  Any
 429                 -- pragmas we know about are dealt with later (after any layout
 430                 -- processing if necessary).
 431             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 432                 if lookAhead# buf 2# `eqChar#` '#'# then
 433                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 434                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 435                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 436                   let lexeme = mkFastString -- ToDo: too slow
 437                                   (map toUpper (lexemeToString buf2)) in
 438                   case lookupUFM pragmaKeywordsFM lexeme of
 439                         Just ITline_prag ->
 440                            line_prag skip_to_end buf2 s'
 441                         Just other -> is_a_token
 442                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 443                   }}
 444
 445                 else skip_to_end (stepOnBy# buf 2#) s'
 446                 where
 447                     skip_to_end = nested_comment (lexer cont)
 448
 449                 -- special GHC extension: we grok cpp-style #line pragmas
 450             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 451                 case expandWhile# is_space (stepOn buf) of { buf1 ->
 452                 if is_digit (currentChar# buf1)
 453                         then line_prag next_line buf1 s'
 454                         else is_a_token
 455                 }
 456                 where
 457                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 458
 459                 -- tabs have been expanded beforehand
 460             c | is_space c -> tab y bol atbol (stepOn buf)
 461               | otherwise  -> is_a_token
 462
 463            where s' = s{loc = replaceSrcLine loc y,
 464                         bol = bol,
 465                        atbol = atbol}
 466
 467                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 468                             | otherwise    = lexToken cont glaexts buf s'
 469
 470 -- {-# LINE .. #-} pragmas.  yeuch.
 471 line_prag cont buf s@PState{loc=loc} =
 472   case expandWhile# is_space buf                of { buf1 ->
 473   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 474   -- subtract one: the line number refers to the *following* line.
 475   let real_line = line - 1 in
 476   case fromInteger real_line                    of { i@(I# l) ->
 477         -- ToDo, if no filename then we skip the newline.... d'oh
 478   case expandWhile# is_space buf2               of { buf3 ->
 479   case currentChar# buf3                        of
 480      '\"'#{-"-} ->
 481         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 482         let
 483             file = lexemeToFastString buf4
 484             new_buf = stepOn (stepOverLexeme buf4)
 485         in
 486         if nullFastString file
 487                 then cont new_buf s{loc = replaceSrcLine loc l}
 488                 else cont new_buf s{loc = mkSrcLoc file i}
 489         }
 490      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 491   }}}}
 492
 493 nested_comment :: P a -> P a
 494 nested_comment cont buf = loop buf
 495  where
 496    loop buf =
 497      case currentChar# buf of
 498         '\NUL'# | bufferExhausted (stepOn buf) ->
 499                 lexError "unterminated `{-'" buf -- -}
 500         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 501                 cont (stepOnBy# buf 2#)
 502
 503         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 504               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 505
 506         '\n'# -> \ s@PState{loc=loc} ->
 507                  let buf' = stepOn buf in
 508                  nested_comment cont buf'
 509                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 510                           atbol = 1#}
 511
 512         _   -> nested_comment cont (stepOn buf)
 513
 514 -- When we are lexing the first token of a line, check whether we need to
 515 -- insert virtual semicolons or close braces due to layout.
 516
 517 lexBOL :: (Token -> P a) -> P a
 518 lexBOL cont buf s@(PState{
 519                     loc = loc,
 520                     glasgow_exts = glaexts,
 521                     bol = bol,
 522                     atbol = atbol,
 523                     context = ctx
 524                   }) =
 525         if need_close_curly then
 526                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 527                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 528         else if need_semi_colon then
 529                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 530                 cont ITsemi buf s{atbol = 0#}
 531         else
 532                 lexToken cont glaexts buf s{atbol = 0#}
 533   where
 534         col = currentIndex# buf -# bol
 535
 536         need_close_curly =
 537                 case ctx of
 538                         [] -> False
 539                         (i:_) -> case i of
 540                                     NoLayout -> False
 541                                     Layout n -> col <# n
 542         need_semi_colon =
 543                 case ctx of
 544                         [] -> False
 545                         (i:_) -> case i of
 546                                     NoLayout -> False
 547                                     Layout n -> col ==# n
 548
 549
 550 lexToken :: (Token -> P a) -> Int# -> P a
 551 lexToken cont glaexts buf =
 552  -- trace "lexToken" $
 553   case currentChar# buf of
 554
 555     -- special symbols ----------------------------------------------------
 556     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 557                 -> cont IToubxparen (setCurrentPos# buf 2#)
 558          | otherwise
 559                 -> cont IToparen (incLexeme buf)
 560
 561     ')'# -> cont ITcparen    (incLexeme buf)
 562     '['# -> cont ITobrack    (incLexeme buf)
 563     ']'# -> cont ITcbrack    (incLexeme buf)
 564     ','# -> cont ITcomma     (incLexeme buf)
 565     ';'# -> cont ITsemi      (incLexeme buf)
 566     '}'# -> \ s@PState{context = ctx} ->
 567             case ctx of
 568                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 569                 _        -> lexError "too many '}'s" buf s
 570     '|'# -> case lookAhead# buf 1# of
 571                  '}'#  | flag glaexts -> cont ITccurlybar
 572                                               (setCurrentPos# buf 2#)
 573                  _                    -> lex_sym cont (incLexeme buf)
 574
 575
 576     '#'# -> case lookAhead# buf 1# of
 577                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 578                 '-'# -> case lookAhead# buf 2# of
 579                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 580                            _    -> lex_sym cont (incLexeme buf)
 581                 _    -> lex_sym cont (incLexeme buf)
 582
 583     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 584                 -> lex_cstring cont (setCurrentPos# buf 2#)
 585          | otherwise
 586                 -> cont ITbackquote (incLexeme buf)
 587
 588     '{'# ->     -- look for "{-##" special iface pragma
 589             case lookAhead# buf 1# of
 590            '|'# | flag glaexts
 591                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 592            '-'# -> case lookAhead# buf 2# of
 593                     '#'# -> case lookAhead# buf 3# of
 594                                 '#'# ->
 595                                    let (lexeme, buf')
 596                                           = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in
 597                                             cont (ITpragma lexeme) buf'
 598                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 599                     _    -> cont ITocurly (incLexeme buf)
 600            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 601
 602     -- strings/characters -------------------------------------------------
 603     '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
 604     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 605
 606     -- strictness and cpr pragmas and __scc treated specially.
 607     '_'# | flag glaexts ->
 608          case lookAhead# buf 1# of
 609            '_'# -> case lookAhead# buf 2# of
 610                     'S'# ->
 611                         lex_demand cont (stepOnUntil (not . isSpace)
 612                                         (stepOnBy# buf 3#)) -- past __S
 613                     'M'# ->
 614                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 615
 616                     's'# ->
 617                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 618                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 619                                Nothing   -> lex_id cont glaexts buf
 620                     _ -> lex_id cont glaexts buf
 621            _    -> lex_id cont glaexts buf
 622
 623         -- Hexadecimal and octal constants
 624     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 625                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 626          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 627                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 628         where ch   = lookAhead# buf 1#
 629               ch2  = lookAhead# buf 2#
 630               buf' = setCurrentPos# buf 2#
 631
 632     '\NUL'# ->
 633             if bufferExhausted (stepOn buf) then
 634                cont ITeof buf
 635             else
 636                trace "lexIface: misplaced NUL?" $
 637                cont (ITunknown "\NUL") (stepOn buf)
 638
 639     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 640             lex_ip cont (incLexeme buf)
 641     c | is_digit  c -> lex_num cont glaexts 0 buf
 642       | is_symbol c -> lex_sym cont buf
 643       | is_upper  c -> lex_con cont glaexts buf
 644       | is_ident  c -> lex_id  cont glaexts buf
 645       | otherwise   -> lexError "illegal character" buf
 646
 647 -- Int# is unlifted, and therefore faster than Bool for flags.
 648 {-# INLINE flag #-}
 649 flag :: Int# -> Bool
 650 flag 0# = False
 651 flag _  = True
 652
 653 -------------------------------------------------------------------------------
 654 -- Pragmas
 655
 656 lex_prag cont buf
 657   = case expandWhile# is_space buf of { buf1 ->
 658     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 659     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 660     case lookupUFM pragmaKeywordsFM lexeme of
 661         Just kw -> cont kw (mergeLexemes buf buf2)
 662         Nothing -> panic "lex_prag"
 663   }}
 664
 665 -------------------------------------------------------------------------------
 666 -- Strings & Chars
 667
 668 lex_string cont glaexts s buf
 669   = case currentChar# buf of
 670         '"'#{-"-} ->
 671            let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in
 672            case currentChar# buf' of
 673                 '#'# | flag glaexts -> if all (<= 0xFF) s
 674                     then cont (ITprimstring s') (incLexeme buf')
 675                     else lexError "primitive string literal must contain only characters <= '\xFF'" buf'
 676                 _                   -> cont (ITstring s') buf'
 677
 678         -- ignore \& in a string, deal with string gaps
 679         '\\'# | next_ch `eqChar#` '&'#
 680                 -> lex_string cont glaexts s buf'
 681               | is_space next_ch
 682                 -> lex_stringgap cont glaexts s (incLexeme buf)
 683
 684             where next_ch = lookAhead# buf 1#
 685                   buf' = setCurrentPos# buf 2#
 686
 687         _ -> lex_char (lex_next_string cont s) glaexts buf
 688
 689 lex_stringgap cont glaexts s buf
 690   = let buf' = incLexeme buf in
 691     case currentChar# buf of
 692         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 693                   st{loc = incSrcLine loc}
 694         '\\'# -> lex_string cont glaexts s buf'
 695         c | is_space c -> lex_stringgap cont glaexts s buf'
 696         other -> charError buf'
 697
 698 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 699
 700 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 701 lex_char cont glaexts buf
 702   = case currentChar# buf of
 703         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 704         c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
 705         other -> charError buf
 706
 707 char_end cont glaexts c buf
 708   = case currentChar# buf of
 709         '\''# -> let buf' = incLexeme buf in
 710                  case currentChar# buf' of
 711                         '#'# | flag glaexts
 712                                 -> cont (ITprimchar c) (incLexeme buf')
 713                         _       -> cont (ITchar c) buf'
 714         _     -> charError buf
 715
 716 lex_escape cont buf
 717   = let buf' = incLexeme buf in
 718     case currentChar# buf of
 719         'a'#       -> cont (ord '\a') buf'
 720         'b'#       -> cont (ord '\b') buf'
 721         'f'#       -> cont (ord '\f') buf'
 722         'n'#       -> cont (ord '\n') buf'
 723         'r'#       -> cont (ord '\r') buf'
 724         't'#       -> cont (ord '\t') buf'
 725         'v'#       -> cont (ord '\v') buf'
 726         '\\'#      -> cont (ord '\\') buf'
 727         '"'#       -> cont (ord '\"') buf'
 728         '\''#      -> cont (ord '\'') buf'
 729         '^'#       -> let c = currentChar# buf' in
 730                       if c `geChar#` '@'# && c `leChar#` '_'#
 731                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 732                         else charError buf'
 733
 734         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 735         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 736         x | is_digit x
 737                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 738
 739         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 740                                        Just buf2 <- [prefixMatch buf p] ] of
 741                             (c,buf2):_ -> cont (ord c) buf2
 742                             [] -> charError buf'
 743
 744 after_charnum cont i buf
 745   = if i >= 0 && i <= 0x7FFFFFFF
 746         then cont (fromInteger i) buf
 747         else charError buf
 748
 749 readNum cont buf is_digit base conv = read buf 0
 750   where read buf i
 751           = case currentChar# buf of { c ->
 752             if is_digit c
 753                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 754                 else cont i buf
 755             }
 756
 757 is_hexdigit c
 758         =  is_digit c
 759         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 760         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 761
 762 hex c | is_digit c = ord# c -# ord# '0'#
 763       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 764 oct_or_dec c = ord# c -# ord# '0'#
 765
 766 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 767
 768 to_lower c
 769   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 770         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 771   | otherwise = c
 772
 773 charError buf = lexError "error in character literal" buf
 774
 775 silly_escape_chars = [
 776         ("NUL", '\NUL'),
 777         ("SOH", '\SOH'),
 778         ("STX", '\STX'),
 779         ("ETX", '\ETX'),
 780         ("EOT", '\EOT'),
 781         ("ENQ", '\ENQ'),
 782         ("ACK", '\ACK'),
 783         ("BEL", '\BEL'),
 784         ("BS", '\BS'),
 785         ("HT", '\HT'),
 786         ("LF", '\LF'),
 787         ("VT", '\VT'),
 788         ("FF", '\FF'),
 789         ("CR", '\CR'),
 790         ("SO", '\SO'),
 791         ("SI", '\SI'),
 792         ("DLE", '\DLE'),
 793         ("DC1", '\DC1'),
 794         ("DC2", '\DC2'),
 795         ("DC3", '\DC3'),
 796         ("DC4", '\DC4'),
 797         ("NAK", '\NAK'),
 798         ("SYN", '\SYN'),
 799         ("ETB", '\ETB'),
 800         ("CAN", '\CAN'),
 801         ("EM", '\EM'),
 802         ("SUB", '\SUB'),
 803         ("ESC", '\ESC'),
 804         ("FS", '\FS'),
 805         ("GS", '\GS'),
 806         ("RS", '\RS'),
 807         ("US", '\US'),
 808         ("SP", '\SP'),
 809         ("DEL", '\DEL')
 810         ]
 811
 812 -------------------------------------------------------------------------------
 813
 814 lex_demand cont buf =
 815  case read_em [] buf of { (ls,buf') ->
 816  case currentChar# buf' of
 817    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 818    _    -> cont (ITstrict (ls, False)) buf'
 819  }
 820  where
 821    -- code snatched from Demand.lhs
 822   read_em acc buf =
 823    case currentChar# buf of
 824     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 825     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 826     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 827     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 828     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 829     ')'# -> (reverse acc, stepOn buf)
 830     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 831     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 832     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 833     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 834     _    -> (reverse acc, buf)
 835
 836   do_unpack new_or_data wrapper_unpacks acc buf
 837    = case read_em [] buf of
 838       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 839
 840
 841 ------------------
 842 lex_scc cont buf =
 843  case currentChar# buf of
 844   'C'# -> cont ITsccAllCafs (incLexeme buf)
 845   other -> cont ITscc buf
 846
 847 -----------------------------------------------------------------------------
 848 -- Numbers
 849
 850 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 851 lex_num cont glaexts acc buf =
 852  case scanNumLit acc buf of
 853      (acc',buf') ->
 854        case currentChar# buf' of
 855          '.'# | is_digit (lookAhead# buf' 1#) ->
 856              -- this case is not optimised at all, as the
 857              -- presence of floating point numbers in interface
 858              -- files is not that common. (ToDo)
 859             case expandWhile# is_digit (incLexeme buf') of
 860               buf2 -> -- points to first non digit char
 861
 862                 let l = case currentChar# buf2 of
 863                           'E'# -> do_exponent
 864                           'e'# -> do_exponent
 865                           _ -> buf2
 866
 867                     do_exponent
 868                         = let buf3 = incLexeme buf2 in
 869                           case currentChar# buf3 of
 870                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 871                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 872                                 x | is_digit x -> expandWhile# is_digit buf3
 873                                 _ -> buf2
 874
 875                     v = readRational__ (lexemeToString l)
 876
 877                 in case currentChar# l of -- glasgow exts only
 878                       '#'# | flag glaexts -> let l' = incLexeme l in
 879                               case currentChar# l' of
 880                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 881                                 _    -> cont (ITprimfloat  v) l'
 882                       _ -> cont (ITrational v) l
 883
 884          _ -> after_lexnum cont glaexts acc' buf'
 885
 886 after_lexnum cont glaexts i buf
 887   = case currentChar# buf of
 888         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 889         _    -> cont (ITinteger i) buf
 890
 891 -----------------------------------------------------------------------------
 892 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 893
 894 -- we lexemeToFastString on the bit between the ``''s, but include the
 895 -- quotes in the full lexeme.
 896
 897 lex_cstring cont buf =
 898  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 899    Just buf' -> cont (ITlitlit (lexemeToFastString
 900                                 (setCurrentPos# buf' (negateInt# 2#))))
 901                    (mergeLexemes buf buf')
 902    Nothing   -> lexError "unterminated ``" buf
 903
 904 -----------------------------------------------------------------------------
 905 -- identifiers, symbols etc.
 906
 907 lex_ip cont buf =
 908  case expandWhile# is_ident buf of
 909    buf' -> cont (ITipvarid lexeme) buf'
 910            where lexeme = lexemeToFastString buf'
 911
 912 lex_id cont glaexts buf =
 913  let buf1 = expandWhile# is_ident buf in
 914  seq buf1 $
 915
 916  case (if flag glaexts
 917         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 918         else buf1)                              of { buf' ->
 919
 920  let lexeme  = lexemeToFastString buf' in
 921
 922  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 923         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 924                           cont kwd_token buf';
 925         Nothing        ->
 926
 927  let var_token = cont (ITvarid lexeme) buf' in
 928
 929  if not (flag glaexts)
 930    then var_token
 931    else
 932
 933  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 934         Just kwd_token -> cont kwd_token buf';
 935         Nothing        -> var_token
 936
 937  }}}
 938
 939 lex_sym cont buf =
 940  -- trace "lex_sym" $
 941  case expandWhile# is_symbol buf of
 942    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 943                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 944                                   cont kwd_token buf' ;
 945                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 946                                   cont (mk_var_token lexeme) buf'
 947            }
 948         where lexeme = lexemeToFastString buf'
 949
 950
 951 lex_con cont glaexts buf =
 952  -- trace ("con: "{-++unpackFS lexeme-}) $
 953  case expandWhile# is_ident buf          of { buf1 ->
 954  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 955
 956  case currentChar# buf' of
 957      '.'# -> munch
 958      _    -> just_a_conid
 959
 960    where
 961     just_a_conid = cont (ITconid lexeme) buf'
 962     lexeme = lexemeToFastString buf'
 963     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 964  }}
 965
 966 lex_qid cont glaexts mod buf just_a_conid =
 967  -- trace ("quid: "{-++unpackFS lexeme-}) $
 968  case currentChar# buf of
 969   '['# ->       -- Special case for []
 970     case lookAhead# buf 1# of
 971      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 972      _    -> just_a_conid
 973
 974   '('# ->  -- Special case for (,,,)
 975            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 976     case lookAhead# buf 1# of
 977      '#'# | flag glaexts -> case lookAhead# buf 2# of
 978                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 979                                 just_a_conid
 980                 _    -> just_a_conid
 981      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 982      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 983      _    -> just_a_conid
 984
 985   '-'# -> case lookAhead# buf 1# of
 986             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 987             _    -> lex_id3 cont glaexts mod buf just_a_conid
 988   _    -> lex_id3 cont glaexts mod buf just_a_conid
 989
 990 lex_id3 cont glaexts mod buf just_a_conid
 991   | is_symbol (currentChar# buf) =
 992      let
 993         start_new_lexeme = stepOverLexeme buf
 994      in
 995      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 996      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 997      let
 998        lexeme  = lexemeToFastString buf'
 999         -- real lexeme is M.<sym>
1000        new_buf = mergeLexemes buf buf'
1001      in
1002      cont (mk_qvar_token mod lexeme) new_buf
1003         -- wrong, but arguably morally right: M... is now a qvarsym
1004      }
1005
1006   | otherwise   =
1007      let
1008         start_new_lexeme = stepOverLexeme buf
1009      in
1010      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1011      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1012      if emptyLexeme buf1
1013             then just_a_conid
1014             else
1015
1016      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1017
1018      let
1019       lexeme      = lexemeToFastString buf'
1020       new_buf     = mergeLexemes buf buf'
1021       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1022      in
1023      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1024             Nothing          -> is_a_qvarid ;
1025
1026             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1027                            -> is_a_qvarid          --  recognised as keywords here.
1028                            | otherwise
1029                            -> just_a_conid         -- avoid M.where etc.
1030      }}}
1031
1032 slurp_trailing_hashes buf glaexts
1033   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1034   | otherwise    = buf
1035
1036
1037 mk_var_token pk_str
1038   | is_upper f          = ITconid pk_str
1039   | is_ident f          = ITvarid pk_str
1040   | f `eqChar#` ':'#    = ITconsym pk_str
1041   | otherwise           = ITvarsym pk_str
1042   where
1043       (C# f) = _HEAD_ pk_str
1044       -- tl     = _TAIL_ pk_str
1045
1046 mk_qvar_token m token =
1047 -- trace ("mk_qvar ") $
1048  case mk_var_token token of
1049    ITconid n  -> ITqconid  (m,n)
1050    ITvarid n  -> ITqvarid  (m,n)
1051    ITconsym n -> ITqconsym (m,n)
1052    ITvarsym n -> ITqvarsym (m,n)
1053    _          -> ITunknown (show token)
1054 \end{code}
1055
1056 ----------------------------------------------------------------------------
1057 Horrible stuff for dealing with M.(,,,)
1058
1059 \begin{code}
1060 lex_tuple cont mod buf back_off =
1061   go 2 buf
1062   where
1063    go n buf =
1064     case currentChar# buf of
1065       ','# -> go (n+1) (stepOn buf)
1066       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1067       _    -> back_off
1068
1069 lex_ubx_tuple cont mod buf back_off =
1070   go 2 buf
1071   where
1072    go n buf =
1073     case currentChar# buf of
1074       ','# -> go (n+1) (stepOn buf)
1075       '#'# -> case lookAhead# buf 1# of
1076                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1077                                  (stepOnBy# buf 2#)
1078                 _    -> back_off
1079       _    -> back_off
1080 \end{code}
1081
1082 -----------------------------------------------------------------------------
1083 doDiscard rips along really fast, looking for a '##-}',
1084 indicating the end of the pragma we're skipping
1085
1086 \begin{code}
1087 doDiscard inStr buf =
1088  case currentChar# buf of
1089    '#'# | inStr ==# 0# ->
1090        case lookAhead# buf 1# of { '#'# ->
1091        case lookAhead# buf 2# of { '-'# ->
1092        case lookAhead# buf 3# of { '}'# ->
1093            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1094         _    -> doDiscard inStr (incLexeme buf) };
1095         _    -> doDiscard inStr (incLexeme buf) };
1096         _    -> doDiscard inStr (incLexeme buf) }
1097
1098    '"'# ->
1099        let
1100         odd_slashes buf flg i# =
1101           case lookAhead# buf i# of
1102            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1103            _     -> flg
1104
1105         not_inStr = if inStr ==# 0# then 1# else 0#
1106        in
1107        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1108          '\\'# -> -- escaping something..
1109            if odd_slashes buf True (negateInt# 2#)
1110                 then  -- odd number of slashes, " is escaped.
1111                       doDiscard inStr (incLexeme buf)
1112                 else  -- even number of slashes, \ is escaped.
1113                       doDiscard not_inStr (incLexeme buf)
1114          _ -> doDiscard not_inStr (incLexeme buf)
1115
1116    '\''# | inStr ==# 0# ->
1117         case lookAhead# buf 1# of { '"'# ->
1118         case lookAhead# buf 2# of { '\''# ->
1119            doDiscard inStr (setCurrentPos# buf 3#);
1120         _ -> doDiscard inStr (incLexeme buf) };
1121         _ -> doDiscard inStr (incLexeme buf) }
1122
1123    _ -> doDiscard inStr (incLexeme buf)
1124
1125 \end{code}
1126
1127 -----------------------------------------------------------------------------
1128
1129 \begin{code}
1130 data LayoutContext
1131   = NoLayout
1132   | Layout Int#
1133
1134 data ParseResult a
1135   = POk PState a
1136   | PFailed Message
1137
1138 data PState = PState {
1139         loc           :: SrcLoc,
1140         glasgow_exts  :: Int#,
1141         bol           :: Int#,
1142         atbol         :: Int#,
1143         context       :: [LayoutContext]
1144      }
1145
1146 type P a = StringBuffer         -- Input string
1147         -> PState
1148         -> ParseResult a
1149
1150 returnP   :: a -> P a
1151 returnP a buf s = POk s a
1152
1153 thenP      :: P a -> (a -> P b) -> P b
1154 m `thenP` k = \ buf s ->
1155         case m buf s of
1156                 POk s1 a -> k a buf s1
1157                 PFailed err  -> PFailed err
1158
1159 thenP_     :: P a -> P b -> P b
1160 m `thenP_` k = m `thenP` \_ -> k
1161
1162 mapP :: (a -> P b) -> [a] -> P [b]
1163 mapP f [] = returnP []
1164 mapP f (a:as) =
1165      f a `thenP` \b ->
1166      mapP f as `thenP` \bs ->
1167      returnP (b:bs)
1168
1169 failP :: String -> P a
1170 failP msg buf s = PFailed (text msg)
1171
1172 failMsgP :: Message -> P a
1173 failMsgP msg buf s = PFailed msg
1174
1175 lexError :: String -> P a
1176 lexError str buf s@PState{ loc = loc }
1177   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1178
1179 getSrcLocP :: P SrcLoc
1180 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1181
1182 getSrcFile :: P FAST_STRING
1183 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1184
1185 getContext :: P [LayoutContext]
1186 getContext buf s@(PState{ context = ctx }) = POk s ctx
1187
1188 pushContext :: LayoutContext -> P ()
1189 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1190
1191 {-
1192
1193 This special case in layoutOn is to handle layout contexts with are
1194 indented the same or less than the current context.  This is illegal
1195 according to the Haskell spec, so we have to arrange to close the
1196 current context.  eg.
1197
1198 class Foo a where
1199 class Bar a
1200
1201 after the first 'where', the sequence of events is:
1202
1203         - layout system inserts a ';' (column 0)
1204         - parser begins a new context at column 0
1205         - parser shifts ';' (legal empty declaration)
1206         - parser sees 'class': parse error (we're still in the inner context)
1207
1208 trouble is, by the time we know we need a new context, the lexer has
1209 already generated the ';'.  Hacky solution is as follows: since we
1210 know the column of the next token (it's the column number of the new
1211 context), we set the ACTUAL column number of the new context to this
1212 numer plus one.  Hence the next time the lexer is called, a '}' will
1213 be generated to close the new context straight away.  Furthermore, we
1214 have to set the atbol flag so that the ';' that the parser shifted as
1215 part of the new context is re-generated.
1216
1217 when the new context is *less* indented than the current one:
1218
1219 f = f where g = g where
1220 h = h
1221
1222         - current context: column 12.
1223         - on seeing 'h' (column 0), the layout system inserts '}'
1224         - parser starts a new context, column 0
1225         - parser sees '}', uses it to close new context
1226         - we still need to insert another '}' followed by a ';',
1227           hence the atbol trick.
1228
1229 There's also a special hack in here to deal with
1230
1231         do
1232            ....
1233            e $ do
1234            blah
1235
1236 i.e. the inner context is at the same indentation level as the outer
1237 context.  This is strictly illegal according to Haskell 98, but
1238 there's a lot of existing code using this style and it doesn't make
1239 any sense to disallow it, since empty 'do' lists don't make sense.
1240 -}
1241
1242 layoutOn :: Bool -> P ()
1243 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1244     let offset = lexemeIndex buf -# bol in
1245     case ctx of
1246         Layout prev_off : _
1247            | if strict then prev_off >=# offset else prev_off ># offset ->
1248                 --trace ("layout on, column: " ++  show (I# offset)) $
1249                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1250         other ->
1251                 --trace ("layout on, column: " ++  show (I# offset)) $
1252                 POk s{ context = Layout offset : ctx } ()
1253
1254 layoutOff :: P ()
1255 layoutOff buf s@(PState{ context = ctx }) =
1256     POk s{ context = NoLayout:ctx } ()
1257
1258 popContext :: P ()
1259 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1260   case ctx of
1261         (_:tl) -> POk s{ context = tl } ()
1262         []     -> PFailed (srcParseErr buf loc)
1263
1264 {-
1265  Note that if the name of the file we're processing ends
1266  with `hi-boot', we accept it on faith as having the right
1267  version. This is done so that .hi-boot files that comes
1268  with hsc don't have to be updated before every release,
1269  *and* it allows us to share .hi-boot files with versions
1270  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1271
1272  If the version number is 0, the checking is also turned off.
1273  (needed to deal with GHC.hi only!)
1274
1275  Once we can assume we're compiling with a version of ghc that
1276  supports interface file checking, we can drop the special
1277  pleading
1278 -}
1279 checkVersion :: Maybe Integer -> P ()
1280 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1281  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1282  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1283 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1284  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1285  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1286
1287 -----------------------------------------------------------------
1288
1289 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1290 ifaceParseErr s l
1291   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1292           text (lexemeToString s), char '\'']
1293
1294 ifaceVersionErr hi_vers l toks
1295   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1296           ptext SLIT("Expected"), int opt_HiVersion,
1297           ptext SLIT("found "), pp_version]
1298     where
1299      pp_version =
1300       case hi_vers of
1301         Nothing -> ptext SLIT("pre ghc-3.02 version")
1302         Just v  -> ptext SLIT("version") <+> integer v
1303
1304 -----------------------------------------------------------------------------
1305
1306 srcParseErr :: StringBuffer -> SrcLoc -> Message
1307 srcParseErr s l
1308   = hcat [ppr l,
1309           if null token
1310              then ptext SLIT(": parse error (possibly incorrect indentation)")
1311              else hcat [ptext SLIT(": parse error on input "),
1312                         char '`', text token, char '\'']
1313     ]
1314   where
1315         token = lexemeToString s
1316
1317 \end{code}