ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( ord )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | ITexpr
 127   | IT__export
 128   | ITdepends
 129   | IT__forall
 130   | ITletrec
 131   | ITcoerce
 132   | ITinlineMe
 133   | ITinlineCall
 134   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 135   | ITdefaultbranch
 136   | ITbottom
 137   | ITinteger_lit
 138   | ITfloat_lit
 139   | ITword_lit
 140   | ITword64_lit
 141   | ITint64_lit
 142   | ITrational_lit
 143   | ITaddr_lit
 144   | ITlabel_lit
 145   | ITlit_lit
 146   | ITstring_lit
 147   | ITtypeapp
 148   | ITusage
 149   | ITfuall
 150   | ITarity
 151   | ITspecialise
 152   | ITnocaf
 153   | ITunfold InlinePragInfo
 154   | ITstrict ([Demand], Bool)
 155   | ITrules
 156   | ITcprinfo
 157   | ITdeprecated
 158   | IT__scc
 159   | ITsccAllCafs
 160
 161   | ITspecialise_prag           -- Pragmas
 162   | ITsource_prag
 163   | ITinline_prag
 164   | ITnoinline_prag
 165   | ITrules_prag
 166   | ITdeprecated_prag
 167   | ITline_prag
 168   | ITclose_prag
 169
 170   | ITdotdot                    -- reserved symbols
 171   | ITdcolon
 172   | ITequal
 173   | ITlam
 174   | ITvbar
 175   | ITlarrow
 176   | ITrarrow
 177   | ITat
 178   | ITtilde
 179   | ITdarrow
 180   | ITminus
 181   | ITbang
 182   | ITdot
 183
 184   | ITbiglam                    -- GHC-extension symbols
 185
 186   | ITocurly                    -- special symbols
 187   | ITccurly
 188   | ITocurlybar                 -- {|, for type applications
 189   | ITccurlybar                 -- |}, for type applications
 190   | ITvccurly
 191   | ITobrack
 192   | ITcbrack
 193   | IToparen
 194   | ITcparen
 195   | IToubxparen
 196   | ITcubxparen
 197   | ITsemi
 198   | ITcomma
 199   | ITunderscore
 200   | ITbackquote
 201
 202   | ITvarid   FAST_STRING       -- identifiers
 203   | ITconid   FAST_STRING
 204   | ITvarsym  FAST_STRING
 205   | ITconsym  FAST_STRING
 206   | ITqvarid  (FAST_STRING,FAST_STRING)
 207   | ITqconid  (FAST_STRING,FAST_STRING)
 208   | ITqvarsym (FAST_STRING,FAST_STRING)
 209   | ITqconsym (FAST_STRING,FAST_STRING)
 210
 211   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 212
 213   | ITpragma StringBuffer
 214
 215   | ITchar       Int
 216   | ITstring     FAST_STRING
 217   | ITinteger    Integer
 218   | ITrational   Rational
 219
 220   | ITprimchar   Int
 221   | ITprimstring FAST_STRING
 222   | ITprimint    Integer
 223   | ITprimfloat  Rational
 224   | ITprimdouble Rational
 225   | ITlitlit     FAST_STRING
 226
 227   | ITunknown String            -- Used when the lexer can't make sense of it
 228   | ITeof                       -- end of file token
 229   deriving Show -- debugging
 230 \end{code}
 231
 232 -----------------------------------------------------------------------------
 233 Keyword Lists
 234
 235 \begin{code}
 236 pragmaKeywordsFM = listToUFM $
 237       map (\ (x,y) -> (_PK_ x,y))
 238        [( "SPECIALISE", ITspecialise_prag ),
 239         ( "SPECIALIZE", ITspecialise_prag ),
 240         ( "SOURCE",     ITsource_prag ),
 241         ( "INLINE",     ITinline_prag ),
 242         ( "NOINLINE",   ITnoinline_prag ),
 243         ( "NOTINLINE",  ITnoinline_prag ),
 244         ( "LINE",       ITline_prag ),
 245         ( "RULES",      ITrules_prag ),
 246         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 247         ( "DEPRECATED", ITdeprecated_prag )
 248         ]
 249
 250 haskellKeywordsFM = listToUFM $
 251       map (\ (x,y) -> (_PK_ x,y))
 252        [( "_",          ITunderscore ),
 253         ( "as",         ITas ),
 254         ( "case",       ITcase ),
 255         ( "class",      ITclass ),
 256         ( "data",       ITdata ),
 257         ( "default",    ITdefault ),
 258         ( "deriving",   ITderiving ),
 259         ( "do",         ITdo ),
 260         ( "else",       ITelse ),
 261         ( "hiding",     IThiding ),
 262         ( "if",         ITif ),
 263         ( "import",     ITimport ),
 264         ( "in",         ITin ),
 265         ( "infix",      ITinfix ),
 266         ( "infixl",     ITinfixl ),
 267         ( "infixr",     ITinfixr ),
 268         ( "instance",   ITinstance ),
 269         ( "let",        ITlet ),
 270         ( "module",     ITmodule ),
 271         ( "newtype",    ITnewtype ),
 272         ( "of",         ITof ),
 273         ( "qualified",  ITqualified ),
 274         ( "then",       ITthen ),
 275         ( "type",       ITtype ),
 276         ( "where",      ITwhere ),
 277         ( "_scc_",      ITscc )
 278      ]
 279
 280 isSpecial :: Token -> Bool
 281 -- If we see M.x, where x is a keyword, but
 282 -- is special, we treat is as just plain M.x,
 283 -- not as a keyword.
 284 isSpecial ITas          = True
 285 isSpecial IThiding      = True
 286 isSpecial ITqualified   = True
 287 isSpecial ITforall      = True
 288 isSpecial ITexport      = True
 289 isSpecial ITlabel       = True
 290 isSpecial ITdynamic     = True
 291 isSpecial ITunsafe      = True
 292 isSpecial ITwith        = True
 293 isSpecial ITccallconv   = True
 294 isSpecial ITstdcallconv = True
 295
 296 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 297 ghcExtensionKeywordsFM = listToUFM $
 298         map (\ (x,y) -> (_PK_ x,y))
 299      [  ( "forall",     ITforall ),
 300         ( "foreign",    ITforeign ),
 301         ( "export",     ITexport ),
 302         ( "label",      ITlabel ),
 303         ( "dynamic",    ITdynamic ),
 304         ( "unsafe",     ITunsafe ),
 305         ( "with",       ITwith ),
 306         ( "stdcall",    ITstdcallconv),
 307         ( "ccall",      ITccallconv),
 308         ("_ccall_",     ITccall (False, False, False)),
 309         ("_ccall_GC_",  ITccall (False, False, True)),
 310         ("_casm_",      ITccall (False, True,  False)),
 311         ("_casm_GC_",   ITccall (False, True,  True)),
 312
 313         -- interface keywords
 314         ("__interface",         ITinterface),
 315         ("__expr",              ITexpr),
 316         ("__export",            IT__export),
 317         ("__depends",           ITdepends),
 318         ("__forall",            IT__forall),
 319         ("__letrec",            ITletrec),
 320         ("__coerce",            ITcoerce),
 321         ("__inline_me",         ITinlineMe),
 322         ("__inline_call",       ITinlineCall),
 323         ("__depends",           ITdepends),
 324         ("__DEFAULT",           ITdefaultbranch),
 325         ("__bot",               ITbottom),
 326         ("__integer",           ITinteger_lit),
 327         ("__float",             ITfloat_lit),
 328         ("__int64",             ITint64_lit),
 329         ("__word",              ITword_lit),
 330         ("__word64",            ITword64_lit),
 331         ("__rational",          ITrational_lit),
 332         ("__addr",              ITaddr_lit),
 333         ("__label",             ITlabel_lit),
 334         ("__litlit",            ITlit_lit),
 335         ("__string",            ITstring_lit),
 336         ("__a",                 ITtypeapp),
 337         ("__u",                 ITusage),
 338         ("__fuall",             ITfuall),
 339         ("__A",                 ITarity),
 340         ("__P",                 ITspecialise),
 341         ("__C",                 ITnocaf),
 342         ("__R",                 ITrules),
 343         ("__D",                 ITdeprecated),
 344         ("__U",                 ITunfold NoInlinePragInfo),
 345
 346         ("__ccall",             ITccall (False, False, False)),
 347         ("__ccall_GC",          ITccall (False, False, True)),
 348         ("__dyn_ccall",         ITccall (True,  False, False)),
 349         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 350         ("__casm",              ITccall (False, True,  False)),
 351         ("__dyn_casm",          ITccall (True,  True,  False)),
 352         ("__casm_GC",           ITccall (False, True,  True)),
 353         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 354
 355         ("/\\",                 ITbiglam)
 356      ]
 357
 358
 359 haskellKeySymsFM = listToUFM $
 360         map (\ (x,y) -> (_PK_ x,y))
 361       [ ("..",          ITdotdot)
 362        ,("::",          ITdcolon)
 363        ,("=",           ITequal)
 364        ,("\\",          ITlam)
 365        ,("|",           ITvbar)
 366        ,("<-",          ITlarrow)
 367        ,("->",          ITrarrow)
 368        ,("@",           ITat)
 369        ,("~",           ITtilde)
 370        ,("=>",          ITdarrow)
 371        ,("-",           ITminus)
 372        ,("!",           ITbang)
 373        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 374        ]
 375 \end{code}
 376
 377 -----------------------------------------------------------------------------
 378 The lexical analyser
 379
 380 Lexer state:
 381
 382         - (glaexts) lexing an interface file or -fglasgow-exts
 383         - (bol)   pointer to beginning of line (for column calculations)
 384         - (buf)   pointer to beginning of token
 385         - (buf)   pointer to current char
 386         - (atbol) flag indicating whether we're at the beginning of a line
 387
 388 \begin{code}
 389 lexer :: (Token -> P a) -> P a
 390 lexer cont buf s@(PState{
 391                     loc = loc,
 392                     glasgow_exts = glaexts,
 393                     bol = bol,
 394                     atbol = atbol,
 395                     context = ctx
 396                 })
 397
 398         -- first, start a new lexeme and lose all the whitespace
 399   =  _scc_ "Lexer"
 400   tab line bol atbol (stepOverLexeme buf)
 401   where
 402         line = srcLocLine loc
 403
 404         tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 405           case currentChar# buf of
 406
 407             '\NUL'# ->
 408                    if bufferExhausted (stepOn buf)
 409                         then cont ITeof buf s'
 410                         else trace "lexer: misplaced NUL?" $
 411                              tab y bol atbol (stepOn buf)
 412
 413             '\n'# -> let buf' = stepOn buf
 414                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 415
 416                 -- find comments.  This got harder in Haskell 98.
 417             '-'# ->  let trundle n =
 418                           let next = lookAhead# buf n in
 419                           if next `eqChar#` '-'# then trundle (n +# 1#)
 420                           else if is_symbol next || n <# 2#
 421                                 then is_a_token
 422                                 else tab y bol atbol
 423                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 424                     in trundle 1#
 425
 426                 -- comments and pragmas.  We deal with LINE pragmas here,
 427                 -- and throw out any unrecognised pragmas as comments.  Any
 428                 -- pragmas we know about are dealt with later (after any layout
 429                 -- processing if necessary).
 430             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 431                 if lookAhead# buf 2# `eqChar#` '#'# then
 432                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 433                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 434                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 435                   let lexeme = mkFastString -- ToDo: too slow
 436                                   (map toUpper (lexemeToString buf2)) in
 437                   case lookupUFM pragmaKeywordsFM lexeme of
 438                         Just ITline_prag ->
 439                            line_prag skip_to_end buf2 s'
 440                         Just other -> is_a_token
 441                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 442                   }}
 443
 444                 else skip_to_end (stepOnBy# buf 2#) s'
 445                 where
 446                     skip_to_end = nested_comment (lexer cont)
 447
 448                 -- special GHC extension: we grok cpp-style #line pragmas
 449             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 450                 line_prag next_line (stepOn buf) s'
 451                 where
 452                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 453
 454                 -- tabs have been expanded beforehand
 455             c | is_space c -> tab y bol atbol (stepOn buf)
 456               | otherwise  -> is_a_token
 457
 458            where s' = s{loc = replaceSrcLine loc y,
 459                         bol = bol,
 460                        atbol = atbol}
 461
 462                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 463                             | otherwise    = lexToken cont glaexts buf s'
 464
 465 -- {-# LINE .. #-} pragmas.  yeuch.
 466 line_prag cont buf s@PState{loc=loc} =
 467   case expandWhile# is_space buf                of { buf1 ->
 468   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 469   -- subtract one: the line number refers to the *following* line.
 470   let real_line = line - 1 in
 471   case fromInteger real_line                    of { i@(I# l) ->
 472         -- ToDo, if no filename then we skip the newline.... d'oh
 473   case expandWhile# is_space buf2               of { buf3 ->
 474   case currentChar# buf3                        of
 475      '\"'#{-"-} ->
 476         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 477         let
 478             file = lexemeToFastString buf4
 479             new_buf = stepOn (stepOverLexeme buf4)
 480         in
 481         if nullFastString file
 482                 then cont new_buf s{loc = replaceSrcLine loc l}
 483                 else cont new_buf s{loc = mkSrcLoc file i}
 484         }
 485      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 486   }}}}
 487
 488 nested_comment :: P a -> P a
 489 nested_comment cont buf = loop buf
 490  where
 491    loop buf =
 492      case currentChar# buf of
 493         '\NUL'# | bufferExhausted (stepOn buf) ->
 494                 lexError "unterminated `{-'" buf -- -}
 495         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 496                 cont (stepOnBy# buf 2#)
 497
 498         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 499               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 500
 501         '\n'# -> \ s@PState{loc=loc} ->
 502                  let buf' = stepOn buf in
 503                  nested_comment cont buf'
 504                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 505                           atbol = 1#}
 506
 507         _   -> nested_comment cont (stepOn buf)
 508
 509 -- When we are lexing the first token of a line, check whether we need to
 510 -- insert virtual semicolons or close braces due to layout.
 511
 512 lexBOL :: (Token -> P a) -> P a
 513 lexBOL cont buf s@(PState{
 514                     loc = loc,
 515                     glasgow_exts = glaexts,
 516                     bol = bol,
 517                     atbol = atbol,
 518                     context = ctx
 519                   }) =
 520         if need_close_curly then
 521                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 522                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 523         else if need_semi_colon then
 524                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 525                 cont ITsemi buf s{atbol = 0#}
 526         else
 527                 lexToken cont glaexts buf s{atbol = 0#}
 528   where
 529         col = currentIndex# buf -# bol
 530
 531         need_close_curly =
 532                 case ctx of
 533                         [] -> False
 534                         (i:_) -> case i of
 535                                     NoLayout -> False
 536                                     Layout n -> col <# n
 537         need_semi_colon =
 538                 case ctx of
 539                         [] -> False
 540                         (i:_) -> case i of
 541                                     NoLayout -> False
 542                                     Layout n -> col ==# n
 543
 544
 545 lexToken :: (Token -> P a) -> Int# -> P a
 546 lexToken cont glaexts buf =
 547  -- trace "lexToken" $
 548   case currentChar# buf of
 549
 550     -- special symbols ----------------------------------------------------
 551     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 552                 -> cont IToubxparen (setCurrentPos# buf 2#)
 553          | otherwise
 554                 -> cont IToparen (incLexeme buf)
 555
 556     ')'# -> cont ITcparen    (incLexeme buf)
 557     '['# -> cont ITobrack    (incLexeme buf)
 558     ']'# -> cont ITcbrack    (incLexeme buf)
 559     ','# -> cont ITcomma     (incLexeme buf)
 560     ';'# -> cont ITsemi      (incLexeme buf)
 561     '}'# -> \ s@PState{context = ctx} ->
 562             case ctx of
 563                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 564                 _        -> lexError "too many '}'s" buf s
 565     '|'# -> case lookAhead# buf 1# of
 566                  '}'#  | flag glaexts -> cont ITccurlybar
 567                                               (setCurrentPos# buf 2#)
 568                  _                    -> lex_sym cont (incLexeme buf)
 569
 570
 571     '#'# -> case lookAhead# buf 1# of
 572                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 573                 '-'# -> case lookAhead# buf 2# of
 574                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 575                            _    -> lex_sym cont (incLexeme buf)
 576                 _    -> lex_sym cont (incLexeme buf)
 577
 578     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 579                 -> lex_cstring cont (setCurrentPos# buf 2#)
 580          | otherwise
 581                 -> cont ITbackquote (incLexeme buf)
 582
 583     '{'# ->     -- look for "{-##" special iface pragma
 584             case lookAhead# buf 1# of
 585            '|'# | flag glaexts
 586                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 587            '-'# -> case lookAhead# buf 2# of
 588                     '#'# -> case lookAhead# buf 3# of
 589                                 '#'# ->
 590                                    let (lexeme, buf')
 591                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 592                                             cont (ITpragma lexeme) buf'
 593                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 594                     _    -> cont ITocurly (incLexeme buf)
 595            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 596
 597     -- strings/characters -------------------------------------------------
 598     '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
 599     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 600
 601     -- strictness and cpr pragmas and __scc treated specially.
 602     '_'# | flag glaexts ->
 603          case lookAhead# buf 1# of
 604            '_'# -> case lookAhead# buf 2# of
 605                     'S'# ->
 606                         lex_demand cont (stepOnUntil (not . isSpace)
 607                                         (stepOnBy# buf 3#)) -- past __S
 608                     'M'# ->
 609                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 610
 611                     's'# ->
 612                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 613                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 614                                Nothing   -> lex_id cont glaexts buf
 615                     _ -> lex_id cont glaexts buf
 616            _    -> lex_id cont glaexts buf
 617
 618         -- Hexadecimal and octal constants
 619     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 620                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 621          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 622                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 623         where ch   = lookAhead# buf 1#
 624               ch2  = lookAhead# buf 2#
 625               buf' = setCurrentPos# buf 2#
 626
 627     '\NUL'# ->
 628             if bufferExhausted (stepOn buf) then
 629                cont ITeof buf
 630             else
 631                trace "lexIface: misplaced NUL?" $
 632                cont (ITunknown "\NUL") (stepOn buf)
 633
 634     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 635             lex_ip cont (incLexeme buf)
 636     c | is_digit  c -> lex_num cont glaexts 0 buf
 637       | is_symbol c -> lex_sym cont buf
 638       | is_upper  c -> lex_con cont glaexts buf
 639       | is_ident  c -> lex_id  cont glaexts buf
 640       | otherwise   -> lexError "illegal character" buf
 641
 642 -- Int# is unlifted, and therefore faster than Bool for flags.
 643 {-# INLINE flag #-}
 644 flag :: Int# -> Bool
 645 flag 0# = False
 646 flag _  = True
 647
 648 -------------------------------------------------------------------------------
 649 -- Pragmas
 650
 651 lex_prag cont buf
 652   = case expandWhile# is_space buf of { buf1 ->
 653     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 654     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 655     case lookupUFM pragmaKeywordsFM lexeme of
 656         Just kw -> cont kw (mergeLexemes buf buf2)
 657         Nothing -> panic "lex_prag"
 658   }}
 659
 660 -------------------------------------------------------------------------------
 661 -- Strings & Chars
 662
 663 lex_string cont glaexts s buf
 664   = case currentChar# buf of
 665         '"'#{-"-} ->
 666            let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in
 667            case currentChar# buf' of
 668                 '#'# | flag glaexts -> if all (<= 0xFF) s
 669                     then cont (ITprimstring s') (incLexeme buf')
 670                     else lexError "primitive string literal must contain only characters <= '\xFF'" buf'
 671                 _                   -> cont (ITstring s') buf'
 672
 673         -- ignore \& in a string, deal with string gaps
 674         '\\'# | next_ch `eqChar#` '&'#
 675                 -> lex_string cont glaexts s buf'
 676               | is_space next_ch
 677                 -> lex_stringgap cont glaexts s (incLexeme buf)
 678
 679             where next_ch = lookAhead# buf 1#
 680                   buf' = setCurrentPos# buf 2#
 681
 682         _ -> lex_char (lex_next_string cont s) glaexts buf
 683
 684 lex_stringgap cont glaexts s buf
 685   = let buf' = incLexeme buf in
 686     case currentChar# buf of
 687         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 688                   st{loc = incSrcLine loc}
 689         '\\'# -> lex_string cont glaexts s buf'
 690         c | is_space c -> lex_stringgap cont glaexts s buf'
 691         other -> charError buf'
 692
 693 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 694
 695 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 696 lex_char cont glaexts buf
 697   = case currentChar# buf of
 698         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 699         c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
 700         other -> charError buf
 701
 702 char_end cont glaexts c buf
 703   = case currentChar# buf of
 704         '\''# -> let buf' = incLexeme buf in
 705                  case currentChar# buf' of
 706                         '#'# | flag glaexts
 707                                 -> cont (ITprimchar c) (incLexeme buf')
 708                         _       -> cont (ITchar c) buf'
 709         _     -> charError buf
 710
 711 lex_escape cont buf
 712   = let buf' = incLexeme buf in
 713     case currentChar# buf of
 714         'a'#       -> cont (ord '\a') buf'
 715         'b'#       -> cont (ord '\b') buf'
 716         'f'#       -> cont (ord '\f') buf'
 717         'n'#       -> cont (ord '\n') buf'
 718         'r'#       -> cont (ord '\r') buf'
 719         't'#       -> cont (ord '\t') buf'
 720         'v'#       -> cont (ord '\v') buf'
 721         '\\'#      -> cont (ord '\\') buf'
 722         '"'#       -> cont (ord '\"') buf'
 723         '\''#      -> cont (ord '\'') buf'
 724         '^'#       -> let c = currentChar# buf' in
 725                       if c `geChar#` '@'# && c `leChar#` '_'#
 726                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 727                         else charError buf'
 728
 729         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 730         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 731         x | is_digit x
 732                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 733
 734         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 735                                        Just buf2 <- [prefixMatch buf p] ] of
 736                             (c,buf2):_ -> cont (ord c) buf2
 737                             [] -> charError buf'
 738
 739 after_charnum cont i buf
 740   = if i >= 0 && i <= 0x7FFFFFFF
 741         then cont (fromInteger i) buf
 742         else charError buf
 743
 744 readNum cont buf is_digit base conv = read buf 0
 745   where read buf i
 746           = case currentChar# buf of { c ->
 747             if is_digit c
 748                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 749                 else cont i buf
 750             }
 751
 752 is_hexdigit c
 753         =  is_digit c
 754         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 755         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 756
 757 hex c | is_digit c = ord# c -# ord# '0'#
 758       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 759 oct_or_dec c = ord# c -# ord# '0'#
 760
 761 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 762
 763 to_lower c
 764   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 765         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 766   | otherwise = c
 767
 768 charError buf = lexError "error in character literal" buf
 769
 770 silly_escape_chars = [
 771         ("NUL", '\NUL'),
 772         ("SOH", '\SOH'),
 773         ("STX", '\STX'),
 774         ("ETX", '\ETX'),
 775         ("EOT", '\EOT'),
 776         ("ENQ", '\ENQ'),
 777         ("ACK", '\ACK'),
 778         ("BEL", '\BEL'),
 779         ("BS", '\BS'),
 780         ("HT", '\HT'),
 781         ("LF", '\LF'),
 782         ("VT", '\VT'),
 783         ("FF", '\FF'),
 784         ("CR", '\CR'),
 785         ("SO", '\SO'),
 786         ("SI", '\SI'),
 787         ("DLE", '\DLE'),
 788         ("DC1", '\DC1'),
 789         ("DC2", '\DC2'),
 790         ("DC3", '\DC3'),
 791         ("DC4", '\DC4'),
 792         ("NAK", '\NAK'),
 793         ("SYN", '\SYN'),
 794         ("ETB", '\ETB'),
 795         ("CAN", '\CAN'),
 796         ("EM", '\EM'),
 797         ("SUB", '\SUB'),
 798         ("ESC", '\ESC'),
 799         ("FS", '\FS'),
 800         ("GS", '\GS'),
 801         ("RS", '\RS'),
 802         ("US", '\US'),
 803         ("SP", '\SP'),
 804         ("DEL", '\DEL')
 805         ]
 806
 807 -------------------------------------------------------------------------------
 808
 809 lex_demand cont buf =
 810  case read_em [] buf of { (ls,buf') ->
 811  case currentChar# buf' of
 812    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 813    _    -> cont (ITstrict (ls, False)) buf'
 814  }
 815  where
 816    -- code snatched from Demand.lhs
 817   read_em acc buf =
 818    case currentChar# buf of
 819     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 820     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 821     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 822     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 823     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 824     ')'# -> (reverse acc, stepOn buf)
 825     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 826     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 827     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 828     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 829     _    -> (reverse acc, buf)
 830
 831   do_unpack new_or_data wrapper_unpacks acc buf
 832    = case read_em [] buf of
 833       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 834
 835
 836 ------------------
 837 lex_scc cont buf =
 838  case currentChar# buf of
 839   'C'# -> cont ITsccAllCafs (incLexeme buf)
 840   other -> cont ITscc buf
 841
 842 -----------------------------------------------------------------------------
 843 -- Numbers
 844
 845 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 846 lex_num cont glaexts acc buf =
 847  case scanNumLit acc buf of
 848      (acc',buf') ->
 849        case currentChar# buf' of
 850          '.'# | is_digit (lookAhead# buf' 1#) ->
 851              -- this case is not optimised at all, as the
 852              -- presence of floating point numbers in interface
 853              -- files is not that common. (ToDo)
 854             case expandWhile# is_digit (incLexeme buf') of
 855               buf2 -> -- points to first non digit char
 856
 857                 let l = case currentChar# buf2 of
 858                           'E'# -> do_exponent
 859                           'e'# -> do_exponent
 860                           _ -> buf2
 861
 862                     do_exponent
 863                         = let buf3 = incLexeme buf2 in
 864                           case currentChar# buf3 of
 865                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 866                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 867                                 x | is_digit x -> expandWhile# is_digit buf3
 868                                 _ -> buf2
 869
 870                     v = readRational__ (lexemeToString l)
 871
 872                 in case currentChar# l of -- glasgow exts only
 873                       '#'# | flag glaexts -> let l' = incLexeme l in
 874                               case currentChar# l' of
 875                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 876                                 _    -> cont (ITprimfloat  v) l'
 877                       _ -> cont (ITrational v) l
 878
 879          _ -> after_lexnum cont glaexts acc' buf'
 880
 881 after_lexnum cont glaexts i buf
 882   = case currentChar# buf of
 883         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 884         _    -> cont (ITinteger i) buf
 885
 886 -----------------------------------------------------------------------------
 887 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 888
 889 -- we lexemeToFastString on the bit between the ``''s, but include the
 890 -- quotes in the full lexeme.
 891
 892 lex_cstring cont buf =
 893  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 894    Just buf' -> cont (ITlitlit (lexemeToFastString
 895                                 (setCurrentPos# buf' (negateInt# 2#))))
 896                    (mergeLexemes buf buf')
 897    Nothing   -> lexError "unterminated ``" buf
 898
 899 -----------------------------------------------------------------------------
 900 -- identifiers, symbols etc.
 901
 902 lex_ip cont buf =
 903  case expandWhile# is_ident buf of
 904    buf' -> cont (ITipvarid lexeme) buf'
 905            where lexeme = lexemeToFastString buf'
 906
 907 lex_id cont glaexts buf =
 908  let buf1 = expandWhile# is_ident buf in
 909  seq buf1 $
 910
 911  case (if flag glaexts
 912         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 913         else buf1)                              of { buf' ->
 914
 915  let lexeme  = lexemeToFastString buf' in
 916
 917  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 918         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 919                           cont kwd_token buf';
 920         Nothing        ->
 921
 922  let var_token = cont (ITvarid lexeme) buf' in
 923
 924  if not (flag glaexts)
 925    then var_token
 926    else
 927
 928  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 929         Just kwd_token -> cont kwd_token buf';
 930         Nothing        -> var_token
 931
 932  }}}
 933
 934 lex_sym cont buf =
 935  -- trace "lex_sym" $
 936  case expandWhile# is_symbol buf of
 937    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 938                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 939                                   cont kwd_token buf' ;
 940                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 941                                   cont (mk_var_token lexeme) buf'
 942            }
 943         where lexeme = lexemeToFastString buf'
 944
 945
 946 lex_con cont glaexts buf =
 947  -- trace ("con: "{-++unpackFS lexeme-}) $
 948  case expandWhile# is_ident buf          of { buf1 ->
 949  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 950
 951  case currentChar# buf' of
 952      '.'# -> munch
 953      _    -> just_a_conid
 954
 955    where
 956     just_a_conid = cont (ITconid lexeme) buf'
 957     lexeme = lexemeToFastString buf'
 958     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 959  }}
 960
 961 lex_qid cont glaexts mod buf just_a_conid =
 962  -- trace ("quid: "{-++unpackFS lexeme-}) $
 963  case currentChar# buf of
 964   '['# ->       -- Special case for []
 965     case lookAhead# buf 1# of
 966      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 967      _    -> just_a_conid
 968
 969   '('# ->  -- Special case for (,,,)
 970            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 971     case lookAhead# buf 1# of
 972      '#'# | flag glaexts -> case lookAhead# buf 2# of
 973                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 974                                 just_a_conid
 975                 _    -> just_a_conid
 976      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 977      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 978      _    -> just_a_conid
 979
 980   '-'# -> case lookAhead# buf 1# of
 981             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 982             _    -> lex_id3 cont glaexts mod buf just_a_conid
 983   _    -> lex_id3 cont glaexts mod buf just_a_conid
 984
 985 lex_id3 cont glaexts mod buf just_a_conid
 986   | is_symbol (currentChar# buf) =
 987      let
 988         start_new_lexeme = stepOverLexeme buf
 989      in
 990      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 991      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 992      let
 993        lexeme  = lexemeToFastString buf'
 994         -- real lexeme is M.<sym>
 995        new_buf = mergeLexemes buf buf'
 996      in
 997      cont (mk_qvar_token mod lexeme) new_buf
 998         -- wrong, but arguably morally right: M... is now a qvarsym
 999      }
1000
1001   | otherwise   =
1002      let
1003         start_new_lexeme = stepOverLexeme buf
1004      in
1005      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1006      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1007      if emptyLexeme buf1
1008             then just_a_conid
1009             else
1010
1011      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1012
1013      let
1014       lexeme      = lexemeToFastString buf'
1015       new_buf     = mergeLexemes buf buf'
1016       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1017      in
1018      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1019             Nothing          -> is_a_qvarid ;
1020
1021             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1022                            -> is_a_qvarid          --  recognised as keywords here.
1023                            | otherwise
1024                            -> just_a_conid         -- avoid M.where etc.
1025      }}}
1026
1027 slurp_trailing_hashes buf glaexts
1028   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1029   | otherwise    = buf
1030
1031
1032 mk_var_token pk_str
1033   | is_upper f          = ITconid pk_str
1034   | is_ident f          = ITvarid pk_str
1035   | f `eqChar#` ':'#    = ITconsym pk_str
1036   | otherwise           = ITvarsym pk_str
1037   where
1038       (C# f) = _HEAD_ pk_str
1039       -- tl     = _TAIL_ pk_str
1040
1041 mk_qvar_token m token =
1042 -- trace ("mk_qvar ") $
1043  case mk_var_token token of
1044    ITconid n  -> ITqconid  (m,n)
1045    ITvarid n  -> ITqvarid  (m,n)
1046    ITconsym n -> ITqconsym (m,n)
1047    ITvarsym n -> ITqvarsym (m,n)
1048    _          -> ITunknown (show token)
1049 \end{code}
1050
1051 ----------------------------------------------------------------------------
1052 Horrible stuff for dealing with M.(,,,)
1053
1054 \begin{code}
1055 lex_tuple cont mod buf back_off =
1056   go 2 buf
1057   where
1058    go n buf =
1059     case currentChar# buf of
1060       ','# -> go (n+1) (stepOn buf)
1061       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1062       _    -> back_off
1063
1064 lex_ubx_tuple cont mod buf back_off =
1065   go 2 buf
1066   where
1067    go n buf =
1068     case currentChar# buf of
1069       ','# -> go (n+1) (stepOn buf)
1070       '#'# -> case lookAhead# buf 1# of
1071                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1072                                  (stepOnBy# buf 2#)
1073                 _    -> back_off
1074       _    -> back_off
1075 \end{code}
1076
1077 -----------------------------------------------------------------------------
1078 doDiscard rips along really fast, looking for a '#-}',
1079 indicating the end of the pragma we're skipping
1080
1081 \begin{code}
1082 doDiscard inStr buf =
1083  case currentChar# buf of
1084    '#'# | not inStr ->
1085        case lookAhead# buf 1# of { '#'# ->
1086        case lookAhead# buf 2# of { '-'# ->
1087        case lookAhead# buf 3# of { '}'# ->
1088            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1089         _    -> doDiscard inStr (incLexeme buf) };
1090         _    -> doDiscard inStr (incLexeme buf) };
1091         _    -> doDiscard inStr (incLexeme buf) }
1092    '"'# ->
1093        let
1094         odd_slashes buf flg i# =
1095           case lookAhead# buf i# of
1096            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1097            _     -> flg
1098        in
1099        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1100          '\\'# -> -- escaping something..
1101            if odd_slashes buf True (negateInt# 2#) then
1102                -- odd number of slashes, " is escaped.
1103               doDiscard inStr (incLexeme buf)
1104            else
1105                -- even number of slashes, \ is escaped.
1106               doDiscard (not inStr) (incLexeme buf)
1107          _ -> case inStr of -- forced to avoid build-up
1108                True  -> doDiscard False (incLexeme buf)
1109                False -> doDiscard True  (incLexeme buf)
1110    _ -> doDiscard inStr (incLexeme buf)
1111
1112 \end{code}
1113
1114 -----------------------------------------------------------------------------
1115
1116 \begin{code}
1117 data LayoutContext
1118   = NoLayout
1119   | Layout Int#
1120
1121 data ParseResult a
1122   = POk PState a
1123   | PFailed Message
1124
1125 data PState = PState {
1126         loc           :: SrcLoc,
1127         glasgow_exts  :: Int#,
1128         bol           :: Int#,
1129         atbol         :: Int#,
1130         context       :: [LayoutContext]
1131      }
1132
1133 type P a = StringBuffer         -- Input string
1134         -> PState
1135         -> ParseResult a
1136
1137 returnP   :: a -> P a
1138 returnP a buf s = POk s a
1139
1140 thenP      :: P a -> (a -> P b) -> P b
1141 m `thenP` k = \ buf s ->
1142         case m buf s of
1143                 POk s1 a -> k a buf s1
1144                 PFailed err  -> PFailed err
1145
1146 thenP_     :: P a -> P b -> P b
1147 m `thenP_` k = m `thenP` \_ -> k
1148
1149 mapP :: (a -> P b) -> [a] -> P [b]
1150 mapP f [] = returnP []
1151 mapP f (a:as) =
1152      f a `thenP` \b ->
1153      mapP f as `thenP` \bs ->
1154      returnP (b:bs)
1155
1156 failP :: String -> P a
1157 failP msg buf s = PFailed (text msg)
1158
1159 failMsgP :: Message -> P a
1160 failMsgP msg buf s = PFailed msg
1161
1162 lexError :: String -> P a
1163 lexError str buf s@PState{ loc = loc }
1164   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1165
1166 getSrcLocP :: P SrcLoc
1167 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1168
1169 getSrcFile :: P FAST_STRING
1170 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1171
1172 getContext :: P [LayoutContext]
1173 getContext buf s@(PState{ context = ctx }) = POk s ctx
1174
1175 pushContext :: LayoutContext -> P ()
1176 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1177
1178 {-
1179
1180 This special case in layoutOn is to handle layout contexts with are
1181 indented the same or less than the current context.  This is illegal
1182 according to the Haskell spec, so we have to arrange to close the
1183 current context.  eg.
1184
1185 class Foo a where
1186 class Bar a
1187
1188 after the first 'where', the sequence of events is:
1189
1190         - layout system inserts a ';' (column 0)
1191         - parser begins a new context at column 0
1192         - parser shifts ';' (legal empty declaration)
1193         - parser sees 'class': parse error (we're still in the inner context)
1194
1195 trouble is, by the time we know we need a new context, the lexer has
1196 already generated the ';'.  Hacky solution is as follows: since we
1197 know the column of the next token (it's the column number of the new
1198 context), we set the ACTUAL column number of the new context to this
1199 numer plus one.  Hence the next time the lexer is called, a '}' will
1200 be generated to close the new context straight away.  Furthermore, we
1201 have to set the atbol flag so that the ';' that the parser shifted as
1202 part of the new context is re-generated.
1203
1204 when the new context is *less* indented than the current one:
1205
1206 f = f where g = g where
1207 h = h
1208
1209         - current context: column 12.
1210         - on seeing 'h' (column 0), the layout system inserts '}'
1211         - parser starts a new context, column 0
1212         - parser sees '}', uses it to close new context
1213         - we still need to insert another '}' followed by a ';',
1214           hence the atbol trick.
1215
1216 There's also a special hack in here to deal with
1217
1218         do
1219            ....
1220            e $ do
1221            blah
1222
1223 i.e. the inner context is at the same indentation level as the outer
1224 context.  This is strictly illegal according to Haskell 98, but
1225 there's a lot of existing code using this style and it doesn't make
1226 any sense to disallow it, since empty 'do' lists don't make sense.
1227 -}
1228
1229 layoutOn :: Bool -> P ()
1230 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1231     let offset = lexemeIndex buf -# bol in
1232     case ctx of
1233         Layout prev_off : _
1234            | if strict then prev_off >=# offset else prev_off ># offset ->
1235                 --trace ("layout on, column: " ++  show (I# offset)) $
1236                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1237         other ->
1238                 --trace ("layout on, column: " ++  show (I# offset)) $
1239                 POk s{ context = Layout offset : ctx } ()
1240
1241 layoutOff :: P ()
1242 layoutOff buf s@(PState{ context = ctx }) =
1243     POk s{ context = NoLayout:ctx } ()
1244
1245 popContext :: P ()
1246 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1247   case ctx of
1248         (_:tl) -> POk s{ context = tl } ()
1249         []     -> PFailed (srcParseErr buf loc)
1250
1251 {-
1252  Note that if the name of the file we're processing ends
1253  with `hi-boot', we accept it on faith as having the right
1254  version. This is done so that .hi-boot files that comes
1255  with hsc don't have to be updated before every release,
1256  *and* it allows us to share .hi-boot files with versions
1257  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1258
1259  If the version number is 0, the checking is also turned off.
1260  (needed to deal with GHC.hi only!)
1261
1262  Once we can assume we're compiling with a version of ghc that
1263  supports interface file checking, we can drop the special
1264  pleading
1265 -}
1266 checkVersion :: Maybe Integer -> P ()
1267 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1268  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1269  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1270 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1271  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1272  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1273
1274 -----------------------------------------------------------------
1275
1276 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1277 ifaceParseErr s l
1278   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1279           text (lexemeToString s), char '\'']
1280
1281 ifaceVersionErr hi_vers l toks
1282   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1283           ptext SLIT("Expected"), int opt_HiVersion,
1284           ptext SLIT("found "), pp_version]
1285     where
1286      pp_version =
1287       case hi_vers of
1288         Nothing -> ptext SLIT("pre ghc-3.02 version")
1289         Just v  -> ptext SLIT("version") <+> integer v
1290
1291 -----------------------------------------------------------------------------
1292
1293 srcParseErr :: StringBuffer -> SrcLoc -> Message
1294 srcParseErr s l
1295   = hcat [ppr l,
1296           if null token
1297              then ptext SLIT(": parse error (possibly incorrect indentation)")
1298              else hcat [ptext SLIT(": parse error on input "),
1299                         char '`', text token, char '\'']
1300     ]
1301   where
1302         token = lexemeToString s
1303
1304 \end{code}