ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( ord )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | IT__export
 127   | ITdepends
 128   | IT__forall
 129   | ITletrec
 130   | ITcoerce
 131   | ITinlineMe
 132   | ITinlineCall
 133   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 134   | ITdefaultbranch
 135   | ITbottom
 136   | ITinteger_lit
 137   | ITfloat_lit
 138   | ITword_lit
 139   | ITword64_lit
 140   | ITint64_lit
 141   | ITrational_lit
 142   | ITaddr_lit
 143   | ITlabel_lit
 144   | ITlit_lit
 145   | ITstring_lit
 146   | ITtypeapp
 147   | ITusage
 148   | ITfuall
 149   | ITarity
 150   | ITspecialise
 151   | ITnocaf
 152   | ITunfold InlinePragInfo
 153   | ITstrict ([Demand], Bool)
 154   | ITrules
 155   | ITcprinfo
 156   | ITdeprecated
 157   | IT__scc
 158   | ITsccAllCafs
 159
 160   | ITspecialise_prag           -- Pragmas
 161   | ITsource_prag
 162   | ITinline_prag
 163   | ITnoinline_prag
 164   | ITrules_prag
 165   | ITdeprecated_prag
 166   | ITline_prag
 167   | ITclose_prag
 168
 169   | ITdotdot                    -- reserved symbols
 170   | ITdcolon
 171   | ITequal
 172   | ITlam
 173   | ITvbar
 174   | ITlarrow
 175   | ITrarrow
 176   | ITat
 177   | ITtilde
 178   | ITdarrow
 179   | ITminus
 180   | ITbang
 181   | ITdot
 182
 183   | ITbiglam                    -- GHC-extension symbols
 184
 185   | ITocurly                    -- special symbols
 186   | ITccurly
 187   | ITocurlybar                 -- {|, for type applications
 188   | ITccurlybar                 -- |}, for type applications
 189   | ITvccurly
 190   | ITobrack
 191   | ITcbrack
 192   | IToparen
 193   | ITcparen
 194   | IToubxparen
 195   | ITcubxparen
 196   | ITsemi
 197   | ITcomma
 198   | ITunderscore
 199   | ITbackquote
 200
 201   | ITvarid   FAST_STRING       -- identifiers
 202   | ITconid   FAST_STRING
 203   | ITvarsym  FAST_STRING
 204   | ITconsym  FAST_STRING
 205   | ITqvarid  (FAST_STRING,FAST_STRING)
 206   | ITqconid  (FAST_STRING,FAST_STRING)
 207   | ITqvarsym (FAST_STRING,FAST_STRING)
 208   | ITqconsym (FAST_STRING,FAST_STRING)
 209
 210   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 211
 212   | ITpragma StringBuffer
 213
 214   | ITchar       Int
 215   | ITstring     FAST_STRING
 216   | ITinteger    Integer
 217   | ITrational   Rational
 218
 219   | ITprimchar   Int
 220   | ITprimstring FAST_STRING
 221   | ITprimint    Integer
 222   | ITprimfloat  Rational
 223   | ITprimdouble Rational
 224   | ITlitlit     FAST_STRING
 225
 226   | ITunknown String            -- Used when the lexer can't make sense of it
 227   | ITeof                       -- end of file token
 228   deriving Show -- debugging
 229 \end{code}
 230
 231 -----------------------------------------------------------------------------
 232 Keyword Lists
 233
 234 \begin{code}
 235 pragmaKeywordsFM = listToUFM $
 236       map (\ (x,y) -> (_PK_ x,y))
 237        [( "SPECIALISE", ITspecialise_prag ),
 238         ( "SPECIALIZE", ITspecialise_prag ),
 239         ( "SOURCE",     ITsource_prag ),
 240         ( "INLINE",     ITinline_prag ),
 241         ( "NOINLINE",   ITnoinline_prag ),
 242         ( "NOTINLINE",  ITnoinline_prag ),
 243         ( "LINE",       ITline_prag ),
 244         ( "RULES",      ITrules_prag ),
 245         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 246         ( "DEPRECATED", ITdeprecated_prag )
 247         ]
 248
 249 haskellKeywordsFM = listToUFM $
 250       map (\ (x,y) -> (_PK_ x,y))
 251        [( "_",          ITunderscore ),
 252         ( "as",         ITas ),
 253         ( "case",       ITcase ),
 254         ( "class",      ITclass ),
 255         ( "data",       ITdata ),
 256         ( "default",    ITdefault ),
 257         ( "deriving",   ITderiving ),
 258         ( "do",         ITdo ),
 259         ( "else",       ITelse ),
 260         ( "hiding",     IThiding ),
 261         ( "if",         ITif ),
 262         ( "import",     ITimport ),
 263         ( "in",         ITin ),
 264         ( "infix",      ITinfix ),
 265         ( "infixl",     ITinfixl ),
 266         ( "infixr",     ITinfixr ),
 267         ( "instance",   ITinstance ),
 268         ( "let",        ITlet ),
 269         ( "module",     ITmodule ),
 270         ( "newtype",    ITnewtype ),
 271         ( "of",         ITof ),
 272         ( "qualified",  ITqualified ),
 273         ( "then",       ITthen ),
 274         ( "type",       ITtype ),
 275         ( "where",      ITwhere ),
 276         ( "_scc_",      ITscc )
 277      ]
 278
 279 isSpecial :: Token -> Bool
 280 -- If we see M.x, where x is a keyword, but
 281 -- is special, we treat is as just plain M.x,
 282 -- not as a keyword.
 283 isSpecial ITas          = True
 284 isSpecial IThiding      = True
 285 isSpecial ITqualified   = True
 286 isSpecial ITforall      = True
 287 isSpecial ITexport      = True
 288 isSpecial ITlabel       = True
 289 isSpecial ITdynamic     = True
 290 isSpecial ITunsafe      = True
 291 isSpecial ITwith        = True
 292 isSpecial ITccallconv   = True
 293 isSpecial ITstdcallconv = True
 294 isSpecial _             = False
 295
 296 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 297 ghcExtensionKeywordsFM = listToUFM $
 298         map (\ (x,y) -> (_PK_ x,y))
 299      [  ( "forall",     ITforall ),
 300         ( "foreign",    ITforeign ),
 301         ( "export",     ITexport ),
 302         ( "label",      ITlabel ),
 303         ( "dynamic",    ITdynamic ),
 304         ( "unsafe",     ITunsafe ),
 305         ( "with",       ITwith ),
 306         ( "stdcall",    ITstdcallconv),
 307         ( "ccall",      ITccallconv),
 308         ("_ccall_",     ITccall (False, False, False)),
 309         ("_ccall_GC_",  ITccall (False, False, True)),
 310         ("_casm_",      ITccall (False, True,  False)),
 311         ("_casm_GC_",   ITccall (False, True,  True)),
 312
 313         -- interface keywords
 314         ("__interface",         ITinterface),
 315         ("__export",            IT__export),
 316         ("__depends",           ITdepends),
 317         ("__forall",            IT__forall),
 318         ("__letrec",            ITletrec),
 319         ("__coerce",            ITcoerce),
 320         ("__inline_me",         ITinlineMe),
 321         ("__inline_call",       ITinlineCall),
 322         ("__depends",           ITdepends),
 323         ("__DEFAULT",           ITdefaultbranch),
 324         ("__bot",               ITbottom),
 325         ("__integer",           ITinteger_lit),
 326         ("__float",             ITfloat_lit),
 327         ("__int64",             ITint64_lit),
 328         ("__word",              ITword_lit),
 329         ("__word64",            ITword64_lit),
 330         ("__rational",          ITrational_lit),
 331         ("__addr",              ITaddr_lit),
 332         ("__label",             ITlabel_lit),
 333         ("__litlit",            ITlit_lit),
 334         ("__string",            ITstring_lit),
 335         ("__a",                 ITtypeapp),
 336         ("__u",                 ITusage),
 337         ("__fuall",             ITfuall),
 338         ("__A",                 ITarity),
 339         ("__P",                 ITspecialise),
 340         ("__C",                 ITnocaf),
 341         ("__R",                 ITrules),
 342         ("__D",                 ITdeprecated),
 343         ("__U",                 ITunfold NoInlinePragInfo),
 344
 345         ("__ccall",             ITccall (False, False, False)),
 346         ("__ccall_GC",          ITccall (False, False, True)),
 347         ("__dyn_ccall",         ITccall (True,  False, False)),
 348         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 349         ("__casm",              ITccall (False, True,  False)),
 350         ("__dyn_casm",          ITccall (True,  True,  False)),
 351         ("__casm_GC",           ITccall (False, True,  True)),
 352         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 353
 354         ("/\\",                 ITbiglam)
 355      ]
 356
 357
 358 haskellKeySymsFM = listToUFM $
 359         map (\ (x,y) -> (_PK_ x,y))
 360       [ ("..",          ITdotdot)
 361        ,("::",          ITdcolon)
 362        ,("=",           ITequal)
 363        ,("\\",          ITlam)
 364        ,("|",           ITvbar)
 365        ,("<-",          ITlarrow)
 366        ,("->",          ITrarrow)
 367        ,("@",           ITat)
 368        ,("~",           ITtilde)
 369        ,("=>",          ITdarrow)
 370        ,("-",           ITminus)
 371        ,("!",           ITbang)
 372        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 373        ]
 374 \end{code}
 375
 376 -----------------------------------------------------------------------------
 377 The lexical analyser
 378
 379 Lexer state:
 380
 381         - (glaexts) lexing an interface file or -fglasgow-exts
 382         - (bol)   pointer to beginning of line (for column calculations)
 383         - (buf)   pointer to beginning of token
 384         - (buf)   pointer to current char
 385         - (atbol) flag indicating whether we're at the beginning of a line
 386
 387 \begin{code}
 388 lexer :: (Token -> P a) -> P a
 389 lexer cont buf s@(PState{
 390                     loc = loc,
 391                     glasgow_exts = glaexts,
 392                     bol = bol,
 393                     atbol = atbol,
 394                     context = ctx
 395                 })
 396
 397         -- first, start a new lexeme and lose all the whitespace
 398   =  _scc_ "Lexer"
 399   tab line bol atbol (stepOverLexeme buf)
 400   where
 401         line = srcLocLine loc
 402
 403         tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 404           case currentChar# buf of
 405
 406             '\NUL'# ->
 407                    if bufferExhausted (stepOn buf)
 408                         then cont ITeof buf s'
 409                         else trace "lexer: misplaced NUL?" $
 410                              tab y bol atbol (stepOn buf)
 411
 412             '\n'# -> let buf' = stepOn buf
 413                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 414
 415                 -- find comments.  This got harder in Haskell 98.
 416             '-'# ->  let trundle n =
 417                           let next = lookAhead# buf n in
 418                           if next `eqChar#` '-'# then trundle (n +# 1#)
 419                           else if is_symbol next || n <# 2#
 420                                 then is_a_token
 421                                 else tab y bol atbol
 422                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 423                     in trundle 1#
 424
 425                 -- comments and pragmas.  We deal with LINE pragmas here,
 426                 -- and throw out any unrecognised pragmas as comments.  Any
 427                 -- pragmas we know about are dealt with later (after any layout
 428                 -- processing if necessary).
 429             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 430                 if lookAhead# buf 2# `eqChar#` '#'# then
 431                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 432                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 433                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 434                   let lexeme = mkFastString -- ToDo: too slow
 435                                   (map toUpper (lexemeToString buf2)) in
 436                   case lookupUFM pragmaKeywordsFM lexeme of
 437                         Just ITline_prag ->
 438                            line_prag skip_to_end buf2 s'
 439                         Just other -> is_a_token
 440                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 441                   }}
 442
 443                 else skip_to_end (stepOnBy# buf 2#) s'
 444                 where
 445                     skip_to_end = nested_comment (lexer cont)
 446
 447                 -- special GHC extension: we grok cpp-style #line pragmas
 448             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 449                 case expandWhile# is_space (stepOn buf) of { buf1 ->
 450                 if is_digit (currentChar# buf1)
 451                         then line_prag next_line buf1 s'
 452                         else is_a_token
 453                 }
 454                 where
 455                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 456
 457                 -- tabs have been expanded beforehand
 458             c | is_space c -> tab y bol atbol (stepOn buf)
 459               | otherwise  -> is_a_token
 460
 461            where s' = s{loc = replaceSrcLine loc y,
 462                         bol = bol,
 463                        atbol = atbol}
 464
 465                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 466                             | otherwise    = lexToken cont glaexts buf s'
 467
 468 -- {-# LINE .. #-} pragmas.  yeuch.
 469 line_prag cont buf s@PState{loc=loc} =
 470   case expandWhile# is_space buf                of { buf1 ->
 471   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 472   -- subtract one: the line number refers to the *following* line.
 473   let real_line = line - 1 in
 474   case fromInteger real_line                    of { i@(I# l) ->
 475         -- ToDo, if no filename then we skip the newline.... d'oh
 476   case expandWhile# is_space buf2               of { buf3 ->
 477   case currentChar# buf3                        of
 478      '\"'#{-"-} ->
 479         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 480         let
 481             file = lexemeToFastString buf4
 482             new_buf = stepOn (stepOverLexeme buf4)
 483         in
 484         if nullFastString file
 485                 then cont new_buf s{loc = replaceSrcLine loc l}
 486                 else cont new_buf s{loc = mkSrcLoc file i}
 487         }
 488      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 489   }}}}
 490
 491 nested_comment :: P a -> P a
 492 nested_comment cont buf = loop buf
 493  where
 494    loop buf =
 495      case currentChar# buf of
 496         '\NUL'# | bufferExhausted (stepOn buf) ->
 497                 lexError "unterminated `{-'" buf -- -}
 498         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 499                 cont (stepOnBy# buf 2#)
 500
 501         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 502               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 503
 504         '\n'# -> \ s@PState{loc=loc} ->
 505                  let buf' = stepOn buf in
 506                  nested_comment cont buf'
 507                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 508                           atbol = 1#}
 509
 510         _   -> nested_comment cont (stepOn buf)
 511
 512 -- When we are lexing the first token of a line, check whether we need to
 513 -- insert virtual semicolons or close braces due to layout.
 514
 515 lexBOL :: (Token -> P a) -> P a
 516 lexBOL cont buf s@(PState{
 517                     loc = loc,
 518                     glasgow_exts = glaexts,
 519                     bol = bol,
 520                     atbol = atbol,
 521                     context = ctx
 522                   }) =
 523         if need_close_curly then
 524                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 525                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 526         else if need_semi_colon then
 527                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 528                 cont ITsemi buf s{atbol = 0#}
 529         else
 530                 lexToken cont glaexts buf s{atbol = 0#}
 531   where
 532         col = currentIndex# buf -# bol
 533
 534         need_close_curly =
 535                 case ctx of
 536                         [] -> False
 537                         (i:_) -> case i of
 538                                     NoLayout -> False
 539                                     Layout n -> col <# n
 540         need_semi_colon =
 541                 case ctx of
 542                         [] -> False
 543                         (i:_) -> case i of
 544                                     NoLayout -> False
 545                                     Layout n -> col ==# n
 546
 547
 548 lexToken :: (Token -> P a) -> Int# -> P a
 549 lexToken cont glaexts buf =
 550  -- trace "lexToken" $
 551   case currentChar# buf of
 552
 553     -- special symbols ----------------------------------------------------
 554     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 555                 -> cont IToubxparen (setCurrentPos# buf 2#)
 556          | otherwise
 557                 -> cont IToparen (incLexeme buf)
 558
 559     ')'# -> cont ITcparen    (incLexeme buf)
 560     '['# -> cont ITobrack    (incLexeme buf)
 561     ']'# -> cont ITcbrack    (incLexeme buf)
 562     ','# -> cont ITcomma     (incLexeme buf)
 563     ';'# -> cont ITsemi      (incLexeme buf)
 564     '}'# -> \ s@PState{context = ctx} ->
 565             case ctx of
 566                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 567                 _        -> lexError "too many '}'s" buf s
 568     '|'# -> case lookAhead# buf 1# of
 569                  '}'#  | flag glaexts -> cont ITccurlybar
 570                                               (setCurrentPos# buf 2#)
 571                  _                    -> lex_sym cont (incLexeme buf)
 572
 573
 574     '#'# -> case lookAhead# buf 1# of
 575                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 576                 '-'# -> case lookAhead# buf 2# of
 577                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 578                            _    -> lex_sym cont (incLexeme buf)
 579                 _    -> lex_sym cont (incLexeme buf)
 580
 581     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 582                 -> lex_cstring cont (setCurrentPos# buf 2#)
 583          | otherwise
 584                 -> cont ITbackquote (incLexeme buf)
 585
 586     '{'# ->     -- look for "{-##" special iface pragma
 587             case lookAhead# buf 1# of
 588            '|'# | flag glaexts
 589                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 590            '-'# -> case lookAhead# buf 2# of
 591                     '#'# -> case lookAhead# buf 3# of
 592                                 '#'# ->
 593                                    let (lexeme, buf')
 594                                           = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in
 595                                             cont (ITpragma lexeme) buf'
 596                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 597                     _    -> cont ITocurly (incLexeme buf)
 598            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 599
 600     -- strings/characters -------------------------------------------------
 601     '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
 602     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 603
 604     -- strictness and cpr pragmas and __scc treated specially.
 605     '_'# | flag glaexts ->
 606          case lookAhead# buf 1# of
 607            '_'# -> case lookAhead# buf 2# of
 608                     'S'# ->
 609                         lex_demand cont (stepOnUntil (not . isSpace)
 610                                         (stepOnBy# buf 3#)) -- past __S
 611                     'M'# ->
 612                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 613
 614                     's'# ->
 615                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 616                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 617                                Nothing   -> lex_id cont glaexts buf
 618                     _ -> lex_id cont glaexts buf
 619            _    -> lex_id cont glaexts buf
 620
 621         -- Hexadecimal and octal constants
 622     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 623                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 624          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 625                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 626         where ch   = lookAhead# buf 1#
 627               ch2  = lookAhead# buf 2#
 628               buf' = setCurrentPos# buf 2#
 629
 630     '\NUL'# ->
 631             if bufferExhausted (stepOn buf) then
 632                cont ITeof buf
 633             else
 634                trace "lexIface: misplaced NUL?" $
 635                cont (ITunknown "\NUL") (stepOn buf)
 636
 637     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 638             lex_ip cont (incLexeme buf)
 639     c | is_digit  c -> lex_num cont glaexts 0 buf
 640       | is_symbol c -> lex_sym cont buf
 641       | is_upper  c -> lex_con cont glaexts buf
 642       | is_ident  c -> lex_id  cont glaexts buf
 643       | otherwise   -> lexError "illegal character" buf
 644
 645 -- Int# is unlifted, and therefore faster than Bool for flags.
 646 {-# INLINE flag #-}
 647 flag :: Int# -> Bool
 648 flag 0# = False
 649 flag _  = True
 650
 651 -------------------------------------------------------------------------------
 652 -- Pragmas
 653
 654 lex_prag cont buf
 655   = case expandWhile# is_space buf of { buf1 ->
 656     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 657     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 658     case lookupUFM pragmaKeywordsFM lexeme of
 659         Just kw -> cont kw (mergeLexemes buf buf2)
 660         Nothing -> panic "lex_prag"
 661   }}
 662
 663 -------------------------------------------------------------------------------
 664 -- Strings & Chars
 665
 666 lex_string cont glaexts s buf
 667   = case currentChar# buf of
 668         '"'#{-"-} ->
 669            let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in
 670            case currentChar# buf' of
 671                 '#'# | flag glaexts -> if all (<= 0xFF) s
 672                     then cont (ITprimstring s') (incLexeme buf')
 673                     else lexError "primitive string literal must contain only characters <= '\xFF'" buf'
 674                 _                   -> cont (ITstring s') buf'
 675
 676         -- ignore \& in a string, deal with string gaps
 677         '\\'# | next_ch `eqChar#` '&'#
 678                 -> lex_string cont glaexts s buf'
 679               | is_space next_ch
 680                 -> lex_stringgap cont glaexts s (incLexeme buf)
 681
 682             where next_ch = lookAhead# buf 1#
 683                   buf' = setCurrentPos# buf 2#
 684
 685         _ -> lex_char (lex_next_string cont s) glaexts buf
 686
 687 lex_stringgap cont glaexts s buf
 688   = let buf' = incLexeme buf in
 689     case currentChar# buf of
 690         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 691                   st{loc = incSrcLine loc}
 692         '\\'# -> lex_string cont glaexts s buf'
 693         c | is_space c -> lex_stringgap cont glaexts s buf'
 694         other -> charError buf'
 695
 696 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 697
 698 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 699 lex_char cont glaexts buf
 700   = case currentChar# buf of
 701         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 702         c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
 703         other -> charError buf
 704
 705 char_end cont glaexts c buf
 706   = case currentChar# buf of
 707         '\''# -> let buf' = incLexeme buf in
 708                  case currentChar# buf' of
 709                         '#'# | flag glaexts
 710                                 -> cont (ITprimchar c) (incLexeme buf')
 711                         _       -> cont (ITchar c) buf'
 712         _     -> charError buf
 713
 714 lex_escape cont buf
 715   = let buf' = incLexeme buf in
 716     case currentChar# buf of
 717         'a'#       -> cont (ord '\a') buf'
 718         'b'#       -> cont (ord '\b') buf'
 719         'f'#       -> cont (ord '\f') buf'
 720         'n'#       -> cont (ord '\n') buf'
 721         'r'#       -> cont (ord '\r') buf'
 722         't'#       -> cont (ord '\t') buf'
 723         'v'#       -> cont (ord '\v') buf'
 724         '\\'#      -> cont (ord '\\') buf'
 725         '"'#       -> cont (ord '\"') buf'
 726         '\''#      -> cont (ord '\'') buf'
 727         '^'#       -> let c = currentChar# buf' in
 728                       if c `geChar#` '@'# && c `leChar#` '_'#
 729                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 730                         else charError buf'
 731
 732         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 733         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 734         x | is_digit x
 735                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 736
 737         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 738                                        Just buf2 <- [prefixMatch buf p] ] of
 739                             (c,buf2):_ -> cont (ord c) buf2
 740                             [] -> charError buf'
 741
 742 after_charnum cont i buf
 743   = if i >= 0 && i <= 0x7FFFFFFF
 744         then cont (fromInteger i) buf
 745         else charError buf
 746
 747 readNum cont buf is_digit base conv = read buf 0
 748   where read buf i
 749           = case currentChar# buf of { c ->
 750             if is_digit c
 751                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 752                 else cont i buf
 753             }
 754
 755 is_hexdigit c
 756         =  is_digit c
 757         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 758         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 759
 760 hex c | is_digit c = ord# c -# ord# '0'#
 761       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 762 oct_or_dec c = ord# c -# ord# '0'#
 763
 764 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 765
 766 to_lower c
 767   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 768         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 769   | otherwise = c
 770
 771 charError buf = lexError "error in character literal" buf
 772
 773 silly_escape_chars = [
 774         ("NUL", '\NUL'),
 775         ("SOH", '\SOH'),
 776         ("STX", '\STX'),
 777         ("ETX", '\ETX'),
 778         ("EOT", '\EOT'),
 779         ("ENQ", '\ENQ'),
 780         ("ACK", '\ACK'),
 781         ("BEL", '\BEL'),
 782         ("BS", '\BS'),
 783         ("HT", '\HT'),
 784         ("LF", '\LF'),
 785         ("VT", '\VT'),
 786         ("FF", '\FF'),
 787         ("CR", '\CR'),
 788         ("SO", '\SO'),
 789         ("SI", '\SI'),
 790         ("DLE", '\DLE'),
 791         ("DC1", '\DC1'),
 792         ("DC2", '\DC2'),
 793         ("DC3", '\DC3'),
 794         ("DC4", '\DC4'),
 795         ("NAK", '\NAK'),
 796         ("SYN", '\SYN'),
 797         ("ETB", '\ETB'),
 798         ("CAN", '\CAN'),
 799         ("EM", '\EM'),
 800         ("SUB", '\SUB'),
 801         ("ESC", '\ESC'),
 802         ("FS", '\FS'),
 803         ("GS", '\GS'),
 804         ("RS", '\RS'),
 805         ("US", '\US'),
 806         ("SP", '\SP'),
 807         ("DEL", '\DEL')
 808         ]
 809
 810 -------------------------------------------------------------------------------
 811
 812 lex_demand cont buf =
 813  case read_em [] buf of { (ls,buf') ->
 814  case currentChar# buf' of
 815    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 816    _    -> cont (ITstrict (ls, False)) buf'
 817  }
 818  where
 819    -- code snatched from Demand.lhs
 820   read_em acc buf =
 821    case currentChar# buf of
 822     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 823     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 824     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 825     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 826     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 827     ')'# -> (reverse acc, stepOn buf)
 828     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 829     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 830     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 831     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 832     _    -> (reverse acc, buf)
 833
 834   do_unpack new_or_data wrapper_unpacks acc buf
 835    = case read_em [] buf of
 836       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 837
 838
 839 ------------------
 840 lex_scc cont buf =
 841  case currentChar# buf of
 842   'C'# -> cont ITsccAllCafs (incLexeme buf)
 843   other -> cont ITscc buf
 844
 845 -----------------------------------------------------------------------------
 846 -- Numbers
 847
 848 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 849 lex_num cont glaexts acc buf =
 850  case scanNumLit acc buf of
 851      (acc',buf') ->
 852        case currentChar# buf' of
 853          '.'# | is_digit (lookAhead# buf' 1#) ->
 854              -- this case is not optimised at all, as the
 855              -- presence of floating point numbers in interface
 856              -- files is not that common. (ToDo)
 857             case expandWhile# is_digit (incLexeme buf') of
 858               buf2 -> -- points to first non digit char
 859
 860                 let l = case currentChar# buf2 of
 861                           'E'# -> do_exponent
 862                           'e'# -> do_exponent
 863                           _ -> buf2
 864
 865                     do_exponent
 866                         = let buf3 = incLexeme buf2 in
 867                           case currentChar# buf3 of
 868                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 869                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 870                                 x | is_digit x -> expandWhile# is_digit buf3
 871                                 _ -> buf2
 872
 873                     v = readRational__ (lexemeToString l)
 874
 875                 in case currentChar# l of -- glasgow exts only
 876                       '#'# | flag glaexts -> let l' = incLexeme l in
 877                               case currentChar# l' of
 878                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 879                                 _    -> cont (ITprimfloat  v) l'
 880                       _ -> cont (ITrational v) l
 881
 882          _ -> after_lexnum cont glaexts acc' buf'
 883
 884 after_lexnum cont glaexts i buf
 885   = case currentChar# buf of
 886         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 887         _    -> cont (ITinteger i) buf
 888
 889 -----------------------------------------------------------------------------
 890 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 891
 892 -- we lexemeToFastString on the bit between the ``''s, but include the
 893 -- quotes in the full lexeme.
 894
 895 lex_cstring cont buf =
 896  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 897    Just buf' -> cont (ITlitlit (lexemeToFastString
 898                                 (setCurrentPos# buf' (negateInt# 2#))))
 899                    (mergeLexemes buf buf')
 900    Nothing   -> lexError "unterminated ``" buf
 901
 902 -----------------------------------------------------------------------------
 903 -- identifiers, symbols etc.
 904
 905 lex_ip cont buf =
 906  case expandWhile# is_ident buf of
 907    buf' -> cont (ITipvarid lexeme) buf'
 908            where lexeme = lexemeToFastString buf'
 909
 910 lex_id cont glaexts buf =
 911  let buf1 = expandWhile# is_ident buf in
 912  seq buf1 $
 913
 914  case (if flag glaexts
 915         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 916         else buf1)                              of { buf' ->
 917
 918  let lexeme  = lexemeToFastString buf' in
 919
 920  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 921         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 922                           cont kwd_token buf';
 923         Nothing        ->
 924
 925  let var_token = cont (ITvarid lexeme) buf' in
 926
 927  if not (flag glaexts)
 928    then var_token
 929    else
 930
 931  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 932         Just kwd_token -> cont kwd_token buf';
 933         Nothing        -> var_token
 934
 935  }}}
 936
 937 lex_sym cont buf =
 938  -- trace "lex_sym" $
 939  case expandWhile# is_symbol buf of
 940    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 941                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 942                                   cont kwd_token buf' ;
 943                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 944                                   cont (mk_var_token lexeme) buf'
 945            }
 946         where lexeme = lexemeToFastString buf'
 947
 948
 949 lex_con cont glaexts buf =
 950  -- trace ("con: "{-++unpackFS lexeme-}) $
 951  case expandWhile# is_ident buf          of { buf1 ->
 952  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 953
 954  case currentChar# buf' of
 955      '.'# -> munch
 956      _    -> just_a_conid
 957
 958    where
 959     just_a_conid = cont (ITconid lexeme) buf'
 960     lexeme = lexemeToFastString buf'
 961     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 962  }}
 963
 964 lex_qid cont glaexts mod buf just_a_conid =
 965  -- trace ("quid: "{-++unpackFS lexeme-}) $
 966  case currentChar# buf of
 967   '['# ->       -- Special case for []
 968     case lookAhead# buf 1# of
 969      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 970      _    -> just_a_conid
 971
 972   '('# ->  -- Special case for (,,,)
 973            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 974     case lookAhead# buf 1# of
 975      '#'# | flag glaexts -> case lookAhead# buf 2# of
 976                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 977                                 just_a_conid
 978                 _    -> just_a_conid
 979      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 980      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 981      _    -> just_a_conid
 982
 983   '-'# -> case lookAhead# buf 1# of
 984             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 985             _    -> lex_id3 cont glaexts mod buf just_a_conid
 986   _    -> lex_id3 cont glaexts mod buf just_a_conid
 987
 988 lex_id3 cont glaexts mod buf just_a_conid
 989   | is_symbol (currentChar# buf) =
 990      let
 991         start_new_lexeme = stepOverLexeme buf
 992      in
 993      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 994      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 995      let
 996        lexeme  = lexemeToFastString buf'
 997         -- real lexeme is M.<sym>
 998        new_buf = mergeLexemes buf buf'
 999      in
1000      cont (mk_qvar_token mod lexeme) new_buf
1001         -- wrong, but arguably morally right: M... is now a qvarsym
1002      }
1003
1004   | otherwise   =
1005      let
1006         start_new_lexeme = stepOverLexeme buf
1007      in
1008      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1009      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1010      if emptyLexeme buf1
1011             then just_a_conid
1012             else
1013
1014      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1015
1016      let
1017       lexeme      = lexemeToFastString buf'
1018       new_buf     = mergeLexemes buf buf'
1019       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1020      in
1021      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1022             Nothing          -> is_a_qvarid ;
1023
1024             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1025                            -> is_a_qvarid          --  recognised as keywords here.
1026                            | otherwise
1027                            -> just_a_conid         -- avoid M.where etc.
1028      }}}
1029
1030 slurp_trailing_hashes buf glaexts
1031   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1032   | otherwise    = buf
1033
1034
1035 mk_var_token pk_str
1036   | is_upper f          = ITconid pk_str
1037   | is_ident f          = ITvarid pk_str
1038   | f `eqChar#` ':'#    = ITconsym pk_str
1039   | otherwise           = ITvarsym pk_str
1040   where
1041       (C# f) = _HEAD_ pk_str
1042       -- tl     = _TAIL_ pk_str
1043
1044 mk_qvar_token m token =
1045 -- trace ("mk_qvar ") $
1046  case mk_var_token token of
1047    ITconid n  -> ITqconid  (m,n)
1048    ITvarid n  -> ITqvarid  (m,n)
1049    ITconsym n -> ITqconsym (m,n)
1050    ITvarsym n -> ITqvarsym (m,n)
1051    _          -> ITunknown (show token)
1052 \end{code}
1053
1054 ----------------------------------------------------------------------------
1055 Horrible stuff for dealing with M.(,,,)
1056
1057 \begin{code}
1058 lex_tuple cont mod buf back_off =
1059   go 2 buf
1060   where
1061    go n buf =
1062     case currentChar# buf of
1063       ','# -> go (n+1) (stepOn buf)
1064       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1065       _    -> back_off
1066
1067 lex_ubx_tuple cont mod buf back_off =
1068   go 2 buf
1069   where
1070    go n buf =
1071     case currentChar# buf of
1072       ','# -> go (n+1) (stepOn buf)
1073       '#'# -> case lookAhead# buf 1# of
1074                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1075                                  (stepOnBy# buf 2#)
1076                 _    -> back_off
1077       _    -> back_off
1078 \end{code}
1079
1080 -----------------------------------------------------------------------------
1081 doDiscard rips along really fast, looking for a '##-}',
1082 indicating the end of the pragma we're skipping
1083
1084 \begin{code}
1085 doDiscard inStr buf =
1086  case currentChar# buf of
1087    '#'# | inStr ==# 0# ->
1088        case lookAhead# buf 1# of { '#'# ->
1089        case lookAhead# buf 2# of { '-'# ->
1090        case lookAhead# buf 3# of { '}'# ->
1091            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1092         _    -> doDiscard inStr (incLexeme buf) };
1093         _    -> doDiscard inStr (incLexeme buf) };
1094         _    -> doDiscard inStr (incLexeme buf) }
1095
1096    '"'# ->
1097        let
1098         odd_slashes buf flg i# =
1099           case lookAhead# buf i# of
1100            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1101            _     -> flg
1102
1103         not_inStr = if inStr ==# 0# then 1# else 0#
1104        in
1105        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1106          '\\'# -> -- escaping something..
1107            if odd_slashes buf True (negateInt# 2#)
1108                 then  -- odd number of slashes, " is escaped.
1109                       doDiscard inStr (incLexeme buf)
1110                 else  -- even number of slashes, \ is escaped.
1111                       doDiscard not_inStr (incLexeme buf)
1112          _ -> doDiscard not_inStr (incLexeme buf)
1113
1114    '\''# | inStr ==# 0# ->
1115         case lookAhead# buf 1# of { '"'# ->
1116         case lookAhead# buf 2# of { '\''# ->
1117            doDiscard inStr (setCurrentPos# buf 3#);
1118         _ -> doDiscard inStr (incLexeme buf) };
1119         _ -> doDiscard inStr (incLexeme buf) }
1120
1121    _ -> doDiscard inStr (incLexeme buf)
1122
1123 \end{code}
1124
1125 -----------------------------------------------------------------------------
1126
1127 \begin{code}
1128 data LayoutContext
1129   = NoLayout
1130   | Layout Int#
1131
1132 data ParseResult a
1133   = POk PState a
1134   | PFailed Message
1135
1136 data PState = PState {
1137         loc           :: SrcLoc,
1138         glasgow_exts  :: Int#,
1139         bol           :: Int#,
1140         atbol         :: Int#,
1141         context       :: [LayoutContext]
1142      }
1143
1144 type P a = StringBuffer         -- Input string
1145         -> PState
1146         -> ParseResult a
1147
1148 returnP   :: a -> P a
1149 returnP a buf s = POk s a
1150
1151 thenP      :: P a -> (a -> P b) -> P b
1152 m `thenP` k = \ buf s ->
1153         case m buf s of
1154                 POk s1 a -> k a buf s1
1155                 PFailed err  -> PFailed err
1156
1157 thenP_     :: P a -> P b -> P b
1158 m `thenP_` k = m `thenP` \_ -> k
1159
1160 mapP :: (a -> P b) -> [a] -> P [b]
1161 mapP f [] = returnP []
1162 mapP f (a:as) =
1163      f a `thenP` \b ->
1164      mapP f as `thenP` \bs ->
1165      returnP (b:bs)
1166
1167 failP :: String -> P a
1168 failP msg buf s = PFailed (text msg)
1169
1170 failMsgP :: Message -> P a
1171 failMsgP msg buf s = PFailed msg
1172
1173 lexError :: String -> P a
1174 lexError str buf s@PState{ loc = loc }
1175   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1176
1177 getSrcLocP :: P SrcLoc
1178 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1179
1180 getSrcFile :: P FAST_STRING
1181 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1182
1183 getContext :: P [LayoutContext]
1184 getContext buf s@(PState{ context = ctx }) = POk s ctx
1185
1186 pushContext :: LayoutContext -> P ()
1187 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1188
1189 {-
1190
1191 This special case in layoutOn is to handle layout contexts with are
1192 indented the same or less than the current context.  This is illegal
1193 according to the Haskell spec, so we have to arrange to close the
1194 current context.  eg.
1195
1196 class Foo a where
1197 class Bar a
1198
1199 after the first 'where', the sequence of events is:
1200
1201         - layout system inserts a ';' (column 0)
1202         - parser begins a new context at column 0
1203         - parser shifts ';' (legal empty declaration)
1204         - parser sees 'class': parse error (we're still in the inner context)
1205
1206 trouble is, by the time we know we need a new context, the lexer has
1207 already generated the ';'.  Hacky solution is as follows: since we
1208 know the column of the next token (it's the column number of the new
1209 context), we set the ACTUAL column number of the new context to this
1210 numer plus one.  Hence the next time the lexer is called, a '}' will
1211 be generated to close the new context straight away.  Furthermore, we
1212 have to set the atbol flag so that the ';' that the parser shifted as
1213 part of the new context is re-generated.
1214
1215 when the new context is *less* indented than the current one:
1216
1217 f = f where g = g where
1218 h = h
1219
1220         - current context: column 12.
1221         - on seeing 'h' (column 0), the layout system inserts '}'
1222         - parser starts a new context, column 0
1223         - parser sees '}', uses it to close new context
1224         - we still need to insert another '}' followed by a ';',
1225           hence the atbol trick.
1226
1227 There's also a special hack in here to deal with
1228
1229         do
1230            ....
1231            e $ do
1232            blah
1233
1234 i.e. the inner context is at the same indentation level as the outer
1235 context.  This is strictly illegal according to Haskell 98, but
1236 there's a lot of existing code using this style and it doesn't make
1237 any sense to disallow it, since empty 'do' lists don't make sense.
1238 -}
1239
1240 layoutOn :: Bool -> P ()
1241 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1242     let offset = lexemeIndex buf -# bol in
1243     case ctx of
1244         Layout prev_off : _
1245            | if strict then prev_off >=# offset else prev_off ># offset ->
1246                 --trace ("layout on, column: " ++  show (I# offset)) $
1247                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1248         other ->
1249                 --trace ("layout on, column: " ++  show (I# offset)) $
1250                 POk s{ context = Layout offset : ctx } ()
1251
1252 layoutOff :: P ()
1253 layoutOff buf s@(PState{ context = ctx }) =
1254     POk s{ context = NoLayout:ctx } ()
1255
1256 popContext :: P ()
1257 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1258   case ctx of
1259         (_:tl) -> POk s{ context = tl } ()
1260         []     -> PFailed (srcParseErr buf loc)
1261
1262 {-
1263  Note that if the name of the file we're processing ends
1264  with `hi-boot', we accept it on faith as having the right
1265  version. This is done so that .hi-boot files that comes
1266  with hsc don't have to be updated before every release,
1267  *and* it allows us to share .hi-boot files with versions
1268  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1269
1270  If the version number is 0, the checking is also turned off.
1271  (needed to deal with GHC.hi only!)
1272
1273  Once we can assume we're compiling with a version of ghc that
1274  supports interface file checking, we can drop the special
1275  pleading
1276 -}
1277 checkVersion :: Maybe Integer -> P ()
1278 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1279  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1280  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1281 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1282  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1283  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1284
1285 -----------------------------------------------------------------
1286
1287 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1288 ifaceParseErr s l
1289   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1290           text (lexemeToString s), char '\'']
1291
1292 ifaceVersionErr hi_vers l toks
1293   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1294           ptext SLIT("Expected"), int opt_HiVersion,
1295           ptext SLIT("found "), pp_version]
1296     where
1297      pp_version =
1298       case hi_vers of
1299         Nothing -> ptext SLIT("pre ghc-3.02 version")
1300         Just v  -> ptext SLIT("version") <+> integer v
1301
1302 -----------------------------------------------------------------------------
1303
1304 srcParseErr :: StringBuffer -> SrcLoc -> Message
1305 srcParseErr s l
1306   = hcat [ppr l,
1307           if null token
1308              then ptext SLIT(": parse error (possibly incorrect indentation)")
1309              else hcat [ptext SLIT(": parse error on input "),
1310                         char '`', text token, char '\'']
1311     ]
1312   where
1313         token = lexemeToString s
1314
1315 \end{code}