ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr, srcParseErr,
  24
  25         -- Monad for parser
  26         Token(..), lexer, ParseResult(..), PState(..),
  27         checkVersion,
  28         StringBuffer,
  29
  30         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  31         getSrcLocP, getSrcFile,
  32         layoutOn, layoutOff, pushContext, popContext
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import Char             ( ord, isSpace, toUpper )
  38 import List             ( isSuffixOf )
  39
  40 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  41 import Name             ( isLowerISO, isUpperISO )
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  48                           replaceSrcLine, mkSrcLoc )
  49
  50 import Maybes           ( MaybeErr(..) )
  51 import ErrUtils         ( Message )
  52 import Outputable
  53
  54 import FastString
  55 import StringBuffer
  56 import GlaExts
  57 import ST               ( runST )
  58
  59 #if __GLASGOW_HASKELL__ >= 303
  60 import Bits
  61 import Word
  62 #endif
  63
  64 import Char             ( chr )
  65 import Addr
  66 import PrelRead         ( readRational__ ) -- Glasgow non-std
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Data types}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 The token data type, fairly un-interesting except from one
  76 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  77 strictness, unfolding etc).
  78
  79 The Idea/Observation here is that the renamer needs to scan through
  80 all of an interface file before it can continue. But only a fraction
  81 of the information contained in the file turns out to be useful, so
  82 delaying as much as possible of the scanning and parsing of an
  83 interface file Makes Sense (Heap profiles of the compiler
  84 show a reduction in heap usage by at least a factor of two,
  85 post-renamer).
  86
  87 Hence, the interface file lexer spots when value declarations are
  88 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  89 for the type and any other id info for that binding (unfolding, strictness
  90 etc). These constructors are applied to the result of lexing these sub-chunks.
  91
  92 The lexing of the type and id info is all done lazily, of course, so
  93 the scanning (and subsequent parsing) will be done *only* on the ids the
  94 renamer finds out that it is interested in. The rest will just be junked.
  95 Laziness, you know it makes sense :-)
  96
  97 \begin{code}
  98 data Token
  99   = ITas                        -- Haskell keywords
 100   | ITcase
 101   | ITclass
 102   | ITdata
 103   | ITdefault
 104   | ITderiving
 105   | ITdo
 106   | ITelse
 107   | IThiding
 108   | ITif
 109   | ITimport
 110   | ITin
 111   | ITinfix
 112   | ITinfixl
 113   | ITinfixr
 114   | ITinstance
 115   | ITlet
 116   | ITmodule
 117   | ITnewtype
 118   | ITof
 119   | ITqualified
 120   | ITthen
 121   | ITtype
 122   | ITwhere
 123   | ITscc
 124
 125   | ITforall                    -- GHC extension keywords
 126   | ITforeign
 127   | ITexport
 128   | ITlabel
 129   | ITdynamic
 130   | ITunsafe
 131   | ITwith
 132   | ITstdcallconv
 133   | ITccallconv
 134
 135   | ITinterface                 -- interface keywords
 136   | IT__export
 137   | ITdepends
 138   | IT__forall
 139   | ITletrec
 140   | ITcoerce
 141   | ITinlineMe
 142   | ITinlineCall
 143   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 144   | ITdefaultbranch
 145   | ITbottom
 146   | ITinteger_lit
 147   | ITfloat_lit
 148   | ITword_lit
 149   | ITword64_lit
 150   | ITint64_lit
 151   | ITrational_lit
 152   | ITaddr_lit
 153   | ITlit_lit
 154   | ITstring_lit
 155   | ITtypeapp
 156   | ITusage
 157   | ITfuall
 158   | ITarity
 159   | ITspecialise
 160   | ITnocaf
 161   | ITunfold InlinePragInfo
 162   | ITstrict ([Demand], Bool)
 163   | ITrules
 164   | ITcprinfo
 165   | ITdeprecated
 166   | IT__scc
 167   | ITsccAllCafs
 168
 169   | ITspecialise_prag           -- Pragmas
 170   | ITsource_prag
 171   | ITinline_prag
 172   | ITnoinline_prag
 173   | ITrules_prag
 174   | ITdeprecated_prag
 175   | ITline_prag
 176   | ITclose_prag
 177
 178   | ITdotdot                    -- reserved symbols
 179   | ITdcolon
 180   | ITequal
 181   | ITlam
 182   | ITvbar
 183   | ITlarrow
 184   | ITrarrow
 185   | ITat
 186   | ITtilde
 187   | ITdarrow
 188   | ITminus
 189   | ITbang
 190   | ITdot
 191
 192   | ITbiglam                    -- GHC-extension symbols
 193
 194   | ITocurly                    -- special symbols
 195   | ITccurly
 196   | ITvccurly
 197   | ITobrack
 198   | ITcbrack
 199   | IToparen
 200   | ITcparen
 201   | IToubxparen
 202   | ITcubxparen
 203   | ITsemi
 204   | ITcomma
 205   | ITunderscore
 206   | ITbackquote
 207
 208   | ITvarid   FAST_STRING       -- identifiers
 209   | ITconid   FAST_STRING
 210   | ITvarsym  FAST_STRING
 211   | ITconsym  FAST_STRING
 212   | ITqvarid  (FAST_STRING,FAST_STRING)
 213   | ITqconid  (FAST_STRING,FAST_STRING)
 214   | ITqvarsym (FAST_STRING,FAST_STRING)
 215   | ITqconsym (FAST_STRING,FAST_STRING)
 216
 217   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 218
 219   | ITpragma StringBuffer
 220
 221   | ITchar       Char
 222   | ITstring     FAST_STRING
 223   | ITinteger    Integer
 224   | ITrational   Rational
 225
 226   | ITprimchar   Char
 227   | ITprimstring FAST_STRING
 228   | ITprimint    Integer
 229   | ITprimfloat  Rational
 230   | ITprimdouble Rational
 231   | ITlitlit     FAST_STRING
 232
 233   | ITunknown String            -- Used when the lexer can't make sense of it
 234   | ITeof                       -- end of file token
 235   deriving Show -- debugging
 236 \end{code}
 237
 238 -----------------------------------------------------------------------------
 239 Keyword Lists
 240
 241 \begin{code}
 242 pragmaKeywordsFM = listToUFM $
 243       map (\ (x,y) -> (_PK_ x,y))
 244        [( "SPECIALISE", ITspecialise_prag ),
 245         ( "SPECIALIZE", ITspecialise_prag ),
 246         ( "SOURCE",     ITsource_prag ),
 247         ( "INLINE",     ITinline_prag ),
 248         ( "NOINLINE",   ITnoinline_prag ),
 249         ( "NOTINLINE",  ITnoinline_prag ),
 250         ( "LINE",       ITline_prag ),
 251         ( "RULES",      ITrules_prag ),
 252         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 253         ( "DEPRECATED", ITdeprecated_prag )
 254         ]
 255
 256 haskellKeywordsFM = listToUFM $
 257       map (\ (x,y) -> (_PK_ x,y))
 258        [( "_",          ITunderscore ),
 259         ( "as",         ITas ),
 260         ( "case",       ITcase ),
 261         ( "class",      ITclass ),
 262         ( "data",       ITdata ),
 263         ( "default",    ITdefault ),
 264         ( "deriving",   ITderiving ),
 265         ( "do",         ITdo ),
 266         ( "else",       ITelse ),
 267         ( "hiding",     IThiding ),
 268         ( "if",         ITif ),
 269         ( "import",     ITimport ),
 270         ( "in",         ITin ),
 271         ( "infix",      ITinfix ),
 272         ( "infixl",     ITinfixl ),
 273         ( "infixr",     ITinfixr ),
 274         ( "instance",   ITinstance ),
 275         ( "let",        ITlet ),
 276         ( "module",     ITmodule ),
 277         ( "newtype",    ITnewtype ),
 278         ( "of",         ITof ),
 279         ( "qualified",  ITqualified ),
 280         ( "then",       ITthen ),
 281         ( "type",       ITtype ),
 282         ( "where",      ITwhere ),
 283         ( "_scc_",      ITscc )
 284      ]
 285
 286 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 287 ghcExtensionKeywordsFM = listToUFM $
 288         map (\ (x,y) -> (_PK_ x,y))
 289      [  ( "forall",     ITforall ),
 290         ( "foreign",    ITforeign ),
 291         ( "export",     ITexport ),
 292         ( "label",      ITlabel ),
 293         ( "dynamic",    ITdynamic ),
 294         ( "unsafe",     ITunsafe ),
 295         ( "with",       ITwith ),
 296         ( "stdcall",    ITstdcallconv),
 297         ( "ccall",      ITccallconv),
 298         ("_ccall_",     ITccall (False, False, False)),
 299         ("_ccall_GC_",  ITccall (False, False, True)),
 300         ("_casm_",      ITccall (False, True,  False)),
 301         ("_casm_GC_",   ITccall (False, True,  True)),
 302
 303         -- interface keywords
 304         ("__interface",         ITinterface),
 305         ("__export",            IT__export),
 306         ("__depends",           ITdepends),
 307         ("__forall",            IT__forall),
 308         ("__letrec",            ITletrec),
 309         ("__coerce",            ITcoerce),
 310         ("__inline_me",         ITinlineMe),
 311         ("__inline_call",       ITinlineCall),
 312         ("__depends",           ITdepends),
 313         ("__DEFAULT",           ITdefaultbranch),
 314         ("__bot",               ITbottom),
 315         ("__integer",           ITinteger_lit),
 316         ("__float",             ITfloat_lit),
 317         ("__int64",             ITint64_lit),
 318         ("__word",              ITword_lit),
 319         ("__word64",            ITword64_lit),
 320         ("__rational",          ITrational_lit),
 321         ("__addr",              ITaddr_lit),
 322         ("__litlit",            ITlit_lit),
 323         ("__string",            ITstring_lit),
 324         ("__a",                 ITtypeapp),
 325         ("__u",                 ITusage),
 326         ("__fuall",             ITfuall),
 327         ("__A",                 ITarity),
 328         ("__P",                 ITspecialise),
 329         ("__C",                 ITnocaf),
 330         ("__R",                 ITrules),
 331         ("__D",                 ITdeprecated),
 332         ("__U",                 ITunfold NoInlinePragInfo),
 333
 334         ("__ccall",             ITccall (False, False, False)),
 335         ("__ccall_GC",          ITccall (False, False, True)),
 336         ("__dyn_ccall",         ITccall (True,  False, False)),
 337         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 338         ("__casm",              ITccall (False, True,  False)),
 339         ("__dyn_casm",          ITccall (True,  True,  False)),
 340         ("__casm_GC",           ITccall (False, True,  True)),
 341         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 342
 343         ("/\\",                 ITbiglam)
 344      ]
 345
 346
 347 haskellKeySymsFM = listToUFM $
 348         map (\ (x,y) -> (_PK_ x,y))
 349       [ ("..",          ITdotdot)
 350        ,("::",          ITdcolon)
 351        ,("=",           ITequal)
 352        ,("\\",          ITlam)
 353        ,("|",           ITvbar)
 354        ,("<-",          ITlarrow)
 355        ,("->",          ITrarrow)
 356        ,("@",           ITat)
 357        ,("~",           ITtilde)
 358        ,("=>",          ITdarrow)
 359        ,("-",           ITminus)
 360        ,("!",           ITbang)
 361        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 362        ]
 363 \end{code}
 364
 365 -----------------------------------------------------------------------------
 366 The lexical analyser
 367
 368 Lexer state:
 369
 370         - (glaexts) lexing an interface file or -fglasgow-exts
 371         - (bol)   pointer to beginning of line (for column calculations)
 372         - (buf)   pointer to beginning of token
 373         - (buf)   pointer to current char
 374         - (atbol) flag indicating whether we're at the beginning of a line
 375
 376 \begin{code}
 377 lexer :: (Token -> P a) -> P a
 378 lexer cont buf s@(PState{
 379                     loc = loc,
 380                     glasgow_exts = glaexts,
 381                     bol = bol,
 382                     atbol = atbol,
 383                     context = ctx
 384                 })
 385
 386         -- first, start a new lexeme and lose all the whitespace
 387   =  _scc_ "Lexer"
 388   tab line bol atbol (stepOverLexeme buf)
 389   where
 390         line = srcLocLine loc
 391
 392         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 393           case currentChar# buf of
 394
 395             '\NUL'# ->
 396                    if bufferExhausted (stepOn buf)
 397                         then cont ITeof buf s'
 398                         else trace "lexer: misplaced NUL?" $
 399                              tab y bol atbol (stepOn buf)
 400
 401             '\n'# -> let buf' = stepOn buf
 402                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 403
 404                 -- find comments.  This got harder in Haskell 98.
 405             '-'# ->  let trundle n =
 406                           let next = lookAhead# buf n in
 407                           if next `eqChar#` '-'# then trundle (n +# 1#)
 408                           else if is_symbol next || n <# 2#
 409                                 then is_a_token
 410                                 else case untilChar# (stepOnBy# buf n) '\n'# of
 411                                     { buf' -> tab y bol atbol (stepOverLexeme buf')
 412                                     }
 413                     in trundle 1#
 414
 415                 -- comments and pragmas.  We deal with LINE pragmas here,
 416                 -- and throw out any unrecognised pragmas as comments.  Any
 417                 -- pragmas we know about are dealt with later (after any layout
 418                 -- processing if necessary).
 419
 420             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 421                 if lookAhead# buf 2# `eqChar#` '#'# then
 422                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 423                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 424                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 425                   let lexeme = mkFastString -- ToDo: too slow
 426                                   (map toUpper (lexemeToString buf2)) in
 427                   case lookupUFM pragmaKeywordsFM lexeme of
 428                         Just ITline_prag -> line_prag (lexer cont) buf2 s'
 429                         Just other -> is_a_token
 430                         Nothing -> skip_to_end (stepOnBy# buf 2#)
 431                   }}
 432
 433                 else skip_to_end (stepOnBy# buf 2#)
 434                 where
 435                     skip_to_end buf = nested_comment (lexer cont) buf s'
 436
 437                 -- tabs have been expanded beforehand
 438             c | is_space c -> tab y bol atbol (stepOn buf)
 439               | otherwise  -> is_a_token
 440
 441            where s' = s{loc = replaceSrcLine loc y,
 442                         bol = bol,
 443                        atbol = atbol}
 444
 445                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 446                             | otherwise    = lexToken cont glaexts buf s'
 447
 448 -- {-# LINE .. #-} pragmas.  yeuch.
 449 line_prag cont buf =
 450   case expandWhile# is_space buf                of { buf1 ->
 451   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 452   -- subtract one: the line number refers to the *following* line.
 453   let real_line = line - 1 in
 454   case fromInteger real_line                    of { i@(I# l) ->
 455   case expandWhile# is_space buf2               of { buf3 ->
 456   case currentChar# buf3                        of
 457      '\"'#{-"-} ->
 458         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 459         let file = lexemeToFastString buf4 in
 460         \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
 461         }
 462      other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
 463   }}}}
 464   where
 465         skipToEnd buf = nested_comment cont buf
 466
 467 nested_comment :: P a -> P a
 468 nested_comment cont buf = loop buf
 469  where
 470    loop buf =
 471      case currentChar# buf of
 472         '\NUL'# | bufferExhausted (stepOn buf) ->
 473                 lexError "unterminated `{-'" buf
 474
 475         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 476                 cont (stepOnBy# buf 2#)
 477
 478         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 479               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 480
 481         '\n'# -> \ s@PState{loc=loc} ->
 482                  let buf' = stepOn buf in
 483                  nested_comment cont buf'
 484                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 485                           atbol = 1#}
 486
 487         _   -> nested_comment cont (stepOn buf)
 488
 489 -- When we are lexing the first token of a line, check whether we need to
 490 -- insert virtual semicolons or close braces due to layout.
 491
 492 lexBOL :: (Token -> P a) -> P a
 493 lexBOL cont buf s@(PState{
 494                     loc = loc,
 495                     glasgow_exts = glaexts,
 496                     bol = bol,
 497                     atbol = atbol,
 498                     context = ctx
 499                   }) =
 500         if need_close_curly then
 501                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 502                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 503         else if need_semi_colon then
 504                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 505                 cont ITsemi buf s{atbol = 0#}
 506         else
 507                 lexToken cont glaexts buf s{atbol = 0#}
 508   where
 509         col = currentIndex# buf -# bol
 510
 511         need_close_curly =
 512                 case ctx of
 513                         [] -> False
 514                         (i:_) -> case i of
 515                                     NoLayout -> False
 516                                     Layout n -> col <# n
 517         need_semi_colon =
 518                 case ctx of
 519                         [] -> False
 520                         (i:_) -> case i of
 521                                     NoLayout -> False
 522                                     Layout n -> col ==# n
 523
 524
 525 lexToken :: (Token -> P a) -> Int# -> P a
 526 lexToken cont glaexts buf =
 527  --trace "lexToken" $
 528   case currentChar# buf of
 529
 530     -- special symbols ----------------------------------------------------
 531     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 532                 -> cont IToubxparen (setCurrentPos# buf 2#)
 533          | otherwise
 534                 -> cont IToparen (incLexeme buf)
 535
 536     ')'# -> cont ITcparen    (incLexeme buf)
 537     '['# -> cont ITobrack    (incLexeme buf)
 538     ']'# -> cont ITcbrack    (incLexeme buf)
 539     ','# -> cont ITcomma     (incLexeme buf)
 540     ';'# -> cont ITsemi      (incLexeme buf)
 541
 542     '}'# -> \ s@PState{context = ctx} ->
 543             case ctx of
 544                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 545                 _        -> lexError "too many '}'s" buf s
 546
 547     '#'# -> case lookAhead# buf 1# of
 548                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 549                 '-'# -> case lookAhead# buf 2# of
 550                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 551                            _    -> lex_sym cont (incLexeme buf)
 552                 _    -> lex_sym cont (incLexeme buf)
 553
 554     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 555                 -> lex_cstring cont (setCurrentPos# buf 2#)
 556          | otherwise
 557                 -> cont ITbackquote (incLexeme buf)
 558
 559     '{'# ->     -- look for "{-##" special iface pragma
 560         case lookAhead# buf 1# of
 561            '-'# -> case lookAhead# buf 2# of
 562                     '#'# -> case lookAhead# buf 3# of
 563                                 '#'# ->
 564                                    let (lexeme, buf')
 565                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 566                                    cont (ITpragma lexeme) buf'
 567                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 568                     _    -> cont ITocurly (incLexeme buf)
 569            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 570
 571     -- strings/characters -------------------------------------------------
 572     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 573     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 574
 575     -- strictness and cpr pragmas and __scc treated specially.
 576     '_'# | flag glaexts ->
 577          case lookAhead# buf 1# of
 578            '_'# -> case lookAhead# buf 2# of
 579                     'S'# ->
 580                         lex_demand cont (stepOnUntil (not . isSpace)
 581                                         (stepOnBy# buf 3#)) -- past __S
 582                     'M'# ->
 583                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 584
 585                     's'# ->
 586                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 587                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 588                                Nothing   -> lex_id cont glaexts buf
 589                     _ -> lex_id cont glaexts buf
 590            _    -> lex_id cont glaexts buf
 591
 592         -- Hexadecimal and octal constants
 593     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 594                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 595          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 596                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 597         where ch   = lookAhead# buf 1#
 598               ch2  = lookAhead# buf 2#
 599               buf' = setCurrentPos# buf 2#
 600
 601     '\NUL'# ->
 602             if bufferExhausted (stepOn buf) then
 603                cont ITeof buf
 604             else
 605                trace "lexIface: misplaced NUL?" $
 606                cont (ITunknown "\NUL") (stepOn buf)
 607
 608     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 609             lex_ip cont (incLexeme buf)
 610     c | is_digit  c -> lex_num cont glaexts 0 buf
 611       | is_symbol c -> lex_sym cont buf
 612       | is_upper  c -> lex_con cont glaexts buf
 613       | is_ident  c -> lex_id  cont glaexts buf
 614       | otherwise   -> lexError "illegal character" buf
 615
 616 -- Int# is unlifted, and therefore faster than Bool for flags.
 617 {-# INLINE flag #-}
 618 flag :: Int# -> Bool
 619 flag 0# = False
 620 flag _  = True
 621
 622 -------------------------------------------------------------------------------
 623 -- Pragmas
 624
 625 lex_prag cont buf
 626   = case expandWhile# is_space buf of { buf1 ->
 627     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 628     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 629     case lookupUFM pragmaKeywordsFM lexeme of
 630         Just kw -> cont kw (mergeLexemes buf buf2)
 631         Nothing -> panic "lex_prag"
 632   }}
 633
 634 -------------------------------------------------------------------------------
 635 -- Strings & Chars
 636
 637 lex_string cont glaexts s buf
 638   = case currentChar# buf of
 639         '"'#{-"-} ->
 640            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 641            case currentChar# buf' of
 642                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 643                 _                   -> cont (ITstring s') buf'
 644
 645         -- ignore \& in a string, deal with string gaps
 646         '\\'# | next_ch `eqChar#` '&'#
 647                 -> lex_string cont glaexts s buf'
 648               | is_space next_ch
 649                 -> lex_stringgap cont glaexts s (incLexeme buf)
 650
 651             where next_ch = lookAhead# buf 1#
 652                   buf' = setCurrentPos# buf 2#
 653
 654         _ -> lex_char (lex_next_string cont s) glaexts buf
 655
 656 lex_stringgap cont glaexts s buf
 657   = let buf' = incLexeme buf in
 658     case currentChar# buf of
 659         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 660                   st{loc = incSrcLine loc}
 661         '\\'# -> lex_string cont glaexts s buf'
 662         c | is_space c -> lex_stringgap cont glaexts s buf'
 663         other -> charError buf'
 664
 665 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 666
 667 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 668 lex_char cont glaexts buf
 669   = case currentChar# buf of
 670         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 671         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 672         other -> charError buf
 673
 674 char_end cont glaexts c buf
 675   = case currentChar# buf of
 676         '\''# -> let buf' = incLexeme buf in
 677                  case currentChar# buf' of
 678                         '#'# | flag glaexts
 679                                 -> cont (ITprimchar c) (incLexeme buf')
 680                         _       -> cont (ITchar c) buf'
 681         _     -> charError buf
 682
 683 lex_escape cont buf
 684   = let buf' = incLexeme buf in
 685     case currentChar# buf of
 686         'a'#       -> cont '\a' buf'
 687         'b'#       -> cont '\b' buf'
 688         'f'#       -> cont '\f' buf'
 689         'n'#       -> cont '\n' buf'
 690         'r'#       -> cont '\r' buf'
 691         't'#       -> cont '\t' buf'
 692         'v'#       -> cont '\v' buf'
 693         '\\'#      -> cont '\\' buf'
 694         '"'#       -> cont '\"' buf'
 695         '\''#      -> cont '\'' buf'
 696         '^'#       -> let c = currentChar# buf' in
 697                       if c `geChar#` '@'# && c `leChar#` '_'#
 698                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 699                         else charError buf'
 700
 701         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 702         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 703         x | is_digit x
 704                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 705
 706         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 707                                        Just buf2 <- [prefixMatch buf p] ] of
 708                             (c,buf2):_ -> cont c buf2
 709                             [] -> charError buf'
 710
 711 after_charnum cont i buf
 712   = let int = fromInteger i in
 713     if i >= 0 && i <= 255
 714         then cont (chr int) buf
 715         else charError buf
 716
 717 readNum cont buf is_digit base conv = read buf 0
 718   where read buf i
 719           = case currentChar# buf of { c ->
 720             if is_digit c
 721                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 722                 else cont i buf
 723             }
 724
 725 is_hexdigit c
 726         =  is_digit c
 727         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 728         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 729
 730 hex c | is_digit c = ord# c -# ord# '0'#
 731       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 732 oct_or_dec c = ord# c -# ord# '0'#
 733
 734 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 735
 736 to_lower c
 737   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 738         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 739   | otherwise = c
 740
 741 charError buf = lexError "error in character literal" buf
 742
 743 silly_escape_chars = [
 744         ("NUL", '\NUL'),
 745         ("SOH", '\SOH'),
 746         ("STX", '\STX'),
 747         ("ETX", '\ETX'),
 748         ("EOT", '\EOT'),
 749         ("ENQ", '\ENQ'),
 750         ("ACK", '\ACK'),
 751         ("BEL", '\BEL'),
 752         ("BS", '\BS'),
 753         ("HT", '\HT'),
 754         ("LF", '\LF'),
 755         ("VT", '\VT'),
 756         ("FF", '\FF'),
 757         ("CR", '\CR'),
 758         ("SO", '\SO'),
 759         ("SI", '\SI'),
 760         ("DLE", '\DLE'),
 761         ("DC1", '\DC1'),
 762         ("DC2", '\DC2'),
 763         ("DC3", '\DC3'),
 764         ("DC4", '\DC4'),
 765         ("NAK", '\NAK'),
 766         ("SYN", '\SYN'),
 767         ("ETB", '\ETB'),
 768         ("CAN", '\CAN'),
 769         ("EM", '\EM'),
 770         ("SUB", '\SUB'),
 771         ("ESC", '\ESC'),
 772         ("FS", '\FS'),
 773         ("GS", '\GS'),
 774         ("RS", '\RS'),
 775         ("US", '\US'),
 776         ("SP", '\SP'),
 777         ("DEL", '\DEL')
 778         ]
 779
 780 -------------------------------------------------------------------------------
 781
 782 lex_demand cont buf =
 783  case read_em [] buf of { (ls,buf') ->
 784  case currentChar# buf' of
 785    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 786    _    -> cont (ITstrict (ls, False)) buf'
 787  }
 788  where
 789    -- code snatched from Demand.lhs
 790   read_em acc buf =
 791    case currentChar# buf of
 792     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 793     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 794     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 795     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 796     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 797     ')'# -> (reverse acc, stepOn buf)
 798     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 799     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 800     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 801     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 802     _    -> (reverse acc, buf)
 803
 804   do_unpack new_or_data wrapper_unpacks acc buf
 805    = case read_em [] buf of
 806       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 807
 808
 809 ------------------
 810 lex_scc cont buf =
 811  case currentChar# buf of
 812   'C'# -> cont ITsccAllCafs (incLexeme buf)
 813   other -> cont ITscc buf
 814
 815 -----------------------------------------------------------------------------
 816 -- Numbers
 817
 818 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 819 lex_num cont glaexts acc buf =
 820  case scanNumLit acc buf of
 821      (acc',buf') ->
 822        case currentChar# buf' of
 823          '.'# | is_digit (lookAhead# buf' 1#) ->
 824              -- this case is not optimised at all, as the
 825              -- presence of floating point numbers in interface
 826              -- files is not that common. (ToDo)
 827             case expandWhile# is_digit (incLexeme buf') of
 828               buf2 -> -- points to first non digit char
 829
 830                 let l = case currentChar# buf2 of
 831                           'E'# -> do_exponent
 832                           'e'# -> do_exponent
 833                           _ -> buf2
 834
 835                     do_exponent
 836                         = let buf3 = incLexeme buf2 in
 837                           case currentChar# buf3 of
 838                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 839                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 840                                 x | is_digit x -> expandWhile# is_digit buf3
 841                                 _ -> buf2
 842
 843                     v = readRational__ (lexemeToString l)
 844
 845                 in case currentChar# l of -- glasgow exts only
 846                       '#'# | flag glaexts -> let l' = incLexeme l in
 847                               case currentChar# l' of
 848                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 849                                 _    -> cont (ITprimfloat  v) l'
 850                       _ -> cont (ITrational v) l
 851
 852          _ -> after_lexnum cont glaexts acc' buf'
 853
 854 after_lexnum cont glaexts i buf
 855   = case currentChar# buf of
 856         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 857         _    -> cont (ITinteger i) buf
 858
 859 -----------------------------------------------------------------------------
 860 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 861
 862 -- we lexemeToFastString on the bit between the ``''s, but include the
 863 -- quotes in the full lexeme.
 864
 865 lex_cstring cont buf =
 866  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 867    Just buf' -> cont (ITlitlit (lexemeToFastString
 868                                 (setCurrentPos# buf' (negateInt# 2#))))
 869                    (mergeLexemes buf buf')
 870    Nothing   -> lexError "unterminated ``" buf
 871
 872 ------------------------------------------------------------------------------
 873 -- Character Classes
 874
 875 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 876
 877 {-# INLINE is_ctype #-}
 878 #if __GLASGOW_HASKELL__ >= 303
 879 is_ctype :: Word8 -> Char# -> Bool
 880 is_ctype mask = \c ->
 881    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 882 #else
 883 is_ctype :: Int -> Char# -> Bool
 884 is_ctype (I# mask) = \c ->
 885     let (A# ctype) = ``char_types'' :: Addr
 886         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 887     in
 888         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 889 #endif
 890
 891 is_ident  = is_ctype 1
 892 is_symbol = is_ctype 2
 893 is_any    = is_ctype 4
 894 is_space  = is_ctype 8
 895 is_lower  = is_ctype 16
 896 is_upper  = is_ctype 32
 897 is_digit  = is_ctype 64
 898
 899 -----------------------------------------------------------------------------
 900 -- identifiers, symbols etc.
 901
 902 lex_ip cont buf =
 903  case expandWhile# is_ident buf of
 904    buf' -> cont (ITipvarid lexeme) buf'
 905            where lexeme = lexemeToFastString buf'
 906
 907 lex_id cont glaexts buf =
 908  case expandWhile# is_ident buf of { buf1 ->
 909
 910  case (if flag glaexts
 911         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 912         else buf1)                              of { buf' ->
 913
 914  let lexeme  = lexemeToFastString buf' in
 915
 916  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 917         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 918                           cont kwd_token buf';
 919         Nothing        ->
 920
 921  let var_token = cont (mk_var_token lexeme) buf' in
 922
 923  if not (flag glaexts)
 924    then var_token
 925    else
 926
 927  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 928         Just kwd_token -> cont kwd_token buf';
 929         Nothing        -> var_token
 930
 931  }}}}
 932
 933 lex_sym cont buf =
 934  case expandWhile# is_symbol buf of
 935    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 936                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 937                                   cont kwd_token buf' ;
 938                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 939                                   cont (mk_var_token lexeme) buf'
 940            }
 941         where lexeme = lexemeToFastString buf'
 942
 943
 944 lex_con cont glaexts buf =
 945  case expandWhile# is_ident buf          of { buf1 ->
 946  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 947
 948  case currentChar# buf' of
 949      '.'# -> munch
 950      _    -> just_a_conid
 951
 952    where
 953     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 954                    cont (ITconid lexeme) buf'
 955     lexeme = lexemeToFastString buf'
 956     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 957  }}
 958
 959 lex_qid cont glaexts mod buf just_a_conid =
 960  case currentChar# buf of
 961   '['# ->       -- Special case for []
 962     case lookAhead# buf 1# of
 963      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 964      _    -> just_a_conid
 965
 966   '('# ->  -- Special case for (,,,)
 967            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 968     case lookAhead# buf 1# of
 969      '#'# | flag glaexts -> case lookAhead# buf 2# of
 970                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 971                                 just_a_conid
 972                 _    -> just_a_conid
 973      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 974      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 975      _    -> just_a_conid
 976
 977   '-'# -> case lookAhead# buf 1# of
 978             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 979             _    -> lex_id3 cont glaexts mod buf just_a_conid
 980   _    -> lex_id3 cont glaexts mod buf just_a_conid
 981
 982 lex_id3 cont glaexts mod buf just_a_conid
 983   | is_symbol (currentChar# buf) =
 984      let
 985         start_new_lexeme = stepOverLexeme buf
 986      in
 987      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 988      let
 989        lexeme  = lexemeToFastString buf'
 990         -- real lexeme is M.<sym>
 991        new_buf = mergeLexemes buf buf'
 992      in
 993      cont (mk_qvar_token mod lexeme) new_buf
 994         -- wrong, but arguably morally right: M... is now a qvarsym
 995      }
 996
 997   | otherwise   =
 998      let
 999         start_new_lexeme = stepOverLexeme buf
1000      in
1001      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1002      if emptyLexeme buf1
1003             then just_a_conid
1004             else
1005
1006      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1007
1008      let
1009       lexeme  = lexemeToFastString buf'
1010       new_buf = mergeLexemes buf buf'
1011       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1012      in
1013      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1014             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1015             Nothing        -> is_a_qvarid
1016         -- TODO: special ids (as, qualified, hiding) shouldn't be
1017         -- recognised as keywords here.  ie.  M.as is a qualified varid.
1018      }}}
1019
1020
1021 slurp_trailing_hashes buf glaexts
1022   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1023   | otherwise    = buf
1024
1025
1026 mk_var_token pk_str
1027   | is_upper f          = ITconid pk_str
1028   | is_ident f          = ITvarid pk_str
1029   | f `eqChar#` ':'#    = ITconsym pk_str
1030   | otherwise           = ITvarsym pk_str
1031   where
1032       (C# f) = _HEAD_ pk_str
1033       tl     = _TAIL_ pk_str
1034
1035 mk_qvar_token m token =
1036  case mk_var_token token of
1037    ITconid n  -> ITqconid  (m,n)
1038    ITvarid n  -> ITqvarid  (m,n)
1039    ITconsym n -> ITqconsym (m,n)
1040    ITvarsym n -> ITqvarsym (m,n)
1041    _          -> ITunknown (show token)
1042 \end{code}
1043
1044 ----------------------------------------------------------------------------
1045 Horrible stuff for dealing with M.(,,,)
1046
1047 \begin{code}
1048 lex_tuple cont mod buf back_off =
1049   go 2 buf
1050   where
1051    go n buf =
1052     case currentChar# buf of
1053       ','# -> go (n+1) (stepOn buf)
1054       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
1055       _    -> back_off
1056
1057 lex_ubx_tuple cont mod buf back_off =
1058   go 2 buf
1059   where
1060    go n buf =
1061     case currentChar# buf of
1062       ','# -> go (n+1) (stepOn buf)
1063       '#'# -> case lookAhead# buf 1# of
1064                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
1065                                  (stepOnBy# buf 2#)
1066                 _    -> back_off
1067       _    -> back_off
1068 \end{code}
1069
1070 -----------------------------------------------------------------------------
1071 doDiscard rips along really fast, looking for a '#-}',
1072 indicating the end of the pragma we're skipping
1073
1074 \begin{code}
1075 doDiscard inStr buf =
1076  case currentChar# buf of
1077    '#'# | not inStr ->
1078        case lookAhead# buf 1# of { '#'# ->
1079        case lookAhead# buf 2# of { '-'# ->
1080        case lookAhead# buf 3# of { '}'# ->
1081            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1082         _    -> doDiscard inStr (incLexeme buf) };
1083         _    -> doDiscard inStr (incLexeme buf) };
1084         _    -> doDiscard inStr (incLexeme buf) }
1085    '"'# ->
1086        let
1087         odd_slashes buf flg i# =
1088           case lookAhead# buf i# of
1089            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1090            _     -> flg
1091        in
1092        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1093          '\\'# -> -- escaping something..
1094            if odd_slashes buf True (negateInt# 2#) then
1095                -- odd number of slashes, " is escaped.
1096               doDiscard inStr (incLexeme buf)
1097            else
1098                -- even number of slashes, \ is escaped.
1099               doDiscard (not inStr) (incLexeme buf)
1100          _ -> case inStr of -- forced to avoid build-up
1101                True  -> doDiscard False (incLexeme buf)
1102                False -> doDiscard True  (incLexeme buf)
1103    _ -> doDiscard inStr (incLexeme buf)
1104
1105 \end{code}
1106
1107 -----------------------------------------------------------------------------
1108
1109 \begin{code}
1110 data LayoutContext
1111   = NoLayout
1112   | Layout Int#
1113
1114 data ParseResult a
1115   = POk PState a
1116   | PFailed Message
1117
1118 data PState = PState {
1119         loc           :: SrcLoc,
1120         glasgow_exts  :: Int#,
1121         bol           :: Int#,
1122         atbol         :: Int#,
1123         context       :: [LayoutContext]
1124      }
1125
1126 type P a = StringBuffer         -- Input string
1127         -> PState
1128         -> ParseResult a
1129
1130 returnP   :: a -> P a
1131 returnP a buf s = POk s a
1132
1133 thenP      :: P a -> (a -> P b) -> P b
1134 m `thenP` k = \ buf s ->
1135         case m buf s of
1136                 POk s1 a -> k a buf s1
1137                 PFailed err  -> PFailed err
1138
1139 thenP_     :: P a -> P b -> P b
1140 m `thenP_` k = m `thenP` \_ -> k
1141
1142 mapP :: (a -> P b) -> [a] -> P [b]
1143 mapP f [] = returnP []
1144 mapP f (a:as) =
1145      f a `thenP` \b ->
1146      mapP f as `thenP` \bs ->
1147      returnP (b:bs)
1148
1149 failP :: String -> P a
1150 failP msg buf s = PFailed (text msg)
1151
1152 failMsgP :: Message -> P a
1153 failMsgP msg buf s = PFailed msg
1154
1155 lexError :: String -> P a
1156 lexError str buf s@PState{ loc = loc }
1157   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1158
1159 getSrcLocP :: P SrcLoc
1160 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1161
1162 getSrcFile :: P FAST_STRING
1163 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1164
1165 getContext :: P [LayoutContext]
1166 getContext buf s@(PState{ context = ctx }) = POk s ctx
1167
1168 pushContext :: LayoutContext -> P ()
1169 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1170
1171 {-
1172
1173 This special case in layoutOn is to handle layout contexts with are
1174 indented the same or less than the current context.  This is illegal
1175 according to the Haskell spec, so we have to arrange to close the
1176 current context.  eg.
1177
1178 class Foo a where
1179 class Bar a
1180
1181 after the first 'where', the sequence of events is:
1182
1183         - layout system inserts a ';' (column 0)
1184         - parser begins a new context at column 0
1185         - parser shifts ';' (legal empty declaration)
1186         - parser sees 'class': parse error (we're still in the inner context)
1187
1188 trouble is, by the time we know we need a new context, the lexer has
1189 already generated the ';'.  Hacky solution is as follows: since we
1190 know the column of the next token (it's the column number of the new
1191 context), we set the ACTUAL column number of the new context to this
1192 numer plus one.  Hence the next time the lexer is called, a '}' will
1193 be generated to close the new context straight away.  Furthermore, we
1194 have to set the atbol flag so that the ';' that the parser shifted as
1195 part of the new context is re-generated.
1196
1197 when the new context is *less* indented than the current one:
1198
1199 f = f where g = g where
1200 h = h
1201
1202         - current context: column 12.
1203         - on seeing 'h' (column 0), the layout system inserts '}'
1204         - parser starts a new context, column 0
1205         - parser sees '}', uses it to close new context
1206         - we still need to insert another '}' followed by a ';',
1207           hence the atbol trick.
1208
1209 There's also a special hack in here to deal with
1210
1211         do
1212            ....
1213            e $ do
1214            blah
1215
1216 i.e. the inner context is at the same indentation level as the outer
1217 context.  This is strictly illegal according to Haskell 98, but
1218 there's a lot of existing code using this style and it doesn't make
1219 any sense to disallow it, since empty 'do' lists don't make sense.
1220 -}
1221
1222 layoutOn :: Bool -> P ()
1223 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1224     let offset = lexemeIndex buf -# bol in
1225     case ctx of
1226         Layout prev_off : _
1227            | if strict then prev_off >=# offset else prev_off ># offset ->
1228                 --trace ("layout on, column: " ++  show (I# offset)) $
1229                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1230         other ->
1231                 --trace ("layout on, column: " ++  show (I# offset)) $
1232                 POk s{ context = Layout offset : ctx } ()
1233
1234 layoutOff :: P ()
1235 layoutOff buf s@(PState{ context = ctx }) =
1236     POk s{ context = NoLayout:ctx } ()
1237
1238 popContext :: P ()
1239 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1240   case ctx of
1241         (_:tl) -> POk s{ context = tl } ()
1242         []     -> PFailed (srcParseErr buf loc)
1243
1244 {-
1245  Note that if the name of the file we're processing ends
1246  with `hi-boot', we accept it on faith as having the right
1247  version. This is done so that .hi-boot files that comes
1248  with hsc don't have to be updated before every release,
1249  *and* it allows us to share .hi-boot files with versions
1250  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1251
1252  If the version number is 0, the checking is also turned off.
1253  (needed to deal with GHC.hi only!)
1254
1255  Once we can assume we're compiling with a version of ghc that
1256  supports interface file checking, we can drop the special
1257  pleading
1258 -}
1259 checkVersion :: Maybe Integer -> P ()
1260 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1261  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1262  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1263 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1264  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1265  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1266
1267 -----------------------------------------------------------------
1268
1269 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1270 ifaceParseErr s l
1271   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1272           text (lexemeToString s), char '\'']
1273
1274 ifaceVersionErr hi_vers l toks
1275   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1276           ptext SLIT("Expected"), int opt_HiVersion,
1277           ptext SLIT("found "), pp_version]
1278     where
1279      pp_version =
1280       case hi_vers of
1281         Nothing -> ptext SLIT("pre ghc-3.02 version")
1282         Just v  -> ptext SLIT("version") <+> integer v
1283
1284 -----------------------------------------------------------------------------
1285
1286 srcParseErr :: StringBuffer -> SrcLoc -> Message
1287 srcParseErr s l
1288   = hcat [ppr l,
1289           if null token
1290              then ptext SLIT(": parse error (possibly incorrect indentation)")
1291              else hcat [ptext SLIT(": parse error on input "),
1292                         char '`', text token, char '\'']
1293     ]
1294   where
1295         token = lexemeToString s
1296
1297 \end{code}