ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         Token(..), lexer, ParseResult(..), PState(..),
  27         checkVersion,
  28         StringBuffer,
  29
  30         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  31         getSrcLocP, getSrcFile,
  32         layoutOn, layoutOff, pushContext, popContext
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import Char             ( ord, isSpace )
  38 import List             ( isSuffixOf )
  39
  40 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  41 import Name             ( isLowerISO, isUpperISO )
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  48                           replaceSrcLine, mkSrcLoc )
  49
  50 import Maybes           ( MaybeErr(..) )
  51 import ErrUtils         ( Message )
  52 import Outputable
  53
  54 import FastString
  55 import StringBuffer
  56 import GlaExts
  57 import ST               ( runST )
  58
  59 #if __GLASGOW_HASKELL__ >= 303
  60 import Bits
  61 import Word
  62 #endif
  63
  64 import Char             ( chr )
  65 import Addr
  66 import PrelRead         ( readRational__ ) -- Glasgow non-std
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Data types}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 The token data type, fairly un-interesting except from one
  76 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  77 strictness, unfolding etc).
  78
  79 The Idea/Observation here is that the renamer needs to scan through
  80 all of an interface file before it can continue. But only a fraction
  81 of the information contained in the file turns out to be useful, so
  82 delaying as much as possible of the scanning and parsing of an
  83 interface file Makes Sense (Heap profiles of the compiler
  84 show a reduction in heap usage by at least a factor of two,
  85 post-renamer).
  86
  87 Hence, the interface file lexer spots when value declarations are
  88 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  89 for the type and any other id info for that binding (unfolding, strictness
  90 etc). These constructors are applied to the result of lexing these sub-chunks.
  91
  92 The lexing of the type and id info is all done lazily, of course, so
  93 the scanning (and subsequent parsing) will be done *only* on the ids the
  94 renamer finds out that it is interested in. The rest will just be junked.
  95 Laziness, you know it makes sense :-)
  96
  97 \begin{code}
  98 data Token
  99   = ITas                        -- Haskell keywords
 100   | ITcase
 101   | ITclass
 102   | ITdata
 103   | ITdefault
 104   | ITderiving
 105   | ITdo
 106   | ITelse
 107   | IThiding
 108   | ITif
 109   | ITimport
 110   | ITin
 111   | ITinfix
 112   | ITinfixl
 113   | ITinfixr
 114   | ITinstance
 115   | ITlet
 116   | ITmodule
 117   | ITnewtype
 118   | ITof
 119   | ITqualified
 120   | ITthen
 121   | ITtype
 122   | ITwhere
 123
 124   | ITforall                    -- GHC extension keywords
 125   | ITforeign
 126   | ITexport
 127   | ITlabel
 128   | ITdynamic
 129   | ITunsafe
 130
 131   | ITinterface                 -- interface keywords
 132   | IT__export
 133   | ITdepends
 134   | IT__forall
 135   | ITletrec
 136   | ITcoerce
 137   | ITinlineMe
 138   | ITinlineCall
 139   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 140   | ITdefaultbranch
 141   | ITbottom
 142   | ITinteger_lit
 143   | ITfloat_lit
 144   | ITrational_lit
 145   | ITaddr_lit
 146   | ITlit_lit
 147   | ITstring_lit
 148   | ITtypeapp
 149   | ITonce
 150   | ITmany
 151   | ITarity
 152   | ITspecialise
 153   | ITnocaf
 154   | ITunfold InlinePragInfo
 155   | ITstrict ([Demand], Bool)
 156   | ITrules
 157   | ITcprinfo (CprInfo)
 158   | ITscc
 159   | ITsccAllCafs
 160
 161   | ITspecialise_prag           -- Pragmas
 162   | ITsource_prag
 163   | ITinline_prag
 164   | ITnoinline_prag
 165   | ITrules_prag
 166   | ITline_prag
 167   | ITclose_prag
 168
 169   | ITdotdot                    -- reserved symbols
 170   | ITdcolon
 171   | ITequal
 172   | ITlam
 173   | ITvbar
 174   | ITlarrow
 175   | ITrarrow
 176   | ITat
 177   | ITtilde
 178   | ITdarrow
 179   | ITminus
 180   | ITbang
 181   | ITdot
 182
 183   | ITbiglam                    -- GHC-extension symbols
 184
 185   | ITocurly                    -- special symbols
 186   | ITccurly
 187   | ITvccurly
 188   | ITobrack
 189   | ITcbrack
 190   | IToparen
 191   | ITcparen
 192   | IToubxparen
 193   | ITcubxparen
 194   | ITsemi
 195   | ITcomma
 196   | ITunderscore
 197   | ITbackquote
 198
 199   | ITvarid   FAST_STRING       -- identifiers
 200   | ITconid   FAST_STRING
 201   | ITvarsym  FAST_STRING
 202   | ITconsym  FAST_STRING
 203   | ITqvarid  (FAST_STRING,FAST_STRING)
 204   | ITqconid  (FAST_STRING,FAST_STRING)
 205   | ITqvarsym (FAST_STRING,FAST_STRING)
 206   | ITqconsym (FAST_STRING,FAST_STRING)
 207
 208   | ITpragma StringBuffer
 209
 210   | ITchar       Char
 211   | ITstring     FAST_STRING
 212   | ITinteger    Integer
 213   | ITrational   Rational
 214
 215   | ITprimchar   Char
 216   | ITprimstring FAST_STRING
 217   | ITprimint    Integer
 218   | ITprimfloat  Rational
 219   | ITprimdouble Rational
 220   | ITlitlit     FAST_STRING
 221
 222   | ITunknown String            -- Used when the lexer can't make sense of it
 223   | ITeof                       -- end of file token
 224   deriving Text -- debugging
 225 \end{code}
 226
 227 -----------------------------------------------------------------------------
 228 Keyword Lists
 229
 230 \begin{code}
 231 pragmaKeywordsFM = listToUFM $
 232       map (\ (x,y) -> (_PK_ x,y))
 233        [( "SPECIALISE", ITspecialise_prag ),
 234         ( "SPECIALIZE", ITspecialise_prag ),
 235         ( "SOURCE",     ITsource_prag ),
 236         ( "INLINE",     ITinline_prag ),
 237         ( "NOINLINE",   ITnoinline_prag ),
 238         ( "LINE",       ITline_prag ),
 239         ( "RULES",      ITrules_prag ),
 240         ( "RULEZ",      ITrules_prag )  -- american spelling :-)
 241         ]
 242
 243 haskellKeywordsFM = listToUFM $
 244       map (\ (x,y) -> (_PK_ x,y))
 245        [( "_",          ITunderscore ),
 246         ( "as",         ITas ),
 247         ( "case",       ITcase ),
 248         ( "class",      ITclass ),
 249         ( "data",       ITdata ),
 250         ( "default",    ITdefault ),
 251         ( "deriving",   ITderiving ),
 252         ( "do",         ITdo ),
 253         ( "else",       ITelse ),
 254         ( "hiding",     IThiding ),
 255         ( "if",         ITif ),
 256         ( "import",     ITimport ),
 257         ( "in",         ITin ),
 258         ( "infix",      ITinfix ),
 259         ( "infixl",     ITinfixl ),
 260         ( "infixr",     ITinfixr ),
 261         ( "instance",   ITinstance ),
 262         ( "let",        ITlet ),
 263         ( "module",     ITmodule ),
 264         ( "newtype",    ITnewtype ),
 265         ( "of",         ITof ),
 266         ( "qualified",  ITqualified ),
 267         ( "then",       ITthen ),
 268         ( "type",       ITtype ),
 269         ( "where",      ITwhere )
 270      ]
 271
 272
 273 ghcExtensionKeywordsFM = listToUFM $
 274         map (\ (x,y) -> (_PK_ x,y))
 275      [  ( "forall",     ITforall ),
 276         ( "foreign",    ITforeign ),
 277         ( "export",     ITexport ),
 278         ( "label",      ITlabel ),
 279         ( "dynamic",    ITdynamic ),
 280         ( "unsafe",     ITunsafe ),
 281         ("_ccall_",     ITccall (False, False, False)),
 282         ("_ccall_GC_",  ITccall (False, False, True)),
 283         ("_casm_",      ITccall (False, True,  False)),
 284         ("_casm_GC_",   ITccall (False, True,  True)),
 285
 286         -- interface keywords
 287         ("__interface",         ITinterface),
 288         ("__export",            IT__export),
 289         ("__depends",           ITdepends),
 290         ("__forall",            IT__forall),
 291         ("__letrec",            ITletrec),
 292         ("__coerce",            ITcoerce),
 293         ("__inline_me",         ITinlineMe),
 294         ("__inline_call",       ITinlineCall),
 295         ("__depends",           ITdepends),
 296         ("__DEFAULT",           ITdefaultbranch),
 297         ("__bot",               ITbottom),
 298         ("__integer",           ITinteger_lit),
 299         ("__float",             ITfloat_lit),
 300         ("__rational",          ITrational_lit),
 301         ("__addr",              ITaddr_lit),
 302         ("__litlit",            ITlit_lit),
 303         ("__string",            ITstring_lit),
 304         ("__a",                 ITtypeapp),
 305         ("__o",                 ITonce),
 306         ("__m",                 ITmany),
 307         ("__A",                 ITarity),
 308         ("__P",                 ITspecialise),
 309         ("__C",                 ITnocaf),
 310         ("__R",                 ITrules),
 311         ("__u",                 ITunfold NoInlinePragInfo),
 312
 313         ("__ccall",             ITccall (False, False, False)),
 314         ("__ccall_GC",          ITccall (False, False, True)),
 315         ("__dyn_ccall",         ITccall (True,  False, False)),
 316         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 317         ("__casm",              ITccall (False, True,  False)),
 318         ("__dyn_casm",          ITccall (True,  True,  False)),
 319         ("__casm_GC",           ITccall (False, True,  True)),
 320         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 321
 322         ("/\\",                 ITbiglam)
 323      ]
 324
 325
 326 haskellKeySymsFM = listToUFM $
 327         map (\ (x,y) -> (_PK_ x,y))
 328       [ ("..",          ITdotdot)
 329        ,("::",          ITdcolon)
 330        ,("=",           ITequal)
 331        ,("\\",          ITlam)
 332        ,("|",           ITvbar)
 333        ,("<-",          ITlarrow)
 334        ,("->",          ITrarrow)
 335        ,("@",           ITat)
 336        ,("~",           ITtilde)
 337        ,("=>",          ITdarrow)
 338        ,("-",           ITminus)
 339        ,("!",           ITbang)
 340        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 341        ]
 342 \end{code}
 343
 344 -----------------------------------------------------------------------------
 345 The lexical analyser
 346
 347 Lexer state:
 348
 349         - (glaexts) lexing an interface file or -fglasgow-exts
 350         - (bol)   pointer to beginning of line (for column calculations)
 351         - (buf)   pointer to beginning of token
 352         - (buf)   pointer to current char
 353         - (atbol) flag indicating whether we're at the beginning of a line
 354
 355 \begin{code}
 356 lexer :: (Token -> P a) -> P a
 357 lexer cont buf s@(PState{
 358                     loc = loc,
 359                     glasgow_exts = glaexts,
 360                     bol = bol,
 361                     atbol = atbol,
 362                     context = ctx
 363                 })
 364
 365         -- first, start a new lexeme and lose all the whitespace
 366   = tab line bol atbol (stepOverLexeme buf)
 367   where
 368         line = srcLocLine loc
 369
 370         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 371           case currentChar# buf of
 372
 373             '\NUL'# ->
 374                    if bufferExhausted (stepOn buf)
 375                         then cont ITeof buf s'
 376                         else trace "lexer: misplaced NUL?" $
 377                              tab y bol atbol (stepOn buf)
 378
 379             '\n'# -> let buf' = stepOn buf
 380                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 381
 382                 -- find comments.  This got harder in Haskell 98.
 383             '-'# ->  let trundle n =
 384                           let next = lookAhead# buf n in
 385                           if next `eqChar#` '-'# then trundle (n +# 1#)
 386                           else if is_symbol next || n <# 2#
 387                                 then is_a_token
 388                                 else case untilChar# (stepOnBy# buf n) '\n'# of
 389                                     { buf' -> tab y bol atbol (stepOverLexeme buf')
 390                                     }
 391                     in trundle 1#
 392
 393                 -- comments and pragmas.  We deal with LINE pragmas here,
 394                 -- and throw out any unrecognised pragmas as comments.  Any
 395                 -- pragmas we know about are dealt with later (after any layout
 396                 -- processing if necessary).
 397
 398             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 399                 if lookAhead# buf 2# `eqChar#` '#'# then
 400                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 401                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 402                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 403                   let lexeme = lexemeToFastString buf2 in
 404                   case lookupUFM pragmaKeywordsFM lexeme of
 405                         Just ITline_prag -> line_prag (lexer cont) buf2 s'
 406                         Just other -> is_a_token
 407                         Nothing -> skip_to_end (stepOnBy# buf 2#)
 408                   }}
 409
 410                 else skip_to_end (stepOnBy# buf 2#)
 411                 where
 412                     skip_to_end buf = nested_comment (lexer cont) buf s'
 413
 414                 -- tabs have been expanded beforehand
 415             c | is_space c -> tab y bol atbol (stepOn buf)
 416               | otherwise  -> is_a_token
 417
 418            where s' = s{loc = replaceSrcLine loc y,
 419                         bol = bol,
 420                        atbol = atbol}
 421
 422                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 423                             | otherwise    = lexToken cont glaexts buf s'
 424
 425 -- {-# LINE .. #-} pragmas.  yeuch.
 426 line_prag cont buf =
 427   case expandWhile# is_space buf                of { buf1 ->
 428   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 429   -- subtract one: the line number refers to the *following* line.
 430   let real_line = line - 1 in
 431   case fromInteger real_line                    of { i@(I# l) ->
 432   case expandWhile# is_space buf2               of { buf3 ->
 433   case currentChar# buf3                        of
 434      '\"'#{-"-} ->
 435         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 436         let file = lexemeToFastString buf4 in
 437         \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
 438         }
 439      other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
 440   }}}}
 441   where
 442         skipToEnd buf = nested_comment cont buf
 443
 444 nested_comment :: P a -> P a
 445 nested_comment cont buf = loop buf
 446  where
 447    loop buf =
 448      case currentChar# buf of
 449         '\NUL'# | bufferExhausted (stepOn buf) ->
 450                 lexError "unterminated `{-'" buf
 451
 452         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 453                 cont (stepOnBy# buf 2#)
 454
 455         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 456               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 457
 458         '\n'# -> \ s@PState{loc=loc} ->
 459                  let buf' = stepOn buf in
 460                  nested_comment cont buf'
 461                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 462                           atbol = 1#}
 463
 464         _   -> nested_comment cont (stepOn buf)
 465
 466 -- When we are lexing the first token of a line, check whether we need to
 467 -- insert virtual semicolons or close braces due to layout.
 468
 469 lexBOL :: (Token -> P a) -> P a
 470 lexBOL cont buf s@(PState{
 471                     loc = loc,
 472                     glasgow_exts = glaexts,
 473                     bol = bol,
 474                     atbol = atbol,
 475                     context = ctx
 476                   }) =
 477         if need_close_curly then
 478                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 479                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 480         else if need_semi_colon then
 481                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 482                 cont ITsemi buf s{atbol = 0#}
 483         else
 484                 lexToken cont glaexts buf s{atbol = 0#}
 485   where
 486         col = currentIndex# buf -# bol
 487
 488         need_close_curly =
 489                 case ctx of
 490                         [] -> False
 491                         (i:_) -> case i of
 492                                     NoLayout -> False
 493                                     Layout n -> col <# n
 494         need_semi_colon =
 495                 case ctx of
 496                         [] -> False
 497                         (i:_) -> case i of
 498                                     NoLayout -> False
 499                                     Layout n -> col ==# n
 500
 501
 502 lexToken :: (Token -> P a) -> Int# -> P a
 503 lexToken cont glaexts buf =
 504  --trace "lexToken" $
 505  _scc_ "Lexer"
 506   case currentChar# buf of
 507
 508     -- special symbols ----------------------------------------------------
 509     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 510                 -> cont IToubxparen (setCurrentPos# buf 2#)
 511          | otherwise
 512                 -> cont IToparen (incLexeme buf)
 513
 514     ')'# -> cont ITcparen    (incLexeme buf)
 515     '['# -> cont ITobrack    (incLexeme buf)
 516     ']'# -> cont ITcbrack    (incLexeme buf)
 517     ','# -> cont ITcomma     (incLexeme buf)
 518     ';'# -> cont ITsemi      (incLexeme buf)
 519
 520     '}'# -> \ s@PState{context = ctx} ->
 521             case ctx of
 522                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 523                 _ -> lexError "too many '}'s" buf s
 524
 525     '#'# | flag glaexts
 526          -> case lookAhead# buf 1# of
 527                 ')'# -> cont ITcubxparen (setCurrentPos# buf 2#)
 528                 '-'# -> case lookAhead# buf 2# of
 529                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 530                            _    -> lex_sym cont (incLexeme buf)
 531                 _    -> lex_sym cont (incLexeme buf)
 532
 533     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 534                 -> lex_cstring cont (setCurrentPos# buf 2#)
 535          | otherwise
 536                 -> cont ITbackquote (incLexeme buf)
 537
 538     '{'# ->     -- look for "{-##" special iface pragma
 539         case lookAhead# buf 1# of
 540            '-'# -> case lookAhead# buf 2# of
 541                     '#'# -> case lookAhead# buf 3# of
 542                                 '#'# ->
 543                                    let (lexeme, buf')
 544                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 545                                    cont (ITpragma lexeme) buf'
 546                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 547                     _    -> cont ITocurly (incLexeme buf)
 548            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 549
 550     -- strings/characters -------------------------------------------------
 551     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 552     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 553
 554     -- strictness and cpr pragmas and __scc treated specially.
 555     '_'# | flag glaexts ->
 556          case lookAhead# buf 1# of
 557            '_'# -> case lookAhead# buf 2# of
 558                     'S'# ->
 559                         lex_demand cont (stepOnUntil (not . isSpace)
 560                                         (stepOnBy# buf 3#)) -- past __S
 561                     'M'# ->
 562                         lex_cpr cont (stepOnUntil (not . isSpace)
 563                                      (stepOnBy# buf 3#)) -- past __M
 564                     's'# ->
 565                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 566                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 567                                Nothing   -> lex_id cont glaexts buf
 568                     _ -> lex_id cont glaexts buf
 569            _    -> lex_id cont glaexts buf
 570
 571         -- Hexadecimal and octal constants
 572     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 573                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 574          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 575                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 576         where ch   = lookAhead# buf 1#
 577               ch2  = lookAhead# buf 2#
 578               buf' = setCurrentPos# buf 2#
 579
 580     '\NUL'# ->
 581             if bufferExhausted (stepOn buf) then
 582                cont ITeof buf
 583             else
 584                trace "lexIface: misplaced NUL?" $
 585                cont (ITunknown "\NUL") (stepOn buf)
 586
 587     c | is_digit  c -> lex_num cont glaexts 0 buf
 588       | is_symbol c -> lex_sym cont buf
 589       | is_upper  c -> lex_con cont glaexts buf
 590       | is_ident  c -> lex_id  cont glaexts buf
 591
 592 -- Int# is unlifted, and therefore faster than Bool for flags.
 593 {-# INLINE flag #-}
 594 flag :: Int# -> Bool
 595 flag 0# = False
 596 flag _  = True
 597
 598 -------------------------------------------------------------------------------
 599 -- Pragmas
 600
 601 lex_prag cont buf
 602   = case expandWhile# is_space buf of { buf1 ->
 603     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 604     let lexeme = lexemeToFastString buf2 in
 605     case lookupUFM pragmaKeywordsFM lexeme of
 606         Just kw -> cont kw (mergeLexemes buf buf2)
 607         Nothing -> panic "lex_prag"
 608   }}
 609
 610 -------------------------------------------------------------------------------
 611 -- Strings & Chars
 612
 613 lex_string cont glaexts s buf
 614   = case currentChar# buf of
 615         '"'#{-"-} ->
 616            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 617            case currentChar# buf' of
 618                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 619                 _                   -> cont (ITstring s') buf'
 620
 621         -- ignore \& in a string, deal with string gaps
 622         '\\'# | next_ch `eqChar#` '&'#
 623                 -> lex_string cont glaexts s (setCurrentPos# buf 2#)
 624               | is_space next_ch
 625                 -> lex_stringgap cont glaexts s buf'
 626
 627             where next_ch = lookAhead# buf 1#
 628                   buf' = setCurrentPos# buf 2#
 629
 630         _ -> lex_char (lex_next_string cont s) glaexts buf
 631
 632 lex_stringgap cont glaexts s buf
 633   = let buf' = incLexeme buf in
 634     case currentChar# buf of
 635         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 636                   st{loc = incSrcLine loc}
 637         '\\'# -> lex_string cont glaexts s buf'
 638         c | is_space c -> lex_stringgap cont glaexts s buf'
 639         other -> charError buf'
 640
 641 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 642
 643 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 644 lex_char cont glaexts buf
 645   = case currentChar# buf of
 646         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 647         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 648         other -> charError buf
 649
 650 char_end cont glaexts c buf
 651   = case currentChar# buf of
 652         '\''# -> let buf' = incLexeme buf in
 653                  case currentChar# buf' of
 654                         '#'# | flag glaexts
 655                                 -> cont (ITprimchar c) (incLexeme buf')
 656                         _       -> cont (ITchar c) buf'
 657         _     -> charError buf
 658
 659 lex_escape cont buf
 660   = let buf' = incLexeme buf in
 661     case currentChar# buf of
 662         'a'#       -> cont '\a' buf'
 663         'b'#       -> cont '\b' buf'
 664         'f'#       -> cont '\f' buf'
 665         'n'#       -> cont '\n' buf'
 666         'r'#       -> cont '\r' buf'
 667         't'#       -> cont '\t' buf'
 668         'v'#       -> cont '\v' buf'
 669         '\\'#      -> cont '\\' buf'
 670         '"'#       -> cont '\"' buf'
 671         '\''#      -> cont '\'' buf'
 672         '^'#       -> let c = currentChar# buf' in
 673                       if c `geChar#` '@'# && c `leChar#` '_'#
 674                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 675                         else charError buf'
 676
 677         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 678         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 679         x | is_digit x
 680                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 681
 682         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 683                                        Just buf2 <- [prefixMatch buf p] ] of
 684                             (c,buf2):_ -> cont c buf2
 685                             [] -> charError buf'
 686
 687 after_charnum cont i buf
 688   = let int = fromInteger i in
 689     if i >= 0 && i <= 255
 690         then cont (chr int) buf
 691         else charError buf
 692
 693 readNum cont buf is_digit base conv = read buf 0
 694   where read buf i
 695           = case currentChar# buf of { c ->
 696             if is_digit c
 697                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 698                 else cont i buf
 699             }
 700
 701 is_hexdigit c
 702         =  is_digit c
 703         || (c `geChar#` 'a'# && c `leChar#` 'h'#)
 704         || (c `geChar#` 'A'# && c `leChar#` 'H'#)
 705
 706 hex c | is_digit c = ord# c -# ord# '0'#
 707       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 708 oct_or_dec c = ord# c -# ord# '0'#
 709
 710 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 711
 712 to_lower c
 713   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 714         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 715   | otherwise = c
 716
 717 charError buf = lexError "error in character literal" buf
 718
 719 silly_escape_chars = [
 720         ("NUL", '\NUL'),
 721         ("SOH", '\SOH'),
 722         ("STX", '\STX'),
 723         ("ETX", '\ETX'),
 724         ("EOT", '\EOT'),
 725         ("ENQ", '\ENQ'),
 726         ("ACK", '\ACK'),
 727         ("BEL", '\BEL'),
 728         ("BS", '\BS'),
 729         ("HT", '\HT'),
 730         ("LF", '\LF'),
 731         ("VT", '\VT'),
 732         ("FF", '\FF'),
 733         ("CR", '\CR'),
 734         ("SO", '\SO'),
 735         ("SI", '\SI'),
 736         ("DLE", '\DLE'),
 737         ("DC1", '\DC1'),
 738         ("DC2", '\DC2'),
 739         ("DC3", '\DC3'),
 740         ("DC4", '\DC4'),
 741         ("NAK", '\NAK'),
 742         ("SYN", '\SYN'),
 743         ("ETB", '\ETB'),
 744         ("CAN", '\CAN'),
 745         ("EM", '\EM'),
 746         ("SUB", '\SUB'),
 747         ("ESC", '\ESC'),
 748         ("FS", '\FS'),
 749         ("GS", '\GS'),
 750         ("RS", '\RS'),
 751         ("US", '\US'),
 752         ("SP", '\SP'),
 753         ("DEL", '\DEL')
 754         ]
 755
 756 -------------------------------------------------------------------------------
 757
 758 lex_demand cont buf =
 759  case read_em [] buf of { (ls,buf') ->
 760  case currentChar# buf' of
 761    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 762    _    -> cont (ITstrict (ls, False)) buf'
 763  }
 764  where
 765    -- code snatched from Demand.lhs
 766   read_em acc buf =
 767    case currentChar# buf of
 768     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 769     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 770     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 771     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 772     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 773     ')'# -> (reverse acc, stepOn buf)
 774     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 775     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 776     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 777     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 778     _    -> (reverse acc, buf)
 779
 780   do_unpack new_or_data wrapper_unpacks acc buf
 781    = case read_em [] buf of
 782       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 783
 784 lex_cpr cont buf =
 785  case read_em [] buf of { (cpr_inf,buf') ->
 786    ASSERT ( null (tail cpr_inf) )
 787    cont (ITcprinfo $ head cpr_inf) buf'
 788  }
 789  where
 790    -- code snatched from lex_demand above
 791   read_em acc buf =
 792    case currentChar# buf of
 793     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 794     '('# -> do_unpack acc (stepOn buf)
 795     ')'# -> (reverse acc, stepOn buf)
 796     _    -> (reverse acc, buf)
 797
 798   do_unpack acc buf
 799    = case read_em [] buf of
 800       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 801
 802 ------------------
 803 lex_scc cont buf =
 804  case currentChar# buf of
 805   'C'# -> cont ITsccAllCafs (incLexeme buf)
 806   other -> cont ITscc buf
 807
 808 -----------------------------------------------------------------------------
 809 -- Numbers
 810
 811 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 812 lex_num cont glaexts acc buf =
 813  case scanNumLit acc buf of
 814      (acc',buf') ->
 815        case currentChar# buf' of
 816          '.'# | is_digit (lookAhead# buf' 1#) ->
 817              -- this case is not optimised at all, as the
 818              -- presence of floating point numbers in interface
 819              -- files is not that common. (ToDo)
 820             case expandWhile# is_digit (incLexeme buf') of
 821               buf2 -> -- points to first non digit char
 822
 823                 let l = case currentChar# buf2 of
 824                           'E'# -> do_exponent
 825                           'e'# -> do_exponent
 826                           _ -> buf2
 827
 828                     do_exponent
 829                         = let buf3 = incLexeme buf2 in
 830                           case currentChar# buf3 of
 831                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 832                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 833                                 x | is_digit x -> expandWhile# is_digit buf3
 834                                 _ -> buf2
 835
 836                     v = readRational__ (lexemeToString l)
 837
 838                 in case currentChar# l of -- glasgow exts only
 839                       '#'# | flag glaexts -> let l' = incLexeme l in
 840                               case currentChar# l' of
 841                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 842                                 _    -> cont (ITprimfloat  v) l'
 843                       _ -> cont (ITrational v) l
 844
 845          _ -> after_lexnum cont glaexts acc' buf'
 846
 847 after_lexnum cont glaexts i buf
 848   = case currentChar# buf of
 849         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 850         _    -> cont (ITinteger i) buf
 851
 852 -----------------------------------------------------------------------------
 853 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 854
 855 -- we lexemeToFastString on the bit between the ``''s, but include the
 856 -- quotes in the full lexeme.
 857
 858 lex_cstring cont buf =
 859  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 860    buf' -> cont (ITlitlit (lexemeToFastString
 861                                 (setCurrentPos# buf' (negateInt# 2#))))
 862                 (mergeLexemes buf buf')
 863
 864 ------------------------------------------------------------------------------
 865 -- Character Classes
 866
 867 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 868
 869 {-# INLINE is_ctype #-}
 870 #if __GLASGOW_HASKELL__ >= 303
 871 is_ctype :: Word8 -> Char# -> Bool
 872 is_ctype mask = \c ->
 873    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 874 #else
 875 is_ctype :: Int -> Char# -> Bool
 876 is_ctype (I# mask) = \c ->
 877     let (A# ctype) = ``char_types'' :: Addr
 878         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 879     in
 880         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 881 #endif
 882
 883 is_ident  = is_ctype 1
 884 is_symbol = is_ctype 2
 885 is_any    = is_ctype 4
 886 is_space  = is_ctype 8
 887 is_upper  = is_ctype 16
 888 is_digit  = is_ctype 32
 889
 890 -----------------------------------------------------------------------------
 891 -- identifiers, symbols etc.
 892
 893 lex_id cont glaexts buf =
 894  case expandWhile# is_ident buf of { buf1 ->
 895
 896  case (if flag glaexts
 897         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 898         else buf1)                              of { buf' ->
 899
 900  let lexeme  = lexemeToFastString buf' in
 901
 902  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 903         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 904                           cont kwd_token buf';
 905         Nothing        ->
 906
 907  let var_token = cont (mk_var_token lexeme) buf' in
 908
 909  if not (flag glaexts)
 910    then var_token
 911    else
 912
 913  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 914         Just kwd_token -> cont kwd_token buf';
 915         Nothing        -> var_token
 916
 917  }}}}
 918
 919 lex_sym cont buf =
 920  case expandWhile# is_symbol buf of
 921    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 922                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 923                                   cont kwd_token buf' ;
 924                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 925                                   cont (mk_var_token lexeme) buf'
 926            }
 927         where lexeme = lexemeToFastString buf'
 928
 929
 930 lex_con cont glaexts buf =
 931  case expandWhile# is_ident buf          of { buf1 ->
 932  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 933
 934  case currentChar# buf' of
 935      '.'# -> munch
 936      _    -> just_a_conid
 937
 938    where
 939     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 940                    cont (ITconid lexeme) buf'
 941     lexeme = lexemeToFastString buf'
 942     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 943  }}
 944
 945 lex_qid cont glaexts mod buf just_a_conid =
 946  case currentChar# buf of
 947   '['# ->       -- Special case for []
 948     case lookAhead# buf 1# of
 949      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 950      _    -> just_a_conid
 951
 952   '('# ->  -- Special case for (,,,)
 953            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 954     case lookAhead# buf 1# of
 955      '#'# | flag glaexts -> case lookAhead# buf 2# of
 956                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 957                                 just_a_conid
 958                 _    -> just_a_conid
 959      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 960      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 961      _    -> just_a_conid
 962
 963   '-'# -> case lookAhead# buf 1# of
 964             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 965             _    -> lex_id3 cont glaexts mod buf just_a_conid
 966   _    -> lex_id3 cont glaexts mod buf just_a_conid
 967
 968 lex_id3 cont glaexts mod buf just_a_conid
 969   | is_symbol (currentChar# buf) =
 970      let
 971         start_new_lexeme = stepOverLexeme buf
 972      in
 973      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 974      let
 975        lexeme  = lexemeToFastString buf'
 976         -- real lexeme is M.<sym>
 977        new_buf = mergeLexemes buf buf'
 978      in
 979      case lookupUFM haskellKeySymsFM lexeme of {
 980         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 981         Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
 982      }}
 983
 984   | otherwise   =
 985      let
 986         start_new_lexeme = stepOverLexeme buf
 987      in
 988      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 989      if emptyLexeme buf1
 990             then just_a_conid
 991             else
 992
 993      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 994
 995      let
 996       lexeme  = lexemeToFastString buf'
 997       new_buf = mergeLexemes buf buf'
 998      in
 999      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1000             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1001             Nothing        ->
1002      if flag glaexts
1003        then case lookupUFM ghcExtensionKeywordsFM lexeme of {
1004             Just kwd_token -> just_a_conid;
1005             Nothing        -> cont (mk_qvar_token mod lexeme) new_buf }
1006        else just_a_conid
1007      }}}
1008
1009
1010 slurp_trailing_hashes buf glaexts
1011   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1012   | otherwise    = buf
1013
1014
1015 mk_var_token pk_str
1016   | is_upper f          = ITconid pk_str
1017         -- _[A-Z] is treated as a constructor in interface files.
1018   | f `eqChar#` '_'# && not (_NULL_ tl)
1019         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
1020   | is_ident f          = ITvarid pk_str
1021   | f `eqChar#` ':'#    = ITconsym pk_str
1022   | otherwise           = ITvarsym pk_str
1023   where
1024       (C# f) = _HEAD_ pk_str
1025       tl     = _TAIL_ pk_str
1026
1027 mk_qvar_token m token =
1028  case mk_var_token token of
1029    ITconid n  -> ITqconid  (m,n)
1030    ITvarid n  -> ITqvarid  (m,n)
1031    ITconsym n -> ITqconsym (m,n)
1032    ITvarsym n -> ITqvarsym (m,n)
1033    _          -> ITunknown (show token)
1034 \end{code}
1035
1036 ----------------------------------------------------------------------------
1037 Horrible stuff for dealing with M.(,,,)
1038
1039 \begin{code}
1040 lex_tuple cont mod buf back_off =
1041   go 2 buf
1042   where
1043    go n buf =
1044     case currentChar# buf of
1045       ','# -> go (n+1) (stepOn buf)
1046       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
1047       _    -> back_off
1048
1049 lex_ubx_tuple cont mod buf back_off =
1050   go 2 buf
1051   where
1052    go n buf =
1053     case currentChar# buf of
1054       ','# -> go (n+1) (stepOn buf)
1055       '#'# -> case lookAhead# buf 1# of
1056                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
1057                                  (stepOnBy# buf 2#)
1058                 _    -> back_off
1059       _    -> back_off
1060 \end{code}
1061
1062 -----------------------------------------------------------------------------
1063 doDiscard rips along really fast, looking for a '#-}',
1064 indicating the end of the pragma we're skipping
1065
1066 \begin{code}
1067 doDiscard inStr buf =
1068  case currentChar# buf of
1069    '#'# | not inStr ->
1070        case lookAhead# buf 1# of { '#'# ->
1071        case lookAhead# buf 2# of { '-'# ->
1072        case lookAhead# buf 3# of { '}'# ->
1073            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1074         _    -> doDiscard inStr (incLexeme buf) };
1075         _    -> doDiscard inStr (incLexeme buf) };
1076         _    -> doDiscard inStr (incLexeme buf) }
1077    '"'# ->
1078        let
1079         odd_slashes buf flg i# =
1080           case lookAhead# buf i# of
1081            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1082            _     -> flg
1083        in
1084        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1085          '\\'# -> -- escaping something..
1086            if odd_slashes buf True (negateInt# 2#) then
1087                -- odd number of slashes, " is escaped.
1088               doDiscard inStr (incLexeme buf)
1089            else
1090                -- even number of slashes, \ is escaped.
1091               doDiscard (not inStr) (incLexeme buf)
1092          _ -> case inStr of -- forced to avoid build-up
1093                True  -> doDiscard False (incLexeme buf)
1094                False -> doDiscard True  (incLexeme buf)
1095    _ -> doDiscard inStr (incLexeme buf)
1096
1097 \end{code}
1098
1099 -----------------------------------------------------------------------------
1100
1101 \begin{code}
1102 data LayoutContext
1103   = NoLayout
1104   | Layout Int#
1105
1106 data ParseResult a
1107   = POk PState a
1108   | PFailed Message
1109
1110 data PState = PState {
1111         loc           :: SrcLoc,
1112         glasgow_exts  :: Int#,
1113         bol           :: Int#,
1114         atbol         :: Int#,
1115         context       :: [LayoutContext]
1116      }
1117
1118 type P a = StringBuffer         -- Input string
1119         -> PState
1120         -> ParseResult a
1121
1122 returnP   :: a -> P a
1123 returnP a buf s = POk s a
1124
1125 thenP      :: P a -> (a -> P b) -> P b
1126 m `thenP` k = \ buf s ->
1127         case m buf s of
1128                 POk s1 a -> k a buf s1
1129                 PFailed err  -> PFailed err
1130
1131 thenP_     :: P a -> P b -> P b
1132 m `thenP_` k = m `thenP` \_ -> k
1133
1134 mapP :: (a -> P b) -> [a] -> P [b]
1135 mapP f [] = returnP []
1136 mapP f (a:as) =
1137      f a `thenP` \b ->
1138      mapP f as `thenP` \bs ->
1139      returnP (b:bs)
1140
1141 failP :: String -> P a
1142 failP msg buf s = PFailed (text msg)
1143
1144 failMsgP :: Message -> P a
1145 failMsgP msg buf s = PFailed msg
1146
1147 lexError :: String -> P a
1148 lexError str buf s@PState{ loc = loc }
1149   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1150
1151 getSrcLocP :: P SrcLoc
1152 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1153
1154 getSrcFile :: P FAST_STRING
1155 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1156
1157 getContext :: P [LayoutContext]
1158 getContext buf s@(PState{ context = ctx }) = POk s ctx
1159
1160 pushContext :: LayoutContext -> P ()
1161 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1162
1163 {-
1164
1165 This special case in layoutOn is to handle layout contexts with are
1166 indented the same or less than the current context.  This is illegal
1167 according to the Haskell spec, so we have to arrange to close the
1168 current context.  eg.
1169
1170 class Foo a where
1171 class Bar a
1172
1173 after the first 'where', the sequence of events is:
1174
1175         - layout system inserts a ';' (column 0)
1176         - parser begins a new context at column 0
1177         - parser shifts ';' (legal empty declaration)
1178         - parser sees 'class': parse error (we're still in the inner context)
1179
1180 trouble is, by the time we know we need a new context, the lexer has
1181 already generated the ';'.  Hacky solution is as follows: since we
1182 know the column of the next token (it's the column number of the new
1183 context), we set the ACTUAL column number of the new context to this
1184 numer plus one.  Hence the next time the lexer is called, a '}' will
1185 be generated to close the new context straight away.  Furthermore, we
1186 have to set the atbol flag so that the ';' that the parser shifted as
1187 part of the new context is re-generated.
1188
1189 when the new context is *less* indented than the current one:
1190
1191 f = f where g = g where
1192 h = h
1193
1194         - current context: column 12.
1195         - on seeing 'h' (column 0), the layout system inserts '}'
1196         - parser starts a new context, column 0
1197         - parser sees '}', uses it to close new context
1198         - we still need to insert another '}' followed by a ';',
1199           hence the atbol trick.
1200
1201 -}
1202
1203 layoutOn :: P ()
1204 layoutOn buf s@(PState{ bol = bol, context = ctx }) =
1205     let offset = lexemeIndex buf -# bol in
1206     case ctx of
1207         Layout prev_off : _ | prev_off >=# offset ->
1208                 --trace ("layout on, column: " ++  show (I# offset)) $
1209                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1210         other ->
1211                 --trace ("layout on, column: " ++  show (I# offset)) $
1212                 POk s{ context = Layout offset : ctx } ()
1213
1214 layoutOff :: P ()
1215 layoutOff buf s@(PState{ context = ctx }) =
1216     POk s{ context = NoLayout:ctx } ()
1217
1218 popContext :: P ()
1219 popContext = \ buf s@(PState{ context = ctx }) ->
1220   case ctx of
1221         (_:tl) -> POk s{ context = tl } ()
1222         []    -> panic "Lex.popContext: empty context"
1223
1224 {-
1225  Note that if the name of the file we're processing ends
1226  with `hi-boot', we accept it on faith as having the right
1227  version. This is done so that .hi-boot files that comes
1228  with hsc don't have to be updated before every release,
1229  *and* it allows us to share .hi-boot files with versions
1230  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1231
1232  If the version number is 0, the checking is also turned off.
1233  (needed to deal with GHC.hi only!)
1234
1235  Once we can assume we're compiling with a version of ghc that
1236  supports interface file checking, we can drop the special
1237  pleading
1238 -}
1239 checkVersion :: Maybe Integer -> P ()
1240 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1241  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1242  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1243 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1244  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1245  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1246
1247 -----------------------------------------------------------------
1248
1249 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1250 ifaceParseErr s l
1251   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1252           text (lexemeToString s), char '\'']
1253
1254 ifaceVersionErr hi_vers l toks
1255   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1256           ptext SLIT("Expected"), int opt_HiVersion,
1257           ptext SLIT("found "), pp_version]
1258     where
1259      pp_version =
1260       case hi_vers of
1261         Nothing -> ptext SLIT("pre ghc-3.02 version")
1262         Just v  -> ptext SLIT("version") <+> integer v
1263
1264 \end{code}