ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         Token(..), lexer, ParseResult(..), PState(..),
  27         checkVersion,
  28         StringBuffer,
  29
  30         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  31         getSrcLocP, getSrcFile,
  32         layoutOn, layoutOff, pushContext, popContext
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import Char             ( ord, isSpace, toUpper )
  38 import List             ( isSuffixOf )
  39
  40 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  41 import Name             ( isLowerISO, isUpperISO )
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  48                           replaceSrcLine, mkSrcLoc )
  49
  50 import Maybes           ( MaybeErr(..) )
  51 import ErrUtils         ( Message )
  52 import Outputable
  53
  54 import FastString
  55 import StringBuffer
  56 import GlaExts
  57 import ST               ( runST )
  58
  59 #if __GLASGOW_HASKELL__ >= 303
  60 import Bits
  61 import Word
  62 #endif
  63
  64 import Char             ( chr )
  65 import Addr
  66 import PrelRead         ( readRational__ ) -- Glasgow non-std
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Data types}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 The token data type, fairly un-interesting except from one
  76 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  77 strictness, unfolding etc).
  78
  79 The Idea/Observation here is that the renamer needs to scan through
  80 all of an interface file before it can continue. But only a fraction
  81 of the information contained in the file turns out to be useful, so
  82 delaying as much as possible of the scanning and parsing of an
  83 interface file Makes Sense (Heap profiles of the compiler
  84 show a reduction in heap usage by at least a factor of two,
  85 post-renamer).
  86
  87 Hence, the interface file lexer spots when value declarations are
  88 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  89 for the type and any other id info for that binding (unfolding, strictness
  90 etc). These constructors are applied to the result of lexing these sub-chunks.
  91
  92 The lexing of the type and id info is all done lazily, of course, so
  93 the scanning (and subsequent parsing) will be done *only* on the ids the
  94 renamer finds out that it is interested in. The rest will just be junked.
  95 Laziness, you know it makes sense :-)
  96
  97 \begin{code}
  98 data Token
  99   = ITcase                      -- Haskell keywords
 100   | ITclass
 101   | ITdata
 102   | ITdefault
 103   | ITderiving
 104   | ITdo
 105   | ITelse
 106   | ITif
 107   | ITimport
 108   | ITin
 109   | ITinfix
 110   | ITinfixl
 111   | ITinfixr
 112   | ITinstance
 113   | ITlet
 114   | ITmodule
 115   | ITnewtype
 116   | ITof
 117   | ITthen
 118   | ITtype
 119   | ITwhere
 120   | ITscc
 121
 122   | ITforall                    -- GHC extension keywords
 123   | ITforeign
 124   | ITexport
 125   | ITlabel
 126   | ITdynamic
 127   | ITunsafe
 128
 129   | ITinterface                 -- interface keywords
 130   | IT__export
 131   | ITdepends
 132   | IT__forall
 133   | ITletrec
 134   | ITcoerce
 135   | ITinlineMe
 136   | ITinlineCall
 137   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 138   | ITdefaultbranch
 139   | ITbottom
 140   | ITinteger_lit
 141   | ITfloat_lit
 142   | ITrational_lit
 143   | ITaddr_lit
 144   | ITlit_lit
 145   | ITstring_lit
 146   | ITtypeapp
 147   | ITonce
 148   | ITmany
 149   | ITarity
 150   | ITspecialise
 151   | ITnocaf
 152   | ITunfold InlinePragInfo
 153   | ITstrict ([Demand], Bool)
 154   | ITrules
 155   | ITcprinfo (CprInfo)
 156   | IT__scc
 157   | ITsccAllCafs
 158
 159   | ITspecialise_prag           -- Pragmas
 160   | ITsource_prag
 161   | ITinline_prag
 162   | ITnoinline_prag
 163   | ITrules_prag
 164   | ITline_prag
 165   | ITclose_prag
 166
 167   | ITdotdot                    -- reserved symbols
 168   | ITdcolon
 169   | ITequal
 170   | ITlam
 171   | ITvbar
 172   | ITlarrow
 173   | ITrarrow
 174   | ITat
 175   | ITtilde
 176   | ITdarrow
 177   | ITminus
 178   | ITbang
 179   | ITdot
 180
 181   | ITbiglam                    -- GHC-extension symbols
 182
 183   | ITocurly                    -- special symbols
 184   | ITccurly
 185   | ITvccurly
 186   | ITobrack
 187   | ITcbrack
 188   | IToparen
 189   | ITcparen
 190   | IToubxparen
 191   | ITcubxparen
 192   | ITsemi
 193   | ITcomma
 194   | ITunderscore
 195   | ITbackquote
 196
 197   | ITvarid   FAST_STRING       -- identifiers
 198   | ITconid   FAST_STRING
 199   | ITvarsym  FAST_STRING
 200   | ITconsym  FAST_STRING
 201   | ITqvarid  (FAST_STRING,FAST_STRING)
 202   | ITqconid  (FAST_STRING,FAST_STRING)
 203   | ITqvarsym (FAST_STRING,FAST_STRING)
 204   | ITqconsym (FAST_STRING,FAST_STRING)
 205
 206   | ITpragma StringBuffer
 207
 208   | ITchar       Char
 209   | ITstring     FAST_STRING
 210   | ITinteger    Integer
 211   | ITrational   Rational
 212
 213   | ITprimchar   Char
 214   | ITprimstring FAST_STRING
 215   | ITprimint    Integer
 216   | ITprimfloat  Rational
 217   | ITprimdouble Rational
 218   | ITlitlit     FAST_STRING
 219
 220   | ITunknown String            -- Used when the lexer can't make sense of it
 221   | ITeof                       -- end of file token
 222   deriving Text -- debugging
 223 \end{code}
 224
 225 -----------------------------------------------------------------------------
 226 Keyword Lists
 227
 228 \begin{code}
 229 pragmaKeywordsFM = listToUFM $
 230       map (\ (x,y) -> (_PK_ x,y))
 231        [( "SPECIALISE", ITspecialise_prag ),
 232         ( "SPECIALIZE", ITspecialise_prag ),
 233         ( "SOURCE",     ITsource_prag ),
 234         ( "INLINE",     ITinline_prag ),
 235         ( "NOINLINE",   ITnoinline_prag ),
 236         ( "NOTINLINE",  ITnoinline_prag ),
 237         ( "LINE",       ITline_prag ),
 238         ( "RULES",      ITrules_prag ),
 239         ( "RULEZ",      ITrules_prag )  -- american spelling :-)
 240         ]
 241
 242 haskellKeywordsFM = listToUFM $
 243       map (\ (x,y) -> (_PK_ x,y))
 244        [( "_",          ITunderscore ),
 245         ( "case",       ITcase ),
 246         ( "class",      ITclass ),
 247         ( "data",       ITdata ),
 248         ( "default",    ITdefault ),
 249         ( "deriving",   ITderiving ),
 250         ( "do",         ITdo ),
 251         ( "else",       ITelse ),
 252         ( "if",         ITif ),
 253         ( "import",     ITimport ),
 254         ( "in",         ITin ),
 255         ( "infix",      ITinfix ),
 256         ( "infixl",     ITinfixl ),
 257         ( "infixr",     ITinfixr ),
 258         ( "instance",   ITinstance ),
 259         ( "let",        ITlet ),
 260         ( "module",     ITmodule ),
 261         ( "newtype",    ITnewtype ),
 262         ( "of",         ITof ),
 263         ( "then",       ITthen ),
 264         ( "type",       ITtype ),
 265         ( "where",      ITwhere ),
 266         ( "_scc_",      ITscc )
 267      ]
 268
 269 ghcExtensionKeywordsFM = listToUFM $
 270         map (\ (x,y) -> (_PK_ x,y))
 271      [  ( "forall",     ITforall ),
 272         ( "foreign",    ITforeign ),
 273         ( "export",     ITexport ),
 274         ( "label",      ITlabel ),
 275         ( "dynamic",    ITdynamic ),
 276         ( "unsafe",     ITunsafe ),
 277         ("_ccall_",     ITccall (False, False, False)),
 278         ("_ccall_GC_",  ITccall (False, False, True)),
 279         ("_casm_",      ITccall (False, True,  False)),
 280         ("_casm_GC_",   ITccall (False, True,  True)),
 281
 282         -- interface keywords
 283         ("__interface",         ITinterface),
 284         ("__export",            IT__export),
 285         ("__depends",           ITdepends),
 286         ("__forall",            IT__forall),
 287         ("__letrec",            ITletrec),
 288         ("__coerce",            ITcoerce),
 289         ("__inline_me",         ITinlineMe),
 290         ("__inline_call",       ITinlineCall),
 291         ("__depends",           ITdepends),
 292         ("__DEFAULT",           ITdefaultbranch),
 293         ("__bot",               ITbottom),
 294         ("__integer",           ITinteger_lit),
 295         ("__float",             ITfloat_lit),
 296         ("__rational",          ITrational_lit),
 297         ("__addr",              ITaddr_lit),
 298         ("__litlit",            ITlit_lit),
 299         ("__string",            ITstring_lit),
 300         ("__a",                 ITtypeapp),
 301         ("__o",                 ITonce),
 302         ("__m",                 ITmany),
 303         ("__A",                 ITarity),
 304         ("__P",                 ITspecialise),
 305         ("__C",                 ITnocaf),
 306         ("__R",                 ITrules),
 307         ("__u",                 ITunfold NoInlinePragInfo),
 308
 309         ("__ccall",             ITccall (False, False, False)),
 310         ("__ccall_GC",          ITccall (False, False, True)),
 311         ("__dyn_ccall",         ITccall (True,  False, False)),
 312         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 313         ("__casm",              ITccall (False, True,  False)),
 314         ("__dyn_casm",          ITccall (True,  True,  False)),
 315         ("__casm_GC",           ITccall (False, True,  True)),
 316         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 317
 318         ("/\\",                 ITbiglam)
 319      ]
 320
 321
 322 haskellKeySymsFM = listToUFM $
 323         map (\ (x,y) -> (_PK_ x,y))
 324       [ ("..",          ITdotdot)
 325        ,("::",          ITdcolon)
 326        ,("=",           ITequal)
 327        ,("\\",          ITlam)
 328        ,("|",           ITvbar)
 329        ,("<-",          ITlarrow)
 330        ,("->",          ITrarrow)
 331        ,("@",           ITat)
 332        ,("~",           ITtilde)
 333        ,("=>",          ITdarrow)
 334        ,("-",           ITminus)
 335        ,("!",           ITbang)
 336        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 337        ]
 338
 339 not_special_op ITminus = False
 340 not_special_op ITbang  = False
 341 not_special_op _ = True
 342 \end{code}
 343
 344 -----------------------------------------------------------------------------
 345 The lexical analyser
 346
 347 Lexer state:
 348
 349         - (glaexts) lexing an interface file or -fglasgow-exts
 350         - (bol)   pointer to beginning of line (for column calculations)
 351         - (buf)   pointer to beginning of token
 352         - (buf)   pointer to current char
 353         - (atbol) flag indicating whether we're at the beginning of a line
 354
 355 \begin{code}
 356 lexer :: (Token -> P a) -> P a
 357 lexer cont buf s@(PState{
 358                     loc = loc,
 359                     glasgow_exts = glaexts,
 360                     bol = bol,
 361                     atbol = atbol,
 362                     context = ctx
 363                 })
 364
 365         -- first, start a new lexeme and lose all the whitespace
 366   = tab line bol atbol (stepOverLexeme buf)
 367   where
 368         line = srcLocLine loc
 369
 370         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 371           case currentChar# buf of
 372
 373             '\NUL'# ->
 374                    if bufferExhausted (stepOn buf)
 375                         then cont ITeof buf s'
 376                         else trace "lexer: misplaced NUL?" $
 377                              tab y bol atbol (stepOn buf)
 378
 379             '\n'# -> let buf' = stepOn buf
 380                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 381
 382                 -- find comments.  This got harder in Haskell 98.
 383             '-'# ->  let trundle n =
 384                           let next = lookAhead# buf n in
 385                           if next `eqChar#` '-'# then trundle (n +# 1#)
 386                           else if is_symbol next || n <# 2#
 387                                 then is_a_token
 388                                 else case untilChar# (stepOnBy# buf n) '\n'# of
 389                                     { buf' -> tab y bol atbol (stepOverLexeme buf')
 390                                     }
 391                     in trundle 1#
 392
 393                 -- comments and pragmas.  We deal with LINE pragmas here,
 394                 -- and throw out any unrecognised pragmas as comments.  Any
 395                 -- pragmas we know about are dealt with later (after any layout
 396                 -- processing if necessary).
 397
 398             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 399                 if lookAhead# buf 2# `eqChar#` '#'# then
 400                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 401                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 402                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 403                   let lexeme = mkFastString -- ToDo: too slow
 404                                   (map toUpper (lexemeToString buf2)) in
 405                   case lookupUFM pragmaKeywordsFM lexeme of
 406                         Just ITline_prag -> line_prag (lexer cont) buf2 s'
 407                         Just other -> is_a_token
 408                         Nothing -> skip_to_end (stepOnBy# buf 2#)
 409                   }}
 410
 411                 else skip_to_end (stepOnBy# buf 2#)
 412                 where
 413                     skip_to_end buf = nested_comment (lexer cont) buf s'
 414
 415                 -- tabs have been expanded beforehand
 416             c | is_space c -> tab y bol atbol (stepOn buf)
 417               | otherwise  -> is_a_token
 418
 419            where s' = s{loc = replaceSrcLine loc y,
 420                         bol = bol,
 421                        atbol = atbol}
 422
 423                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 424                             | otherwise    = lexToken cont glaexts buf s'
 425
 426 -- {-# LINE .. #-} pragmas.  yeuch.
 427 line_prag cont buf =
 428   case expandWhile# is_space buf                of { buf1 ->
 429   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 430   -- subtract one: the line number refers to the *following* line.
 431   let real_line = line - 1 in
 432   case fromInteger real_line                    of { i@(I# l) ->
 433   case expandWhile# is_space buf2               of { buf3 ->
 434   case currentChar# buf3                        of
 435      '\"'#{-"-} ->
 436         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 437         let file = lexemeToFastString buf4 in
 438         \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
 439         }
 440      other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
 441   }}}}
 442   where
 443         skipToEnd buf = nested_comment cont buf
 444
 445 nested_comment :: P a -> P a
 446 nested_comment cont buf = loop buf
 447  where
 448    loop buf =
 449      case currentChar# buf of
 450         '\NUL'# | bufferExhausted (stepOn buf) ->
 451                 lexError "unterminated `{-'" buf
 452
 453         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 454                 cont (stepOnBy# buf 2#)
 455
 456         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 457               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 458
 459         '\n'# -> \ s@PState{loc=loc} ->
 460                  let buf' = stepOn buf in
 461                  nested_comment cont buf'
 462                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 463                           atbol = 1#}
 464
 465         _   -> nested_comment cont (stepOn buf)
 466
 467 -- When we are lexing the first token of a line, check whether we need to
 468 -- insert virtual semicolons or close braces due to layout.
 469
 470 lexBOL :: (Token -> P a) -> P a
 471 lexBOL cont buf s@(PState{
 472                     loc = loc,
 473                     glasgow_exts = glaexts,
 474                     bol = bol,
 475                     atbol = atbol,
 476                     context = ctx
 477                   }) =
 478         if need_close_curly then
 479                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 480                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 481         else if need_semi_colon then
 482                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 483                 cont ITsemi buf s{atbol = 0#}
 484         else
 485                 lexToken cont glaexts buf s{atbol = 0#}
 486   where
 487         col = currentIndex# buf -# bol
 488
 489         need_close_curly =
 490                 case ctx of
 491                         [] -> False
 492                         (i:_) -> case i of
 493                                     NoLayout -> False
 494                                     Layout n -> col <# n
 495         need_semi_colon =
 496                 case ctx of
 497                         [] -> False
 498                         (i:_) -> case i of
 499                                     NoLayout -> False
 500                                     Layout n -> col ==# n
 501
 502
 503 lexToken :: (Token -> P a) -> Int# -> P a
 504 lexToken cont glaexts buf =
 505  --trace "lexToken" $
 506  _scc_ "Lexer"
 507   case currentChar# buf of
 508
 509     -- special symbols ----------------------------------------------------
 510     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 511                 -> cont IToubxparen (setCurrentPos# buf 2#)
 512          | otherwise
 513                 -> cont IToparen (incLexeme buf)
 514
 515     ')'# -> cont ITcparen    (incLexeme buf)
 516     '['# -> cont ITobrack    (incLexeme buf)
 517     ']'# -> cont ITcbrack    (incLexeme buf)
 518     ','# -> cont ITcomma     (incLexeme buf)
 519     ';'# -> cont ITsemi      (incLexeme buf)
 520
 521     '}'# -> \ s@PState{context = ctx} ->
 522             case ctx of
 523                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 524                 _ -> lexError "too many '}'s" buf s
 525
 526     '#'# -> case lookAhead# buf 1# of
 527                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 528                 '-'# -> case lookAhead# buf 2# of
 529                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 530                            _    -> lex_sym cont (incLexeme buf)
 531                 _    -> lex_sym cont (incLexeme buf)
 532
 533     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 534                 -> lex_cstring cont (setCurrentPos# buf 2#)
 535          | otherwise
 536                 -> cont ITbackquote (incLexeme buf)
 537
 538     '{'# ->     -- look for "{-##" special iface pragma
 539         case lookAhead# buf 1# of
 540            '-'# -> case lookAhead# buf 2# of
 541                     '#'# -> case lookAhead# buf 3# of
 542                                 '#'# ->
 543                                    let (lexeme, buf')
 544                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 545                                    cont (ITpragma lexeme) buf'
 546                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 547                     _    -> cont ITocurly (incLexeme buf)
 548            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 549
 550     -- strings/characters -------------------------------------------------
 551     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 552     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 553
 554     -- strictness and cpr pragmas and __scc treated specially.
 555     '_'# | flag glaexts ->
 556          case lookAhead# buf 1# of
 557            '_'# -> case lookAhead# buf 2# of
 558                     'S'# ->
 559                         lex_demand cont (stepOnUntil (not . isSpace)
 560                                         (stepOnBy# buf 3#)) -- past __S
 561                     'M'# ->
 562                         lex_cpr cont (stepOnUntil (not . isSpace)
 563                                      (stepOnBy# buf 3#)) -- past __M
 564                     's'# ->
 565                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 566                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 567                                Nothing   -> lex_id cont glaexts buf
 568                     _ -> lex_id cont glaexts buf
 569            _    -> lex_id cont glaexts buf
 570
 571         -- Hexadecimal and octal constants
 572     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 573                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 574          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 575                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 576         where ch   = lookAhead# buf 1#
 577               ch2  = lookAhead# buf 2#
 578               buf' = setCurrentPos# buf 2#
 579
 580     '\NUL'# ->
 581             if bufferExhausted (stepOn buf) then
 582                cont ITeof buf
 583             else
 584                trace "lexIface: misplaced NUL?" $
 585                cont (ITunknown "\NUL") (stepOn buf)
 586
 587     c | is_digit  c -> lex_num cont glaexts 0 buf
 588       | is_symbol c -> lex_sym cont buf
 589       | is_upper  c -> lex_con cont glaexts buf
 590       | is_ident  c -> lex_id  cont glaexts buf
 591       | otherwise   -> lexError "illegal character" buf
 592
 593 -- Int# is unlifted, and therefore faster than Bool for flags.
 594 {-# INLINE flag #-}
 595 flag :: Int# -> Bool
 596 flag 0# = False
 597 flag _  = True
 598
 599 -------------------------------------------------------------------------------
 600 -- Pragmas
 601
 602 lex_prag cont buf
 603   = case expandWhile# is_space buf of { buf1 ->
 604     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 605     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 606     case lookupUFM pragmaKeywordsFM lexeme of
 607         Just kw -> cont kw (mergeLexemes buf buf2)
 608         Nothing -> panic "lex_prag"
 609   }}
 610
 611 -------------------------------------------------------------------------------
 612 -- Strings & Chars
 613
 614 lex_string cont glaexts s buf
 615   = case currentChar# buf of
 616         '"'#{-"-} ->
 617            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 618            case currentChar# buf' of
 619                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 620                 _                   -> cont (ITstring s') buf'
 621
 622         -- ignore \& in a string, deal with string gaps
 623         '\\'# | next_ch `eqChar#` '&'#
 624                 -> lex_string cont glaexts s (setCurrentPos# buf 2#)
 625               | is_space next_ch
 626                 -> lex_stringgap cont glaexts s buf'
 627
 628             where next_ch = lookAhead# buf 1#
 629                   buf' = setCurrentPos# buf 2#
 630
 631         _ -> lex_char (lex_next_string cont s) glaexts buf
 632
 633 lex_stringgap cont glaexts s buf
 634   = let buf' = incLexeme buf in
 635     case currentChar# buf of
 636         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 637                   st{loc = incSrcLine loc}
 638         '\\'# -> lex_string cont glaexts s buf'
 639         c | is_space c -> lex_stringgap cont glaexts s buf'
 640         other -> charError buf'
 641
 642 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 643
 644 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 645 lex_char cont glaexts buf
 646   = case currentChar# buf of
 647         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 648         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 649         other -> charError buf
 650
 651 char_end cont glaexts c buf
 652   = case currentChar# buf of
 653         '\''# -> let buf' = incLexeme buf in
 654                  case currentChar# buf' of
 655                         '#'# | flag glaexts
 656                                 -> cont (ITprimchar c) (incLexeme buf')
 657                         _       -> cont (ITchar c) buf'
 658         _     -> charError buf
 659
 660 lex_escape cont buf
 661   = let buf' = incLexeme buf in
 662     case currentChar# buf of
 663         'a'#       -> cont '\a' buf'
 664         'b'#       -> cont '\b' buf'
 665         'f'#       -> cont '\f' buf'
 666         'n'#       -> cont '\n' buf'
 667         'r'#       -> cont '\r' buf'
 668         't'#       -> cont '\t' buf'
 669         'v'#       -> cont '\v' buf'
 670         '\\'#      -> cont '\\' buf'
 671         '"'#       -> cont '\"' buf'
 672         '\''#      -> cont '\'' buf'
 673         '^'#       -> let c = currentChar# buf' in
 674                       if c `geChar#` '@'# && c `leChar#` '_'#
 675                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 676                         else charError buf'
 677
 678         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 679         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 680         x | is_digit x
 681                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 682
 683         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 684                                        Just buf2 <- [prefixMatch buf p] ] of
 685                             (c,buf2):_ -> cont c buf2
 686                             [] -> charError buf'
 687
 688 after_charnum cont i buf
 689   = let int = fromInteger i in
 690     if i >= 0 && i <= 255
 691         then cont (chr int) buf
 692         else charError buf
 693
 694 readNum cont buf is_digit base conv = read buf 0
 695   where read buf i
 696           = case currentChar# buf of { c ->
 697             if is_digit c
 698                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 699                 else cont i buf
 700             }
 701
 702 is_hexdigit c
 703         =  is_digit c
 704         || (c `geChar#` 'a'# && c `leChar#` 'h'#)
 705         || (c `geChar#` 'A'# && c `leChar#` 'H'#)
 706
 707 hex c | is_digit c = ord# c -# ord# '0'#
 708       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 709 oct_or_dec c = ord# c -# ord# '0'#
 710
 711 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 712
 713 to_lower c
 714   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 715         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 716   | otherwise = c
 717
 718 charError buf = lexError "error in character literal" buf
 719
 720 silly_escape_chars = [
 721         ("NUL", '\NUL'),
 722         ("SOH", '\SOH'),
 723         ("STX", '\STX'),
 724         ("ETX", '\ETX'),
 725         ("EOT", '\EOT'),
 726         ("ENQ", '\ENQ'),
 727         ("ACK", '\ACK'),
 728         ("BEL", '\BEL'),
 729         ("BS", '\BS'),
 730         ("HT", '\HT'),
 731         ("LF", '\LF'),
 732         ("VT", '\VT'),
 733         ("FF", '\FF'),
 734         ("CR", '\CR'),
 735         ("SO", '\SO'),
 736         ("SI", '\SI'),
 737         ("DLE", '\DLE'),
 738         ("DC1", '\DC1'),
 739         ("DC2", '\DC2'),
 740         ("DC3", '\DC3'),
 741         ("DC4", '\DC4'),
 742         ("NAK", '\NAK'),
 743         ("SYN", '\SYN'),
 744         ("ETB", '\ETB'),
 745         ("CAN", '\CAN'),
 746         ("EM", '\EM'),
 747         ("SUB", '\SUB'),
 748         ("ESC", '\ESC'),
 749         ("FS", '\FS'),
 750         ("GS", '\GS'),
 751         ("RS", '\RS'),
 752         ("US", '\US'),
 753         ("SP", '\SP'),
 754         ("DEL", '\DEL')
 755         ]
 756
 757 -------------------------------------------------------------------------------
 758
 759 lex_demand cont buf =
 760  case read_em [] buf of { (ls,buf') ->
 761  case currentChar# buf' of
 762    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 763    _    -> cont (ITstrict (ls, False)) buf'
 764  }
 765  where
 766    -- code snatched from Demand.lhs
 767   read_em acc buf =
 768    case currentChar# buf of
 769     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 770     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 771     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 772     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 773     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 774     ')'# -> (reverse acc, stepOn buf)
 775     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 776     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 777     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 778     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 779     _    -> (reverse acc, buf)
 780
 781   do_unpack new_or_data wrapper_unpacks acc buf
 782    = case read_em [] buf of
 783       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 784
 785 lex_cpr cont buf =
 786  case read_em [] buf of { (cpr_inf,buf') ->
 787    ASSERT ( null (tail cpr_inf) )
 788    cont (ITcprinfo $ head cpr_inf) buf'
 789  }
 790  where
 791    -- code snatched from lex_demand above
 792   read_em acc buf =
 793    case currentChar# buf of
 794     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 795     '('# -> do_unpack acc (stepOn buf)
 796     ')'# -> (reverse acc, stepOn buf)
 797     _    -> (reverse acc, buf)
 798
 799   do_unpack acc buf
 800    = case read_em [] buf of
 801       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 802
 803 ------------------
 804 lex_scc cont buf =
 805  case currentChar# buf of
 806   'C'# -> cont ITsccAllCafs (incLexeme buf)
 807   other -> cont ITscc buf
 808
 809 -----------------------------------------------------------------------------
 810 -- Numbers
 811
 812 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 813 lex_num cont glaexts acc buf =
 814  case scanNumLit acc buf of
 815      (acc',buf') ->
 816        case currentChar# buf' of
 817          '.'# | is_digit (lookAhead# buf' 1#) ->
 818              -- this case is not optimised at all, as the
 819              -- presence of floating point numbers in interface
 820              -- files is not that common. (ToDo)
 821             case expandWhile# is_digit (incLexeme buf') of
 822               buf2 -> -- points to first non digit char
 823
 824                 let l = case currentChar# buf2 of
 825                           'E'# -> do_exponent
 826                           'e'# -> do_exponent
 827                           _ -> buf2
 828
 829                     do_exponent
 830                         = let buf3 = incLexeme buf2 in
 831                           case currentChar# buf3 of
 832                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 833                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 834                                 x | is_digit x -> expandWhile# is_digit buf3
 835                                 _ -> buf2
 836
 837                     v = readRational__ (lexemeToString l)
 838
 839                 in case currentChar# l of -- glasgow exts only
 840                       '#'# | flag glaexts -> let l' = incLexeme l in
 841                               case currentChar# l' of
 842                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 843                                 _    -> cont (ITprimfloat  v) l'
 844                       _ -> cont (ITrational v) l
 845
 846          _ -> after_lexnum cont glaexts acc' buf'
 847
 848 after_lexnum cont glaexts i buf
 849   = case currentChar# buf of
 850         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 851         _    -> cont (ITinteger i) buf
 852
 853 -----------------------------------------------------------------------------
 854 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 855
 856 -- we lexemeToFastString on the bit between the ``''s, but include the
 857 -- quotes in the full lexeme.
 858
 859 lex_cstring cont buf =
 860  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 861    buf' -> cont (ITlitlit (lexemeToFastString
 862                                 (setCurrentPos# buf' (negateInt# 2#))))
 863                 (mergeLexemes buf buf')
 864
 865 ------------------------------------------------------------------------------
 866 -- Character Classes
 867
 868 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 869
 870 {-# INLINE is_ctype #-}
 871 #if __GLASGOW_HASKELL__ >= 303
 872 is_ctype :: Word8 -> Char# -> Bool
 873 is_ctype mask = \c ->
 874    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 875 #else
 876 is_ctype :: Int -> Char# -> Bool
 877 is_ctype (I# mask) = \c ->
 878     let (A# ctype) = ``char_types'' :: Addr
 879         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 880     in
 881         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 882 #endif
 883
 884 is_ident  = is_ctype 1
 885 is_symbol = is_ctype 2
 886 is_any    = is_ctype 4
 887 is_space  = is_ctype 8
 888 is_upper  = is_ctype 16
 889 is_digit  = is_ctype 32
 890
 891 -----------------------------------------------------------------------------
 892 -- identifiers, symbols etc.
 893
 894 lex_id cont glaexts buf =
 895  case expandWhile# is_ident buf of { buf1 ->
 896
 897  case (if flag glaexts
 898         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 899         else buf1)                              of { buf' ->
 900
 901  let lexeme  = lexemeToFastString buf' in
 902
 903  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 904         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 905                           cont kwd_token buf';
 906         Nothing        ->
 907
 908  let var_token = cont (mk_var_token lexeme) buf' in
 909
 910  if not (flag glaexts)
 911    then var_token
 912    else
 913
 914  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 915         Just kwd_token -> cont kwd_token buf';
 916         Nothing        -> var_token
 917
 918  }}}}
 919
 920 lex_sym cont buf =
 921  case expandWhile# is_symbol buf of
 922    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 923                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 924                                   cont kwd_token buf' ;
 925                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 926                                   cont (mk_var_token lexeme) buf'
 927            }
 928         where lexeme = lexemeToFastString buf'
 929
 930
 931 lex_con cont glaexts buf =
 932  case expandWhile# is_ident buf          of { buf1 ->
 933  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 934
 935  case currentChar# buf' of
 936      '.'# -> munch
 937      _    -> just_a_conid
 938
 939    where
 940     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 941                    cont (ITconid lexeme) buf'
 942     lexeme = lexemeToFastString buf'
 943     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 944  }}
 945
 946 lex_qid cont glaexts mod buf just_a_conid =
 947  case currentChar# buf of
 948   '['# ->       -- Special case for []
 949     case lookAhead# buf 1# of
 950      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 951      _    -> just_a_conid
 952
 953   '('# ->  -- Special case for (,,,)
 954            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 955     case lookAhead# buf 1# of
 956      '#'# | flag glaexts -> case lookAhead# buf 2# of
 957                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 958                                 just_a_conid
 959                 _    -> just_a_conid
 960      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 961      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 962      _    -> just_a_conid
 963
 964   '-'# -> case lookAhead# buf 1# of
 965             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 966             _    -> lex_id3 cont glaexts mod buf just_a_conid
 967   _    -> lex_id3 cont glaexts mod buf just_a_conid
 968
 969 lex_id3 cont glaexts mod buf just_a_conid
 970   | is_symbol (currentChar# buf) =
 971      let
 972         start_new_lexeme = stepOverLexeme buf
 973      in
 974      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 975      let
 976        lexeme  = lexemeToFastString buf'
 977         -- real lexeme is M.<sym>
 978        new_buf = mergeLexemes buf buf'
 979      in
 980      case lookupUFM haskellKeySymsFM lexeme of {
 981         Just kwd_token | not_special_op kwd_token
 982                         -> just_a_conid;        -- avoid M.::, but not M.!
 983         other -> cont (mk_qvar_token mod lexeme) new_buf
 984      }}
 985
 986   | otherwise   =
 987      let
 988         start_new_lexeme = stepOverLexeme buf
 989      in
 990      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 991      if emptyLexeme buf1
 992             then just_a_conid
 993             else
 994
 995      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 996
 997      let
 998       lexeme  = lexemeToFastString buf'
 999       new_buf = mergeLexemes buf buf'
1000       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1001      in
1002      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1003             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1004             Nothing        -> is_a_qvarid
1005      }}}
1006
1007
1008 slurp_trailing_hashes buf glaexts
1009   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1010   | otherwise    = buf
1011
1012
1013 mk_var_token pk_str
1014   | is_upper f          = ITconid pk_str
1015         -- _[A-Z] is treated as a constructor in interface files.
1016   | f `eqChar#` '_'# && not (_NULL_ tl)
1017         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
1018   | is_ident f          = ITvarid pk_str
1019   | f `eqChar#` ':'#    = ITconsym pk_str
1020   | otherwise           = ITvarsym pk_str
1021   where
1022       (C# f) = _HEAD_ pk_str
1023       tl     = _TAIL_ pk_str
1024
1025 mk_qvar_token m token =
1026  case mk_var_token token of
1027    ITconid n  -> ITqconid  (m,n)
1028    ITvarid n  -> ITqvarid  (m,n)
1029    ITconsym n -> ITqconsym (m,n)
1030    ITvarsym n -> ITqvarsym (m,n)
1031    _          -> ITunknown (show token)
1032 \end{code}
1033
1034 ----------------------------------------------------------------------------
1035 Horrible stuff for dealing with M.(,,,)
1036
1037 \begin{code}
1038 lex_tuple cont mod buf back_off =
1039   go 2 buf
1040   where
1041    go n buf =
1042     case currentChar# buf of
1043       ','# -> go (n+1) (stepOn buf)
1044       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
1045       _    -> back_off
1046
1047 lex_ubx_tuple cont mod buf back_off =
1048   go 2 buf
1049   where
1050    go n buf =
1051     case currentChar# buf of
1052       ','# -> go (n+1) (stepOn buf)
1053       '#'# -> case lookAhead# buf 1# of
1054                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
1055                                  (stepOnBy# buf 2#)
1056                 _    -> back_off
1057       _    -> back_off
1058 \end{code}
1059
1060 -----------------------------------------------------------------------------
1061 doDiscard rips along really fast, looking for a '#-}',
1062 indicating the end of the pragma we're skipping
1063
1064 \begin{code}
1065 doDiscard inStr buf =
1066  case currentChar# buf of
1067    '#'# | not inStr ->
1068        case lookAhead# buf 1# of { '#'# ->
1069        case lookAhead# buf 2# of { '-'# ->
1070        case lookAhead# buf 3# of { '}'# ->
1071            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1072         _    -> doDiscard inStr (incLexeme buf) };
1073         _    -> doDiscard inStr (incLexeme buf) };
1074         _    -> doDiscard inStr (incLexeme buf) }
1075    '"'# ->
1076        let
1077         odd_slashes buf flg i# =
1078           case lookAhead# buf i# of
1079            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1080            _     -> flg
1081        in
1082        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1083          '\\'# -> -- escaping something..
1084            if odd_slashes buf True (negateInt# 2#) then
1085                -- odd number of slashes, " is escaped.
1086               doDiscard inStr (incLexeme buf)
1087            else
1088                -- even number of slashes, \ is escaped.
1089               doDiscard (not inStr) (incLexeme buf)
1090          _ -> case inStr of -- forced to avoid build-up
1091                True  -> doDiscard False (incLexeme buf)
1092                False -> doDiscard True  (incLexeme buf)
1093    _ -> doDiscard inStr (incLexeme buf)
1094
1095 \end{code}
1096
1097 -----------------------------------------------------------------------------
1098
1099 \begin{code}
1100 data LayoutContext
1101   = NoLayout
1102   | Layout Int#
1103
1104 data ParseResult a
1105   = POk PState a
1106   | PFailed Message
1107
1108 data PState = PState {
1109         loc           :: SrcLoc,
1110         glasgow_exts  :: Int#,
1111         bol           :: Int#,
1112         atbol         :: Int#,
1113         context       :: [LayoutContext]
1114      }
1115
1116 type P a = StringBuffer         -- Input string
1117         -> PState
1118         -> ParseResult a
1119
1120 returnP   :: a -> P a
1121 returnP a buf s = POk s a
1122
1123 thenP      :: P a -> (a -> P b) -> P b
1124 m `thenP` k = \ buf s ->
1125         case m buf s of
1126                 POk s1 a -> k a buf s1
1127                 PFailed err  -> PFailed err
1128
1129 thenP_     :: P a -> P b -> P b
1130 m `thenP_` k = m `thenP` \_ -> k
1131
1132 mapP :: (a -> P b) -> [a] -> P [b]
1133 mapP f [] = returnP []
1134 mapP f (a:as) =
1135      f a `thenP` \b ->
1136      mapP f as `thenP` \bs ->
1137      returnP (b:bs)
1138
1139 failP :: String -> P a
1140 failP msg buf s = PFailed (text msg)
1141
1142 failMsgP :: Message -> P a
1143 failMsgP msg buf s = PFailed msg
1144
1145 lexError :: String -> P a
1146 lexError str buf s@PState{ loc = loc }
1147   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1148
1149 getSrcLocP :: P SrcLoc
1150 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1151
1152 getSrcFile :: P FAST_STRING
1153 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1154
1155 getContext :: P [LayoutContext]
1156 getContext buf s@(PState{ context = ctx }) = POk s ctx
1157
1158 pushContext :: LayoutContext -> P ()
1159 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1160
1161 {-
1162
1163 This special case in layoutOn is to handle layout contexts with are
1164 indented the same or less than the current context.  This is illegal
1165 according to the Haskell spec, so we have to arrange to close the
1166 current context.  eg.
1167
1168 class Foo a where
1169 class Bar a
1170
1171 after the first 'where', the sequence of events is:
1172
1173         - layout system inserts a ';' (column 0)
1174         - parser begins a new context at column 0
1175         - parser shifts ';' (legal empty declaration)
1176         - parser sees 'class': parse error (we're still in the inner context)
1177
1178 trouble is, by the time we know we need a new context, the lexer has
1179 already generated the ';'.  Hacky solution is as follows: since we
1180 know the column of the next token (it's the column number of the new
1181 context), we set the ACTUAL column number of the new context to this
1182 numer plus one.  Hence the next time the lexer is called, a '}' will
1183 be generated to close the new context straight away.  Furthermore, we
1184 have to set the atbol flag so that the ';' that the parser shifted as
1185 part of the new context is re-generated.
1186
1187 when the new context is *less* indented than the current one:
1188
1189 f = f where g = g where
1190 h = h
1191
1192         - current context: column 12.
1193         - on seeing 'h' (column 0), the layout system inserts '}'
1194         - parser starts a new context, column 0
1195         - parser sees '}', uses it to close new context
1196         - we still need to insert another '}' followed by a ';',
1197           hence the atbol trick.
1198
1199 -}
1200
1201 layoutOn :: P ()
1202 layoutOn buf s@(PState{ bol = bol, context = ctx }) =
1203     let offset = lexemeIndex buf -# bol in
1204     case ctx of
1205         Layout prev_off : _ | prev_off >=# offset ->
1206                 --trace ("layout on, column: " ++  show (I# offset)) $
1207                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1208         other ->
1209                 --trace ("layout on, column: " ++  show (I# offset)) $
1210                 POk s{ context = Layout offset : ctx } ()
1211
1212 layoutOff :: P ()
1213 layoutOff buf s@(PState{ context = ctx }) =
1214     POk s{ context = NoLayout:ctx } ()
1215
1216 popContext :: P ()
1217 popContext = \ buf s@(PState{ context = ctx }) ->
1218   case ctx of
1219         (_:tl) -> POk s{ context = tl } ()
1220         []    -> panic "Lex.popContext: empty context"
1221
1222 {-
1223  Note that if the name of the file we're processing ends
1224  with `hi-boot', we accept it on faith as having the right
1225  version. This is done so that .hi-boot files that comes
1226  with hsc don't have to be updated before every release,
1227  *and* it allows us to share .hi-boot files with versions
1228  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1229
1230  If the version number is 0, the checking is also turned off.
1231  (needed to deal with GHC.hi only!)
1232
1233  Once we can assume we're compiling with a version of ghc that
1234  supports interface file checking, we can drop the special
1235  pleading
1236 -}
1237 checkVersion :: Maybe Integer -> P ()
1238 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1239  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1240  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1241 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1242  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1243  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1244
1245 -----------------------------------------------------------------
1246
1247 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1248 ifaceParseErr s l
1249   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1250           text (lexemeToString s), char '\'']
1251
1252 ifaceVersionErr hi_vers l toks
1253   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1254           ptext SLIT("Expected"), int opt_HiVersion,
1255           ptext SLIT("found "), pp_version]
1256     where
1257      pp_version =
1258       case hi_vers of
1259         Nothing -> ptext SLIT("pre ghc-3.02 version")
1260         Just v  -> ptext SLIT("version") <+> integer v
1261
1262 \end{code}