ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         Token(..), lexer, ParseResult(..), PState(..),
  27         checkVersion,
  28         StringBuffer,
  29
  30         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  31         getSrcLocP, getSrcFile,
  32         layoutOn, layoutOff, pushContext, popContext
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import Char             ( ord, isSpace, toUpper )
  38 import List             ( isSuffixOf )
  39
  40 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  41 import Name             ( isLowerISO, isUpperISO )
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  48                           replaceSrcLine, mkSrcLoc )
  49
  50 import Maybes           ( MaybeErr(..) )
  51 import ErrUtils         ( Message )
  52 import Outputable
  53
  54 import FastString
  55 import StringBuffer
  56 import GlaExts
  57 import ST               ( runST )
  58
  59 #if __GLASGOW_HASKELL__ >= 303
  60 import Bits
  61 import Word
  62 #endif
  63
  64 import Char             ( chr )
  65 import Addr
  66 import PrelRead         ( readRational__ ) -- Glasgow non-std
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Data types}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 The token data type, fairly un-interesting except from one
  76 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  77 strictness, unfolding etc).
  78
  79 The Idea/Observation here is that the renamer needs to scan through
  80 all of an interface file before it can continue. But only a fraction
  81 of the information contained in the file turns out to be useful, so
  82 delaying as much as possible of the scanning and parsing of an
  83 interface file Makes Sense (Heap profiles of the compiler
  84 show a reduction in heap usage by at least a factor of two,
  85 post-renamer).
  86
  87 Hence, the interface file lexer spots when value declarations are
  88 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  89 for the type and any other id info for that binding (unfolding, strictness
  90 etc). These constructors are applied to the result of lexing these sub-chunks.
  91
  92 The lexing of the type and id info is all done lazily, of course, so
  93 the scanning (and subsequent parsing) will be done *only* on the ids the
  94 renamer finds out that it is interested in. The rest will just be junked.
  95 Laziness, you know it makes sense :-)
  96
  97 \begin{code}
  98 data Token
  99   = ITas                        -- Haskell keywords
 100   | ITcase
 101   | ITclass
 102   | ITdata
 103   | ITdefault
 104   | ITderiving
 105   | ITdo
 106   | ITelse
 107   | IThiding
 108   | ITif
 109   | ITimport
 110   | ITin
 111   | ITinfix
 112   | ITinfixl
 113   | ITinfixr
 114   | ITinstance
 115   | ITlet
 116   | ITmodule
 117   | ITnewtype
 118   | ITof
 119   | ITqualified
 120   | ITthen
 121   | ITtype
 122   | ITwhere
 123   | ITscc
 124
 125   | ITforall                    -- GHC extension keywords
 126   | ITforeign
 127   | ITexport
 128   | ITlabel
 129   | ITdynamic
 130   | ITunsafe
 131
 132   | ITinterface                 -- interface keywords
 133   | IT__export
 134   | ITdepends
 135   | IT__forall
 136   | ITletrec
 137   | ITcoerce
 138   | ITinlineMe
 139   | ITinlineCall
 140   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 141   | ITdefaultbranch
 142   | ITbottom
 143   | ITinteger_lit
 144   | ITfloat_lit
 145   | ITrational_lit
 146   | ITaddr_lit
 147   | ITlit_lit
 148   | ITstring_lit
 149   | ITtypeapp
 150   | ITonce
 151   | ITmany
 152   | ITarity
 153   | ITspecialise
 154   | ITnocaf
 155   | ITunfold InlinePragInfo
 156   | ITstrict ([Demand], Bool)
 157   | ITrules
 158   | ITcprinfo (CprInfo)
 159   | IT__scc
 160   | ITsccAllCafs
 161
 162   | ITspecialise_prag           -- Pragmas
 163   | ITsource_prag
 164   | ITinline_prag
 165   | ITnoinline_prag
 166   | ITrules_prag
 167   | ITline_prag
 168   | ITclose_prag
 169
 170   | ITdotdot                    -- reserved symbols
 171   | ITdcolon
 172   | ITequal
 173   | ITlam
 174   | ITvbar
 175   | ITlarrow
 176   | ITrarrow
 177   | ITat
 178   | ITtilde
 179   | ITdarrow
 180   | ITminus
 181   | ITbang
 182   | ITdot
 183
 184   | ITbiglam                    -- GHC-extension symbols
 185
 186   | ITocurly                    -- special symbols
 187   | ITccurly
 188   | ITvccurly
 189   | ITobrack
 190   | ITcbrack
 191   | IToparen
 192   | ITcparen
 193   | IToubxparen
 194   | ITcubxparen
 195   | ITsemi
 196   | ITcomma
 197   | ITunderscore
 198   | ITbackquote
 199
 200   | ITvarid   FAST_STRING       -- identifiers
 201   | ITconid   FAST_STRING
 202   | ITvarsym  FAST_STRING
 203   | ITconsym  FAST_STRING
 204   | ITqvarid  (FAST_STRING,FAST_STRING)
 205   | ITqconid  (FAST_STRING,FAST_STRING)
 206   | ITqvarsym (FAST_STRING,FAST_STRING)
 207   | ITqconsym (FAST_STRING,FAST_STRING)
 208
 209   | ITpragma StringBuffer
 210
 211   | ITchar       Char
 212   | ITstring     FAST_STRING
 213   | ITinteger    Integer
 214   | ITrational   Rational
 215
 216   | ITprimchar   Char
 217   | ITprimstring FAST_STRING
 218   | ITprimint    Integer
 219   | ITprimfloat  Rational
 220   | ITprimdouble Rational
 221   | ITlitlit     FAST_STRING
 222
 223   | ITunknown String            -- Used when the lexer can't make sense of it
 224   | ITeof                       -- end of file token
 225   deriving Text -- debugging
 226 \end{code}
 227
 228 -----------------------------------------------------------------------------
 229 Keyword Lists
 230
 231 \begin{code}
 232 pragmaKeywordsFM = listToUFM $
 233       map (\ (x,y) -> (_PK_ x,y))
 234        [( "SPECIALISE", ITspecialise_prag ),
 235         ( "SPECIALIZE", ITspecialise_prag ),
 236         ( "SOURCE",     ITsource_prag ),
 237         ( "INLINE",     ITinline_prag ),
 238         ( "NOINLINE",   ITnoinline_prag ),
 239         ( "NOTINLINE",  ITnoinline_prag ),
 240         ( "LINE",       ITline_prag ),
 241         ( "RULES",      ITrules_prag ),
 242         ( "RULEZ",      ITrules_prag )  -- american spelling :-)
 243         ]
 244
 245 haskellKeywordsFM = listToUFM $
 246       map (\ (x,y) -> (_PK_ x,y))
 247        [( "_",          ITunderscore ),
 248         ( "as",         ITas ),
 249         ( "case",       ITcase ),
 250         ( "class",      ITclass ),
 251         ( "data",       ITdata ),
 252         ( "default",    ITdefault ),
 253         ( "deriving",   ITderiving ),
 254         ( "do",         ITdo ),
 255         ( "else",       ITelse ),
 256         ( "hiding",     IThiding ),
 257         ( "if",         ITif ),
 258         ( "import",     ITimport ),
 259         ( "in",         ITin ),
 260         ( "infix",      ITinfix ),
 261         ( "infixl",     ITinfixl ),
 262         ( "infixr",     ITinfixr ),
 263         ( "instance",   ITinstance ),
 264         ( "let",        ITlet ),
 265         ( "module",     ITmodule ),
 266         ( "newtype",    ITnewtype ),
 267         ( "of",         ITof ),
 268         ( "qualified",  ITqualified ),
 269         ( "then",       ITthen ),
 270         ( "type",       ITtype ),
 271         ( "where",      ITwhere ),
 272         ( "_scc_",      ITscc )
 273      ]
 274
 275
 276 ghcExtensionKeywordsFM = listToUFM $
 277         map (\ (x,y) -> (_PK_ x,y))
 278      [  ( "forall",     ITforall ),
 279         ( "foreign",    ITforeign ),
 280         ( "export",     ITexport ),
 281         ( "label",      ITlabel ),
 282         ( "dynamic",    ITdynamic ),
 283         ( "unsafe",     ITunsafe ),
 284         ("_ccall_",     ITccall (False, False, False)),
 285         ("_ccall_GC_",  ITccall (False, False, True)),
 286         ("_casm_",      ITccall (False, True,  False)),
 287         ("_casm_GC_",   ITccall (False, True,  True)),
 288
 289         -- interface keywords
 290         ("__interface",         ITinterface),
 291         ("__export",            IT__export),
 292         ("__depends",           ITdepends),
 293         ("__forall",            IT__forall),
 294         ("__letrec",            ITletrec),
 295         ("__coerce",            ITcoerce),
 296         ("__inline_me",         ITinlineMe),
 297         ("__inline_call",       ITinlineCall),
 298         ("__depends",           ITdepends),
 299         ("__DEFAULT",           ITdefaultbranch),
 300         ("__bot",               ITbottom),
 301         ("__integer",           ITinteger_lit),
 302         ("__float",             ITfloat_lit),
 303         ("__rational",          ITrational_lit),
 304         ("__addr",              ITaddr_lit),
 305         ("__litlit",            ITlit_lit),
 306         ("__string",            ITstring_lit),
 307         ("__a",                 ITtypeapp),
 308         ("__o",                 ITonce),
 309         ("__m",                 ITmany),
 310         ("__A",                 ITarity),
 311         ("__P",                 ITspecialise),
 312         ("__C",                 ITnocaf),
 313         ("__R",                 ITrules),
 314         ("__u",                 ITunfold NoInlinePragInfo),
 315
 316         ("__ccall",             ITccall (False, False, False)),
 317         ("__ccall_GC",          ITccall (False, False, True)),
 318         ("__dyn_ccall",         ITccall (True,  False, False)),
 319         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 320         ("__casm",              ITccall (False, True,  False)),
 321         ("__dyn_casm",          ITccall (True,  True,  False)),
 322         ("__casm_GC",           ITccall (False, True,  True)),
 323         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 324
 325         ("/\\",                 ITbiglam)
 326      ]
 327
 328
 329 haskellKeySymsFM = listToUFM $
 330         map (\ (x,y) -> (_PK_ x,y))
 331       [ ("..",          ITdotdot)
 332        ,("::",          ITdcolon)
 333        ,("=",           ITequal)
 334        ,("\\",          ITlam)
 335        ,("|",           ITvbar)
 336        ,("<-",          ITlarrow)
 337        ,("->",          ITrarrow)
 338        ,("@",           ITat)
 339        ,("~",           ITtilde)
 340        ,("=>",          ITdarrow)
 341        ,("-",           ITminus)
 342        ,("!",           ITbang)
 343        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 344        ]
 345 \end{code}
 346
 347 -----------------------------------------------------------------------------
 348 The lexical analyser
 349
 350 Lexer state:
 351
 352         - (glaexts) lexing an interface file or -fglasgow-exts
 353         - (bol)   pointer to beginning of line (for column calculations)
 354         - (buf)   pointer to beginning of token
 355         - (buf)   pointer to current char
 356         - (atbol) flag indicating whether we're at the beginning of a line
 357
 358 \begin{code}
 359 lexer :: (Token -> P a) -> P a
 360 lexer cont buf s@(PState{
 361                     loc = loc,
 362                     glasgow_exts = glaexts,
 363                     bol = bol,
 364                     atbol = atbol,
 365                     context = ctx
 366                 })
 367
 368         -- first, start a new lexeme and lose all the whitespace
 369   = tab line bol atbol (stepOverLexeme buf)
 370   where
 371         line = srcLocLine loc
 372
 373         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 374           case currentChar# buf of
 375
 376             '\NUL'# ->
 377                    if bufferExhausted (stepOn buf)
 378                         then cont ITeof buf s'
 379                         else trace "lexer: misplaced NUL?" $
 380                              tab y bol atbol (stepOn buf)
 381
 382             '\n'# -> let buf' = stepOn buf
 383                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 384
 385                 -- find comments.  This got harder in Haskell 98.
 386             '-'# ->  let trundle n =
 387                           let next = lookAhead# buf n in
 388                           if next `eqChar#` '-'# then trundle (n +# 1#)
 389                           else if is_symbol next || n <# 2#
 390                                 then is_a_token
 391                                 else case untilChar# (stepOnBy# buf n) '\n'# of
 392                                     { buf' -> tab y bol atbol (stepOverLexeme buf')
 393                                     }
 394                     in trundle 1#
 395
 396                 -- comments and pragmas.  We deal with LINE pragmas here,
 397                 -- and throw out any unrecognised pragmas as comments.  Any
 398                 -- pragmas we know about are dealt with later (after any layout
 399                 -- processing if necessary).
 400
 401             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 402                 if lookAhead# buf 2# `eqChar#` '#'# then
 403                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 404                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 405                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 406                   let lexeme = mkFastString -- ToDo: too slow
 407                                   (map toUpper (lexemeToString buf2)) in
 408                   case lookupUFM pragmaKeywordsFM lexeme of
 409                         Just ITline_prag -> line_prag (lexer cont) buf2 s'
 410                         Just other -> is_a_token
 411                         Nothing -> skip_to_end (stepOnBy# buf 2#)
 412                   }}
 413
 414                 else skip_to_end (stepOnBy# buf 2#)
 415                 where
 416                     skip_to_end buf = nested_comment (lexer cont) buf s'
 417
 418                 -- tabs have been expanded beforehand
 419             c | is_space c -> tab y bol atbol (stepOn buf)
 420               | otherwise  -> is_a_token
 421
 422            where s' = s{loc = replaceSrcLine loc y,
 423                         bol = bol,
 424                        atbol = atbol}
 425
 426                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 427                             | otherwise    = lexToken cont glaexts buf s'
 428
 429 -- {-# LINE .. #-} pragmas.  yeuch.
 430 line_prag cont buf =
 431   case expandWhile# is_space buf                of { buf1 ->
 432   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 433   -- subtract one: the line number refers to the *following* line.
 434   let real_line = line - 1 in
 435   case fromInteger real_line                    of { i@(I# l) ->
 436   case expandWhile# is_space buf2               of { buf3 ->
 437   case currentChar# buf3                        of
 438      '\"'#{-"-} ->
 439         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 440         let file = lexemeToFastString buf4 in
 441         \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
 442         }
 443      other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
 444   }}}}
 445   where
 446         skipToEnd buf = nested_comment cont buf
 447
 448 nested_comment :: P a -> P a
 449 nested_comment cont buf = loop buf
 450  where
 451    loop buf =
 452      case currentChar# buf of
 453         '\NUL'# | bufferExhausted (stepOn buf) ->
 454                 lexError "unterminated `{-'" buf
 455
 456         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 457                 cont (stepOnBy# buf 2#)
 458
 459         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 460               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 461
 462         '\n'# -> \ s@PState{loc=loc} ->
 463                  let buf' = stepOn buf in
 464                  nested_comment cont buf'
 465                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 466                           atbol = 1#}
 467
 468         _   -> nested_comment cont (stepOn buf)
 469
 470 -- When we are lexing the first token of a line, check whether we need to
 471 -- insert virtual semicolons or close braces due to layout.
 472
 473 lexBOL :: (Token -> P a) -> P a
 474 lexBOL cont buf s@(PState{
 475                     loc = loc,
 476                     glasgow_exts = glaexts,
 477                     bol = bol,
 478                     atbol = atbol,
 479                     context = ctx
 480                   }) =
 481         if need_close_curly then
 482                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 483                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 484         else if need_semi_colon then
 485                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 486                 cont ITsemi buf s{atbol = 0#}
 487         else
 488                 lexToken cont glaexts buf s{atbol = 0#}
 489   where
 490         col = currentIndex# buf -# bol
 491
 492         need_close_curly =
 493                 case ctx of
 494                         [] -> False
 495                         (i:_) -> case i of
 496                                     NoLayout -> False
 497                                     Layout n -> col <# n
 498         need_semi_colon =
 499                 case ctx of
 500                         [] -> False
 501                         (i:_) -> case i of
 502                                     NoLayout -> False
 503                                     Layout n -> col ==# n
 504
 505
 506 lexToken :: (Token -> P a) -> Int# -> P a
 507 lexToken cont glaexts buf =
 508  --trace "lexToken" $
 509  _scc_ "Lexer"
 510   case currentChar# buf of
 511
 512     -- special symbols ----------------------------------------------------
 513     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 514                 -> cont IToubxparen (setCurrentPos# buf 2#)
 515          | otherwise
 516                 -> cont IToparen (incLexeme buf)
 517
 518     ')'# -> cont ITcparen    (incLexeme buf)
 519     '['# -> cont ITobrack    (incLexeme buf)
 520     ']'# -> cont ITcbrack    (incLexeme buf)
 521     ','# -> cont ITcomma     (incLexeme buf)
 522     ';'# -> cont ITsemi      (incLexeme buf)
 523
 524     '}'# -> \ s@PState{context = ctx} ->
 525             case ctx of
 526                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 527                 _ -> lexError "too many '}'s" buf s
 528
 529     '#'# -> case lookAhead# buf 1# of
 530                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 531                 '-'# -> case lookAhead# buf 2# of
 532                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 533                            _    -> lex_sym cont (incLexeme buf)
 534                 _    -> lex_sym cont (incLexeme buf)
 535
 536     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 537                 -> lex_cstring cont (setCurrentPos# buf 2#)
 538          | otherwise
 539                 -> cont ITbackquote (incLexeme buf)
 540
 541     '{'# ->     -- look for "{-##" special iface pragma
 542         case lookAhead# buf 1# of
 543            '-'# -> case lookAhead# buf 2# of
 544                     '#'# -> case lookAhead# buf 3# of
 545                                 '#'# ->
 546                                    let (lexeme, buf')
 547                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 548                                    cont (ITpragma lexeme) buf'
 549                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 550                     _    -> cont ITocurly (incLexeme buf)
 551            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 552
 553     -- strings/characters -------------------------------------------------
 554     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 555     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 556
 557     -- strictness and cpr pragmas and __scc treated specially.
 558     '_'# | flag glaexts ->
 559          case lookAhead# buf 1# of
 560            '_'# -> case lookAhead# buf 2# of
 561                     'S'# ->
 562                         lex_demand cont (stepOnUntil (not . isSpace)
 563                                         (stepOnBy# buf 3#)) -- past __S
 564                     'M'# ->
 565                         lex_cpr cont (stepOnUntil (not . isSpace)
 566                                      (stepOnBy# buf 3#)) -- past __M
 567                     's'# ->
 568                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 569                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 570                                Nothing   -> lex_id cont glaexts buf
 571                     _ -> lex_id cont glaexts buf
 572            _    -> lex_id cont glaexts buf
 573
 574         -- Hexadecimal and octal constants
 575     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 576                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 577          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 578                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 579         where ch   = lookAhead# buf 1#
 580               ch2  = lookAhead# buf 2#
 581               buf' = setCurrentPos# buf 2#
 582
 583     '\NUL'# ->
 584             if bufferExhausted (stepOn buf) then
 585                cont ITeof buf
 586             else
 587                trace "lexIface: misplaced NUL?" $
 588                cont (ITunknown "\NUL") (stepOn buf)
 589
 590     c | is_digit  c -> lex_num cont glaexts 0 buf
 591       | is_symbol c -> lex_sym cont buf
 592       | is_upper  c -> lex_con cont glaexts buf
 593       | is_ident  c -> lex_id  cont glaexts buf
 594       | otherwise   -> lexError "illegal character" buf
 595
 596 -- Int# is unlifted, and therefore faster than Bool for flags.
 597 {-# INLINE flag #-}
 598 flag :: Int# -> Bool
 599 flag 0# = False
 600 flag _  = True
 601
 602 -------------------------------------------------------------------------------
 603 -- Pragmas
 604
 605 lex_prag cont buf
 606   = case expandWhile# is_space buf of { buf1 ->
 607     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 608     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 609     case lookupUFM pragmaKeywordsFM lexeme of
 610         Just kw -> cont kw (mergeLexemes buf buf2)
 611         Nothing -> panic "lex_prag"
 612   }}
 613
 614 -------------------------------------------------------------------------------
 615 -- Strings & Chars
 616
 617 lex_string cont glaexts s buf
 618   = case currentChar# buf of
 619         '"'#{-"-} ->
 620            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 621            case currentChar# buf' of
 622                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 623                 _                   -> cont (ITstring s') buf'
 624
 625         -- ignore \& in a string, deal with string gaps
 626         '\\'# | next_ch `eqChar#` '&'#
 627                 -> lex_string cont glaexts s (setCurrentPos# buf 2#)
 628               | is_space next_ch
 629                 -> lex_stringgap cont glaexts s buf'
 630
 631             where next_ch = lookAhead# buf 1#
 632                   buf' = setCurrentPos# buf 2#
 633
 634         _ -> lex_char (lex_next_string cont s) glaexts buf
 635
 636 lex_stringgap cont glaexts s buf
 637   = let buf' = incLexeme buf in
 638     case currentChar# buf of
 639         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 640                   st{loc = incSrcLine loc}
 641         '\\'# -> lex_string cont glaexts s buf'
 642         c | is_space c -> lex_stringgap cont glaexts s buf'
 643         other -> charError buf'
 644
 645 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 646
 647 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 648 lex_char cont glaexts buf
 649   = case currentChar# buf of
 650         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 651         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 652         other -> charError buf
 653
 654 char_end cont glaexts c buf
 655   = case currentChar# buf of
 656         '\''# -> let buf' = incLexeme buf in
 657                  case currentChar# buf' of
 658                         '#'# | flag glaexts
 659                                 -> cont (ITprimchar c) (incLexeme buf')
 660                         _       -> cont (ITchar c) buf'
 661         _     -> charError buf
 662
 663 lex_escape cont buf
 664   = let buf' = incLexeme buf in
 665     case currentChar# buf of
 666         'a'#       -> cont '\a' buf'
 667         'b'#       -> cont '\b' buf'
 668         'f'#       -> cont '\f' buf'
 669         'n'#       -> cont '\n' buf'
 670         'r'#       -> cont '\r' buf'
 671         't'#       -> cont '\t' buf'
 672         'v'#       -> cont '\v' buf'
 673         '\\'#      -> cont '\\' buf'
 674         '"'#       -> cont '\"' buf'
 675         '\''#      -> cont '\'' buf'
 676         '^'#       -> let c = currentChar# buf' in
 677                       if c `geChar#` '@'# && c `leChar#` '_'#
 678                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 679                         else charError buf'
 680
 681         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 682         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 683         x | is_digit x
 684                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 685
 686         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 687                                        Just buf2 <- [prefixMatch buf p] ] of
 688                             (c,buf2):_ -> cont c buf2
 689                             [] -> charError buf'
 690
 691 after_charnum cont i buf
 692   = let int = fromInteger i in
 693     if i >= 0 && i <= 255
 694         then cont (chr int) buf
 695         else charError buf
 696
 697 readNum cont buf is_digit base conv = read buf 0
 698   where read buf i
 699           = case currentChar# buf of { c ->
 700             if is_digit c
 701                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 702                 else cont i buf
 703             }
 704
 705 is_hexdigit c
 706         =  is_digit c
 707         || (c `geChar#` 'a'# && c `leChar#` 'h'#)
 708         || (c `geChar#` 'A'# && c `leChar#` 'H'#)
 709
 710 hex c | is_digit c = ord# c -# ord# '0'#
 711       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 712 oct_or_dec c = ord# c -# ord# '0'#
 713
 714 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 715
 716 to_lower c
 717   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 718         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 719   | otherwise = c
 720
 721 charError buf = lexError "error in character literal" buf
 722
 723 silly_escape_chars = [
 724         ("NUL", '\NUL'),
 725         ("SOH", '\SOH'),
 726         ("STX", '\STX'),
 727         ("ETX", '\ETX'),
 728         ("EOT", '\EOT'),
 729         ("ENQ", '\ENQ'),
 730         ("ACK", '\ACK'),
 731         ("BEL", '\BEL'),
 732         ("BS", '\BS'),
 733         ("HT", '\HT'),
 734         ("LF", '\LF'),
 735         ("VT", '\VT'),
 736         ("FF", '\FF'),
 737         ("CR", '\CR'),
 738         ("SO", '\SO'),
 739         ("SI", '\SI'),
 740         ("DLE", '\DLE'),
 741         ("DC1", '\DC1'),
 742         ("DC2", '\DC2'),
 743         ("DC3", '\DC3'),
 744         ("DC4", '\DC4'),
 745         ("NAK", '\NAK'),
 746         ("SYN", '\SYN'),
 747         ("ETB", '\ETB'),
 748         ("CAN", '\CAN'),
 749         ("EM", '\EM'),
 750         ("SUB", '\SUB'),
 751         ("ESC", '\ESC'),
 752         ("FS", '\FS'),
 753         ("GS", '\GS'),
 754         ("RS", '\RS'),
 755         ("US", '\US'),
 756         ("SP", '\SP'),
 757         ("DEL", '\DEL')
 758         ]
 759
 760 -------------------------------------------------------------------------------
 761
 762 lex_demand cont buf =
 763  case read_em [] buf of { (ls,buf') ->
 764  case currentChar# buf' of
 765    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 766    _    -> cont (ITstrict (ls, False)) buf'
 767  }
 768  where
 769    -- code snatched from Demand.lhs
 770   read_em acc buf =
 771    case currentChar# buf of
 772     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 773     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 774     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 775     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 776     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 777     ')'# -> (reverse acc, stepOn buf)
 778     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 779     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 780     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 781     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 782     _    -> (reverse acc, buf)
 783
 784   do_unpack new_or_data wrapper_unpacks acc buf
 785    = case read_em [] buf of
 786       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 787
 788 lex_cpr cont buf =
 789  case read_em [] buf of { (cpr_inf,buf') ->
 790    ASSERT ( null (tail cpr_inf) )
 791    cont (ITcprinfo $ head cpr_inf) buf'
 792  }
 793  where
 794    -- code snatched from lex_demand above
 795   read_em acc buf =
 796    case currentChar# buf of
 797     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 798     '('# -> do_unpack acc (stepOn buf)
 799     ')'# -> (reverse acc, stepOn buf)
 800     _    -> (reverse acc, buf)
 801
 802   do_unpack acc buf
 803    = case read_em [] buf of
 804       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 805
 806 ------------------
 807 lex_scc cont buf =
 808  case currentChar# buf of
 809   'C'# -> cont ITsccAllCafs (incLexeme buf)
 810   other -> cont ITscc buf
 811
 812 -----------------------------------------------------------------------------
 813 -- Numbers
 814
 815 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 816 lex_num cont glaexts acc buf =
 817  case scanNumLit acc buf of
 818      (acc',buf') ->
 819        case currentChar# buf' of
 820          '.'# | is_digit (lookAhead# buf' 1#) ->
 821              -- this case is not optimised at all, as the
 822              -- presence of floating point numbers in interface
 823              -- files is not that common. (ToDo)
 824             case expandWhile# is_digit (incLexeme buf') of
 825               buf2 -> -- points to first non digit char
 826
 827                 let l = case currentChar# buf2 of
 828                           'E'# -> do_exponent
 829                           'e'# -> do_exponent
 830                           _ -> buf2
 831
 832                     do_exponent
 833                         = let buf3 = incLexeme buf2 in
 834                           case currentChar# buf3 of
 835                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 836                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 837                                 x | is_digit x -> expandWhile# is_digit buf3
 838                                 _ -> buf2
 839
 840                     v = readRational__ (lexemeToString l)
 841
 842                 in case currentChar# l of -- glasgow exts only
 843                       '#'# | flag glaexts -> let l' = incLexeme l in
 844                               case currentChar# l' of
 845                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 846                                 _    -> cont (ITprimfloat  v) l'
 847                       _ -> cont (ITrational v) l
 848
 849          _ -> after_lexnum cont glaexts acc' buf'
 850
 851 after_lexnum cont glaexts i buf
 852   = case currentChar# buf of
 853         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 854         _    -> cont (ITinteger i) buf
 855
 856 -----------------------------------------------------------------------------
 857 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 858
 859 -- we lexemeToFastString on the bit between the ``''s, but include the
 860 -- quotes in the full lexeme.
 861
 862 lex_cstring cont buf =
 863  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 864    buf' -> cont (ITlitlit (lexemeToFastString
 865                                 (setCurrentPos# buf' (negateInt# 2#))))
 866                 (mergeLexemes buf buf')
 867
 868 ------------------------------------------------------------------------------
 869 -- Character Classes
 870
 871 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 872
 873 {-# INLINE is_ctype #-}
 874 #if __GLASGOW_HASKELL__ >= 303
 875 is_ctype :: Word8 -> Char# -> Bool
 876 is_ctype mask = \c ->
 877    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 878 #else
 879 is_ctype :: Int -> Char# -> Bool
 880 is_ctype (I# mask) = \c ->
 881     let (A# ctype) = ``char_types'' :: Addr
 882         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 883     in
 884         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 885 #endif
 886
 887 is_ident  = is_ctype 1
 888 is_symbol = is_ctype 2
 889 is_any    = is_ctype 4
 890 is_space  = is_ctype 8
 891 is_upper  = is_ctype 16
 892 is_digit  = is_ctype 32
 893
 894 -----------------------------------------------------------------------------
 895 -- identifiers, symbols etc.
 896
 897 lex_id cont glaexts buf =
 898  case expandWhile# is_ident buf of { buf1 ->
 899
 900  case (if flag glaexts
 901         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 902         else buf1)                              of { buf' ->
 903
 904  let lexeme  = lexemeToFastString buf' in
 905
 906  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 907         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 908                           cont kwd_token buf';
 909         Nothing        ->
 910
 911  let var_token = cont (mk_var_token lexeme) buf' in
 912
 913  if not (flag glaexts)
 914    then var_token
 915    else
 916
 917  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 918         Just kwd_token -> cont kwd_token buf';
 919         Nothing        -> var_token
 920
 921  }}}}
 922
 923 lex_sym cont buf =
 924  case expandWhile# is_symbol buf of
 925    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 926                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 927                                   cont kwd_token buf' ;
 928                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 929                                   cont (mk_var_token lexeme) buf'
 930            }
 931         where lexeme = lexemeToFastString buf'
 932
 933
 934 lex_con cont glaexts buf =
 935  case expandWhile# is_ident buf          of { buf1 ->
 936  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 937
 938  case currentChar# buf' of
 939      '.'# -> munch
 940      _    -> just_a_conid
 941
 942    where
 943     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 944                    cont (ITconid lexeme) buf'
 945     lexeme = lexemeToFastString buf'
 946     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 947  }}
 948
 949 lex_qid cont glaexts mod buf just_a_conid =
 950  case currentChar# buf of
 951   '['# ->       -- Special case for []
 952     case lookAhead# buf 1# of
 953      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 954      _    -> just_a_conid
 955
 956   '('# ->  -- Special case for (,,,)
 957            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 958     case lookAhead# buf 1# of
 959      '#'# | flag glaexts -> case lookAhead# buf 2# of
 960                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 961                                 just_a_conid
 962                 _    -> just_a_conid
 963      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 964      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 965      _    -> just_a_conid
 966
 967   '-'# -> case lookAhead# buf 1# of
 968             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 969             _    -> lex_id3 cont glaexts mod buf just_a_conid
 970   _    -> lex_id3 cont glaexts mod buf just_a_conid
 971
 972 lex_id3 cont glaexts mod buf just_a_conid
 973   | is_symbol (currentChar# buf) =
 974      let
 975         start_new_lexeme = stepOverLexeme buf
 976      in
 977      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 978      let
 979        lexeme  = lexemeToFastString buf'
 980         -- real lexeme is M.<sym>
 981        new_buf = mergeLexemes buf buf'
 982      in
 983      case lookupUFM haskellKeySymsFM lexeme of {
 984         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 985         Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
 986      }}
 987
 988   | otherwise   =
 989      let
 990         start_new_lexeme = stepOverLexeme buf
 991      in
 992      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 993      if emptyLexeme buf1
 994             then just_a_conid
 995             else
 996
 997      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 998
 999      let
1000       lexeme  = lexemeToFastString buf'
1001       new_buf = mergeLexemes buf buf'
1002       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1003      in
1004      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1005             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1006             Nothing        ->
1007      if flag glaexts
1008        then case lookupUFM ghcExtensionKeywordsFM lexeme of {
1009             Just kwd_token -> just_a_conid;
1010             Nothing        -> is_a_qvarid }
1011        else is_a_qvarid
1012      }}}
1013
1014
1015 slurp_trailing_hashes buf glaexts
1016   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1017   | otherwise    = buf
1018
1019
1020 mk_var_token pk_str
1021   | is_upper f          = ITconid pk_str
1022         -- _[A-Z] is treated as a constructor in interface files.
1023   | f `eqChar#` '_'# && not (_NULL_ tl)
1024         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
1025   | is_ident f          = ITvarid pk_str
1026   | f `eqChar#` ':'#    = ITconsym pk_str
1027   | otherwise           = ITvarsym pk_str
1028   where
1029       (C# f) = _HEAD_ pk_str
1030       tl     = _TAIL_ pk_str
1031
1032 mk_qvar_token m token =
1033  case mk_var_token token of
1034    ITconid n  -> ITqconid  (m,n)
1035    ITvarid n  -> ITqvarid  (m,n)
1036    ITconsym n -> ITqconsym (m,n)
1037    ITvarsym n -> ITqvarsym (m,n)
1038    _          -> ITunknown (show token)
1039 \end{code}
1040
1041 ----------------------------------------------------------------------------
1042 Horrible stuff for dealing with M.(,,,)
1043
1044 \begin{code}
1045 lex_tuple cont mod buf back_off =
1046   go 2 buf
1047   where
1048    go n buf =
1049     case currentChar# buf of
1050       ','# -> go (n+1) (stepOn buf)
1051       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
1052       _    -> back_off
1053
1054 lex_ubx_tuple cont mod buf back_off =
1055   go 2 buf
1056   where
1057    go n buf =
1058     case currentChar# buf of
1059       ','# -> go (n+1) (stepOn buf)
1060       '#'# -> case lookAhead# buf 1# of
1061                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
1062                                  (stepOnBy# buf 2#)
1063                 _    -> back_off
1064       _    -> back_off
1065 \end{code}
1066
1067 -----------------------------------------------------------------------------
1068 doDiscard rips along really fast, looking for a '#-}',
1069 indicating the end of the pragma we're skipping
1070
1071 \begin{code}
1072 doDiscard inStr buf =
1073  case currentChar# buf of
1074    '#'# | not inStr ->
1075        case lookAhead# buf 1# of { '#'# ->
1076        case lookAhead# buf 2# of { '-'# ->
1077        case lookAhead# buf 3# of { '}'# ->
1078            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1079         _    -> doDiscard inStr (incLexeme buf) };
1080         _    -> doDiscard inStr (incLexeme buf) };
1081         _    -> doDiscard inStr (incLexeme buf) }
1082    '"'# ->
1083        let
1084         odd_slashes buf flg i# =
1085           case lookAhead# buf i# of
1086            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1087            _     -> flg
1088        in
1089        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1090          '\\'# -> -- escaping something..
1091            if odd_slashes buf True (negateInt# 2#) then
1092                -- odd number of slashes, " is escaped.
1093               doDiscard inStr (incLexeme buf)
1094            else
1095                -- even number of slashes, \ is escaped.
1096               doDiscard (not inStr) (incLexeme buf)
1097          _ -> case inStr of -- forced to avoid build-up
1098                True  -> doDiscard False (incLexeme buf)
1099                False -> doDiscard True  (incLexeme buf)
1100    _ -> doDiscard inStr (incLexeme buf)
1101
1102 \end{code}
1103
1104 -----------------------------------------------------------------------------
1105
1106 \begin{code}
1107 data LayoutContext
1108   = NoLayout
1109   | Layout Int#
1110
1111 data ParseResult a
1112   = POk PState a
1113   | PFailed Message
1114
1115 data PState = PState {
1116         loc           :: SrcLoc,
1117         glasgow_exts  :: Int#,
1118         bol           :: Int#,
1119         atbol         :: Int#,
1120         context       :: [LayoutContext]
1121      }
1122
1123 type P a = StringBuffer         -- Input string
1124         -> PState
1125         -> ParseResult a
1126
1127 returnP   :: a -> P a
1128 returnP a buf s = POk s a
1129
1130 thenP      :: P a -> (a -> P b) -> P b
1131 m `thenP` k = \ buf s ->
1132         case m buf s of
1133                 POk s1 a -> k a buf s1
1134                 PFailed err  -> PFailed err
1135
1136 thenP_     :: P a -> P b -> P b
1137 m `thenP_` k = m `thenP` \_ -> k
1138
1139 mapP :: (a -> P b) -> [a] -> P [b]
1140 mapP f [] = returnP []
1141 mapP f (a:as) =
1142      f a `thenP` \b ->
1143      mapP f as `thenP` \bs ->
1144      returnP (b:bs)
1145
1146 failP :: String -> P a
1147 failP msg buf s = PFailed (text msg)
1148
1149 failMsgP :: Message -> P a
1150 failMsgP msg buf s = PFailed msg
1151
1152 lexError :: String -> P a
1153 lexError str buf s@PState{ loc = loc }
1154   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1155
1156 getSrcLocP :: P SrcLoc
1157 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1158
1159 getSrcFile :: P FAST_STRING
1160 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1161
1162 getContext :: P [LayoutContext]
1163 getContext buf s@(PState{ context = ctx }) = POk s ctx
1164
1165 pushContext :: LayoutContext -> P ()
1166 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1167
1168 {-
1169
1170 This special case in layoutOn is to handle layout contexts with are
1171 indented the same or less than the current context.  This is illegal
1172 according to the Haskell spec, so we have to arrange to close the
1173 current context.  eg.
1174
1175 class Foo a where
1176 class Bar a
1177
1178 after the first 'where', the sequence of events is:
1179
1180         - layout system inserts a ';' (column 0)
1181         - parser begins a new context at column 0
1182         - parser shifts ';' (legal empty declaration)
1183         - parser sees 'class': parse error (we're still in the inner context)
1184
1185 trouble is, by the time we know we need a new context, the lexer has
1186 already generated the ';'.  Hacky solution is as follows: since we
1187 know the column of the next token (it's the column number of the new
1188 context), we set the ACTUAL column number of the new context to this
1189 numer plus one.  Hence the next time the lexer is called, a '}' will
1190 be generated to close the new context straight away.  Furthermore, we
1191 have to set the atbol flag so that the ';' that the parser shifted as
1192 part of the new context is re-generated.
1193
1194 when the new context is *less* indented than the current one:
1195
1196 f = f where g = g where
1197 h = h
1198
1199         - current context: column 12.
1200         - on seeing 'h' (column 0), the layout system inserts '}'
1201         - parser starts a new context, column 0
1202         - parser sees '}', uses it to close new context
1203         - we still need to insert another '}' followed by a ';',
1204           hence the atbol trick.
1205
1206 -}
1207
1208 layoutOn :: P ()
1209 layoutOn buf s@(PState{ bol = bol, context = ctx }) =
1210     let offset = lexemeIndex buf -# bol in
1211     case ctx of
1212         Layout prev_off : _ | prev_off >=# offset ->
1213                 --trace ("layout on, column: " ++  show (I# offset)) $
1214                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1215         other ->
1216                 --trace ("layout on, column: " ++  show (I# offset)) $
1217                 POk s{ context = Layout offset : ctx } ()
1218
1219 layoutOff :: P ()
1220 layoutOff buf s@(PState{ context = ctx }) =
1221     POk s{ context = NoLayout:ctx } ()
1222
1223 popContext :: P ()
1224 popContext = \ buf s@(PState{ context = ctx }) ->
1225   case ctx of
1226         (_:tl) -> POk s{ context = tl } ()
1227         []    -> panic "Lex.popContext: empty context"
1228
1229 {-
1230  Note that if the name of the file we're processing ends
1231  with `hi-boot', we accept it on faith as having the right
1232  version. This is done so that .hi-boot files that comes
1233  with hsc don't have to be updated before every release,
1234  *and* it allows us to share .hi-boot files with versions
1235  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1236
1237  If the version number is 0, the checking is also turned off.
1238  (needed to deal with GHC.hi only!)
1239
1240  Once we can assume we're compiling with a version of ghc that
1241  supports interface file checking, we can drop the special
1242  pleading
1243 -}
1244 checkVersion :: Maybe Integer -> P ()
1245 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1246  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1247  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1248 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1249  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1250  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1251
1252 -----------------------------------------------------------------
1253
1254 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1255 ifaceParseErr s l
1256   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1257           text (lexemeToString s), char '\'']
1258
1259 ifaceVersionErr hi_vers l toks
1260   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1261           ptext SLIT("Expected"), int opt_HiVersion,
1262           ptext SLIT("found "), pp_version]
1263     where
1264      pp_version =
1265       case hi_vers of
1266         Nothing -> ptext SLIT("pre ghc-3.02 version")
1267         Just v  -> ptext SLIT("version") <+> integer v
1268
1269 \end{code}