ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         Token(..), lexer, ParseResult(..), PState(..),
  27         checkVersion,
  28         StringBuffer,
  29
  30         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  31         getSrcLocP, getSrcFile,
  32         layoutOn, layoutOff, pushContext, popContext
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import Char             ( ord, isSpace, toUpper )
  38 import List             ( isSuffixOf )
  39
  40 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  41 import Name             ( isLowerISO, isUpperISO )
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  48                           replaceSrcLine, mkSrcLoc )
  49
  50 import Maybes           ( MaybeErr(..) )
  51 import ErrUtils         ( Message )
  52 import Outputable
  53
  54 import FastString
  55 import StringBuffer
  56 import GlaExts
  57 import ST               ( runST )
  58
  59 #if __GLASGOW_HASKELL__ >= 303
  60 import Bits
  61 import Word
  62 #endif
  63
  64 import Char             ( chr )
  65 import Addr
  66 import PrelRead         ( readRational__ ) -- Glasgow non-std
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Data types}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 The token data type, fairly un-interesting except from one
  76 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  77 strictness, unfolding etc).
  78
  79 The Idea/Observation here is that the renamer needs to scan through
  80 all of an interface file before it can continue. But only a fraction
  81 of the information contained in the file turns out to be useful, so
  82 delaying as much as possible of the scanning and parsing of an
  83 interface file Makes Sense (Heap profiles of the compiler
  84 show a reduction in heap usage by at least a factor of two,
  85 post-renamer).
  86
  87 Hence, the interface file lexer spots when value declarations are
  88 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  89 for the type and any other id info for that binding (unfolding, strictness
  90 etc). These constructors are applied to the result of lexing these sub-chunks.
  91
  92 The lexing of the type and id info is all done lazily, of course, so
  93 the scanning (and subsequent parsing) will be done *only* on the ids the
  94 renamer finds out that it is interested in. The rest will just be junked.
  95 Laziness, you know it makes sense :-)
  96
  97 \begin{code}
  98 data Token
  99   = ITas                        -- Haskell keywords
 100   | ITcase
 101   | ITclass
 102   | ITdata
 103   | ITdefault
 104   | ITderiving
 105   | ITdo
 106   | ITelse
 107   | IThiding
 108   | ITif
 109   | ITimport
 110   | ITin
 111   | ITinfix
 112   | ITinfixl
 113   | ITinfixr
 114   | ITinstance
 115   | ITlet
 116   | ITmodule
 117   | ITnewtype
 118   | ITof
 119   | ITqualified
 120   | ITthen
 121   | ITtype
 122   | ITwhere
 123   | ITscc
 124
 125   | ITforall                    -- GHC extension keywords
 126   | ITforeign
 127   | ITexport
 128   | ITlabel
 129   | ITdynamic
 130   | ITunsafe
 131
 132   | ITinterface                 -- interface keywords
 133   | IT__export
 134   | ITdepends
 135   | IT__forall
 136   | ITletrec
 137   | ITcoerce
 138   | ITinlineMe
 139   | ITinlineCall
 140   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 141   | ITdefaultbranch
 142   | ITbottom
 143   | ITinteger_lit
 144   | ITfloat_lit
 145   | ITrational_lit
 146   | ITaddr_lit
 147   | ITlit_lit
 148   | ITstring_lit
 149   | ITtypeapp
 150   | ITonce
 151   | ITmany
 152   | ITarity
 153   | ITspecialise
 154   | ITnocaf
 155   | ITunfold InlinePragInfo
 156   | ITstrict ([Demand], Bool)
 157   | ITrules
 158   | ITcprinfo (CprInfo)
 159   | IT__scc
 160   | ITsccAllCafs
 161
 162   | ITspecialise_prag           -- Pragmas
 163   | ITsource_prag
 164   | ITinline_prag
 165   | ITnoinline_prag
 166   | ITrules_prag
 167   | ITline_prag
 168   | ITclose_prag
 169
 170   | ITdotdot                    -- reserved symbols
 171   | ITdcolon
 172   | ITequal
 173   | ITlam
 174   | ITvbar
 175   | ITlarrow
 176   | ITrarrow
 177   | ITat
 178   | ITtilde
 179   | ITdarrow
 180   | ITminus
 181   | ITbang
 182   | ITdot
 183
 184   | ITbiglam                    -- GHC-extension symbols
 185
 186   | ITocurly                    -- special symbols
 187   | ITccurly
 188   | ITvccurly
 189   | ITobrack
 190   | ITcbrack
 191   | IToparen
 192   | ITcparen
 193   | IToubxparen
 194   | ITcubxparen
 195   | ITsemi
 196   | ITcomma
 197   | ITunderscore
 198   | ITbackquote
 199
 200   | ITvarid   FAST_STRING       -- identifiers
 201   | ITconid   FAST_STRING
 202   | ITvarsym  FAST_STRING
 203   | ITconsym  FAST_STRING
 204   | ITqvarid  (FAST_STRING,FAST_STRING)
 205   | ITqconid  (FAST_STRING,FAST_STRING)
 206   | ITqvarsym (FAST_STRING,FAST_STRING)
 207   | ITqconsym (FAST_STRING,FAST_STRING)
 208
 209   | ITpragma StringBuffer
 210
 211   | ITchar       Char
 212   | ITstring     FAST_STRING
 213   | ITinteger    Integer
 214   | ITrational   Rational
 215
 216   | ITprimchar   Char
 217   | ITprimstring FAST_STRING
 218   | ITprimint    Integer
 219   | ITprimfloat  Rational
 220   | ITprimdouble Rational
 221   | ITlitlit     FAST_STRING
 222
 223   | ITunknown String            -- Used when the lexer can't make sense of it
 224   | ITeof                       -- end of file token
 225   deriving Text -- debugging
 226 \end{code}
 227
 228 -----------------------------------------------------------------------------
 229 Keyword Lists
 230
 231 \begin{code}
 232 pragmaKeywordsFM = listToUFM $
 233       map (\ (x,y) -> (_PK_ x,y))
 234        [( "SPECIALISE", ITspecialise_prag ),
 235         ( "SPECIALIZE", ITspecialise_prag ),
 236         ( "SOURCE",     ITsource_prag ),
 237         ( "INLINE",     ITinline_prag ),
 238         ( "NOINLINE",   ITnoinline_prag ),
 239         ( "NOTINLINE",  ITnoinline_prag ),
 240         ( "LINE",       ITline_prag ),
 241         ( "RULES",      ITrules_prag ),
 242         ( "RULEZ",      ITrules_prag )  -- american spelling :-)
 243         ]
 244
 245 haskellKeywordsFM = listToUFM $
 246       map (\ (x,y) -> (_PK_ x,y))
 247        [( "_",          ITunderscore ),
 248         ( "as",         ITas ),
 249         ( "case",       ITcase ),
 250         ( "class",      ITclass ),
 251         ( "data",       ITdata ),
 252         ( "default",    ITdefault ),
 253         ( "deriving",   ITderiving ),
 254         ( "do",         ITdo ),
 255         ( "else",       ITelse ),
 256         ( "hiding",     IThiding ),
 257         ( "if",         ITif ),
 258         ( "import",     ITimport ),
 259         ( "in",         ITin ),
 260         ( "infix",      ITinfix ),
 261         ( "infixl",     ITinfixl ),
 262         ( "infixr",     ITinfixr ),
 263         ( "instance",   ITinstance ),
 264         ( "let",        ITlet ),
 265         ( "module",     ITmodule ),
 266         ( "newtype",    ITnewtype ),
 267         ( "of",         ITof ),
 268         ( "qualified",  ITqualified ),
 269         ( "then",       ITthen ),
 270         ( "type",       ITtype ),
 271         ( "where",      ITwhere ),
 272         ( "_scc_",      ITscc )
 273      ]
 274
 275 ghcExtensionKeywordsFM = listToUFM $
 276         map (\ (x,y) -> (_PK_ x,y))
 277      [  ( "forall",     ITforall ),
 278         ( "foreign",    ITforeign ),
 279         ( "export",     ITexport ),
 280         ( "label",      ITlabel ),
 281         ( "dynamic",    ITdynamic ),
 282         ( "unsafe",     ITunsafe ),
 283         ("_ccall_",     ITccall (False, False, False)),
 284         ("_ccall_GC_",  ITccall (False, False, True)),
 285         ("_casm_",      ITccall (False, True,  False)),
 286         ("_casm_GC_",   ITccall (False, True,  True)),
 287
 288         -- interface keywords
 289         ("__interface",         ITinterface),
 290         ("__export",            IT__export),
 291         ("__depends",           ITdepends),
 292         ("__forall",            IT__forall),
 293         ("__letrec",            ITletrec),
 294         ("__coerce",            ITcoerce),
 295         ("__inline_me",         ITinlineMe),
 296         ("__inline_call",       ITinlineCall),
 297         ("__depends",           ITdepends),
 298         ("__DEFAULT",           ITdefaultbranch),
 299         ("__bot",               ITbottom),
 300         ("__integer",           ITinteger_lit),
 301         ("__float",             ITfloat_lit),
 302         ("__rational",          ITrational_lit),
 303         ("__addr",              ITaddr_lit),
 304         ("__litlit",            ITlit_lit),
 305         ("__string",            ITstring_lit),
 306         ("__a",                 ITtypeapp),
 307         ("__o",                 ITonce),
 308         ("__m",                 ITmany),
 309         ("__A",                 ITarity),
 310         ("__P",                 ITspecialise),
 311         ("__C",                 ITnocaf),
 312         ("__R",                 ITrules),
 313         ("__u",                 ITunfold NoInlinePragInfo),
 314
 315         ("__ccall",             ITccall (False, False, False)),
 316         ("__ccall_GC",          ITccall (False, False, True)),
 317         ("__dyn_ccall",         ITccall (True,  False, False)),
 318         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 319         ("__casm",              ITccall (False, True,  False)),
 320         ("__dyn_casm",          ITccall (True,  True,  False)),
 321         ("__casm_GC",           ITccall (False, True,  True)),
 322         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 323
 324         ("/\\",                 ITbiglam)
 325      ]
 326
 327
 328 haskellKeySymsFM = listToUFM $
 329         map (\ (x,y) -> (_PK_ x,y))
 330       [ ("..",          ITdotdot)
 331        ,("::",          ITdcolon)
 332        ,("=",           ITequal)
 333        ,("\\",          ITlam)
 334        ,("|",           ITvbar)
 335        ,("<-",          ITlarrow)
 336        ,("->",          ITrarrow)
 337        ,("@",           ITat)
 338        ,("~",           ITtilde)
 339        ,("=>",          ITdarrow)
 340        ,("-",           ITminus)
 341        ,("!",           ITbang)
 342        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 343        ]
 344 \end{code}
 345
 346 -----------------------------------------------------------------------------
 347 The lexical analyser
 348
 349 Lexer state:
 350
 351         - (glaexts) lexing an interface file or -fglasgow-exts
 352         - (bol)   pointer to beginning of line (for column calculations)
 353         - (buf)   pointer to beginning of token
 354         - (buf)   pointer to current char
 355         - (atbol) flag indicating whether we're at the beginning of a line
 356
 357 \begin{code}
 358 lexer :: (Token -> P a) -> P a
 359 lexer cont buf s@(PState{
 360                     loc = loc,
 361                     glasgow_exts = glaexts,
 362                     bol = bol,
 363                     atbol = atbol,
 364                     context = ctx
 365                 })
 366
 367         -- first, start a new lexeme and lose all the whitespace
 368   = tab line bol atbol (stepOverLexeme buf)
 369   where
 370         line = srcLocLine loc
 371
 372         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 373           case currentChar# buf of
 374
 375             '\NUL'# ->
 376                    if bufferExhausted (stepOn buf)
 377                         then cont ITeof buf s'
 378                         else trace "lexer: misplaced NUL?" $
 379                              tab y bol atbol (stepOn buf)
 380
 381             '\n'# -> let buf' = stepOn buf
 382                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 383
 384                 -- find comments.  This got harder in Haskell 98.
 385             '-'# ->  let trundle n =
 386                           let next = lookAhead# buf n in
 387                           if next `eqChar#` '-'# then trundle (n +# 1#)
 388                           else if is_symbol next || n <# 2#
 389                                 then is_a_token
 390                                 else case untilChar# (stepOnBy# buf n) '\n'# of
 391                                     { buf' -> tab y bol atbol (stepOverLexeme buf')
 392                                     }
 393                     in trundle 1#
 394
 395                 -- comments and pragmas.  We deal with LINE pragmas here,
 396                 -- and throw out any unrecognised pragmas as comments.  Any
 397                 -- pragmas we know about are dealt with later (after any layout
 398                 -- processing if necessary).
 399
 400             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 401                 if lookAhead# buf 2# `eqChar#` '#'# then
 402                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 403                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 404                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 405                   let lexeme = mkFastString -- ToDo: too slow
 406                                   (map toUpper (lexemeToString buf2)) in
 407                   case lookupUFM pragmaKeywordsFM lexeme of
 408                         Just ITline_prag -> line_prag (lexer cont) buf2 s'
 409                         Just other -> is_a_token
 410                         Nothing -> skip_to_end (stepOnBy# buf 2#)
 411                   }}
 412
 413                 else skip_to_end (stepOnBy# buf 2#)
 414                 where
 415                     skip_to_end buf = nested_comment (lexer cont) buf s'
 416
 417                 -- tabs have been expanded beforehand
 418             c | is_space c -> tab y bol atbol (stepOn buf)
 419               | otherwise  -> is_a_token
 420
 421            where s' = s{loc = replaceSrcLine loc y,
 422                         bol = bol,
 423                        atbol = atbol}
 424
 425                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 426                             | otherwise    = lexToken cont glaexts buf s'
 427
 428 -- {-# LINE .. #-} pragmas.  yeuch.
 429 line_prag cont buf =
 430   case expandWhile# is_space buf                of { buf1 ->
 431   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 432   -- subtract one: the line number refers to the *following* line.
 433   let real_line = line - 1 in
 434   case fromInteger real_line                    of { i@(I# l) ->
 435   case expandWhile# is_space buf2               of { buf3 ->
 436   case currentChar# buf3                        of
 437      '\"'#{-"-} ->
 438         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 439         let file = lexemeToFastString buf4 in
 440         \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
 441         }
 442      other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
 443   }}}}
 444   where
 445         skipToEnd buf = nested_comment cont buf
 446
 447 nested_comment :: P a -> P a
 448 nested_comment cont buf = loop buf
 449  where
 450    loop buf =
 451      case currentChar# buf of
 452         '\NUL'# | bufferExhausted (stepOn buf) ->
 453                 lexError "unterminated `{-'" buf
 454
 455         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 456                 cont (stepOnBy# buf 2#)
 457
 458         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 459               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 460
 461         '\n'# -> \ s@PState{loc=loc} ->
 462                  let buf' = stepOn buf in
 463                  nested_comment cont buf'
 464                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 465                           atbol = 1#}
 466
 467         _   -> nested_comment cont (stepOn buf)
 468
 469 -- When we are lexing the first token of a line, check whether we need to
 470 -- insert virtual semicolons or close braces due to layout.
 471
 472 lexBOL :: (Token -> P a) -> P a
 473 lexBOL cont buf s@(PState{
 474                     loc = loc,
 475                     glasgow_exts = glaexts,
 476                     bol = bol,
 477                     atbol = atbol,
 478                     context = ctx
 479                   }) =
 480         if need_close_curly then
 481                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 482                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 483         else if need_semi_colon then
 484                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 485                 cont ITsemi buf s{atbol = 0#}
 486         else
 487                 lexToken cont glaexts buf s{atbol = 0#}
 488   where
 489         col = currentIndex# buf -# bol
 490
 491         need_close_curly =
 492                 case ctx of
 493                         [] -> False
 494                         (i:_) -> case i of
 495                                     NoLayout -> False
 496                                     Layout n -> col <# n
 497         need_semi_colon =
 498                 case ctx of
 499                         [] -> False
 500                         (i:_) -> case i of
 501                                     NoLayout -> False
 502                                     Layout n -> col ==# n
 503
 504
 505 lexToken :: (Token -> P a) -> Int# -> P a
 506 lexToken cont glaexts buf =
 507  --trace "lexToken" $
 508  _scc_ "Lexer"
 509   case currentChar# buf of
 510
 511     -- special symbols ----------------------------------------------------
 512     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 513                 -> cont IToubxparen (setCurrentPos# buf 2#)
 514          | otherwise
 515                 -> cont IToparen (incLexeme buf)
 516
 517     ')'# -> cont ITcparen    (incLexeme buf)
 518     '['# -> cont ITobrack    (incLexeme buf)
 519     ']'# -> cont ITcbrack    (incLexeme buf)
 520     ','# -> cont ITcomma     (incLexeme buf)
 521     ';'# -> cont ITsemi      (incLexeme buf)
 522
 523     '}'# -> \ s@PState{context = ctx} ->
 524             case ctx of
 525                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 526                 _ -> lexError "too many '}'s" buf s
 527
 528     '#'# -> case lookAhead# buf 1# of
 529                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 530                 '-'# -> case lookAhead# buf 2# of
 531                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 532                            _    -> lex_sym cont (incLexeme buf)
 533                 _    -> lex_sym cont (incLexeme buf)
 534
 535     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 536                 -> lex_cstring cont (setCurrentPos# buf 2#)
 537          | otherwise
 538                 -> cont ITbackquote (incLexeme buf)
 539
 540     '{'# ->     -- look for "{-##" special iface pragma
 541         case lookAhead# buf 1# of
 542            '-'# -> case lookAhead# buf 2# of
 543                     '#'# -> case lookAhead# buf 3# of
 544                                 '#'# ->
 545                                    let (lexeme, buf')
 546                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 547                                    cont (ITpragma lexeme) buf'
 548                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 549                     _    -> cont ITocurly (incLexeme buf)
 550            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 551
 552     -- strings/characters -------------------------------------------------
 553     '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
 554     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 555
 556     -- strictness and cpr pragmas and __scc treated specially.
 557     '_'# | flag glaexts ->
 558          case lookAhead# buf 1# of
 559            '_'# -> case lookAhead# buf 2# of
 560                     'S'# ->
 561                         lex_demand cont (stepOnUntil (not . isSpace)
 562                                         (stepOnBy# buf 3#)) -- past __S
 563                     'M'# ->
 564                         lex_cpr cont (stepOnUntil (not . isSpace)
 565                                      (stepOnBy# buf 3#)) -- past __M
 566                     's'# ->
 567                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 568                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 569                                Nothing   -> lex_id cont glaexts buf
 570                     _ -> lex_id cont glaexts buf
 571            _    -> lex_id cont glaexts buf
 572
 573         -- Hexadecimal and octal constants
 574     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 575                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 576          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 577                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 578         where ch   = lookAhead# buf 1#
 579               ch2  = lookAhead# buf 2#
 580               buf' = setCurrentPos# buf 2#
 581
 582     '\NUL'# ->
 583             if bufferExhausted (stepOn buf) then
 584                cont ITeof buf
 585             else
 586                trace "lexIface: misplaced NUL?" $
 587                cont (ITunknown "\NUL") (stepOn buf)
 588
 589     c | is_digit  c -> lex_num cont glaexts 0 buf
 590       | is_symbol c -> lex_sym cont buf
 591       | is_upper  c -> lex_con cont glaexts buf
 592       | is_ident  c -> lex_id  cont glaexts buf
 593       | otherwise   -> lexError "illegal character" buf
 594
 595 -- Int# is unlifted, and therefore faster than Bool for flags.
 596 {-# INLINE flag #-}
 597 flag :: Int# -> Bool
 598 flag 0# = False
 599 flag _  = True
 600
 601 -------------------------------------------------------------------------------
 602 -- Pragmas
 603
 604 lex_prag cont buf
 605   = case expandWhile# is_space buf of { buf1 ->
 606     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 607     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 608     case lookupUFM pragmaKeywordsFM lexeme of
 609         Just kw -> cont kw (mergeLexemes buf buf2)
 610         Nothing -> panic "lex_prag"
 611   }}
 612
 613 -------------------------------------------------------------------------------
 614 -- Strings & Chars
 615
 616 lex_string cont glaexts s buf
 617   = case currentChar# buf of
 618         '"'#{-"-} ->
 619            let buf' = incLexeme buf; s' = mkFastString (reverse s) in
 620            case currentChar# buf' of
 621                 '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
 622                 _                   -> cont (ITstring s') buf'
 623
 624         -- ignore \& in a string, deal with string gaps
 625         '\\'# | next_ch `eqChar#` '&'#
 626                 -> lex_string cont glaexts s (setCurrentPos# buf 2#)
 627               | is_space next_ch
 628                 -> lex_stringgap cont glaexts s buf'
 629
 630             where next_ch = lookAhead# buf 1#
 631                   buf' = setCurrentPos# buf 2#
 632
 633         _ -> lex_char (lex_next_string cont s) glaexts buf
 634
 635 lex_stringgap cont glaexts s buf
 636   = let buf' = incLexeme buf in
 637     case currentChar# buf of
 638         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 639                   st{loc = incSrcLine loc}
 640         '\\'# -> lex_string cont glaexts s buf'
 641         c | is_space c -> lex_stringgap cont glaexts s buf'
 642         other -> charError buf'
 643
 644 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 645
 646 lex_char :: (Int# -> Char -> P a) -> Int# -> P a
 647 lex_char cont glaexts buf
 648   = case currentChar# buf of
 649         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 650         c | is_any c -> cont glaexts (C# c) (incLexeme buf)
 651         other -> charError buf
 652
 653 char_end cont glaexts c buf
 654   = case currentChar# buf of
 655         '\''# -> let buf' = incLexeme buf in
 656                  case currentChar# buf' of
 657                         '#'# | flag glaexts
 658                                 -> cont (ITprimchar c) (incLexeme buf')
 659                         _       -> cont (ITchar c) buf'
 660         _     -> charError buf
 661
 662 lex_escape cont buf
 663   = let buf' = incLexeme buf in
 664     case currentChar# buf of
 665         'a'#       -> cont '\a' buf'
 666         'b'#       -> cont '\b' buf'
 667         'f'#       -> cont '\f' buf'
 668         'n'#       -> cont '\n' buf'
 669         'r'#       -> cont '\r' buf'
 670         't'#       -> cont '\t' buf'
 671         'v'#       -> cont '\v' buf'
 672         '\\'#      -> cont '\\' buf'
 673         '"'#       -> cont '\"' buf'
 674         '\''#      -> cont '\'' buf'
 675         '^'#       -> let c = currentChar# buf' in
 676                       if c `geChar#` '@'# && c `leChar#` '_'#
 677                         then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
 678                         else charError buf'
 679
 680         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 681         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 682         x | is_digit x
 683                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 684
 685         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 686                                        Just buf2 <- [prefixMatch buf p] ] of
 687                             (c,buf2):_ -> cont c buf2
 688                             [] -> charError buf'
 689
 690 after_charnum cont i buf
 691   = let int = fromInteger i in
 692     if i >= 0 && i <= 255
 693         then cont (chr int) buf
 694         else charError buf
 695
 696 readNum cont buf is_digit base conv = read buf 0
 697   where read buf i
 698           = case currentChar# buf of { c ->
 699             if is_digit c
 700                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 701                 else cont i buf
 702             }
 703
 704 is_hexdigit c
 705         =  is_digit c
 706         || (c `geChar#` 'a'# && c `leChar#` 'h'#)
 707         || (c `geChar#` 'A'# && c `leChar#` 'H'#)
 708
 709 hex c | is_digit c = ord# c -# ord# '0'#
 710       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 711 oct_or_dec c = ord# c -# ord# '0'#
 712
 713 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 714
 715 to_lower c
 716   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 717         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 718   | otherwise = c
 719
 720 charError buf = lexError "error in character literal" buf
 721
 722 silly_escape_chars = [
 723         ("NUL", '\NUL'),
 724         ("SOH", '\SOH'),
 725         ("STX", '\STX'),
 726         ("ETX", '\ETX'),
 727         ("EOT", '\EOT'),
 728         ("ENQ", '\ENQ'),
 729         ("ACK", '\ACK'),
 730         ("BEL", '\BEL'),
 731         ("BS", '\BS'),
 732         ("HT", '\HT'),
 733         ("LF", '\LF'),
 734         ("VT", '\VT'),
 735         ("FF", '\FF'),
 736         ("CR", '\CR'),
 737         ("SO", '\SO'),
 738         ("SI", '\SI'),
 739         ("DLE", '\DLE'),
 740         ("DC1", '\DC1'),
 741         ("DC2", '\DC2'),
 742         ("DC3", '\DC3'),
 743         ("DC4", '\DC4'),
 744         ("NAK", '\NAK'),
 745         ("SYN", '\SYN'),
 746         ("ETB", '\ETB'),
 747         ("CAN", '\CAN'),
 748         ("EM", '\EM'),
 749         ("SUB", '\SUB'),
 750         ("ESC", '\ESC'),
 751         ("FS", '\FS'),
 752         ("GS", '\GS'),
 753         ("RS", '\RS'),
 754         ("US", '\US'),
 755         ("SP", '\SP'),
 756         ("DEL", '\DEL')
 757         ]
 758
 759 -------------------------------------------------------------------------------
 760
 761 lex_demand cont buf =
 762  case read_em [] buf of { (ls,buf') ->
 763  case currentChar# buf' of
 764    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 765    _    -> cont (ITstrict (ls, False)) buf'
 766  }
 767  where
 768    -- code snatched from Demand.lhs
 769   read_em acc buf =
 770    case currentChar# buf of
 771     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 772     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 773     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 774     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 775     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 776     ')'# -> (reverse acc, stepOn buf)
 777     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 778     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 779     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 780     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 781     _    -> (reverse acc, buf)
 782
 783   do_unpack new_or_data wrapper_unpacks acc buf
 784    = case read_em [] buf of
 785       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 786
 787 lex_cpr cont buf =
 788  case read_em [] buf of { (cpr_inf,buf') ->
 789    ASSERT ( null (tail cpr_inf) )
 790    cont (ITcprinfo $ head cpr_inf) buf'
 791  }
 792  where
 793    -- code snatched from lex_demand above
 794   read_em acc buf =
 795    case currentChar# buf of
 796     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 797     '('# -> do_unpack acc (stepOn buf)
 798     ')'# -> (reverse acc, stepOn buf)
 799     _    -> (reverse acc, buf)
 800
 801   do_unpack acc buf
 802    = case read_em [] buf of
 803       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 804
 805 ------------------
 806 lex_scc cont buf =
 807  case currentChar# buf of
 808   'C'# -> cont ITsccAllCafs (incLexeme buf)
 809   other -> cont ITscc buf
 810
 811 -----------------------------------------------------------------------------
 812 -- Numbers
 813
 814 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 815 lex_num cont glaexts acc buf =
 816  case scanNumLit acc buf of
 817      (acc',buf') ->
 818        case currentChar# buf' of
 819          '.'# | is_digit (lookAhead# buf' 1#) ->
 820              -- this case is not optimised at all, as the
 821              -- presence of floating point numbers in interface
 822              -- files is not that common. (ToDo)
 823             case expandWhile# is_digit (incLexeme buf') of
 824               buf2 -> -- points to first non digit char
 825
 826                 let l = case currentChar# buf2 of
 827                           'E'# -> do_exponent
 828                           'e'# -> do_exponent
 829                           _ -> buf2
 830
 831                     do_exponent
 832                         = let buf3 = incLexeme buf2 in
 833                           case currentChar# buf3 of
 834                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 835                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 836                                 x | is_digit x -> expandWhile# is_digit buf3
 837                                 _ -> buf2
 838
 839                     v = readRational__ (lexemeToString l)
 840
 841                 in case currentChar# l of -- glasgow exts only
 842                       '#'# | flag glaexts -> let l' = incLexeme l in
 843                               case currentChar# l' of
 844                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 845                                 _    -> cont (ITprimfloat  v) l'
 846                       _ -> cont (ITrational v) l
 847
 848          _ -> after_lexnum cont glaexts acc' buf'
 849
 850 after_lexnum cont glaexts i buf
 851   = case currentChar# buf of
 852         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 853         _    -> cont (ITinteger i) buf
 854
 855 -----------------------------------------------------------------------------
 856 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 857
 858 -- we lexemeToFastString on the bit between the ``''s, but include the
 859 -- quotes in the full lexeme.
 860
 861 lex_cstring cont buf =
 862  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 863    buf' -> cont (ITlitlit (lexemeToFastString
 864                                 (setCurrentPos# buf' (negateInt# 2#))))
 865                 (mergeLexemes buf buf')
 866
 867 ------------------------------------------------------------------------------
 868 -- Character Classes
 869
 870 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 871
 872 {-# INLINE is_ctype #-}
 873 #if __GLASGOW_HASKELL__ >= 303
 874 is_ctype :: Word8 -> Char# -> Bool
 875 is_ctype mask = \c ->
 876    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 877 #else
 878 is_ctype :: Int -> Char# -> Bool
 879 is_ctype (I# mask) = \c ->
 880     let (A# ctype) = ``char_types'' :: Addr
 881         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 882     in
 883         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 884 #endif
 885
 886 is_ident  = is_ctype 1
 887 is_symbol = is_ctype 2
 888 is_any    = is_ctype 4
 889 is_space  = is_ctype 8
 890 is_upper  = is_ctype 16
 891 is_digit  = is_ctype 32
 892
 893 -----------------------------------------------------------------------------
 894 -- identifiers, symbols etc.
 895
 896 lex_id cont glaexts buf =
 897  case expandWhile# is_ident buf of { buf1 ->
 898
 899  case (if flag glaexts
 900         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 901         else buf1)                              of { buf' ->
 902
 903  let lexeme  = lexemeToFastString buf' in
 904
 905  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 906         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 907                           cont kwd_token buf';
 908         Nothing        ->
 909
 910  let var_token = cont (mk_var_token lexeme) buf' in
 911
 912  if not (flag glaexts)
 913    then var_token
 914    else
 915
 916  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 917         Just kwd_token -> cont kwd_token buf';
 918         Nothing        -> var_token
 919
 920  }}}}
 921
 922 lex_sym cont buf =
 923  case expandWhile# is_symbol buf of
 924    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 925                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 926                                   cont kwd_token buf' ;
 927                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 928                                   cont (mk_var_token lexeme) buf'
 929            }
 930         where lexeme = lexemeToFastString buf'
 931
 932
 933 lex_con cont glaexts buf =
 934  case expandWhile# is_ident buf          of { buf1 ->
 935  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 936
 937  case currentChar# buf' of
 938      '.'# -> munch
 939      _    -> just_a_conid
 940
 941    where
 942     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 943                    cont (ITconid lexeme) buf'
 944     lexeme = lexemeToFastString buf'
 945     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 946  }}
 947
 948 lex_qid cont glaexts mod buf just_a_conid =
 949  case currentChar# buf of
 950   '['# ->       -- Special case for []
 951     case lookAhead# buf 1# of
 952      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 953      _    -> just_a_conid
 954
 955   '('# ->  -- Special case for (,,,)
 956            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 957     case lookAhead# buf 1# of
 958      '#'# | flag glaexts -> case lookAhead# buf 2# of
 959                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 960                                 just_a_conid
 961                 _    -> just_a_conid
 962      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 963      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 964      _    -> just_a_conid
 965
 966   '-'# -> case lookAhead# buf 1# of
 967             '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
 968             _    -> lex_id3 cont glaexts mod buf just_a_conid
 969   _    -> lex_id3 cont glaexts mod buf just_a_conid
 970
 971 lex_id3 cont glaexts mod buf just_a_conid
 972   | is_symbol (currentChar# buf) =
 973      let
 974         start_new_lexeme = stepOverLexeme buf
 975      in
 976      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 977      let
 978        lexeme  = lexemeToFastString buf'
 979         -- real lexeme is M.<sym>
 980        new_buf = mergeLexemes buf buf'
 981      in
 982      cont (mk_qvar_token mod lexeme) new_buf
 983         -- wrong, but arguably morally right: M... is now a qvarsym
 984      }
 985
 986   | otherwise   =
 987      let
 988         start_new_lexeme = stepOverLexeme buf
 989      in
 990      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 991      if emptyLexeme buf1
 992             then just_a_conid
 993             else
 994
 995      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 996
 997      let
 998       lexeme  = lexemeToFastString buf'
 999       new_buf = mergeLexemes buf buf'
1000       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1001      in
1002      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1003             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1004             Nothing        -> is_a_qvarid
1005         -- TODO: special ids (as, qualified, hiding) shouldn't be
1006         -- recognised as keywords here.  ie.  M.as is a qualified varid.
1007      }}}
1008
1009
1010 slurp_trailing_hashes buf glaexts
1011   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1012   | otherwise    = buf
1013
1014
1015 mk_var_token pk_str
1016   | is_upper f          = ITconid pk_str
1017         -- _[A-Z] is treated as a constructor in interface files.
1018   | f `eqChar#` '_'# && not (_NULL_ tl)
1019         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
1020   | is_ident f          = ITvarid pk_str
1021   | f `eqChar#` ':'#    = ITconsym pk_str
1022   | otherwise           = ITvarsym pk_str
1023   where
1024       (C# f) = _HEAD_ pk_str
1025       tl     = _TAIL_ pk_str
1026
1027 mk_qvar_token m token =
1028  case mk_var_token token of
1029    ITconid n  -> ITqconid  (m,n)
1030    ITvarid n  -> ITqvarid  (m,n)
1031    ITconsym n -> ITqconsym (m,n)
1032    ITvarsym n -> ITqvarsym (m,n)
1033    _          -> ITunknown (show token)
1034 \end{code}
1035
1036 ----------------------------------------------------------------------------
1037 Horrible stuff for dealing with M.(,,,)
1038
1039 \begin{code}
1040 lex_tuple cont mod buf back_off =
1041   go 2 buf
1042   where
1043    go n buf =
1044     case currentChar# buf of
1045       ','# -> go (n+1) (stepOn buf)
1046       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
1047       _    -> back_off
1048
1049 lex_ubx_tuple cont mod buf back_off =
1050   go 2 buf
1051   where
1052    go n buf =
1053     case currentChar# buf of
1054       ','# -> go (n+1) (stepOn buf)
1055       '#'# -> case lookAhead# buf 1# of
1056                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
1057                                  (stepOnBy# buf 2#)
1058                 _    -> back_off
1059       _    -> back_off
1060 \end{code}
1061
1062 -----------------------------------------------------------------------------
1063 doDiscard rips along really fast, looking for a '#-}',
1064 indicating the end of the pragma we're skipping
1065
1066 \begin{code}
1067 doDiscard inStr buf =
1068  case currentChar# buf of
1069    '#'# | not inStr ->
1070        case lookAhead# buf 1# of { '#'# ->
1071        case lookAhead# buf 2# of { '-'# ->
1072        case lookAhead# buf 3# of { '}'# ->
1073            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1074         _    -> doDiscard inStr (incLexeme buf) };
1075         _    -> doDiscard inStr (incLexeme buf) };
1076         _    -> doDiscard inStr (incLexeme buf) }
1077    '"'# ->
1078        let
1079         odd_slashes buf flg i# =
1080           case lookAhead# buf i# of
1081            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1082            _     -> flg
1083        in
1084        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1085          '\\'# -> -- escaping something..
1086            if odd_slashes buf True (negateInt# 2#) then
1087                -- odd number of slashes, " is escaped.
1088               doDiscard inStr (incLexeme buf)
1089            else
1090                -- even number of slashes, \ is escaped.
1091               doDiscard (not inStr) (incLexeme buf)
1092          _ -> case inStr of -- forced to avoid build-up
1093                True  -> doDiscard False (incLexeme buf)
1094                False -> doDiscard True  (incLexeme buf)
1095    _ -> doDiscard inStr (incLexeme buf)
1096
1097 \end{code}
1098
1099 -----------------------------------------------------------------------------
1100
1101 \begin{code}
1102 data LayoutContext
1103   = NoLayout
1104   | Layout Int#
1105
1106 data ParseResult a
1107   = POk PState a
1108   | PFailed Message
1109
1110 data PState = PState {
1111         loc           :: SrcLoc,
1112         glasgow_exts  :: Int#,
1113         bol           :: Int#,
1114         atbol         :: Int#,
1115         context       :: [LayoutContext]
1116      }
1117
1118 type P a = StringBuffer         -- Input string
1119         -> PState
1120         -> ParseResult a
1121
1122 returnP   :: a -> P a
1123 returnP a buf s = POk s a
1124
1125 thenP      :: P a -> (a -> P b) -> P b
1126 m `thenP` k = \ buf s ->
1127         case m buf s of
1128                 POk s1 a -> k a buf s1
1129                 PFailed err  -> PFailed err
1130
1131 thenP_     :: P a -> P b -> P b
1132 m `thenP_` k = m `thenP` \_ -> k
1133
1134 mapP :: (a -> P b) -> [a] -> P [b]
1135 mapP f [] = returnP []
1136 mapP f (a:as) =
1137      f a `thenP` \b ->
1138      mapP f as `thenP` \bs ->
1139      returnP (b:bs)
1140
1141 failP :: String -> P a
1142 failP msg buf s = PFailed (text msg)
1143
1144 failMsgP :: Message -> P a
1145 failMsgP msg buf s = PFailed msg
1146
1147 lexError :: String -> P a
1148 lexError str buf s@PState{ loc = loc }
1149   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1150
1151 getSrcLocP :: P SrcLoc
1152 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1153
1154 getSrcFile :: P FAST_STRING
1155 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1156
1157 getContext :: P [LayoutContext]
1158 getContext buf s@(PState{ context = ctx }) = POk s ctx
1159
1160 pushContext :: LayoutContext -> P ()
1161 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1162
1163 {-
1164
1165 This special case in layoutOn is to handle layout contexts with are
1166 indented the same or less than the current context.  This is illegal
1167 according to the Haskell spec, so we have to arrange to close the
1168 current context.  eg.
1169
1170 class Foo a where
1171 class Bar a
1172
1173 after the first 'where', the sequence of events is:
1174
1175         - layout system inserts a ';' (column 0)
1176         - parser begins a new context at column 0
1177         - parser shifts ';' (legal empty declaration)
1178         - parser sees 'class': parse error (we're still in the inner context)
1179
1180 trouble is, by the time we know we need a new context, the lexer has
1181 already generated the ';'.  Hacky solution is as follows: since we
1182 know the column of the next token (it's the column number of the new
1183 context), we set the ACTUAL column number of the new context to this
1184 numer plus one.  Hence the next time the lexer is called, a '}' will
1185 be generated to close the new context straight away.  Furthermore, we
1186 have to set the atbol flag so that the ';' that the parser shifted as
1187 part of the new context is re-generated.
1188
1189 when the new context is *less* indented than the current one:
1190
1191 f = f where g = g where
1192 h = h
1193
1194         - current context: column 12.
1195         - on seeing 'h' (column 0), the layout system inserts '}'
1196         - parser starts a new context, column 0
1197         - parser sees '}', uses it to close new context
1198         - we still need to insert another '}' followed by a ';',
1199           hence the atbol trick.
1200
1201 There's also a special hack in here to deal with
1202
1203         do
1204            ....
1205            e $ do
1206            blah
1207
1208 i.e. the inner context is at the same indentation level as the outer
1209 context.  This is strictly illegal according to Haskell 98, but
1210 there's a lot of existing code using this style and it doesn't make
1211 any sense to disallow it, since empty 'do' lists don't make sense.
1212 -}
1213
1214 layoutOn :: Bool -> P ()
1215 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1216     let offset = lexemeIndex buf -# bol in
1217     case ctx of
1218         Layout prev_off : _
1219            | if strict then prev_off >=# offset else prev_off ># offset ->
1220                 --trace ("layout on, column: " ++  show (I# offset)) $
1221                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1222         other ->
1223                 --trace ("layout on, column: " ++  show (I# offset)) $
1224                 POk s{ context = Layout offset : ctx } ()
1225
1226 layoutOff :: P ()
1227 layoutOff buf s@(PState{ context = ctx }) =
1228     POk s{ context = NoLayout:ctx } ()
1229
1230 popContext :: P ()
1231 popContext = \ buf s@(PState{ context = ctx }) ->
1232   case ctx of
1233         (_:tl) -> POk s{ context = tl } ()
1234         []    -> panic "Lex.popContext: empty context"
1235
1236 {-
1237  Note that if the name of the file we're processing ends
1238  with `hi-boot', we accept it on faith as having the right
1239  version. This is done so that .hi-boot files that comes
1240  with hsc don't have to be updated before every release,
1241  *and* it allows us to share .hi-boot files with versions
1242  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1243
1244  If the version number is 0, the checking is also turned off.
1245  (needed to deal with GHC.hi only!)
1246
1247  Once we can assume we're compiling with a version of ghc that
1248  supports interface file checking, we can drop the special
1249  pleading
1250 -}
1251 checkVersion :: Maybe Integer -> P ()
1252 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1253  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1254  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1255 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1256  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1257  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1258
1259 -----------------------------------------------------------------
1260
1261 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1262 ifaceParseErr s l
1263   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1264           text (lexemeToString s), char '\'']
1265
1266 ifaceVersionErr hi_vers l toks
1267   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1268           ptext SLIT("Expected"), int opt_HiVersion,
1269           ptext SLIT("found "), pp_version]
1270     where
1271      pp_version =
1272       case hi_vers of
1273         Nothing -> ptext SLIT("pre ghc-3.02 version")
1274         Just v  -> ptext SLIT("version") <+> integer v
1275
1276 \end{code}