ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( chr, ord )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | IT__export
 127   | ITdepends
 128   | IT__forall
 129   | ITletrec
 130   | ITcoerce
 131   | ITinlineMe
 132   | ITinlineCall
 133   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 134   | ITdefaultbranch
 135   | ITbottom
 136   | ITinteger_lit
 137   | ITfloat_lit
 138   | ITword_lit
 139   | ITword64_lit
 140   | ITint64_lit
 141   | ITrational_lit
 142   | ITaddr_lit
 143   | ITlabel_lit
 144   | ITlit_lit
 145   | ITstring_lit
 146   | ITtypeapp
 147   | ITusage
 148   | ITfuall
 149   | ITarity
 150   | ITspecialise
 151   | ITnocaf
 152   | ITunfold InlinePragInfo
 153   | ITstrict ([Demand], Bool)
 154   | ITrules
 155   | ITcprinfo
 156   | ITdeprecated
 157   | IT__scc
 158   | ITsccAllCafs
 159
 160   | ITspecialise_prag           -- Pragmas
 161   | ITsource_prag
 162   | ITinline_prag
 163   | ITnoinline_prag
 164   | ITrules_prag
 165   | ITdeprecated_prag
 166   | ITline_prag
 167   | ITscc_prag
 168   | ITclose_prag
 169
 170   | ITdotdot                    -- reserved symbols
 171   | ITdcolon
 172   | ITequal
 173   | ITlam
 174   | ITvbar
 175   | ITlarrow
 176   | ITrarrow
 177   | ITat
 178   | ITtilde
 179   | ITdarrow
 180   | ITminus
 181   | ITbang
 182   | ITdot
 183
 184   | ITbiglam                    -- GHC-extension symbols
 185
 186   | ITocurly                    -- special symbols
 187   | ITccurly
 188   | ITocurlybar                 -- {|, for type applications
 189   | ITccurlybar                 -- |}, for type applications
 190   | ITvccurly
 191   | ITobrack
 192   | ITcbrack
 193   | IToparen
 194   | ITcparen
 195   | IToubxparen
 196   | ITcubxparen
 197   | ITsemi
 198   | ITcomma
 199   | ITunderscore
 200   | ITbackquote
 201
 202   | ITvarid   FAST_STRING       -- identifiers
 203   | ITconid   FAST_STRING
 204   | ITvarsym  FAST_STRING
 205   | ITconsym  FAST_STRING
 206   | ITqvarid  (FAST_STRING,FAST_STRING)
 207   | ITqconid  (FAST_STRING,FAST_STRING)
 208   | ITqvarsym (FAST_STRING,FAST_STRING)
 209   | ITqconsym (FAST_STRING,FAST_STRING)
 210
 211   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 212
 213   | ITpragma StringBuffer
 214
 215   | ITchar       Int
 216   | ITstring     FAST_STRING
 217   | ITinteger    Integer
 218   | ITrational   Rational
 219
 220   | ITprimchar   Int
 221   | ITprimstring FAST_STRING
 222   | ITprimint    Integer
 223   | ITprimfloat  Rational
 224   | ITprimdouble Rational
 225   | ITlitlit     FAST_STRING
 226
 227   | ITunknown String            -- Used when the lexer can't make sense of it
 228   | ITeof                       -- end of file token
 229   deriving Show -- debugging
 230 \end{code}
 231
 232 -----------------------------------------------------------------------------
 233 Keyword Lists
 234
 235 \begin{code}
 236 pragmaKeywordsFM = listToUFM $
 237       map (\ (x,y) -> (_PK_ x,y))
 238        [( "SPECIALISE", ITspecialise_prag ),
 239         ( "SPECIALIZE", ITspecialise_prag ),
 240         ( "SOURCE",     ITsource_prag ),
 241         ( "INLINE",     ITinline_prag ),
 242         ( "NOINLINE",   ITnoinline_prag ),
 243         ( "NOTINLINE",  ITnoinline_prag ),
 244         ( "LINE",       ITline_prag ),
 245         ( "RULES",      ITrules_prag ),
 246         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 247         ( "SCC",        ITscc_prag ),
 248         ( "DEPRECATED", ITdeprecated_prag )
 249         ]
 250
 251 haskellKeywordsFM = listToUFM $
 252       map (\ (x,y) -> (_PK_ x,y))
 253        [( "_",          ITunderscore ),
 254         ( "as",         ITas ),
 255         ( "case",       ITcase ),
 256         ( "class",      ITclass ),
 257         ( "data",       ITdata ),
 258         ( "default",    ITdefault ),
 259         ( "deriving",   ITderiving ),
 260         ( "do",         ITdo ),
 261         ( "else",       ITelse ),
 262         ( "hiding",     IThiding ),
 263         ( "if",         ITif ),
 264         ( "import",     ITimport ),
 265         ( "in",         ITin ),
 266         ( "infix",      ITinfix ),
 267         ( "infixl",     ITinfixl ),
 268         ( "infixr",     ITinfixr ),
 269         ( "instance",   ITinstance ),
 270         ( "let",        ITlet ),
 271         ( "module",     ITmodule ),
 272         ( "newtype",    ITnewtype ),
 273         ( "of",         ITof ),
 274         ( "qualified",  ITqualified ),
 275         ( "then",       ITthen ),
 276         ( "type",       ITtype ),
 277         ( "where",      ITwhere ),
 278         ( "_scc_",      ITscc )         -- ToDo: remove
 279      ]
 280
 281 isSpecial :: Token -> Bool
 282 -- If we see M.x, where x is a keyword, but
 283 -- is special, we treat is as just plain M.x,
 284 -- not as a keyword.
 285 isSpecial ITas          = True
 286 isSpecial IThiding      = True
 287 isSpecial ITqualified   = True
 288 isSpecial ITforall      = True
 289 isSpecial ITexport      = True
 290 isSpecial ITlabel       = True
 291 isSpecial ITdynamic     = True
 292 isSpecial ITunsafe      = True
 293 isSpecial ITwith        = True
 294 isSpecial ITccallconv   = True
 295 isSpecial ITstdcallconv = True
 296 isSpecial _             = False
 297
 298 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 299 ghcExtensionKeywordsFM = listToUFM $
 300         map (\ (x,y) -> (_PK_ x,y))
 301      [  ( "forall",     ITforall ),
 302         ( "foreign",    ITforeign ),
 303         ( "export",     ITexport ),
 304         ( "label",      ITlabel ),
 305         ( "dynamic",    ITdynamic ),
 306         ( "unsafe",     ITunsafe ),
 307         ( "with",       ITwith ),
 308         ( "stdcall",    ITstdcallconv),
 309         ( "ccall",      ITccallconv),
 310         ("_ccall_",     ITccall (False, False, False)),
 311         ("_ccall_GC_",  ITccall (False, False, True)),
 312         ("_casm_",      ITccall (False, True,  False)),
 313         ("_casm_GC_",   ITccall (False, True,  True)),
 314
 315         -- interface keywords
 316         ("__interface",         ITinterface),
 317         ("__export",            IT__export),
 318         ("__depends",           ITdepends),
 319         ("__forall",            IT__forall),
 320         ("__letrec",            ITletrec),
 321         ("__coerce",            ITcoerce),
 322         ("__inline_me",         ITinlineMe),
 323         ("__inline_call",       ITinlineCall),
 324         ("__depends",           ITdepends),
 325         ("__DEFAULT",           ITdefaultbranch),
 326         ("__bot",               ITbottom),
 327         ("__integer",           ITinteger_lit),
 328         ("__float",             ITfloat_lit),
 329         ("__int64",             ITint64_lit),
 330         ("__word",              ITword_lit),
 331         ("__word64",            ITword64_lit),
 332         ("__rational",          ITrational_lit),
 333         ("__addr",              ITaddr_lit),
 334         ("__label",             ITlabel_lit),
 335         ("__litlit",            ITlit_lit),
 336         ("__string",            ITstring_lit),
 337         ("__a",                 ITtypeapp),
 338         ("__u",                 ITusage),
 339         ("__fuall",             ITfuall),
 340         ("__A",                 ITarity),
 341         ("__P",                 ITspecialise),
 342         ("__C",                 ITnocaf),
 343         ("__R",                 ITrules),
 344         ("__D",                 ITdeprecated),
 345         ("__U",                 ITunfold NoInlinePragInfo),
 346
 347         ("__ccall",             ITccall (False, False, False)),
 348         ("__ccall_GC",          ITccall (False, False, True)),
 349         ("__dyn_ccall",         ITccall (True,  False, False)),
 350         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 351         ("__casm",              ITccall (False, True,  False)),
 352         ("__dyn_casm",          ITccall (True,  True,  False)),
 353         ("__casm_GC",           ITccall (False, True,  True)),
 354         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 355
 356         ("/\\",                 ITbiglam)
 357      ]
 358
 359
 360 haskellKeySymsFM = listToUFM $
 361         map (\ (x,y) -> (_PK_ x,y))
 362       [ ("..",          ITdotdot)
 363        ,("::",          ITdcolon)
 364        ,("=",           ITequal)
 365        ,("\\",          ITlam)
 366        ,("|",           ITvbar)
 367        ,("<-",          ITlarrow)
 368        ,("->",          ITrarrow)
 369        ,("@",           ITat)
 370        ,("~",           ITtilde)
 371        ,("=>",          ITdarrow)
 372        ,("-",           ITminus)
 373        ,("!",           ITbang)
 374        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 375        ]
 376 \end{code}
 377
 378 -----------------------------------------------------------------------------
 379 The lexical analyser
 380
 381 Lexer state:
 382
 383         - (glaexts) lexing an interface file or -fglasgow-exts
 384         - (bol)   pointer to beginning of line (for column calculations)
 385         - (buf)   pointer to beginning of token
 386         - (buf)   pointer to current char
 387         - (atbol) flag indicating whether we're at the beginning of a line
 388
 389 \begin{code}
 390 lexer :: (Token -> P a) -> P a
 391 lexer cont buf s@(PState{
 392                     loc = loc,
 393                     glasgow_exts = glaexts,
 394                     bol = bol,
 395                     atbol = atbol,
 396                     context = ctx
 397                 })
 398
 399         -- first, start a new lexeme and lose all the whitespace
 400   =  _scc_ "Lexer"
 401   tab line bol atbol (stepOverLexeme buf)
 402   where
 403         line = srcLocLine loc
 404
 405         tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 406           case currentChar# buf of
 407
 408             '\NUL'# ->
 409                    if bufferExhausted (stepOn buf)
 410                         then cont ITeof buf s'
 411                         else trace "lexer: misplaced NUL?" $
 412                              tab y bol atbol (stepOn buf)
 413
 414             '\n'# -> let buf' = stepOn buf
 415                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 416
 417                 -- find comments.  This got harder in Haskell 98.
 418             '-'# ->  let trundle n =
 419                           let next = lookAhead# buf n in
 420                           if next `eqChar#` '-'# then trundle (n +# 1#)
 421                           else if is_symbol next || n <# 2#
 422                                 then is_a_token
 423                                 else tab y bol atbol
 424                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 425                     in trundle 1#
 426
 427                 -- comments and pragmas.  We deal with LINE pragmas here,
 428                 -- and throw out any unrecognised pragmas as comments.  Any
 429                 -- pragmas we know about are dealt with later (after any layout
 430                 -- processing if necessary).
 431             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 432                 if lookAhead# buf 2# `eqChar#` '#'# then
 433                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 434                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 435                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 436                   let lexeme = mkFastString -- ToDo: too slow
 437                                   (map toUpper (lexemeToString buf2)) in
 438                   case lookupUFM pragmaKeywordsFM lexeme of
 439                         Just ITline_prag ->
 440                            line_prag skip_to_end buf2 s'
 441                         Just other -> is_a_token
 442                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 443                   }}
 444
 445                 else skip_to_end (stepOnBy# buf 2#) s'
 446                 where
 447                     skip_to_end = nested_comment (lexer cont)
 448
 449                 -- special GHC extension: we grok cpp-style #line pragmas
 450             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 451                 case expandWhile# is_space (stepOn buf) of { buf1 ->
 452                 if is_digit (currentChar# buf1)
 453                         then line_prag next_line buf1 s'
 454                         else is_a_token
 455                 }
 456                 where
 457                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 458
 459                 -- tabs have been expanded beforehand
 460             c | is_space c -> tab y bol atbol (stepOn buf)
 461               | otherwise  -> is_a_token
 462
 463            where s' = s{loc = replaceSrcLine loc y,
 464                         bol = bol,
 465                        atbol = atbol}
 466
 467                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 468                             | otherwise    = lexToken cont glaexts buf s'
 469
 470 -- {-# LINE .. #-} pragmas.  yeuch.
 471 line_prag cont buf s@PState{loc=loc} =
 472   case expandWhile# is_space buf                of { buf1 ->
 473   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 474   -- subtract one: the line number refers to the *following* line.
 475   let real_line = line - 1 in
 476   case fromInteger real_line                    of { i@(I# l) ->
 477         -- ToDo, if no filename then we skip the newline.... d'oh
 478   case expandWhile# is_space buf2               of { buf3 ->
 479   case currentChar# buf3                        of
 480      '\"'#{-"-} ->
 481         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 482         let
 483             file = lexemeToFastString buf4
 484             new_buf = stepOn (stepOverLexeme buf4)
 485         in
 486         if nullFastString file
 487                 then cont new_buf s{loc = replaceSrcLine loc l}
 488                 else cont new_buf s{loc = mkSrcLoc file i}
 489         }
 490      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 491   }}}}
 492
 493 nested_comment :: P a -> P a
 494 nested_comment cont buf = loop buf
 495  where
 496    loop buf =
 497      case currentChar# buf of
 498         '\NUL'# | bufferExhausted (stepOn buf) ->
 499                 lexError "unterminated `{-'" buf -- -}
 500         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 501                 cont (stepOnBy# buf 2#)
 502
 503         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 504               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 505
 506         '\n'# -> \ s@PState{loc=loc} ->
 507                  let buf' = stepOn buf in
 508                  nested_comment cont buf'
 509                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 510                           atbol = 1#}
 511
 512         _   -> nested_comment cont (stepOn buf)
 513
 514 -- When we are lexing the first token of a line, check whether we need to
 515 -- insert virtual semicolons or close braces due to layout.
 516
 517 lexBOL :: (Token -> P a) -> P a
 518 lexBOL cont buf s@(PState{
 519                     loc = loc,
 520                     glasgow_exts = glaexts,
 521                     bol = bol,
 522                     atbol = atbol,
 523                     context = ctx
 524                   }) =
 525         if need_close_curly then
 526                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 527                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 528         else if need_semi_colon then
 529                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 530                 cont ITsemi buf s{atbol = 0#}
 531         else
 532                 lexToken cont glaexts buf s{atbol = 0#}
 533   where
 534         col = currentIndex# buf -# bol
 535
 536         need_close_curly =
 537                 case ctx of
 538                         [] -> False
 539                         (i:_) -> case i of
 540                                     NoLayout -> False
 541                                     Layout n -> col <# n
 542         need_semi_colon =
 543                 case ctx of
 544                         [] -> False
 545                         (i:_) -> case i of
 546                                     NoLayout -> False
 547                                     Layout n -> col ==# n
 548
 549
 550 lexToken :: (Token -> P a) -> Int# -> P a
 551 lexToken cont glaexts buf =
 552  -- trace "lexToken" $
 553   case currentChar# buf of
 554
 555     -- special symbols ----------------------------------------------------
 556     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 557                 -> cont IToubxparen (setCurrentPos# buf 2#)
 558          | otherwise
 559                 -> cont IToparen (incLexeme buf)
 560
 561     ')'# -> cont ITcparen    (incLexeme buf)
 562     '['# -> cont ITobrack    (incLexeme buf)
 563     ']'# -> cont ITcbrack    (incLexeme buf)
 564     ','# -> cont ITcomma     (incLexeme buf)
 565     ';'# -> cont ITsemi      (incLexeme buf)
 566     '}'# -> \ s@PState{context = ctx} ->
 567             case ctx of
 568                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 569                 _        -> lexError "too many '}'s" buf s
 570     '|'# -> case lookAhead# buf 1# of
 571                  '}'#  | flag glaexts -> cont ITccurlybar
 572                                               (setCurrentPos# buf 2#)
 573                  _                    -> lex_sym cont (incLexeme buf)
 574
 575
 576     '#'# -> case lookAhead# buf 1# of
 577                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 578                 '-'# -> case lookAhead# buf 2# of
 579                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 580                            _    -> lex_sym cont (incLexeme buf)
 581                 _    -> lex_sym cont (incLexeme buf)
 582
 583     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 584                 -> lex_cstring cont (setCurrentPos# buf 2#)
 585          | otherwise
 586                 -> cont ITbackquote (incLexeme buf)
 587
 588     '{'# ->     -- look for "{-##" special iface pragma
 589             case lookAhead# buf 1# of
 590            '|'# | flag glaexts
 591                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 592            '-'# -> case lookAhead# buf 2# of
 593                     '#'# -> case lookAhead# buf 3# of
 594                                 '#'# ->
 595                                    let (lexeme, buf')
 596                                           = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in
 597                                             cont (ITpragma lexeme) buf'
 598                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 599                     _    -> cont ITocurly (incLexeme buf)
 600            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 601
 602     -- strings/characters -------------------------------------------------
 603     '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
 604     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 605
 606     -- strictness and cpr pragmas and __scc treated specially.
 607     '_'# | flag glaexts ->
 608          case lookAhead# buf 1# of
 609            '_'# -> case lookAhead# buf 2# of
 610                     'S'# ->
 611                         lex_demand cont (stepOnUntil (not . isSpace)
 612                                         (stepOnBy# buf 3#)) -- past __S
 613                     'M'# ->
 614                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 615
 616                     's'# ->
 617                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 618                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 619                                Nothing   -> lex_id cont glaexts buf
 620                     _ -> lex_id cont glaexts buf
 621            _    -> lex_id cont glaexts buf
 622
 623         -- Hexadecimal and octal constants
 624     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 625                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 626          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 627                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 628         where ch   = lookAhead# buf 1#
 629               ch2  = lookAhead# buf 2#
 630               buf' = setCurrentPos# buf 2#
 631
 632     '\NUL'# ->
 633             if bufferExhausted (stepOn buf) then
 634                cont ITeof buf
 635             else
 636                trace "lexIface: misplaced NUL?" $
 637                cont (ITunknown "\NUL") (stepOn buf)
 638
 639     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 640             lex_ip cont (incLexeme buf)
 641     c | is_digit  c -> lex_num cont glaexts 0 buf
 642       | is_symbol c -> lex_sym cont buf
 643       | is_upper  c -> lex_con cont glaexts buf
 644       | is_ident  c -> lex_id  cont glaexts buf
 645       | otherwise   -> lexError "illegal character" buf
 646
 647 -- Int# is unlifted, and therefore faster than Bool for flags.
 648 {-# INLINE flag #-}
 649 flag :: Int# -> Bool
 650 flag 0# = False
 651 flag _  = True
 652
 653 -------------------------------------------------------------------------------
 654 -- Pragmas
 655
 656 lex_prag cont buf
 657   = case expandWhile# is_space buf of { buf1 ->
 658     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 659     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 660     case lookupUFM pragmaKeywordsFM lexeme of
 661         Just kw -> cont kw (mergeLexemes buf buf2)
 662         Nothing -> panic "lex_prag"
 663   }}
 664
 665 -------------------------------------------------------------------------------
 666 -- Strings & Chars
 667
 668 lex_string cont glaexts s buf
 669   = case currentChar# buf of
 670         '"'#{-"-} ->
 671            let buf' = incLexeme buf
 672                s' = mkFastStringNarrow (map chr (reverse s))
 673            in case currentChar# buf' of
 674                 '#'# | flag glaexts -> if all (<= 0xFF) s
 675                     then cont (ITprimstring s') (incLexeme buf')
 676                     else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 677                 _                   -> cont (ITstring s') buf'
 678
 679         -- ignore \& in a string, deal with string gaps
 680         '\\'# | next_ch `eqChar#` '&'#
 681                 -> lex_string cont glaexts s buf'
 682               | is_space next_ch
 683                 -> lex_stringgap cont glaexts s (incLexeme buf)
 684
 685             where next_ch = lookAhead# buf 1#
 686                   buf' = setCurrentPos# buf 2#
 687
 688         _ -> lex_char (lex_next_string cont s) glaexts buf
 689
 690 lex_stringgap cont glaexts s buf
 691   = let buf' = incLexeme buf in
 692     case currentChar# buf of
 693         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 694                   st{loc = incSrcLine loc}
 695         '\\'# -> lex_string cont glaexts s buf'
 696         c | is_space c -> lex_stringgap cont glaexts s buf'
 697         other -> charError buf'
 698
 699 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 700
 701 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 702 lex_char cont glaexts buf
 703   = case currentChar# buf of
 704         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 705         c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
 706         other -> charError buf
 707
 708 char_end cont glaexts c buf
 709   = case currentChar# buf of
 710         '\''# -> let buf' = incLexeme buf in
 711                  case currentChar# buf' of
 712                         '#'# | flag glaexts
 713                                 -> cont (ITprimchar c) (incLexeme buf')
 714                         _       -> cont (ITchar c) buf'
 715         _     -> charError buf
 716
 717 lex_escape cont buf
 718   = let buf' = incLexeme buf in
 719     case currentChar# buf of
 720         'a'#       -> cont (ord '\a') buf'
 721         'b'#       -> cont (ord '\b') buf'
 722         'f'#       -> cont (ord '\f') buf'
 723         'n'#       -> cont (ord '\n') buf'
 724         'r'#       -> cont (ord '\r') buf'
 725         't'#       -> cont (ord '\t') buf'
 726         'v'#       -> cont (ord '\v') buf'
 727         '\\'#      -> cont (ord '\\') buf'
 728         '"'#       -> cont (ord '\"') buf'
 729         '\''#      -> cont (ord '\'') buf'
 730         '^'#       -> let c = currentChar# buf' in
 731                       if c `geChar#` '@'# && c `leChar#` '_'#
 732                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 733                         else charError buf'
 734
 735         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 736         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 737         x | is_digit x
 738                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 739
 740         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 741                                        Just buf2 <- [prefixMatch buf p] ] of
 742                             (c,buf2):_ -> cont (ord c) buf2
 743                             [] -> charError buf'
 744
 745 after_charnum cont i buf
 746   = if i >= 0 && i <= 0x10FFFF
 747         then cont (fromInteger i) buf
 748         else charError buf
 749
 750 readNum cont buf is_digit base conv = read buf 0
 751   where read buf i
 752           = case currentChar# buf of { c ->
 753             if is_digit c
 754                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 755                 else cont i buf
 756             }
 757
 758 is_hexdigit c
 759         =  is_digit c
 760         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 761         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 762
 763 hex c | is_digit c = ord# c -# ord# '0'#
 764       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 765 oct_or_dec c = ord# c -# ord# '0'#
 766
 767 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 768
 769 to_lower c
 770   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 771         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 772   | otherwise = c
 773
 774 charError buf = lexError "error in character literal" buf
 775
 776 silly_escape_chars = [
 777         ("NUL", '\NUL'),
 778         ("SOH", '\SOH'),
 779         ("STX", '\STX'),
 780         ("ETX", '\ETX'),
 781         ("EOT", '\EOT'),
 782         ("ENQ", '\ENQ'),
 783         ("ACK", '\ACK'),
 784         ("BEL", '\BEL'),
 785         ("BS", '\BS'),
 786         ("HT", '\HT'),
 787         ("LF", '\LF'),
 788         ("VT", '\VT'),
 789         ("FF", '\FF'),
 790         ("CR", '\CR'),
 791         ("SO", '\SO'),
 792         ("SI", '\SI'),
 793         ("DLE", '\DLE'),
 794         ("DC1", '\DC1'),
 795         ("DC2", '\DC2'),
 796         ("DC3", '\DC3'),
 797         ("DC4", '\DC4'),
 798         ("NAK", '\NAK'),
 799         ("SYN", '\SYN'),
 800         ("ETB", '\ETB'),
 801         ("CAN", '\CAN'),
 802         ("EM", '\EM'),
 803         ("SUB", '\SUB'),
 804         ("ESC", '\ESC'),
 805         ("FS", '\FS'),
 806         ("GS", '\GS'),
 807         ("RS", '\RS'),
 808         ("US", '\US'),
 809         ("SP", '\SP'),
 810         ("DEL", '\DEL')
 811         ]
 812
 813 -------------------------------------------------------------------------------
 814
 815 lex_demand cont buf =
 816  case read_em [] buf of { (ls,buf') ->
 817  case currentChar# buf' of
 818    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 819    _    -> cont (ITstrict (ls, False)) buf'
 820  }
 821  where
 822    -- code snatched from Demand.lhs
 823   read_em acc buf =
 824    case currentChar# buf of
 825     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 826     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 827     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 828     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 829     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 830     ')'# -> (reverse acc, stepOn buf)
 831     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 832     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 833     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 834     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 835     _    -> (reverse acc, buf)
 836
 837   do_unpack new_or_data wrapper_unpacks acc buf
 838    = case read_em [] buf of
 839       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 840
 841
 842 ------------------
 843 lex_scc cont buf =
 844  case currentChar# buf of
 845   'C'# -> cont ITsccAllCafs (incLexeme buf)
 846   other -> cont ITscc buf
 847
 848 -----------------------------------------------------------------------------
 849 -- Numbers
 850
 851 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 852 lex_num cont glaexts acc buf =
 853  case scanNumLit acc buf of
 854      (acc',buf') ->
 855        case currentChar# buf' of
 856          '.'# | is_digit (lookAhead# buf' 1#) ->
 857              -- this case is not optimised at all, as the
 858              -- presence of floating point numbers in interface
 859              -- files is not that common. (ToDo)
 860             case expandWhile# is_digit (incLexeme buf') of
 861               buf2 -> -- points to first non digit char
 862
 863                 let l = case currentChar# buf2 of
 864                           'E'# -> do_exponent
 865                           'e'# -> do_exponent
 866                           _ -> buf2
 867
 868                     do_exponent
 869                         = let buf3 = incLexeme buf2 in
 870                           case currentChar# buf3 of
 871                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 872                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 873                                 x | is_digit x -> expandWhile# is_digit buf3
 874                                 _ -> buf2
 875
 876                     v = readRational__ (lexemeToString l)
 877
 878                 in case currentChar# l of -- glasgow exts only
 879                       '#'# | flag glaexts -> let l' = incLexeme l in
 880                               case currentChar# l' of
 881                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 882                                 _    -> cont (ITprimfloat  v) l'
 883                       _ -> cont (ITrational v) l
 884
 885          _ -> after_lexnum cont glaexts acc' buf'
 886
 887 after_lexnum cont glaexts i buf
 888   = case currentChar# buf of
 889         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 890         _    -> cont (ITinteger i) buf
 891
 892 -----------------------------------------------------------------------------
 893 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 894
 895 -- we lexemeToFastString on the bit between the ``''s, but include the
 896 -- quotes in the full lexeme.
 897
 898 lex_cstring cont buf =
 899  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 900    Just buf' -> cont (ITlitlit (lexemeToFastString
 901                                 (setCurrentPos# buf' (negateInt# 2#))))
 902                    (mergeLexemes buf buf')
 903    Nothing   -> lexError "unterminated ``" buf
 904
 905 -----------------------------------------------------------------------------
 906 -- identifiers, symbols etc.
 907
 908 lex_ip cont buf =
 909  case expandWhile# is_ident buf of
 910    buf' -> cont (ITipvarid lexeme) buf'
 911            where lexeme = lexemeToFastString buf'
 912
 913 lex_id cont glaexts buf =
 914  let buf1 = expandWhile# is_ident buf in
 915  seq buf1 $
 916
 917  case (if flag glaexts
 918         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 919         else buf1)                              of { buf' ->
 920
 921  let lexeme  = lexemeToFastString buf' in
 922
 923  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 924         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 925                           cont kwd_token buf';
 926         Nothing        ->
 927
 928  let var_token = cont (ITvarid lexeme) buf' in
 929
 930  if not (flag glaexts)
 931    then var_token
 932    else
 933
 934  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 935         Just kwd_token -> cont kwd_token buf';
 936         Nothing        -> var_token
 937
 938  }}}
 939
 940 lex_sym cont buf =
 941  -- trace "lex_sym" $
 942  case expandWhile# is_symbol buf of
 943    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 944                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 945                                   cont kwd_token buf' ;
 946                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 947                                   cont (mk_var_token lexeme) buf'
 948            }
 949         where lexeme = lexemeToFastString buf'
 950
 951
 952 lex_con cont glaexts buf =
 953  -- trace ("con: "{-++unpackFS lexeme-}) $
 954  case expandWhile# is_ident buf          of { buf1 ->
 955  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 956
 957  case currentChar# buf' of
 958      '.'# -> munch
 959      _    -> just_a_conid
 960
 961    where
 962     just_a_conid = cont (ITconid lexeme) buf'
 963     lexeme = lexemeToFastString buf'
 964     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 965  }}
 966
 967 lex_qid cont glaexts mod buf just_a_conid =
 968  -- trace ("quid: "{-++unpackFS lexeme-}) $
 969  case currentChar# buf of
 970   '['# ->       -- Special case for []
 971     case lookAhead# buf 1# of
 972      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 973      _    -> just_a_conid
 974
 975   '('# ->  -- Special case for (,,,)
 976            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 977     case lookAhead# buf 1# of
 978      '#'# | flag glaexts -> case lookAhead# buf 2# of
 979                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 980                                 just_a_conid
 981                 _    -> just_a_conid
 982      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 983      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 984      _    -> just_a_conid
 985
 986   '-'# -> case lookAhead# buf 1# of
 987             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 988             _    -> lex_id3 cont glaexts mod buf just_a_conid
 989   _    -> lex_id3 cont glaexts mod buf just_a_conid
 990
 991 lex_id3 cont glaexts mod buf just_a_conid
 992   | is_symbol (currentChar# buf) =
 993      let
 994         start_new_lexeme = stepOverLexeme buf
 995      in
 996      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 997      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 998      let
 999        lexeme  = lexemeToFastString buf'
1000         -- real lexeme is M.<sym>
1001        new_buf = mergeLexemes buf buf'
1002      in
1003      cont (mk_qvar_token mod lexeme) new_buf
1004         -- wrong, but arguably morally right: M... is now a qvarsym
1005      }
1006
1007   | otherwise   =
1008      let
1009         start_new_lexeme = stepOverLexeme buf
1010      in
1011      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1012      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1013      if emptyLexeme buf1
1014             then just_a_conid
1015             else
1016
1017      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1018
1019      let
1020       lexeme      = lexemeToFastString buf'
1021       new_buf     = mergeLexemes buf buf'
1022       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1023      in
1024      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1025             Nothing          -> is_a_qvarid ;
1026
1027             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1028                            -> is_a_qvarid          --  recognised as keywords here.
1029                            | otherwise
1030                            -> just_a_conid         -- avoid M.where etc.
1031      }}}
1032
1033 slurp_trailing_hashes buf glaexts
1034   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1035   | otherwise    = buf
1036
1037
1038 mk_var_token pk_str
1039   | is_upper f          = ITconid pk_str
1040   | is_ident f          = ITvarid pk_str
1041   | f `eqChar#` ':'#    = ITconsym pk_str
1042   | otherwise           = ITvarsym pk_str
1043   where
1044       (C# f) = _HEAD_ pk_str
1045       -- tl     = _TAIL_ pk_str
1046
1047 mk_qvar_token m token =
1048 -- trace ("mk_qvar ") $
1049  case mk_var_token token of
1050    ITconid n  -> ITqconid  (m,n)
1051    ITvarid n  -> ITqvarid  (m,n)
1052    ITconsym n -> ITqconsym (m,n)
1053    ITvarsym n -> ITqvarsym (m,n)
1054    _          -> ITunknown (show token)
1055 \end{code}
1056
1057 ----------------------------------------------------------------------------
1058 Horrible stuff for dealing with M.(,,,)
1059
1060 \begin{code}
1061 lex_tuple cont mod buf back_off =
1062   go 2 buf
1063   where
1064    go n buf =
1065     case currentChar# buf of
1066       ','# -> go (n+1) (stepOn buf)
1067       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1068       _    -> back_off
1069
1070 lex_ubx_tuple cont mod buf back_off =
1071   go 2 buf
1072   where
1073    go n buf =
1074     case currentChar# buf of
1075       ','# -> go (n+1) (stepOn buf)
1076       '#'# -> case lookAhead# buf 1# of
1077                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1078                                  (stepOnBy# buf 2#)
1079                 _    -> back_off
1080       _    -> back_off
1081 \end{code}
1082
1083 -----------------------------------------------------------------------------
1084 doDiscard rips along really fast, looking for a '##-}',
1085 indicating the end of the pragma we're skipping
1086
1087 \begin{code}
1088 doDiscard inStr buf =
1089  case currentChar# buf of
1090    '#'# | inStr ==# 0# ->
1091        case lookAhead# buf 1# of { '#'# ->
1092        case lookAhead# buf 2# of { '-'# ->
1093        case lookAhead# buf 3# of { '}'# ->
1094            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1095         _    -> doDiscard inStr (incLexeme buf) };
1096         _    -> doDiscard inStr (incLexeme buf) };
1097         _    -> doDiscard inStr (incLexeme buf) }
1098
1099    '"'# ->
1100        let
1101         odd_slashes buf flg i# =
1102           case lookAhead# buf i# of
1103            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1104            _     -> flg
1105
1106         not_inStr = if inStr ==# 0# then 1# else 0#
1107        in
1108        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1109          '\\'# -> -- escaping something..
1110            if odd_slashes buf True (negateInt# 2#)
1111                 then  -- odd number of slashes, " is escaped.
1112                       doDiscard inStr (incLexeme buf)
1113                 else  -- even number of slashes, \ is escaped.
1114                       doDiscard not_inStr (incLexeme buf)
1115          _ -> doDiscard not_inStr (incLexeme buf)
1116
1117    '\''# | inStr ==# 0# ->
1118         case lookAhead# buf 1# of { '"'# ->
1119         case lookAhead# buf 2# of { '\''# ->
1120            doDiscard inStr (setCurrentPos# buf 3#);
1121         _ -> doDiscard inStr (incLexeme buf) };
1122         _ -> doDiscard inStr (incLexeme buf) }
1123
1124    _ -> doDiscard inStr (incLexeme buf)
1125
1126 \end{code}
1127
1128 -----------------------------------------------------------------------------
1129
1130 \begin{code}
1131 data LayoutContext
1132   = NoLayout
1133   | Layout Int#
1134
1135 data ParseResult a
1136   = POk PState a
1137   | PFailed Message
1138
1139 data PState = PState {
1140         loc           :: SrcLoc,
1141         glasgow_exts  :: Int#,
1142         bol           :: Int#,
1143         atbol         :: Int#,
1144         context       :: [LayoutContext]
1145      }
1146
1147 type P a = StringBuffer         -- Input string
1148         -> PState
1149         -> ParseResult a
1150
1151 returnP   :: a -> P a
1152 returnP a buf s = POk s a
1153
1154 thenP      :: P a -> (a -> P b) -> P b
1155 m `thenP` k = \ buf s ->
1156         case m buf s of
1157                 POk s1 a -> k a buf s1
1158                 PFailed err  -> PFailed err
1159
1160 thenP_     :: P a -> P b -> P b
1161 m `thenP_` k = m `thenP` \_ -> k
1162
1163 mapP :: (a -> P b) -> [a] -> P [b]
1164 mapP f [] = returnP []
1165 mapP f (a:as) =
1166      f a `thenP` \b ->
1167      mapP f as `thenP` \bs ->
1168      returnP (b:bs)
1169
1170 failP :: String -> P a
1171 failP msg buf s = PFailed (text msg)
1172
1173 failMsgP :: Message -> P a
1174 failMsgP msg buf s = PFailed msg
1175
1176 lexError :: String -> P a
1177 lexError str buf s@PState{ loc = loc }
1178   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1179
1180 getSrcLocP :: P SrcLoc
1181 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1182
1183 getSrcFile :: P FAST_STRING
1184 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1185
1186 getContext :: P [LayoutContext]
1187 getContext buf s@(PState{ context = ctx }) = POk s ctx
1188
1189 pushContext :: LayoutContext -> P ()
1190 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1191
1192 {-
1193
1194 This special case in layoutOn is to handle layout contexts with are
1195 indented the same or less than the current context.  This is illegal
1196 according to the Haskell spec, so we have to arrange to close the
1197 current context.  eg.
1198
1199 class Foo a where
1200 class Bar a
1201
1202 after the first 'where', the sequence of events is:
1203
1204         - layout system inserts a ';' (column 0)
1205         - parser begins a new context at column 0
1206         - parser shifts ';' (legal empty declaration)
1207         - parser sees 'class': parse error (we're still in the inner context)
1208
1209 trouble is, by the time we know we need a new context, the lexer has
1210 already generated the ';'.  Hacky solution is as follows: since we
1211 know the column of the next token (it's the column number of the new
1212 context), we set the ACTUAL column number of the new context to this
1213 numer plus one.  Hence the next time the lexer is called, a '}' will
1214 be generated to close the new context straight away.  Furthermore, we
1215 have to set the atbol flag so that the ';' that the parser shifted as
1216 part of the new context is re-generated.
1217
1218 when the new context is *less* indented than the current one:
1219
1220 f = f where g = g where
1221 h = h
1222
1223         - current context: column 12.
1224         - on seeing 'h' (column 0), the layout system inserts '}'
1225         - parser starts a new context, column 0
1226         - parser sees '}', uses it to close new context
1227         - we still need to insert another '}' followed by a ';',
1228           hence the atbol trick.
1229
1230 There's also a special hack in here to deal with
1231
1232         do
1233            ....
1234            e $ do
1235            blah
1236
1237 i.e. the inner context is at the same indentation level as the outer
1238 context.  This is strictly illegal according to Haskell 98, but
1239 there's a lot of existing code using this style and it doesn't make
1240 any sense to disallow it, since empty 'do' lists don't make sense.
1241 -}
1242
1243 layoutOn :: Bool -> P ()
1244 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1245     let offset = lexemeIndex buf -# bol in
1246     case ctx of
1247         Layout prev_off : _
1248            | if strict then prev_off >=# offset else prev_off ># offset ->
1249                 --trace ("layout on, column: " ++  show (I# offset)) $
1250                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1251         other ->
1252                 --trace ("layout on, column: " ++  show (I# offset)) $
1253                 POk s{ context = Layout offset : ctx } ()
1254
1255 layoutOff :: P ()
1256 layoutOff buf s@(PState{ context = ctx }) =
1257     POk s{ context = NoLayout:ctx } ()
1258
1259 popContext :: P ()
1260 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1261   case ctx of
1262         (_:tl) -> POk s{ context = tl } ()
1263         []     -> PFailed (srcParseErr buf loc)
1264
1265 {-
1266  Note that if the name of the file we're processing ends
1267  with `hi-boot', we accept it on faith as having the right
1268  version. This is done so that .hi-boot files that comes
1269  with hsc don't have to be updated before every release,
1270  *and* it allows us to share .hi-boot files with versions
1271  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1272
1273  If the version number is 0, the checking is also turned off.
1274  (needed to deal with GHC.hi only!)
1275
1276  Once we can assume we're compiling with a version of ghc that
1277  supports interface file checking, we can drop the special
1278  pleading
1279 -}
1280 checkVersion :: Maybe Integer -> P ()
1281 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1282  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1283  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1284 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1285  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1286  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1287
1288 -----------------------------------------------------------------
1289
1290 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1291 ifaceParseErr s l
1292   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1293           text (lexemeToString s), char '\'']
1294
1295 ifaceVersionErr hi_vers l toks
1296   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1297           ptext SLIT("Expected"), int opt_HiVersion,
1298           ptext SLIT("found "), pp_version]
1299     where
1300      pp_version =
1301       case hi_vers of
1302         Nothing -> ptext SLIT("pre ghc-3.02 version")
1303         Just v  -> ptext SLIT("version") <+> integer v
1304
1305 -----------------------------------------------------------------------------
1306
1307 srcParseErr :: StringBuffer -> SrcLoc -> Message
1308 srcParseErr s l
1309   = hcat [ppr l,
1310           if null token
1311              then ptext SLIT(": parse error (possibly incorrect indentation)")
1312              else hcat [ptext SLIT(": parse error on input "),
1313                         char '`', text token, char '\'']
1314     ]
1315   where
1316         token = lexemeToString s
1317
1318 \end{code}