ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( ord )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | IT__export
 127   | ITdepends
 128   | IT__forall
 129   | ITletrec
 130   | ITcoerce
 131   | ITinlineMe
 132   | ITinlineCall
 133   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 134   | ITdefaultbranch
 135   | ITbottom
 136   | ITinteger_lit
 137   | ITfloat_lit
 138   | ITword_lit
 139   | ITword64_lit
 140   | ITint64_lit
 141   | ITrational_lit
 142   | ITaddr_lit
 143   | ITlabel_lit
 144   | ITlit_lit
 145   | ITstring_lit
 146   | ITtypeapp
 147   | ITusage
 148   | ITfuall
 149   | ITarity
 150   | ITspecialise
 151   | ITnocaf
 152   | ITunfold InlinePragInfo
 153   | ITstrict ([Demand], Bool)
 154   | ITrules
 155   | ITcprinfo
 156   | ITdeprecated
 157   | IT__scc
 158   | ITsccAllCafs
 159
 160   | ITspecialise_prag           -- Pragmas
 161   | ITsource_prag
 162   | ITinline_prag
 163   | ITnoinline_prag
 164   | ITrules_prag
 165   | ITdeprecated_prag
 166   | ITline_prag
 167   | ITclose_prag
 168
 169   | ITdotdot                    -- reserved symbols
 170   | ITdcolon
 171   | ITequal
 172   | ITlam
 173   | ITvbar
 174   | ITlarrow
 175   | ITrarrow
 176   | ITat
 177   | ITtilde
 178   | ITdarrow
 179   | ITminus
 180   | ITbang
 181   | ITdot
 182
 183   | ITbiglam                    -- GHC-extension symbols
 184
 185   | ITocurly                    -- special symbols
 186   | ITccurly
 187   | ITocurlybar                 -- {|, for type applications
 188   | ITccurlybar                 -- |}, for type applications
 189   | ITvccurly
 190   | ITobrack
 191   | ITcbrack
 192   | IToparen
 193   | ITcparen
 194   | IToubxparen
 195   | ITcubxparen
 196   | ITsemi
 197   | ITcomma
 198   | ITunderscore
 199   | ITbackquote
 200
 201   | ITvarid   FAST_STRING       -- identifiers
 202   | ITconid   FAST_STRING
 203   | ITvarsym  FAST_STRING
 204   | ITconsym  FAST_STRING
 205   | ITqvarid  (FAST_STRING,FAST_STRING)
 206   | ITqconid  (FAST_STRING,FAST_STRING)
 207   | ITqvarsym (FAST_STRING,FAST_STRING)
 208   | ITqconsym (FAST_STRING,FAST_STRING)
 209
 210   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 211
 212   | ITpragma StringBuffer
 213
 214   | ITchar       Int
 215   | ITstring     FAST_STRING
 216   | ITinteger    Integer
 217   | ITrational   Rational
 218
 219   | ITprimchar   Int
 220   | ITprimstring FAST_STRING
 221   | ITprimint    Integer
 222   | ITprimfloat  Rational
 223   | ITprimdouble Rational
 224   | ITlitlit     FAST_STRING
 225
 226   | ITunknown String            -- Used when the lexer can't make sense of it
 227   | ITeof                       -- end of file token
 228   deriving Show -- debugging
 229 \end{code}
 230
 231 -----------------------------------------------------------------------------
 232 Keyword Lists
 233
 234 \begin{code}
 235 pragmaKeywordsFM = listToUFM $
 236       map (\ (x,y) -> (_PK_ x,y))
 237        [( "SPECIALISE", ITspecialise_prag ),
 238         ( "SPECIALIZE", ITspecialise_prag ),
 239         ( "SOURCE",     ITsource_prag ),
 240         ( "INLINE",     ITinline_prag ),
 241         ( "NOINLINE",   ITnoinline_prag ),
 242         ( "NOTINLINE",  ITnoinline_prag ),
 243         ( "LINE",       ITline_prag ),
 244         ( "RULES",      ITrules_prag ),
 245         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 246         ( "DEPRECATED", ITdeprecated_prag )
 247         ]
 248
 249 haskellKeywordsFM = listToUFM $
 250       map (\ (x,y) -> (_PK_ x,y))
 251        [( "_",          ITunderscore ),
 252         ( "as",         ITas ),
 253         ( "case",       ITcase ),
 254         ( "class",      ITclass ),
 255         ( "data",       ITdata ),
 256         ( "default",    ITdefault ),
 257         ( "deriving",   ITderiving ),
 258         ( "do",         ITdo ),
 259         ( "else",       ITelse ),
 260         ( "hiding",     IThiding ),
 261         ( "if",         ITif ),
 262         ( "import",     ITimport ),
 263         ( "in",         ITin ),
 264         ( "infix",      ITinfix ),
 265         ( "infixl",     ITinfixl ),
 266         ( "infixr",     ITinfixr ),
 267         ( "instance",   ITinstance ),
 268         ( "let",        ITlet ),
 269         ( "module",     ITmodule ),
 270         ( "newtype",    ITnewtype ),
 271         ( "of",         ITof ),
 272         ( "qualified",  ITqualified ),
 273         ( "then",       ITthen ),
 274         ( "type",       ITtype ),
 275         ( "where",      ITwhere ),
 276         ( "_scc_",      ITscc )
 277      ]
 278
 279 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 280 ghcExtensionKeywordsFM = listToUFM $
 281         map (\ (x,y) -> (_PK_ x,y))
 282      [  ( "forall",     ITforall ),
 283         ( "foreign",    ITforeign ),
 284         ( "export",     ITexport ),
 285         ( "label",      ITlabel ),
 286         ( "dynamic",    ITdynamic ),
 287         ( "unsafe",     ITunsafe ),
 288         ( "with",       ITwith ),
 289         ( "stdcall",    ITstdcallconv),
 290         ( "ccall",      ITccallconv),
 291         ("_ccall_",     ITccall (False, False, False)),
 292         ("_ccall_GC_",  ITccall (False, False, True)),
 293         ("_casm_",      ITccall (False, True,  False)),
 294         ("_casm_GC_",   ITccall (False, True,  True)),
 295
 296         -- interface keywords
 297         ("__interface",         ITinterface),
 298         ("__export",            IT__export),
 299         ("__depends",           ITdepends),
 300         ("__forall",            IT__forall),
 301         ("__letrec",            ITletrec),
 302         ("__coerce",            ITcoerce),
 303         ("__inline_me",         ITinlineMe),
 304         ("__inline_call",       ITinlineCall),
 305         ("__depends",           ITdepends),
 306         ("__DEFAULT",           ITdefaultbranch),
 307         ("__bot",               ITbottom),
 308         ("__integer",           ITinteger_lit),
 309         ("__float",             ITfloat_lit),
 310         ("__int64",             ITint64_lit),
 311         ("__word",              ITword_lit),
 312         ("__word64",            ITword64_lit),
 313         ("__rational",          ITrational_lit),
 314         ("__addr",              ITaddr_lit),
 315         ("__label",             ITlabel_lit),
 316         ("__litlit",            ITlit_lit),
 317         ("__string",            ITstring_lit),
 318         ("__a",                 ITtypeapp),
 319         ("__u",                 ITusage),
 320         ("__fuall",             ITfuall),
 321         ("__A",                 ITarity),
 322         ("__P",                 ITspecialise),
 323         ("__C",                 ITnocaf),
 324         ("__R",                 ITrules),
 325         ("__D",                 ITdeprecated),
 326         ("__U",                 ITunfold NoInlinePragInfo),
 327
 328         ("__ccall",             ITccall (False, False, False)),
 329         ("__ccall_GC",          ITccall (False, False, True)),
 330         ("__dyn_ccall",         ITccall (True,  False, False)),
 331         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 332         ("__casm",              ITccall (False, True,  False)),
 333         ("__dyn_casm",          ITccall (True,  True,  False)),
 334         ("__casm_GC",           ITccall (False, True,  True)),
 335         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 336
 337         ("/\\",                 ITbiglam)
 338      ]
 339
 340
 341 haskellKeySymsFM = listToUFM $
 342         map (\ (x,y) -> (_PK_ x,y))
 343       [ ("..",          ITdotdot)
 344        ,("::",          ITdcolon)
 345        ,("=",           ITequal)
 346        ,("\\",          ITlam)
 347        ,("|",           ITvbar)
 348        ,("<-",          ITlarrow)
 349        ,("->",          ITrarrow)
 350        ,("@",           ITat)
 351        ,("~",           ITtilde)
 352        ,("=>",          ITdarrow)
 353        ,("-",           ITminus)
 354        ,("!",           ITbang)
 355        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 356        ]
 357 \end{code}
 358
 359 -----------------------------------------------------------------------------
 360 The lexical analyser
 361
 362 Lexer state:
 363
 364         - (glaexts) lexing an interface file or -fglasgow-exts
 365         - (bol)   pointer to beginning of line (for column calculations)
 366         - (buf)   pointer to beginning of token
 367         - (buf)   pointer to current char
 368         - (atbol) flag indicating whether we're at the beginning of a line
 369
 370 \begin{code}
 371 lexer :: (Token -> P a) -> P a
 372 lexer cont buf s@(PState{
 373                     loc = loc,
 374                     glasgow_exts = glaexts,
 375                     bol = bol,
 376                     atbol = atbol,
 377                     context = ctx
 378                 })
 379
 380         -- first, start a new lexeme and lose all the whitespace
 381   =  _scc_ "Lexer"
 382   tab line bol atbol (stepOverLexeme buf)
 383   where
 384         line = srcLocLine loc
 385
 386         tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 387           case currentChar# buf of
 388
 389             '\NUL'# ->
 390                    if bufferExhausted (stepOn buf)
 391                         then cont ITeof buf s'
 392                         else trace "lexer: misplaced NUL?" $
 393                              tab y bol atbol (stepOn buf)
 394
 395             '\n'# -> let buf' = stepOn buf
 396                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 397
 398                 -- find comments.  This got harder in Haskell 98.
 399             '-'# ->  let trundle n =
 400                           let next = lookAhead# buf n in
 401                           if next `eqChar#` '-'# then trundle (n +# 1#)
 402                           else if is_symbol next || n <# 2#
 403                                 then is_a_token
 404                                 else tab y bol atbol
 405                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 406                     in trundle 1#
 407
 408                 -- comments and pragmas.  We deal with LINE pragmas here,
 409                 -- and throw out any unrecognised pragmas as comments.  Any
 410                 -- pragmas we know about are dealt with later (after any layout
 411                 -- processing if necessary).
 412             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 413                 if lookAhead# buf 2# `eqChar#` '#'# then
 414                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 415                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 416                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 417                   let lexeme = mkFastString -- ToDo: too slow
 418                                   (map toUpper (lexemeToString buf2)) in
 419                   case lookupUFM pragmaKeywordsFM lexeme of
 420                         Just ITline_prag ->
 421                            line_prag skip_to_end buf2 s'
 422                         Just other -> is_a_token
 423                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 424                   }}
 425
 426                 else skip_to_end (stepOnBy# buf 2#) s'
 427                 where
 428                     skip_to_end = nested_comment (lexer cont)
 429
 430                 -- special GHC extension: we grok cpp-style #line pragmas
 431             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 432                 line_prag next_line (stepOn buf) s'
 433                 where
 434                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 435
 436                 -- tabs have been expanded beforehand
 437             c | is_space c -> tab y bol atbol (stepOn buf)
 438               | otherwise  -> is_a_token
 439
 440            where s' = s{loc = replaceSrcLine loc y,
 441                         bol = bol,
 442                        atbol = atbol}
 443
 444                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 445                             | otherwise    = lexToken cont glaexts buf s'
 446
 447 -- {-# LINE .. #-} pragmas.  yeuch.
 448 line_prag cont buf s@PState{loc=loc} =
 449   case expandWhile# is_space buf                of { buf1 ->
 450   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 451   -- subtract one: the line number refers to the *following* line.
 452   let real_line = line - 1 in
 453   case fromInteger real_line                    of { i@(I# l) ->
 454         -- ToDo, if no filename then we skip the newline.... d'oh
 455   case expandWhile# is_space buf2               of { buf3 ->
 456   case currentChar# buf3                        of
 457      '\"'#{-"-} ->
 458         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 459         let
 460             file = lexemeToFastString buf4
 461             new_buf = stepOn (stepOverLexeme buf4)
 462         in
 463         if nullFastString file
 464                 then cont new_buf s{loc = replaceSrcLine loc l}
 465                 else cont new_buf s{loc = mkSrcLoc file i}
 466         }
 467      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 468   }}}}
 469
 470 nested_comment :: P a -> P a
 471 nested_comment cont buf = loop buf
 472  where
 473    loop buf =
 474      case currentChar# buf of
 475         '\NUL'# | bufferExhausted (stepOn buf) ->
 476                 lexError "unterminated `{-'" buf -- -}
 477         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 478                 cont (stepOnBy# buf 2#)
 479
 480         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 481               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 482
 483         '\n'# -> \ s@PState{loc=loc} ->
 484                  let buf' = stepOn buf in
 485                  nested_comment cont buf'
 486                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 487                           atbol = 1#}
 488
 489         _   -> nested_comment cont (stepOn buf)
 490
 491 -- When we are lexing the first token of a line, check whether we need to
 492 -- insert virtual semicolons or close braces due to layout.
 493
 494 lexBOL :: (Token -> P a) -> P a
 495 lexBOL cont buf s@(PState{
 496                     loc = loc,
 497                     glasgow_exts = glaexts,
 498                     bol = bol,
 499                     atbol = atbol,
 500                     context = ctx
 501                   }) =
 502         if need_close_curly then
 503                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 504                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 505         else if need_semi_colon then
 506                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 507                 cont ITsemi buf s{atbol = 0#}
 508         else
 509                 lexToken cont glaexts buf s{atbol = 0#}
 510   where
 511         col = currentIndex# buf -# bol
 512
 513         need_close_curly =
 514                 case ctx of
 515                         [] -> False
 516                         (i:_) -> case i of
 517                                     NoLayout -> False
 518                                     Layout n -> col <# n
 519         need_semi_colon =
 520                 case ctx of
 521                         [] -> False
 522                         (i:_) -> case i of
 523                                     NoLayout -> False
 524                                     Layout n -> col ==# n
 525
 526
 527 lexToken :: (Token -> P a) -> Int# -> P a
 528 lexToken cont glaexts buf =
 529  -- trace "lexToken" $
 530   case currentChar# buf of
 531
 532     -- special symbols ----------------------------------------------------
 533     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 534                 -> cont IToubxparen (setCurrentPos# buf 2#)
 535          | otherwise
 536                 -> cont IToparen (incLexeme buf)
 537
 538     ')'# -> cont ITcparen    (incLexeme buf)
 539     '['# -> cont ITobrack    (incLexeme buf)
 540     ']'# -> cont ITcbrack    (incLexeme buf)
 541     ','# -> cont ITcomma     (incLexeme buf)
 542     ';'# -> cont ITsemi      (incLexeme buf)
 543     '}'# -> \ s@PState{context = ctx} ->
 544             case ctx of
 545                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 546                 _        -> lexError "too many '}'s" buf s
 547     '|'# -> case lookAhead# buf 1# of
 548                  '}'#  | flag glaexts -> cont ITccurlybar
 549                                               (setCurrentPos# buf 2#)
 550                  _                    -> lex_sym cont (incLexeme buf)
 551
 552
 553     '#'# -> case lookAhead# buf 1# of
 554                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 555                 '-'# -> case lookAhead# buf 2# of
 556                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 557                            _    -> lex_sym cont (incLexeme buf)
 558                 _    -> lex_sym cont (incLexeme buf)
 559
 560     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 561                 -> lex_cstring cont (setCurrentPos# buf 2#)
 562          | otherwise
 563                 -> cont ITbackquote (incLexeme buf)
 564
 565     '{'# ->     -- look for "{-##" special iface pragma
 566             case lookAhead# buf 1# of
 567            '|'# | flag glaexts
 568                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 569            '-'# -> case lookAhead# buf 2# of
 570                     '#'# -> case lookAhead# buf 3# of
 571                                 '#'# ->
 572                                    let (lexeme, buf')
 573                                           = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
 574                                             cont (ITpragma lexeme) buf'
 575                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 576                     _    -> cont ITocurly (incLexeme buf)
 577            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 578
 579     -- strings/characters -------------------------------------------------
 580     '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
 581     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 582
 583     -- strictness and cpr pragmas and __scc treated specially.
 584     '_'# | flag glaexts ->
 585          case lookAhead# buf 1# of
 586            '_'# -> case lookAhead# buf 2# of
 587                     'S'# ->
 588                         lex_demand cont (stepOnUntil (not . isSpace)
 589                                         (stepOnBy# buf 3#)) -- past __S
 590                     'M'# ->
 591                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 592
 593                     's'# ->
 594                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 595                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 596                                Nothing   -> lex_id cont glaexts buf
 597                     _ -> lex_id cont glaexts buf
 598            _    -> lex_id cont glaexts buf
 599
 600         -- Hexadecimal and octal constants
 601     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 602                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 603          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 604                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 605         where ch   = lookAhead# buf 1#
 606               ch2  = lookAhead# buf 2#
 607               buf' = setCurrentPos# buf 2#
 608
 609     '\NUL'# ->
 610             if bufferExhausted (stepOn buf) then
 611                cont ITeof buf
 612             else
 613                trace "lexIface: misplaced NUL?" $
 614                cont (ITunknown "\NUL") (stepOn buf)
 615
 616     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 617             lex_ip cont (incLexeme buf)
 618     c | is_digit  c -> lex_num cont glaexts 0 buf
 619       | is_symbol c -> lex_sym cont buf
 620       | is_upper  c -> lex_con cont glaexts buf
 621       | is_ident  c -> lex_id  cont glaexts buf
 622       | otherwise   -> lexError "illegal character" buf
 623
 624 -- Int# is unlifted, and therefore faster than Bool for flags.
 625 {-# INLINE flag #-}
 626 flag :: Int# -> Bool
 627 flag 0# = False
 628 flag _  = True
 629
 630 -------------------------------------------------------------------------------
 631 -- Pragmas
 632
 633 lex_prag cont buf
 634   = case expandWhile# is_space buf of { buf1 ->
 635     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 636     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 637     case lookupUFM pragmaKeywordsFM lexeme of
 638         Just kw -> cont kw (mergeLexemes buf buf2)
 639         Nothing -> panic "lex_prag"
 640   }}
 641
 642 -------------------------------------------------------------------------------
 643 -- Strings & Chars
 644
 645 lex_string cont glaexts s buf
 646   = case currentChar# buf of
 647         '"'#{-"-} ->
 648            let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in
 649            case currentChar# buf' of
 650                 '#'# | flag glaexts -> if all (<= 0xFF) s
 651                     then cont (ITprimstring s') (incLexeme buf')
 652                     else lexError "primitive string literal must contain only characters <= '\xFF'" buf'
 653                 _                   -> cont (ITstring s') buf'
 654
 655         -- ignore \& in a string, deal with string gaps
 656         '\\'# | next_ch `eqChar#` '&'#
 657                 -> lex_string cont glaexts s buf'
 658               | is_space next_ch
 659                 -> lex_stringgap cont glaexts s (incLexeme buf)
 660
 661             where next_ch = lookAhead# buf 1#
 662                   buf' = setCurrentPos# buf 2#
 663
 664         _ -> lex_char (lex_next_string cont s) glaexts buf
 665
 666 lex_stringgap cont glaexts s buf
 667   = let buf' = incLexeme buf in
 668     case currentChar# buf of
 669         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 670                   st{loc = incSrcLine loc}
 671         '\\'# -> lex_string cont glaexts s buf'
 672         c | is_space c -> lex_stringgap cont glaexts s buf'
 673         other -> charError buf'
 674
 675 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 676
 677 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 678 lex_char cont glaexts buf
 679   = case currentChar# buf of
 680         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 681         c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
 682         other -> charError buf
 683
 684 char_end cont glaexts c buf
 685   = case currentChar# buf of
 686         '\''# -> let buf' = incLexeme buf in
 687                  case currentChar# buf' of
 688                         '#'# | flag glaexts
 689                                 -> cont (ITprimchar c) (incLexeme buf')
 690                         _       -> cont (ITchar c) buf'
 691         _     -> charError buf
 692
 693 lex_escape cont buf
 694   = let buf' = incLexeme buf in
 695     case currentChar# buf of
 696         'a'#       -> cont (ord '\a') buf'
 697         'b'#       -> cont (ord '\b') buf'
 698         'f'#       -> cont (ord '\f') buf'
 699         'n'#       -> cont (ord '\n') buf'
 700         'r'#       -> cont (ord '\r') buf'
 701         't'#       -> cont (ord '\t') buf'
 702         'v'#       -> cont (ord '\v') buf'
 703         '\\'#      -> cont (ord '\\') buf'
 704         '"'#       -> cont (ord '\"') buf'
 705         '\''#      -> cont (ord '\'') buf'
 706         '^'#       -> let c = currentChar# buf' in
 707                       if c `geChar#` '@'# && c `leChar#` '_'#
 708                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 709                         else charError buf'
 710
 711         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 712         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 713         x | is_digit x
 714                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 715
 716         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 717                                        Just buf2 <- [prefixMatch buf p] ] of
 718                             (c,buf2):_ -> cont (ord c) buf2
 719                             [] -> charError buf'
 720
 721 after_charnum cont i buf
 722   = if i >= 0 && i <= 0x7FFFFFFF
 723         then cont (fromInteger i) buf
 724         else charError buf
 725
 726 readNum cont buf is_digit base conv = read buf 0
 727   where read buf i
 728           = case currentChar# buf of { c ->
 729             if is_digit c
 730                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 731                 else cont i buf
 732             }
 733
 734 is_hexdigit c
 735         =  is_digit c
 736         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 737         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 738
 739 hex c | is_digit c = ord# c -# ord# '0'#
 740       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 741 oct_or_dec c = ord# c -# ord# '0'#
 742
 743 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 744
 745 to_lower c
 746   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 747         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 748   | otherwise = c
 749
 750 charError buf = lexError "error in character literal" buf
 751
 752 silly_escape_chars = [
 753         ("NUL", '\NUL'),
 754         ("SOH", '\SOH'),
 755         ("STX", '\STX'),
 756         ("ETX", '\ETX'),
 757         ("EOT", '\EOT'),
 758         ("ENQ", '\ENQ'),
 759         ("ACK", '\ACK'),
 760         ("BEL", '\BEL'),
 761         ("BS", '\BS'),
 762         ("HT", '\HT'),
 763         ("LF", '\LF'),
 764         ("VT", '\VT'),
 765         ("FF", '\FF'),
 766         ("CR", '\CR'),
 767         ("SO", '\SO'),
 768         ("SI", '\SI'),
 769         ("DLE", '\DLE'),
 770         ("DC1", '\DC1'),
 771         ("DC2", '\DC2'),
 772         ("DC3", '\DC3'),
 773         ("DC4", '\DC4'),
 774         ("NAK", '\NAK'),
 775         ("SYN", '\SYN'),
 776         ("ETB", '\ETB'),
 777         ("CAN", '\CAN'),
 778         ("EM", '\EM'),
 779         ("SUB", '\SUB'),
 780         ("ESC", '\ESC'),
 781         ("FS", '\FS'),
 782         ("GS", '\GS'),
 783         ("RS", '\RS'),
 784         ("US", '\US'),
 785         ("SP", '\SP'),
 786         ("DEL", '\DEL')
 787         ]
 788
 789 -------------------------------------------------------------------------------
 790
 791 lex_demand cont buf =
 792  case read_em [] buf of { (ls,buf') ->
 793  case currentChar# buf' of
 794    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 795    _    -> cont (ITstrict (ls, False)) buf'
 796  }
 797  where
 798    -- code snatched from Demand.lhs
 799   read_em acc buf =
 800    case currentChar# buf of
 801     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 802     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 803     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 804     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 805     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 806     ')'# -> (reverse acc, stepOn buf)
 807     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 808     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 809     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 810     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 811     _    -> (reverse acc, buf)
 812
 813   do_unpack new_or_data wrapper_unpacks acc buf
 814    = case read_em [] buf of
 815       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 816
 817
 818 ------------------
 819 lex_scc cont buf =
 820  case currentChar# buf of
 821   'C'# -> cont ITsccAllCafs (incLexeme buf)
 822   other -> cont ITscc buf
 823
 824 -----------------------------------------------------------------------------
 825 -- Numbers
 826
 827 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 828 lex_num cont glaexts acc buf =
 829  case scanNumLit acc buf of
 830      (acc',buf') ->
 831        case currentChar# buf' of
 832          '.'# | is_digit (lookAhead# buf' 1#) ->
 833              -- this case is not optimised at all, as the
 834              -- presence of floating point numbers in interface
 835              -- files is not that common. (ToDo)
 836             case expandWhile# is_digit (incLexeme buf') of
 837               buf2 -> -- points to first non digit char
 838
 839                 let l = case currentChar# buf2 of
 840                           'E'# -> do_exponent
 841                           'e'# -> do_exponent
 842                           _ -> buf2
 843
 844                     do_exponent
 845                         = let buf3 = incLexeme buf2 in
 846                           case currentChar# buf3 of
 847                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 848                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 849                                 x | is_digit x -> expandWhile# is_digit buf3
 850                                 _ -> buf2
 851
 852                     v = readRational__ (lexemeToString l)
 853
 854                 in case currentChar# l of -- glasgow exts only
 855                       '#'# | flag glaexts -> let l' = incLexeme l in
 856                               case currentChar# l' of
 857                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 858                                 _    -> cont (ITprimfloat  v) l'
 859                       _ -> cont (ITrational v) l
 860
 861          _ -> after_lexnum cont glaexts acc' buf'
 862
 863 after_lexnum cont glaexts i buf
 864   = case currentChar# buf of
 865         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 866         _    -> cont (ITinteger i) buf
 867
 868 -----------------------------------------------------------------------------
 869 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 870
 871 -- we lexemeToFastString on the bit between the ``''s, but include the
 872 -- quotes in the full lexeme.
 873
 874 lex_cstring cont buf =
 875  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 876    Just buf' -> cont (ITlitlit (lexemeToFastString
 877                                 (setCurrentPos# buf' (negateInt# 2#))))
 878                    (mergeLexemes buf buf')
 879    Nothing   -> lexError "unterminated ``" buf
 880
 881 -----------------------------------------------------------------------------
 882 -- identifiers, symbols etc.
 883
 884 lex_ip cont buf =
 885  case expandWhile# is_ident buf of
 886    buf' -> cont (ITipvarid lexeme) buf'
 887            where lexeme = lexemeToFastString buf'
 888
 889 lex_id cont glaexts buf =
 890  let buf1 = expandWhile# is_ident buf in
 891  seq buf1 $
 892
 893  case (if flag glaexts
 894         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 895         else buf1)                              of { buf' ->
 896
 897  let lexeme  = lexemeToFastString buf' in
 898
 899  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 900         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 901                           cont kwd_token buf';
 902         Nothing        ->
 903
 904  let var_token = cont (ITvarid lexeme) buf' in
 905
 906  if not (flag glaexts)
 907    then var_token
 908    else
 909
 910  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 911         Just kwd_token -> cont kwd_token buf';
 912         Nothing        -> var_token
 913
 914  }}}
 915
 916 lex_sym cont buf =
 917  -- trace "lex_sym" $
 918  case expandWhile# is_symbol buf of
 919    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 920                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 921                                   cont kwd_token buf' ;
 922                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 923                                   cont (mk_var_token lexeme) buf'
 924            }
 925         where lexeme = lexemeToFastString buf'
 926
 927
 928 lex_con cont glaexts buf =
 929  -- trace ("con: "{-++unpackFS lexeme-}) $
 930  case expandWhile# is_ident buf          of { buf1 ->
 931  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 932
 933  case currentChar# buf' of
 934      '.'# -> munch
 935      _    -> just_a_conid
 936
 937    where
 938     just_a_conid = cont (ITconid lexeme) buf'
 939     lexeme = lexemeToFastString buf'
 940     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 941  }}
 942
 943 lex_qid cont glaexts mod buf just_a_conid =
 944  -- trace ("quid: "{-++unpackFS lexeme-}) $
 945  case currentChar# buf of
 946   '['# ->       -- Special case for []
 947     case lookAhead# buf 1# of
 948      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 949      _    -> just_a_conid
 950
 951   '('# ->  -- Special case for (,,,)
 952            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 953     case lookAhead# buf 1# of
 954      '#'# | flag glaexts -> case lookAhead# buf 2# of
 955                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 956                                 just_a_conid
 957                 _    -> just_a_conid
 958      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 959      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 960      _    -> just_a_conid
 961
 962   '-'# -> case lookAhead# buf 1# of
 963             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 964             _    -> lex_id3 cont glaexts mod buf just_a_conid
 965   _    -> lex_id3 cont glaexts mod buf just_a_conid
 966
 967 lex_id3 cont glaexts mod buf just_a_conid
 968   | is_symbol (currentChar# buf) =
 969      let
 970         start_new_lexeme = stepOverLexeme buf
 971      in
 972      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 973      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 974      let
 975        lexeme  = lexemeToFastString buf'
 976         -- real lexeme is M.<sym>
 977        new_buf = mergeLexemes buf buf'
 978      in
 979      cont (mk_qvar_token mod lexeme) new_buf
 980         -- wrong, but arguably morally right: M... is now a qvarsym
 981      }
 982
 983   | otherwise   =
 984      let
 985         start_new_lexeme = stepOverLexeme buf
 986      in
 987      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
 988      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 989      if emptyLexeme buf1
 990             then just_a_conid
 991             else
 992
 993      case slurp_trailing_hashes buf1 glaexts of { buf' ->
 994
 995      let
 996       lexeme  = lexemeToFastString buf'
 997       new_buf = mergeLexemes buf buf'
 998       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
 999      in
1000      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1001             Just kwd_token -> just_a_conid; -- avoid M.where etc.
1002             Nothing        -> is_a_qvarid
1003         -- TODO: special ids (as, qualified, hiding) shouldn't be
1004         -- recognised as keywords here.  ie.  M.as is a qualified varid.
1005      }}}
1006
1007
1008 slurp_trailing_hashes buf glaexts
1009   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1010   | otherwise    = buf
1011
1012
1013 mk_var_token pk_str
1014   | is_upper f          = ITconid pk_str
1015   | is_ident f          = ITvarid pk_str
1016   | f `eqChar#` ':'#    = ITconsym pk_str
1017   | otherwise           = ITvarsym pk_str
1018   where
1019       (C# f) = _HEAD_ pk_str
1020       -- tl     = _TAIL_ pk_str
1021
1022 mk_qvar_token m token =
1023 -- trace ("mk_qvar ") $
1024  case mk_var_token token of
1025    ITconid n  -> ITqconid  (m,n)
1026    ITvarid n  -> ITqvarid  (m,n)
1027    ITconsym n -> ITqconsym (m,n)
1028    ITvarsym n -> ITqvarsym (m,n)
1029    _          -> ITunknown (show token)
1030 \end{code}
1031
1032 ----------------------------------------------------------------------------
1033 Horrible stuff for dealing with M.(,,,)
1034
1035 \begin{code}
1036 lex_tuple cont mod buf back_off =
1037   go 2 buf
1038   where
1039    go n buf =
1040     case currentChar# buf of
1041       ','# -> go (n+1) (stepOn buf)
1042       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1043       _    -> back_off
1044
1045 lex_ubx_tuple cont mod buf back_off =
1046   go 2 buf
1047   where
1048    go n buf =
1049     case currentChar# buf of
1050       ','# -> go (n+1) (stepOn buf)
1051       '#'# -> case lookAhead# buf 1# of
1052                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1053                                  (stepOnBy# buf 2#)
1054                 _    -> back_off
1055       _    -> back_off
1056 \end{code}
1057
1058 -----------------------------------------------------------------------------
1059 doDiscard rips along really fast, looking for a '#-}',
1060 indicating the end of the pragma we're skipping
1061
1062 \begin{code}
1063 doDiscard inStr buf =
1064  case currentChar# buf of
1065    '#'# | not inStr ->
1066        case lookAhead# buf 1# of { '#'# ->
1067        case lookAhead# buf 2# of { '-'# ->
1068        case lookAhead# buf 3# of { '}'# ->
1069            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1070         _    -> doDiscard inStr (incLexeme buf) };
1071         _    -> doDiscard inStr (incLexeme buf) };
1072         _    -> doDiscard inStr (incLexeme buf) }
1073    '"'# ->
1074        let
1075         odd_slashes buf flg i# =
1076           case lookAhead# buf i# of
1077            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1078            _     -> flg
1079        in
1080        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1081          '\\'# -> -- escaping something..
1082            if odd_slashes buf True (negateInt# 2#) then
1083                -- odd number of slashes, " is escaped.
1084               doDiscard inStr (incLexeme buf)
1085            else
1086                -- even number of slashes, \ is escaped.
1087               doDiscard (not inStr) (incLexeme buf)
1088          _ -> case inStr of -- forced to avoid build-up
1089                True  -> doDiscard False (incLexeme buf)
1090                False -> doDiscard True  (incLexeme buf)
1091    _ -> doDiscard inStr (incLexeme buf)
1092
1093 \end{code}
1094
1095 -----------------------------------------------------------------------------
1096
1097 \begin{code}
1098 data LayoutContext
1099   = NoLayout
1100   | Layout Int#
1101
1102 data ParseResult a
1103   = POk PState a
1104   | PFailed Message
1105
1106 data PState = PState {
1107         loc           :: SrcLoc,
1108         glasgow_exts  :: Int#,
1109         bol           :: Int#,
1110         atbol         :: Int#,
1111         context       :: [LayoutContext]
1112      }
1113
1114 type P a = StringBuffer         -- Input string
1115         -> PState
1116         -> ParseResult a
1117
1118 returnP   :: a -> P a
1119 returnP a buf s = POk s a
1120
1121 thenP      :: P a -> (a -> P b) -> P b
1122 m `thenP` k = \ buf s ->
1123         case m buf s of
1124                 POk s1 a -> k a buf s1
1125                 PFailed err  -> PFailed err
1126
1127 thenP_     :: P a -> P b -> P b
1128 m `thenP_` k = m `thenP` \_ -> k
1129
1130 mapP :: (a -> P b) -> [a] -> P [b]
1131 mapP f [] = returnP []
1132 mapP f (a:as) =
1133      f a `thenP` \b ->
1134      mapP f as `thenP` \bs ->
1135      returnP (b:bs)
1136
1137 failP :: String -> P a
1138 failP msg buf s = PFailed (text msg)
1139
1140 failMsgP :: Message -> P a
1141 failMsgP msg buf s = PFailed msg
1142
1143 lexError :: String -> P a
1144 lexError str buf s@PState{ loc = loc }
1145   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1146
1147 getSrcLocP :: P SrcLoc
1148 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1149
1150 getSrcFile :: P FAST_STRING
1151 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1152
1153 getContext :: P [LayoutContext]
1154 getContext buf s@(PState{ context = ctx }) = POk s ctx
1155
1156 pushContext :: LayoutContext -> P ()
1157 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1158
1159 {-
1160
1161 This special case in layoutOn is to handle layout contexts with are
1162 indented the same or less than the current context.  This is illegal
1163 according to the Haskell spec, so we have to arrange to close the
1164 current context.  eg.
1165
1166 class Foo a where
1167 class Bar a
1168
1169 after the first 'where', the sequence of events is:
1170
1171         - layout system inserts a ';' (column 0)
1172         - parser begins a new context at column 0
1173         - parser shifts ';' (legal empty declaration)
1174         - parser sees 'class': parse error (we're still in the inner context)
1175
1176 trouble is, by the time we know we need a new context, the lexer has
1177 already generated the ';'.  Hacky solution is as follows: since we
1178 know the column of the next token (it's the column number of the new
1179 context), we set the ACTUAL column number of the new context to this
1180 numer plus one.  Hence the next time the lexer is called, a '}' will
1181 be generated to close the new context straight away.  Furthermore, we
1182 have to set the atbol flag so that the ';' that the parser shifted as
1183 part of the new context is re-generated.
1184
1185 when the new context is *less* indented than the current one:
1186
1187 f = f where g = g where
1188 h = h
1189
1190         - current context: column 12.
1191         - on seeing 'h' (column 0), the layout system inserts '}'
1192         - parser starts a new context, column 0
1193         - parser sees '}', uses it to close new context
1194         - we still need to insert another '}' followed by a ';',
1195           hence the atbol trick.
1196
1197 There's also a special hack in here to deal with
1198
1199         do
1200            ....
1201            e $ do
1202            blah
1203
1204 i.e. the inner context is at the same indentation level as the outer
1205 context.  This is strictly illegal according to Haskell 98, but
1206 there's a lot of existing code using this style and it doesn't make
1207 any sense to disallow it, since empty 'do' lists don't make sense.
1208 -}
1209
1210 layoutOn :: Bool -> P ()
1211 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1212     let offset = lexemeIndex buf -# bol in
1213     case ctx of
1214         Layout prev_off : _
1215            | if strict then prev_off >=# offset else prev_off ># offset ->
1216                 --trace ("layout on, column: " ++  show (I# offset)) $
1217                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1218         other ->
1219                 --trace ("layout on, column: " ++  show (I# offset)) $
1220                 POk s{ context = Layout offset : ctx } ()
1221
1222 layoutOff :: P ()
1223 layoutOff buf s@(PState{ context = ctx }) =
1224     POk s{ context = NoLayout:ctx } ()
1225
1226 popContext :: P ()
1227 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1228   case ctx of
1229         (_:tl) -> POk s{ context = tl } ()
1230         []     -> PFailed (srcParseErr buf loc)
1231
1232 {-
1233  Note that if the name of the file we're processing ends
1234  with `hi-boot', we accept it on faith as having the right
1235  version. This is done so that .hi-boot files that comes
1236  with hsc don't have to be updated before every release,
1237  *and* it allows us to share .hi-boot files with versions
1238  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1239
1240  If the version number is 0, the checking is also turned off.
1241  (needed to deal with GHC.hi only!)
1242
1243  Once we can assume we're compiling with a version of ghc that
1244  supports interface file checking, we can drop the special
1245  pleading
1246 -}
1247 checkVersion :: Maybe Integer -> P ()
1248 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1249  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1250  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1251 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1252  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1253  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1254
1255 -----------------------------------------------------------------
1256
1257 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1258 ifaceParseErr s l
1259   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1260           text (lexemeToString s), char '\'']
1261
1262 ifaceVersionErr hi_vers l toks
1263   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1264           ptext SLIT("Expected"), int opt_HiVersion,
1265           ptext SLIT("found "), pp_version]
1266     where
1267      pp_version =
1268       case hi_vers of
1269         Nothing -> ptext SLIT("pre ghc-3.02 version")
1270         Just v  -> ptext SLIT("version") <+> integer v
1271
1272 -----------------------------------------------------------------------------
1273
1274 srcParseErr :: StringBuffer -> SrcLoc -> Message
1275 srcParseErr s l
1276   = hcat [ppr l,
1277           if null token
1278              then ptext SLIT(": parse error (possibly incorrect indentation)")
1279              else hcat [ptext SLIT(": parse error on input "),
1280                         char '`', text token, char '\'']
1281     ]
1282   where
1283         token = lexemeToString s
1284
1285 \end{code}