ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "hs_ctype.h" #-}
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, setSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import PrelNames        ( mkTupNameStr )
  41 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( listToUFM, lookupUFM )
  44 import BasicTypes       ( NewOrData(..), Boxity(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  46                           replaceSrcLine, mkSrcLoc )
  47
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import Ctype
  55 import Char             ( chr, ord )
  56 import PrelRead         ( readRational__ ) -- Glasgow non-std
  57 \end{code}
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{Data types}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 The token data type, fairly un-interesting except from one
  66 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  67 strictness, unfolding etc).
  68
  69 The Idea/Observation here is that the renamer needs to scan through
  70 all of an interface file before it can continue. But only a fraction
  71 of the information contained in the file turns out to be useful, so
  72 delaying as much as possible of the scanning and parsing of an
  73 interface file Makes Sense (Heap profiles of the compiler
  74 show a reduction in heap usage by at least a factor of two,
  75 post-renamer).
  76
  77 Hence, the interface file lexer spots when value declarations are
  78 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  79 for the type and any other id info for that binding (unfolding, strictness
  80 etc). These constructors are applied to the result of lexing these sub-chunks.
  81
  82 The lexing of the type and id info is all done lazily, of course, so
  83 the scanning (and subsequent parsing) will be done *only* on the ids the
  84 renamer finds out that it is interested in. The rest will just be junked.
  85 Laziness, you know it makes sense :-)
  86
  87 \begin{code}
  88 data Token
  89   = ITas                        -- Haskell keywords
  90   | ITcase
  91   | ITclass
  92   | ITdata
  93   | ITdefault
  94   | ITderiving
  95   | ITdo
  96   | ITelse
  97   | IThiding
  98   | ITif
  99   | ITimport
 100   | ITin
 101   | ITinfix
 102   | ITinfixl
 103   | ITinfixr
 104   | ITinstance
 105   | ITlet
 106   | ITmodule
 107   | ITnewtype
 108   | ITof
 109   | ITqualified
 110   | ITthen
 111   | ITtype
 112   | ITwhere
 113   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 114
 115   | ITforall                    -- GHC extension keywords
 116   | ITforeign
 117   | ITexport
 118   | ITlabel
 119   | ITdynamic
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124
 125   | ITinterface                 -- interface keywords
 126   | IT__export
 127   | ITdepends
 128   | IT__forall
 129   | ITletrec
 130   | ITcoerce
 131   | ITinlineMe
 132   | ITinlineCall
 133   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 134   | ITdefaultbranch
 135   | ITbottom
 136   | ITinteger_lit
 137   | ITfloat_lit
 138   | ITword_lit
 139   | ITword64_lit
 140   | ITint64_lit
 141   | ITrational_lit
 142   | ITaddr_lit
 143   | ITlabel_lit
 144   | ITlit_lit
 145   | ITstring_lit
 146   | ITtypeapp
 147   | ITusage
 148   | ITfuall
 149   | ITarity
 150   | ITspecialise
 151   | ITnocaf
 152   | ITunfold InlinePragInfo
 153   | ITstrict ([Demand], Bool)
 154   | ITrules
 155   | ITcprinfo
 156   | ITdeprecated
 157   | IT__scc
 158   | ITsccAllCafs
 159
 160   | ITspecialise_prag           -- Pragmas
 161   | ITsource_prag
 162   | ITinline_prag
 163   | ITnoinline_prag
 164   | ITrules_prag
 165   | ITdeprecated_prag
 166   | ITline_prag
 167   | ITscc_prag
 168   | ITclose_prag
 169
 170   | ITdotdot                    -- reserved symbols
 171   | ITdcolon
 172   | ITequal
 173   | ITlam
 174   | ITvbar
 175   | ITlarrow
 176   | ITrarrow
 177   | ITat
 178   | ITtilde
 179   | ITdarrow
 180   | ITminus
 181   | ITbang
 182   | ITdot
 183
 184   | ITbiglam                    -- GHC-extension symbols
 185
 186   | ITocurly                    -- special symbols
 187   | ITccurly
 188   | ITocurlybar                 -- {|, for type applications
 189   | ITccurlybar                 -- |}, for type applications
 190   | ITvccurly
 191   | ITobrack
 192   | ITcbrack
 193   | IToparen
 194   | ITcparen
 195   | IToubxparen
 196   | ITcubxparen
 197   | ITsemi
 198   | ITcomma
 199   | ITunderscore
 200   | ITbackquote
 201
 202   | ITvarid   FAST_STRING       -- identifiers
 203   | ITconid   FAST_STRING
 204   | ITvarsym  FAST_STRING
 205   | ITconsym  FAST_STRING
 206   | ITqvarid  (FAST_STRING,FAST_STRING)
 207   | ITqconid  (FAST_STRING,FAST_STRING)
 208   | ITqvarsym (FAST_STRING,FAST_STRING)
 209   | ITqconsym (FAST_STRING,FAST_STRING)
 210
 211   | ITipvarid FAST_STRING       -- GHC extension: implicit param: ?x
 212
 213   | ITpragma StringBuffer
 214
 215   | ITchar       Int
 216   | ITstring     FAST_STRING
 217   | ITinteger    Integer
 218   | ITrational   Rational
 219
 220   | ITprimchar   Int
 221   | ITprimstring FAST_STRING
 222   | ITprimint    Integer
 223   | ITprimfloat  Rational
 224   | ITprimdouble Rational
 225   | ITlitlit     FAST_STRING
 226
 227   | ITunknown String            -- Used when the lexer can't make sense of it
 228   | ITeof                       -- end of file token
 229   deriving Show -- debugging
 230 \end{code}
 231
 232 -----------------------------------------------------------------------------
 233 Keyword Lists
 234
 235 \begin{code}
 236 pragmaKeywordsFM = listToUFM $
 237       map (\ (x,y) -> (_PK_ x,y))
 238        [( "SPECIALISE", ITspecialise_prag ),
 239         ( "SPECIALIZE", ITspecialise_prag ),
 240         ( "SOURCE",     ITsource_prag ),
 241         ( "INLINE",     ITinline_prag ),
 242         ( "NOINLINE",   ITnoinline_prag ),
 243         ( "NOTINLINE",  ITnoinline_prag ),
 244         ( "LINE",       ITline_prag ),
 245         ( "RULES",      ITrules_prag ),
 246         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 247         ( "SCC",        ITscc_prag ),
 248         ( "DEPRECATED", ITdeprecated_prag )
 249         ]
 250
 251 haskellKeywordsFM = listToUFM $
 252       map (\ (x,y) -> (_PK_ x,y))
 253        [( "_",          ITunderscore ),
 254         ( "as",         ITas ),
 255         ( "case",       ITcase ),
 256         ( "class",      ITclass ),
 257         ( "data",       ITdata ),
 258         ( "default",    ITdefault ),
 259         ( "deriving",   ITderiving ),
 260         ( "do",         ITdo ),
 261         ( "else",       ITelse ),
 262         ( "hiding",     IThiding ),
 263         ( "if",         ITif ),
 264         ( "import",     ITimport ),
 265         ( "in",         ITin ),
 266         ( "infix",      ITinfix ),
 267         ( "infixl",     ITinfixl ),
 268         ( "infixr",     ITinfixr ),
 269         ( "instance",   ITinstance ),
 270         ( "let",        ITlet ),
 271         ( "module",     ITmodule ),
 272         ( "newtype",    ITnewtype ),
 273         ( "of",         ITof ),
 274         ( "qualified",  ITqualified ),
 275         ( "then",       ITthen ),
 276         ( "type",       ITtype ),
 277         ( "where",      ITwhere ),
 278         ( "_scc_",      ITscc )         -- ToDo: remove
 279      ]
 280
 281 isSpecial :: Token -> Bool
 282 -- If we see M.x, where x is a keyword, but
 283 -- is special, we treat is as just plain M.x,
 284 -- not as a keyword.
 285 isSpecial ITas          = True
 286 isSpecial IThiding      = True
 287 isSpecial ITqualified   = True
 288 isSpecial ITforall      = True
 289 isSpecial ITexport      = True
 290 isSpecial ITlabel       = True
 291 isSpecial ITdynamic     = True
 292 isSpecial ITunsafe      = True
 293 isSpecial ITwith        = True
 294 isSpecial ITccallconv   = True
 295 isSpecial ITstdcallconv = True
 296 isSpecial _             = False
 297
 298 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 299 ghcExtensionKeywordsFM = listToUFM $
 300         map (\ (x,y) -> (_PK_ x,y))
 301      [  ( "forall",     ITforall ),
 302         ( "foreign",    ITforeign ),
 303         ( "export",     ITexport ),
 304         ( "label",      ITlabel ),
 305         ( "dynamic",    ITdynamic ),
 306         ( "unsafe",     ITunsafe ),
 307         ( "with",       ITwith ),
 308         ( "stdcall",    ITstdcallconv),
 309         ( "ccall",      ITccallconv),
 310         ("_ccall_",     ITccall (False, False, False)),
 311         ("_ccall_GC_",  ITccall (False, False, True)),
 312         ("_casm_",      ITccall (False, True,  False)),
 313         ("_casm_GC_",   ITccall (False, True,  True)),
 314
 315         -- interface keywords
 316         ("__interface",         ITinterface),
 317         ("__export",            IT__export),
 318         ("__depends",           ITdepends),
 319         ("__forall",            IT__forall),
 320         ("__letrec",            ITletrec),
 321         ("__coerce",            ITcoerce),
 322         ("__inline_me",         ITinlineMe),
 323         ("__inline_call",       ITinlineCall),
 324         ("__depends",           ITdepends),
 325         ("__DEFAULT",           ITdefaultbranch),
 326         ("__bot",               ITbottom),
 327         ("__integer",           ITinteger_lit),
 328         ("__float",             ITfloat_lit),
 329         ("__int64",             ITint64_lit),
 330         ("__word",              ITword_lit),
 331         ("__word64",            ITword64_lit),
 332         ("__rational",          ITrational_lit),
 333         ("__addr",              ITaddr_lit),
 334         ("__label",             ITlabel_lit),
 335         ("__litlit",            ITlit_lit),
 336         ("__string",            ITstring_lit),
 337         ("__a",                 ITtypeapp),
 338         ("__u",                 ITusage),
 339         ("__fuall",             ITfuall),
 340         ("__A",                 ITarity),
 341         ("__P",                 ITspecialise),
 342         ("__C",                 ITnocaf),
 343         ("__R",                 ITrules),
 344         ("__D",                 ITdeprecated),
 345         ("__U",                 ITunfold NoInlinePragInfo),
 346
 347         ("__ccall",             ITccall (False, False, False)),
 348         ("__ccall_GC",          ITccall (False, False, True)),
 349         ("__dyn_ccall",         ITccall (True,  False, False)),
 350         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 351         ("__casm",              ITccall (False, True,  False)),
 352         ("__dyn_casm",          ITccall (True,  True,  False)),
 353         ("__casm_GC",           ITccall (False, True,  True)),
 354         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 355
 356         ("/\\",                 ITbiglam)
 357      ]
 358
 359
 360 haskellKeySymsFM = listToUFM $
 361         map (\ (x,y) -> (_PK_ x,y))
 362       [ ("..",          ITdotdot)
 363        ,("::",          ITdcolon)
 364        ,("=",           ITequal)
 365        ,("\\",          ITlam)
 366        ,("|",           ITvbar)
 367        ,("<-",          ITlarrow)
 368        ,("->",          ITrarrow)
 369        ,("@",           ITat)
 370        ,("~",           ITtilde)
 371        ,("=>",          ITdarrow)
 372        ,("-",           ITminus)
 373        ,("!",           ITbang)
 374        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 375        ]
 376 \end{code}
 377
 378 -----------------------------------------------------------------------------
 379 The lexical analyser
 380
 381 Lexer state:
 382
 383         - (glaexts) lexing an interface file or -fglasgow-exts
 384         - (bol)   pointer to beginning of line (for column calculations)
 385         - (buf)   pointer to beginning of token
 386         - (buf)   pointer to current char
 387         - (atbol) flag indicating whether we're at the beginning of a line
 388
 389 \begin{code}
 390 lexer :: (Token -> P a) -> P a
 391 lexer cont buf s@(PState{
 392                     loc = loc,
 393                     glasgow_exts = glaexts,
 394                     bol = bol,
 395                     atbol = atbol,
 396                     context = ctx
 397                 })
 398
 399         -- first, start a new lexeme and lose all the whitespace
 400   =  tab line bol atbol (stepOverLexeme buf)
 401   where
 402         line = srcLocLine loc
 403
 404         tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 405           case currentChar# buf of
 406
 407             '\NUL'# ->
 408                    if bufferExhausted (stepOn buf)
 409                         then cont ITeof buf s'
 410                         else trace "lexer: misplaced NUL?" $
 411                              tab y bol atbol (stepOn buf)
 412
 413             '\n'# -> let buf' = stepOn buf
 414                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 415
 416                 -- find comments.  This got harder in Haskell 98.
 417             '-'# ->  let trundle n =
 418                           let next = lookAhead# buf n in
 419                           if next `eqChar#` '-'# then trundle (n +# 1#)
 420                           else if is_symbol next || n <# 2#
 421                                 then is_a_token
 422                                 else tab y bol atbol
 423                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 424                     in trundle 1#
 425
 426                 -- comments and pragmas.  We deal with LINE pragmas here,
 427                 -- and throw out any unrecognised pragmas as comments.  Any
 428                 -- pragmas we know about are dealt with later (after any layout
 429                 -- processing if necessary).
 430             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 431                 if lookAhead# buf 2# `eqChar#` '#'# then
 432                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 433                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 434                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 435                   let lexeme = mkFastString -- ToDo: too slow
 436                                   (map toUpper (lexemeToString buf2)) in
 437                   case lookupUFM pragmaKeywordsFM lexeme of
 438                         Just ITline_prag ->
 439                            line_prag skip_to_end buf2 s'
 440                         Just other -> is_a_token
 441                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 442                   }}
 443
 444                 else skip_to_end (stepOnBy# buf 2#) s'
 445                 where
 446                     skip_to_end = nested_comment (lexer cont)
 447
 448                 -- special GHC extension: we grok cpp-style #line pragmas
 449             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 450                 case expandWhile# is_space (stepOn buf) of { buf1 ->
 451                 if is_digit (currentChar# buf1)
 452                         then line_prag next_line buf1 s'
 453                         else is_a_token
 454                 }
 455                 where
 456                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 457
 458                 -- tabs have been expanded beforehand
 459             c | is_space c -> tab y bol atbol (stepOn buf)
 460               | otherwise  -> is_a_token
 461
 462            where s' = s{loc = replaceSrcLine loc y,
 463                         bol = bol,
 464                        atbol = atbol}
 465
 466                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 467                             | otherwise    = lexToken cont glaexts buf s'
 468
 469 -- {-# LINE .. #-} pragmas.  yeuch.
 470 line_prag cont buf s@PState{loc=loc} =
 471   case expandWhile# is_space buf                of { buf1 ->
 472   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 473   -- subtract one: the line number refers to the *following* line.
 474   let real_line = line - 1 in
 475   case fromInteger real_line                    of { i@(I# l) ->
 476         -- ToDo, if no filename then we skip the newline.... d'oh
 477   case expandWhile# is_space buf2               of { buf3 ->
 478   case currentChar# buf3                        of
 479      '\"'#{-"-} ->
 480         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 481         let
 482             file = lexemeToFastString buf4
 483             new_buf = stepOn (stepOverLexeme buf4)
 484         in
 485         if nullFastString file
 486                 then cont new_buf s{loc = replaceSrcLine loc l}
 487                 else cont new_buf s{loc = mkSrcLoc file i}
 488         }
 489      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 490   }}}}
 491
 492 nested_comment :: P a -> P a
 493 nested_comment cont buf = loop buf
 494  where
 495    loop buf =
 496      case currentChar# buf of
 497         '\NUL'# | bufferExhausted (stepOn buf) ->
 498                 lexError "unterminated `{-'" buf -- -}
 499         '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
 500                 cont (stepOnBy# buf 2#)
 501
 502         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 503               nested_comment (nested_comment cont) (stepOnBy# buf 2#)
 504
 505         '\n'# -> \ s@PState{loc=loc} ->
 506                  let buf' = stepOn buf in
 507                  nested_comment cont buf'
 508                         s{loc = incSrcLine loc, bol = currentIndex# buf',
 509                           atbol = 1#}
 510
 511         _   -> nested_comment cont (stepOn buf)
 512
 513 -- When we are lexing the first token of a line, check whether we need to
 514 -- insert virtual semicolons or close braces due to layout.
 515
 516 lexBOL :: (Token -> P a) -> P a
 517 lexBOL cont buf s@(PState{
 518                     loc = loc,
 519                     glasgow_exts = glaexts,
 520                     bol = bol,
 521                     atbol = atbol,
 522                     context = ctx
 523                   }) =
 524         if need_close_curly then
 525                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 526                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 527         else if need_semi_colon then
 528                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 529                 cont ITsemi buf s{atbol = 0#}
 530         else
 531                 lexToken cont glaexts buf s{atbol = 0#}
 532   where
 533         col = currentIndex# buf -# bol
 534
 535         need_close_curly =
 536                 case ctx of
 537                         [] -> False
 538                         (i:_) -> case i of
 539                                     NoLayout -> False
 540                                     Layout n -> col <# n
 541         need_semi_colon =
 542                 case ctx of
 543                         [] -> False
 544                         (i:_) -> case i of
 545                                     NoLayout -> False
 546                                     Layout n -> col ==# n
 547
 548
 549 lexToken :: (Token -> P a) -> Int# -> P a
 550 lexToken cont glaexts buf =
 551  -- trace "lexToken" $
 552   case currentChar# buf of
 553
 554     -- special symbols ----------------------------------------------------
 555     '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
 556                 -> cont IToubxparen (setCurrentPos# buf 2#)
 557          | otherwise
 558                 -> cont IToparen (incLexeme buf)
 559
 560     ')'# -> cont ITcparen    (incLexeme buf)
 561     '['# -> cont ITobrack    (incLexeme buf)
 562     ']'# -> cont ITcbrack    (incLexeme buf)
 563     ','# -> cont ITcomma     (incLexeme buf)
 564     ';'# -> cont ITsemi      (incLexeme buf)
 565     '}'# -> \ s@PState{context = ctx} ->
 566             case ctx of
 567                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 568                 _        -> lexError "too many '}'s" buf s
 569     '|'# -> case lookAhead# buf 1# of
 570                  '}'#  | flag glaexts -> cont ITccurlybar
 571                                               (setCurrentPos# buf 2#)
 572                  _                    -> lex_sym cont (incLexeme buf)
 573
 574
 575     '#'# -> case lookAhead# buf 1# of
 576                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
 577                 '-'# -> case lookAhead# buf 2# of
 578                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 579                            _    -> lex_sym cont (incLexeme buf)
 580                 _    -> lex_sym cont (incLexeme buf)
 581
 582     '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
 583                 -> lex_cstring cont (setCurrentPos# buf 2#)
 584          | otherwise
 585                 -> cont ITbackquote (incLexeme buf)
 586
 587     '{'# ->     -- look for "{-##" special iface pragma
 588             case lookAhead# buf 1# of
 589            '|'# | flag glaexts
 590                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 591            '-'# -> case lookAhead# buf 2# of
 592                     '#'# -> case lookAhead# buf 3# of
 593                                 '#'# ->
 594                                    let (lexeme, buf')
 595                                           = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in
 596                                             cont (ITpragma lexeme) buf'
 597                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 598                     _    -> cont ITocurly (incLexeme buf)
 599            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 600
 601     -- strings/characters -------------------------------------------------
 602     '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
 603     '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
 604
 605     -- strictness and cpr pragmas and __scc treated specially.
 606     '_'# | flag glaexts ->
 607          case lookAhead# buf 1# of
 608            '_'# -> case lookAhead# buf 2# of
 609                     'S'# ->
 610                         lex_demand cont (stepOnUntil (not . isSpace)
 611                                         (stepOnBy# buf 3#)) -- past __S
 612                     'M'# ->
 613                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 614
 615                     's'# ->
 616                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 617                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 618                                Nothing   -> lex_id cont glaexts buf
 619                     _ -> lex_id cont glaexts buf
 620            _    -> lex_id cont glaexts buf
 621
 622         -- Hexadecimal and octal constants
 623     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 624                 -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
 625          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 626                 -> readNum (after_lexnum cont glaexts) buf' is_octdigit  8 oct_or_dec
 627         where ch   = lookAhead# buf 1#
 628               ch2  = lookAhead# buf 2#
 629               buf' = setCurrentPos# buf 2#
 630
 631     '\NUL'# ->
 632             if bufferExhausted (stepOn buf) then
 633                cont ITeof buf
 634             else
 635                trace "lexIface: misplaced NUL?" $
 636                cont (ITunknown "\NUL") (stepOn buf)
 637
 638     '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
 639             lex_ip cont (incLexeme buf)
 640     c | is_digit  c -> lex_num cont glaexts 0 buf
 641       | is_symbol c -> lex_sym cont buf
 642       | is_upper  c -> lex_con cont glaexts buf
 643       | is_ident  c -> lex_id  cont glaexts buf
 644       | otherwise   -> lexError "illegal character" buf
 645
 646 -- Int# is unlifted, and therefore faster than Bool for flags.
 647 {-# INLINE flag #-}
 648 flag :: Int# -> Bool
 649 flag 0# = False
 650 flag _  = True
 651
 652 -------------------------------------------------------------------------------
 653 -- Pragmas
 654
 655 lex_prag cont buf
 656   = case expandWhile# is_space buf of { buf1 ->
 657     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 658     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 659     case lookupUFM pragmaKeywordsFM lexeme of
 660         Just kw -> cont kw (mergeLexemes buf buf2)
 661         Nothing -> panic "lex_prag"
 662   }}
 663
 664 -------------------------------------------------------------------------------
 665 -- Strings & Chars
 666
 667 lex_string cont glaexts s buf
 668   = case currentChar# buf of
 669         '"'#{-"-} ->
 670            let buf' = incLexeme buf
 671                s' = mkFastStringNarrow (map chr (reverse s))
 672            in case currentChar# buf' of
 673                 '#'# | flag glaexts -> if all (<= 0xFF) s
 674                     then cont (ITprimstring s') (incLexeme buf')
 675                     else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 676                 _                   -> cont (ITstring s') buf'
 677
 678         -- ignore \& in a string, deal with string gaps
 679         '\\'# | next_ch `eqChar#` '&'#
 680                 -> lex_string cont glaexts s buf'
 681               | is_space next_ch
 682                 -> lex_stringgap cont glaexts s (incLexeme buf)
 683
 684             where next_ch = lookAhead# buf 1#
 685                   buf' = setCurrentPos# buf 2#
 686
 687         _ -> lex_char (lex_next_string cont s) glaexts buf
 688
 689 lex_stringgap cont glaexts s buf
 690   = let buf' = incLexeme buf in
 691     case currentChar# buf of
 692         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
 693                   st{loc = incSrcLine loc}
 694         '\\'# -> lex_string cont glaexts s buf'
 695         c | is_space c -> lex_stringgap cont glaexts s buf'
 696         other -> charError buf'
 697
 698 lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
 699
 700 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 701 lex_char cont glaexts buf
 702   = case currentChar# buf of
 703         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
 704         c | is_string c -> cont glaexts (I# (ord# c)) (incLexeme buf)
 705         other -> charError buf
 706
 707 char_end cont glaexts c buf
 708   = case currentChar# buf of
 709         '\''# -> let buf' = incLexeme buf in
 710                  case currentChar# buf' of
 711                         '#'# | flag glaexts
 712                                 -> cont (ITprimchar c) (incLexeme buf')
 713                         _       -> cont (ITchar c) buf'
 714         _     -> charError buf
 715
 716 lex_escape cont buf
 717   = let buf' = incLexeme buf in
 718     case currentChar# buf of
 719         'a'#       -> cont (ord '\a') buf'
 720         'b'#       -> cont (ord '\b') buf'
 721         'f'#       -> cont (ord '\f') buf'
 722         'n'#       -> cont (ord '\n') buf'
 723         'r'#       -> cont (ord '\r') buf'
 724         't'#       -> cont (ord '\t') buf'
 725         'v'#       -> cont (ord '\v') buf'
 726         '\\'#      -> cont (ord '\\') buf'
 727         '"'#       -> cont (ord '\"') buf'
 728         '\''#      -> cont (ord '\'') buf'
 729         '^'#       -> let c = currentChar# buf' in
 730                       if c `geChar#` '@'# && c `leChar#` '_'#
 731                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 732                         else charError buf'
 733
 734         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 735         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 736         x | is_digit x
 737                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 738
 739         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 740                                        Just buf2 <- [prefixMatch buf p] ] of
 741                             (c,buf2):_ -> cont (ord c) buf2
 742                             [] -> charError buf'
 743
 744 after_charnum cont i buf = cont (fromInteger i) buf
 745
 746 readNum cont buf is_digit base conv = read buf 0
 747   where read buf i
 748           = case currentChar# buf of { c ->
 749             if is_digit c
 750                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 751                 else cont i buf
 752             }
 753
 754 is_hexdigit c
 755         =  is_digit c
 756         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 757         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 758
 759 hex c | is_digit c = ord# c -# ord# '0'#
 760       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 761 oct_or_dec c = ord# c -# ord# '0'#
 762
 763 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 764
 765 to_lower c
 766   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 767         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 768   | otherwise = c
 769
 770 charError buf = lexError "error in character literal" buf
 771
 772 silly_escape_chars = [
 773         ("NUL", '\NUL'),
 774         ("SOH", '\SOH'),
 775         ("STX", '\STX'),
 776         ("ETX", '\ETX'),
 777         ("EOT", '\EOT'),
 778         ("ENQ", '\ENQ'),
 779         ("ACK", '\ACK'),
 780         ("BEL", '\BEL'),
 781         ("BS", '\BS'),
 782         ("HT", '\HT'),
 783         ("LF", '\LF'),
 784         ("VT", '\VT'),
 785         ("FF", '\FF'),
 786         ("CR", '\CR'),
 787         ("SO", '\SO'),
 788         ("SI", '\SI'),
 789         ("DLE", '\DLE'),
 790         ("DC1", '\DC1'),
 791         ("DC2", '\DC2'),
 792         ("DC3", '\DC3'),
 793         ("DC4", '\DC4'),
 794         ("NAK", '\NAK'),
 795         ("SYN", '\SYN'),
 796         ("ETB", '\ETB'),
 797         ("CAN", '\CAN'),
 798         ("EM", '\EM'),
 799         ("SUB", '\SUB'),
 800         ("ESC", '\ESC'),
 801         ("FS", '\FS'),
 802         ("GS", '\GS'),
 803         ("RS", '\RS'),
 804         ("US", '\US'),
 805         ("SP", '\SP'),
 806         ("DEL", '\DEL')
 807         ]
 808
 809 -------------------------------------------------------------------------------
 810
 811 lex_demand cont buf =
 812  case read_em [] buf of { (ls,buf') ->
 813  case currentChar# buf' of
 814    'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
 815    _    -> cont (ITstrict (ls, False)) buf'
 816  }
 817  where
 818    -- code snatched from Demand.lhs
 819   read_em acc buf =
 820    case currentChar# buf of
 821     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 822     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 823     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 824     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 825     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 826     ')'# -> (reverse acc, stepOn buf)
 827     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 828     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 829     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 830     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 831     _    -> (reverse acc, buf)
 832
 833   do_unpack new_or_data wrapper_unpacks acc buf
 834    = case read_em [] buf of
 835       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 836
 837
 838 ------------------
 839 lex_scc cont buf =
 840  case currentChar# buf of
 841   'C'# -> cont ITsccAllCafs (incLexeme buf)
 842   other -> cont ITscc buf
 843
 844 -----------------------------------------------------------------------------
 845 -- Numbers
 846
 847 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 848 lex_num cont glaexts acc buf =
 849  case scanNumLit acc buf of
 850      (acc',buf') ->
 851        case currentChar# buf' of
 852          '.'# | is_digit (lookAhead# buf' 1#) ->
 853              -- this case is not optimised at all, as the
 854              -- presence of floating point numbers in interface
 855              -- files is not that common. (ToDo)
 856             case expandWhile# is_digit (incLexeme buf') of
 857               buf2 -> -- points to first non digit char
 858
 859                 let l = case currentChar# buf2 of
 860                           'E'# -> do_exponent
 861                           'e'# -> do_exponent
 862                           _ -> buf2
 863
 864                     do_exponent
 865                         = let buf3 = incLexeme buf2 in
 866                           case currentChar# buf3 of
 867                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 868                                 '+'# -> expandWhile# is_digit (incLexeme buf3)
 869                                 x | is_digit x -> expandWhile# is_digit buf3
 870                                 _ -> buf2
 871
 872                     v = readRational__ (lexemeToString l)
 873
 874                 in case currentChar# l of -- glasgow exts only
 875                       '#'# | flag glaexts -> let l' = incLexeme l in
 876                               case currentChar# l' of
 877                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 878                                 _    -> cont (ITprimfloat  v) l'
 879                       _ -> cont (ITrational v) l
 880
 881          _ -> after_lexnum cont glaexts acc' buf'
 882
 883 after_lexnum cont glaexts i buf
 884   = case currentChar# buf of
 885         '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
 886         _    -> cont (ITinteger i) buf
 887
 888 -----------------------------------------------------------------------------
 889 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 890
 891 -- we lexemeToFastString on the bit between the ``''s, but include the
 892 -- quotes in the full lexeme.
 893
 894 lex_cstring cont buf =
 895  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 896    Just buf' -> cont (ITlitlit (lexemeToFastString
 897                                 (setCurrentPos# buf' (negateInt# 2#))))
 898                    (mergeLexemes buf buf')
 899    Nothing   -> lexError "unterminated ``" buf
 900
 901 -----------------------------------------------------------------------------
 902 -- identifiers, symbols etc.
 903
 904 lex_ip cont buf =
 905  case expandWhile# is_ident buf of
 906    buf' -> cont (ITipvarid lexeme) buf'
 907            where lexeme = lexemeToFastString buf'
 908
 909 lex_id cont glaexts buf =
 910  let buf1 = expandWhile# is_ident buf in
 911  seq buf1 $
 912
 913  case (if flag glaexts
 914         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 915         else buf1)                              of { buf' ->
 916
 917  let lexeme  = lexemeToFastString buf' in
 918
 919  case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 920         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 921                           cont kwd_token buf';
 922         Nothing        ->
 923
 924  let var_token = cont (ITvarid lexeme) buf' in
 925
 926  if not (flag glaexts)
 927    then var_token
 928    else
 929
 930  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 931         Just kwd_token -> cont kwd_token buf';
 932         Nothing        -> var_token
 933
 934  }}}
 935
 936 lex_sym cont buf =
 937  -- trace "lex_sym" $
 938  case expandWhile# is_symbol buf of
 939    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 940                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 941                                   cont kwd_token buf' ;
 942                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 943                                   cont (mk_var_token lexeme) buf'
 944            }
 945         where lexeme = lexemeToFastString buf'
 946
 947
 948 lex_con cont glaexts buf =
 949  -- trace ("con: "{-++unpackFS lexeme-}) $
 950  case expandWhile# is_ident buf          of { buf1 ->
 951  case slurp_trailing_hashes buf1 glaexts of { buf' ->
 952
 953  case currentChar# buf' of
 954      '.'# -> munch
 955      _    -> just_a_conid
 956
 957    where
 958     just_a_conid = cont (ITconid lexeme) buf'
 959     lexeme = lexemeToFastString buf'
 960     munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
 961  }}
 962
 963 lex_qid cont glaexts mod buf just_a_conid =
 964  -- trace ("quid: "{-++unpackFS lexeme-}) $
 965  case currentChar# buf of
 966   '['# ->       -- Special case for []
 967     case lookAhead# buf 1# of
 968      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 969      _    -> just_a_conid
 970
 971   '('# ->  -- Special case for (,,,)
 972            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 973     case lookAhead# buf 1# of
 974      '#'# | flag glaexts -> case lookAhead# buf 2# of
 975                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 976                                 just_a_conid
 977                 _    -> just_a_conid
 978      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 979      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 980      _    -> just_a_conid
 981
 982   '-'# -> case lookAhead# buf 1# of
 983             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 984             _    -> lex_id3 cont glaexts mod buf just_a_conid
 985   _    -> lex_id3 cont glaexts mod buf just_a_conid
 986
 987 lex_id3 cont glaexts mod buf just_a_conid
 988   | is_symbol (currentChar# buf) =
 989      let
 990         start_new_lexeme = stepOverLexeme buf
 991      in
 992      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 993      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 994      let
 995        lexeme  = lexemeToFastString buf'
 996         -- real lexeme is M.<sym>
 997        new_buf = mergeLexemes buf buf'
 998      in
 999      cont (mk_qvar_token mod lexeme) new_buf
1000         -- wrong, but arguably morally right: M... is now a qvarsym
1001      }
1002
1003   | otherwise   =
1004      let
1005         start_new_lexeme = stepOverLexeme buf
1006      in
1007      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1008      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1009      if emptyLexeme buf1
1010             then just_a_conid
1011             else
1012
1013      case slurp_trailing_hashes buf1 glaexts of { buf' ->
1014
1015      let
1016       lexeme      = lexemeToFastString buf'
1017       new_buf     = mergeLexemes buf buf'
1018       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1019      in
1020      case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1021             Nothing          -> is_a_qvarid ;
1022
1023             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1024                            -> is_a_qvarid          --  recognised as keywords here.
1025                            | otherwise
1026                            -> just_a_conid         -- avoid M.where etc.
1027      }}}
1028
1029 slurp_trailing_hashes buf glaexts
1030   | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
1031   | otherwise    = buf
1032
1033
1034 mk_var_token pk_str
1035   | is_upper f          = ITconid pk_str
1036   | is_ident f          = ITvarid pk_str
1037   | f `eqChar#` ':'#    = ITconsym pk_str
1038   | otherwise           = ITvarsym pk_str
1039   where
1040       (C# f) = _HEAD_ pk_str
1041       -- tl     = _TAIL_ pk_str
1042
1043 mk_qvar_token m token =
1044 -- trace ("mk_qvar ") $
1045  case mk_var_token token of
1046    ITconid n  -> ITqconid  (m,n)
1047    ITvarid n  -> ITqvarid  (m,n)
1048    ITconsym n -> ITqconsym (m,n)
1049    ITvarsym n -> ITqvarsym (m,n)
1050    _          -> ITunknown (show token)
1051 \end{code}
1052
1053 ----------------------------------------------------------------------------
1054 Horrible stuff for dealing with M.(,,,)
1055
1056 \begin{code}
1057 lex_tuple cont mod buf back_off =
1058   go 2 buf
1059   where
1060    go n buf =
1061     case currentChar# buf of
1062       ','# -> go (n+1) (stepOn buf)
1063       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1064       _    -> back_off
1065
1066 lex_ubx_tuple cont mod buf back_off =
1067   go 2 buf
1068   where
1069    go n buf =
1070     case currentChar# buf of
1071       ','# -> go (n+1) (stepOn buf)
1072       '#'# -> case lookAhead# buf 1# of
1073                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1074                                  (stepOnBy# buf 2#)
1075                 _    -> back_off
1076       _    -> back_off
1077 \end{code}
1078
1079 -----------------------------------------------------------------------------
1080 doDiscard rips along really fast, looking for a '##-}',
1081 indicating the end of the pragma we're skipping
1082
1083 \begin{code}
1084 doDiscard inStr buf =
1085  case currentChar# buf of
1086    '#'# | inStr ==# 0# ->
1087        case lookAhead# buf 1# of { '#'# ->
1088        case lookAhead# buf 2# of { '-'# ->
1089        case lookAhead# buf 3# of { '}'# ->
1090            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
1091         _    -> doDiscard inStr (incLexeme buf) };
1092         _    -> doDiscard inStr (incLexeme buf) };
1093         _    -> doDiscard inStr (incLexeme buf) }
1094
1095    '"'# ->
1096        let
1097         odd_slashes buf flg i# =
1098           case lookAhead# buf i# of
1099            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1100            _     -> flg
1101
1102         not_inStr = if inStr ==# 0# then 1# else 0#
1103        in
1104        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1105          '\\'# -> -- escaping something..
1106            if odd_slashes buf True (negateInt# 2#)
1107                 then  -- odd number of slashes, " is escaped.
1108                       doDiscard inStr (incLexeme buf)
1109                 else  -- even number of slashes, \ is escaped.
1110                       doDiscard not_inStr (incLexeme buf)
1111          _ -> doDiscard not_inStr (incLexeme buf)
1112
1113    '\''# | inStr ==# 0# ->
1114         case lookAhead# buf 1# of { '"'# ->
1115         case lookAhead# buf 2# of { '\''# ->
1116            doDiscard inStr (setCurrentPos# buf 3#);
1117         _ -> doDiscard inStr (incLexeme buf) };
1118         _ -> doDiscard inStr (incLexeme buf) }
1119
1120    _ -> doDiscard inStr (incLexeme buf)
1121
1122 \end{code}
1123
1124 -----------------------------------------------------------------------------
1125
1126 \begin{code}
1127 data LayoutContext
1128   = NoLayout
1129   | Layout Int#
1130
1131 data ParseResult a
1132   = POk PState a
1133   | PFailed Message
1134
1135 data PState = PState {
1136         loc           :: SrcLoc,
1137         glasgow_exts  :: Int#,
1138         bol           :: Int#,
1139         atbol         :: Int#,
1140         context       :: [LayoutContext]
1141      }
1142
1143 type P a = StringBuffer         -- Input string
1144         -> PState
1145         -> ParseResult a
1146
1147 returnP   :: a -> P a
1148 returnP a buf s = POk s a
1149
1150 thenP      :: P a -> (a -> P b) -> P b
1151 m `thenP` k = \ buf s ->
1152         case m buf s of
1153                 POk s1 a -> k a buf s1
1154                 PFailed err  -> PFailed err
1155
1156 thenP_     :: P a -> P b -> P b
1157 m `thenP_` k = m `thenP` \_ -> k
1158
1159 mapP :: (a -> P b) -> [a] -> P [b]
1160 mapP f [] = returnP []
1161 mapP f (a:as) =
1162      f a `thenP` \b ->
1163      mapP f as `thenP` \bs ->
1164      returnP (b:bs)
1165
1166 failP :: String -> P a
1167 failP msg buf s = PFailed (text msg)
1168
1169 failMsgP :: Message -> P a
1170 failMsgP msg buf s = PFailed msg
1171
1172 lexError :: String -> P a
1173 lexError str buf s@PState{ loc = loc }
1174   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1175
1176 getSrcLocP :: P SrcLoc
1177 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1178
1179 -- use a temporary SrcLoc for the duration of the argument
1180 setSrcLocP :: SrcLoc -> P a -> P a
1181 setSrcLocP new_loc p buf s =
1182   case p buf s{ loc=new_loc } of
1183         POk _ a   -> POk s a
1184         PFailed e -> PFailed e
1185
1186 getSrcFile :: P FAST_STRING
1187 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1188
1189 getContext :: P [LayoutContext]
1190 getContext buf s@(PState{ context = ctx }) = POk s ctx
1191
1192 pushContext :: LayoutContext -> P ()
1193 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1194
1195 {-
1196
1197 This special case in layoutOn is to handle layout contexts with are
1198 indented the same or less than the current context.  This is illegal
1199 according to the Haskell spec, so we have to arrange to close the
1200 current context.  eg.
1201
1202 class Foo a where
1203 class Bar a
1204
1205 after the first 'where', the sequence of events is:
1206
1207         - layout system inserts a ';' (column 0)
1208         - parser begins a new context at column 0
1209         - parser shifts ';' (legal empty declaration)
1210         - parser sees 'class': parse error (we're still in the inner context)
1211
1212 trouble is, by the time we know we need a new context, the lexer has
1213 already generated the ';'.  Hacky solution is as follows: since we
1214 know the column of the next token (it's the column number of the new
1215 context), we set the ACTUAL column number of the new context to this
1216 numer plus one.  Hence the next time the lexer is called, a '}' will
1217 be generated to close the new context straight away.  Furthermore, we
1218 have to set the atbol flag so that the ';' that the parser shifted as
1219 part of the new context is re-generated.
1220
1221 when the new context is *less* indented than the current one:
1222
1223 f = f where g = g where
1224 h = h
1225
1226         - current context: column 12.
1227         - on seeing 'h' (column 0), the layout system inserts '}'
1228         - parser starts a new context, column 0
1229         - parser sees '}', uses it to close new context
1230         - we still need to insert another '}' followed by a ';',
1231           hence the atbol trick.
1232
1233 There's also a special hack in here to deal with
1234
1235         do
1236            ....
1237            e $ do
1238            blah
1239
1240 i.e. the inner context is at the same indentation level as the outer
1241 context.  This is strictly illegal according to Haskell 98, but
1242 there's a lot of existing code using this style and it doesn't make
1243 any sense to disallow it, since empty 'do' lists don't make sense.
1244 -}
1245
1246 layoutOn :: Bool -> P ()
1247 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1248     let offset = lexemeIndex buf -# bol in
1249     case ctx of
1250         Layout prev_off : _
1251            | if strict then prev_off >=# offset else prev_off ># offset ->
1252                 --trace ("layout on, column: " ++  show (I# offset)) $
1253                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1254         other ->
1255                 --trace ("layout on, column: " ++  show (I# offset)) $
1256                 POk s{ context = Layout offset : ctx } ()
1257
1258 layoutOff :: P ()
1259 layoutOff buf s@(PState{ context = ctx }) =
1260     POk s{ context = NoLayout:ctx } ()
1261
1262 popContext :: P ()
1263 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1264   case ctx of
1265         (_:tl) -> POk s{ context = tl } ()
1266         []     -> PFailed (srcParseErr buf loc)
1267
1268 {-
1269  Note that if the name of the file we're processing ends
1270  with `hi-boot', we accept it on faith as having the right
1271  version. This is done so that .hi-boot files that comes
1272  with hsc don't have to be updated before every release,
1273  *and* it allows us to share .hi-boot files with versions
1274  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1275
1276  If the version number is 0, the checking is also turned off.
1277  (needed to deal with GHC.hi only!)
1278
1279  Once we can assume we're compiling with a version of ghc that
1280  supports interface file checking, we can drop the special
1281  pleading
1282 -}
1283 checkVersion :: Maybe Integer -> P ()
1284 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1285  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1286  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1287 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1288  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1289  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1290
1291 -----------------------------------------------------------------
1292
1293 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1294 ifaceParseErr s l
1295   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1296           text (lexemeToString s), char '\'']
1297
1298 ifaceVersionErr hi_vers l toks
1299   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1300           ptext SLIT("Expected"), int opt_HiVersion,
1301           ptext SLIT("found "), pp_version]
1302     where
1303      pp_version =
1304       case hi_vers of
1305         Nothing -> ptext SLIT("pre ghc-3.02 version")
1306         Just v  -> ptext SLIT("version") <+> integer v
1307
1308 -----------------------------------------------------------------------------
1309
1310 srcParseErr :: StringBuffer -> SrcLoc -> Message
1311 srcParseErr s l
1312   = hcat [ppr l,
1313           if null token
1314              then ptext SLIT(": parse error (possibly incorrect indentation)")
1315              else hcat [ptext SLIT(": parse error on input "),
1316                         char '`', text token, char '\'']
1317     ]
1318   where
1319         token = lexemeToString s
1320
1321 \end{code}