ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion, ExtFlags(..), mkPState,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, setSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import PrelNames        ( mkTupNameStr )
  40 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  41 import ForeignCall      ( Safety(..) )
  42 import NewDemand        ( StrictSig(..), Demand(..), Demands(..),
  43                           DmdResult(..), mkTopDmdType, evalDmd, lazyDmd )
  44 import UniqFM           ( listToUFM, lookupUFM )
  45 import BasicTypes       ( Boxity(..) )
  46 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  47                           replaceSrcLine, mkSrcLoc )
  48
  49 import ErrUtils         ( Message )
  50 import Outputable
  51
  52 import FastString
  53 import StringBuffer
  54 import GlaExts
  55 import Ctype
  56 import Char             ( chr, ord )
  57 import PrelRead         ( readRational__ ) -- Glasgow non-std
  58 import PrelBits         ( Bits(..) )       -- non-std
  59 \end{code}
  60
  61 %************************************************************************
  62 %*                                                                      *
  63 \subsection{Data types}
  64 %*                                                                      *
  65 %************************************************************************
  66
  67 The token data type, fairly un-interesting except from one
  68 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  69 strictness, unfolding etc).
  70
  71 The Idea/Observation here is that the renamer needs to scan through
  72 all of an interface file before it can continue. But only a fraction
  73 of the information contained in the file turns out to be useful, so
  74 delaying as much as possible of the scanning and parsing of an
  75 interface file Makes Sense (Heap profiles of the compiler
  76 show a reduction in heap usage by at least a factor of two,
  77 post-renamer).
  78
  79 Hence, the interface file lexer spots when value declarations are
  80 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  81 for the type and any other id info for that binding (unfolding, strictness
  82 etc). These constructors are applied to the result of lexing these sub-chunks.
  83
  84 The lexing of the type and id info is all done lazily, of course, so
  85 the scanning (and subsequent parsing) will be done *only* on the ids the
  86 renamer finds out that it is interested in. The rest will just be junked.
  87 Laziness, you know it makes sense :-)
  88
  89 \begin{code}
  90 data Token
  91   = ITas                        -- Haskell keywords
  92   | ITcase
  93   | ITclass
  94   | ITdata
  95   | ITdefault
  96   | ITderiving
  97   | ITdo
  98   | ITelse
  99   | IThiding
 100   | ITif
 101   | ITimport
 102   | ITin
 103   | ITinfix
 104   | ITinfixl
 105   | ITinfixr
 106   | ITinstance
 107   | ITlet
 108   | ITmodule
 109   | ITnewtype
 110   | ITof
 111   | ITqualified
 112   | ITthen
 113   | ITtype
 114   | ITwhere
 115   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 116
 117   | ITforall                    -- GHC extension keywords
 118   | ITforeign
 119   | ITexport
 120   | ITlabel
 121   | ITdynamic
 122   | ITsafe
 123   | ITunsafe
 124   | ITwith
 125   | ITstdcallconv
 126   | ITccallconv
 127   | ITdotnet
 128
 129   | ITinterface                 -- interface keywords
 130   | IT__export
 131   | ITdepends
 132   | IT__forall
 133   | ITletrec
 134   | ITcoerce
 135   | ITinlineMe
 136   | ITinlineCall
 137   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 138   | ITdefaultbranch
 139   | ITbottom
 140   | ITinteger_lit
 141   | ITfloat_lit
 142   | ITword_lit
 143   | ITword64_lit
 144   | ITint64_lit
 145   | ITrational_lit
 146   | ITaddr_lit
 147   | ITlabel_lit
 148   | ITlit_lit
 149   | ITstring_lit
 150   | ITtypeapp
 151   | ITusage
 152   | ITfuall
 153   | ITarity
 154   | ITspecialise
 155   | ITnocaf
 156   | ITunfold
 157   | ITstrict StrictSig
 158   | ITrules
 159   | ITcprinfo
 160   | ITdeprecated
 161   | IT__scc
 162   | ITsccAllCafs
 163
 164   | ITspecialise_prag           -- Pragmas
 165   | ITsource_prag
 166   | ITinline_prag
 167   | ITnoinline_prag
 168   | ITrules_prag
 169   | ITdeprecated_prag
 170   | ITline_prag
 171   | ITscc_prag
 172   | ITclose_prag
 173
 174   | ITdotdot                    -- reserved symbols
 175   | ITdcolon
 176   | ITequal
 177   | ITlam
 178   | ITvbar
 179   | ITlarrow
 180   | ITrarrow
 181   | ITat
 182   | ITtilde
 183   | ITdarrow
 184   | ITminus
 185   | ITbang
 186   | ITstar
 187   | ITdot
 188
 189   | ITbiglam                    -- GHC-extension symbols
 190
 191   | ITocurly                    -- special symbols
 192   | ITccurly
 193   | ITocurlybar                 -- {|, for type applications
 194   | ITccurlybar                 -- |}, for type applications
 195   | ITvccurly
 196   | ITobrack
 197   | ITopabrack                  -- [:, for parallel arrays with -fparr
 198   | ITcpabrack                  -- :], for parallel arrays with -fparr
 199   | ITcbrack
 200   | IToparen
 201   | ITcparen
 202   | IToubxparen
 203   | ITcubxparen
 204   | ITsemi
 205   | ITcomma
 206   | ITunderscore
 207   | ITbackquote
 208
 209   | ITvarid   FAST_STRING       -- identifiers
 210   | ITconid   FAST_STRING
 211   | ITvarsym  FAST_STRING
 212   | ITconsym  FAST_STRING
 213   | ITqvarid  (FAST_STRING,FAST_STRING)
 214   | ITqconid  (FAST_STRING,FAST_STRING)
 215   | ITqvarsym (FAST_STRING,FAST_STRING)
 216   | ITqconsym (FAST_STRING,FAST_STRING)
 217
 218   | ITdupipvarid   FAST_STRING  -- GHC extension: implicit param: ?x
 219   | ITsplitipvarid FAST_STRING  -- GHC extension: implicit param: %x
 220
 221   | ITpragma StringBuffer
 222
 223   | ITchar       Int
 224   | ITstring     FAST_STRING
 225   | ITinteger    Integer
 226   | ITrational   Rational
 227
 228   | ITprimchar   Int
 229   | ITprimstring FAST_STRING
 230   | ITprimint    Integer
 231   | ITprimfloat  Rational
 232   | ITprimdouble Rational
 233   | ITlitlit     FAST_STRING
 234
 235   | ITunknown String            -- Used when the lexer can't make sense of it
 236   | ITeof                       -- end of file token
 237   deriving Show -- debugging
 238 \end{code}
 239
 240 -----------------------------------------------------------------------------
 241 Keyword Lists
 242
 243 \begin{code}
 244 pragmaKeywordsFM = listToUFM $
 245       map (\ (x,y) -> (_PK_ x,y))
 246        [( "SPECIALISE", ITspecialise_prag ),
 247         ( "SPECIALIZE", ITspecialise_prag ),
 248         ( "SOURCE",     ITsource_prag ),
 249         ( "INLINE",     ITinline_prag ),
 250         ( "NOINLINE",   ITnoinline_prag ),
 251         ( "NOTINLINE",  ITnoinline_prag ),
 252         ( "LINE",       ITline_prag ),
 253         ( "RULES",      ITrules_prag ),
 254         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 255         ( "SCC",        ITscc_prag ),
 256         ( "DEPRECATED", ITdeprecated_prag )
 257         ]
 258
 259 haskellKeywordsFM = listToUFM $
 260       map (\ (x,y) -> (_PK_ x,y))
 261        [( "_",          ITunderscore ),
 262         ( "as",         ITas ),
 263         ( "case",       ITcase ),
 264         ( "class",      ITclass ),
 265         ( "data",       ITdata ),
 266         ( "default",    ITdefault ),
 267         ( "deriving",   ITderiving ),
 268         ( "do",         ITdo ),
 269         ( "else",       ITelse ),
 270         ( "hiding",     IThiding ),
 271         ( "if",         ITif ),
 272         ( "import",     ITimport ),
 273         ( "in",         ITin ),
 274         ( "infix",      ITinfix ),
 275         ( "infixl",     ITinfixl ),
 276         ( "infixr",     ITinfixr ),
 277         ( "instance",   ITinstance ),
 278         ( "let",        ITlet ),
 279         ( "module",     ITmodule ),
 280         ( "newtype",    ITnewtype ),
 281         ( "of",         ITof ),
 282         ( "qualified",  ITqualified ),
 283         ( "then",       ITthen ),
 284         ( "type",       ITtype ),
 285         ( "where",      ITwhere ),
 286         ( "_scc_",      ITscc )         -- ToDo: remove
 287      ]
 288
 289 isSpecial :: Token -> Bool
 290 -- If we see M.x, where x is a keyword, but
 291 -- is special, we treat is as just plain M.x,
 292 -- not as a keyword.
 293 isSpecial ITas          = True
 294 isSpecial IThiding      = True
 295 isSpecial ITqualified   = True
 296 isSpecial ITforall      = True
 297 isSpecial ITexport      = True
 298 isSpecial ITlabel       = True
 299 isSpecial ITdynamic     = True
 300 isSpecial ITsafe        = True
 301 isSpecial ITunsafe      = True
 302 isSpecial ITwith        = True
 303 isSpecial ITccallconv   = True
 304 isSpecial ITstdcallconv = True
 305 isSpecial _             = False
 306
 307 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 308 ghcExtensionKeywordsFM = listToUFM $
 309         map (\ (x,y) -> (_PK_ x,y))
 310      [  ( "forall",     ITforall ),
 311         ( "foreign",    ITforeign ),
 312         ( "export",     ITexport ),
 313         ( "label",      ITlabel ),
 314         ( "dynamic",    ITdynamic ),
 315         ( "safe",       ITunsafe ),
 316         ( "unsafe",     ITunsafe ),
 317         ( "with",       ITwith ),
 318         ( "stdcall",    ITstdcallconv),
 319         ( "ccall",      ITccallconv),
 320         ( "dotnet",     ITdotnet),
 321         ("_ccall_",     ITccall (False, False, PlayRisky)),
 322         ("_ccall_GC_",  ITccall (False, False, PlaySafe)),
 323         ("_casm_",      ITccall (False, True,  PlayRisky)),
 324         ("_casm_GC_",   ITccall (False, True,  PlaySafe)),
 325
 326         -- interface keywords
 327         ("__interface",         ITinterface),
 328         ("__export",            IT__export),
 329         ("__depends",           ITdepends),
 330         ("__forall",            IT__forall),
 331         ("__letrec",            ITletrec),
 332         ("__coerce",            ITcoerce),
 333         ("__inline_me",         ITinlineMe),
 334         ("__inline_call",       ITinlineCall),
 335         ("__depends",           ITdepends),
 336         ("__DEFAULT",           ITdefaultbranch),
 337         ("__bot",               ITbottom),
 338         ("__integer",           ITinteger_lit),
 339         ("__float",             ITfloat_lit),
 340         ("__int64",             ITint64_lit),
 341         ("__word",              ITword_lit),
 342         ("__word64",            ITword64_lit),
 343         ("__rational",          ITrational_lit),
 344         ("__addr",              ITaddr_lit),
 345         ("__label",             ITlabel_lit),
 346         ("__litlit",            ITlit_lit),
 347         ("__string",            ITstring_lit),
 348         ("__a",                 ITtypeapp),
 349         ("__u",                 ITusage),
 350         ("__fuall",             ITfuall),
 351         ("__A",                 ITarity),
 352         ("__P",                 ITspecialise),
 353         ("__C",                 ITnocaf),
 354         ("__R",                 ITrules),
 355         ("__D",                 ITdeprecated),
 356         ("__U",                 ITunfold),
 357
 358         ("__ccall",             ITccall (False, False, PlayRisky)),
 359         ("__ccall_GC",          ITccall (False, False, PlaySafe)),
 360         ("__dyn_ccall",         ITccall (True,  False, PlayRisky)),
 361         ("__dyn_ccall_GC",      ITccall (True,  False, PlaySafe)),
 362         ("__casm",              ITccall (False, True,  PlayRisky)),
 363         ("__dyn_casm",          ITccall (True,  True,  PlayRisky)),
 364         ("__casm_GC",           ITccall (False, True,  PlaySafe)),
 365         ("__dyn_casm_GC",       ITccall (True,  True,  PlaySafe)),
 366
 367         ("/\\",                 ITbiglam)
 368      ]
 369
 370
 371 haskellKeySymsFM = listToUFM $
 372         map (\ (x,y) -> (_PK_ x,y))
 373       [ ("..",          ITdotdot)
 374        ,("::",          ITdcolon)
 375        ,("=",           ITequal)
 376        ,("\\",          ITlam)
 377        ,("|",           ITvbar)
 378        ,("<-",          ITlarrow)
 379        ,("->",          ITrarrow)
 380        ,("@",           ITat)
 381        ,("~",           ITtilde)
 382        ,("=>",          ITdarrow)
 383        ,("-",           ITminus)
 384        ,("!",           ITbang)
 385        ,("*",           ITstar)
 386        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 387        ]
 388 \end{code}
 389
 390 -----------------------------------------------------------------------------
 391 The lexical analyser
 392
 393 Lexer state:
 394
 395         - (exts)  lexing a source with extensions, eg, an interface file or
 396                   with -fglasgow-exts
 397         - (bol)   pointer to beginning of line (for column calculations)
 398         - (buf)   pointer to beginning of token
 399         - (buf)   pointer to current char
 400         - (atbol) flag indicating whether we're at the beginning of a line
 401
 402 \begin{code}
 403 lexer :: (Token -> P a) -> P a
 404 lexer cont buf s@(PState{
 405                     loc = loc,
 406                     extsBitmap = exts,
 407                     bol = bol,
 408                     atbol = atbol,
 409                     context = ctx
 410                 })
 411
 412         -- first, start a new lexeme and lose all the whitespace
 413   =  _scc_ "Lexer"
 414   tab line bol atbol (stepOverLexeme buf)
 415   where
 416         line = srcLocLine loc
 417
 418         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 419           case currentChar# buf of
 420
 421             '\NUL'# ->
 422                    if bufferExhausted (stepOn buf)
 423                         then cont ITeof buf s'
 424                         else trace "lexer: misplaced NUL?" $
 425                              tab y bol atbol (stepOn buf)
 426
 427             '\n'# -> let buf' = stepOn buf
 428                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 429
 430                 -- find comments.  This got harder in Haskell 98.
 431             '-'# ->  let trundle n =
 432                           let next = lookAhead# buf n in
 433                           if next `eqChar#` '-'# then trundle (n +# 1#)
 434                           else if is_symbol next || n <# 2#
 435                                 then is_a_token
 436                                 else tab y bol atbol
 437                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 438                     in trundle 1#
 439
 440                 -- comments and pragmas.  We deal with LINE pragmas here,
 441                 -- and throw out any unrecognised pragmas as comments.  Any
 442                 -- pragmas we know about are dealt with later (after any layout
 443                 -- processing if necessary).
 444             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 445                 if lookAhead# buf 2# `eqChar#` '#'# then
 446                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 447                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 448                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 449                   let lexeme = mkFastString -- ToDo: too slow
 450                                   (map toUpper (lexemeToString buf2)) in
 451                   case lookupUFM pragmaKeywordsFM lexeme of
 452                         -- ignore RULES pragmas when -fglasgow-exts is off
 453                         Just ITrules_prag | not (glaExtsEnabled exts) ->
 454                            skip_to_end (stepOnBy# buf 2#) s'
 455                         Just ITline_prag ->
 456                            line_prag skip_to_end buf2 s'
 457                         Just other -> is_a_token
 458                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 459                   }}
 460
 461                 else skip_to_end (stepOnBy# buf 2#) s'
 462                 where
 463                     skip_to_end = skipNestedComment (lexer cont)
 464
 465                 -- special GHC extension: we grok cpp-style #line pragmas
 466             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 467                 let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
 468                            lookAhead# buf 2# `eqChar#` 'i'# &&
 469                            lookAhead# buf 3# `eqChar#` 'n'# &&
 470                            lookAhead# buf 4# `eqChar#` 'e'#  = stepOnBy# buf 5#
 471                          | otherwise = stepOn buf
 472                 in
 473                 case expandWhile# is_space buf1 of { buf2 ->
 474                 if is_digit (currentChar# buf2)
 475                         then line_prag next_line buf2 s'
 476                         else is_a_token
 477                 }
 478                 where
 479                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 480
 481                 -- tabs have been expanded beforehand
 482             c | is_space c -> tab y bol atbol (stepOn buf)
 483               | otherwise  -> is_a_token
 484
 485            where s' = s{loc = replaceSrcLine loc y,
 486                         bol = bol,
 487                        atbol = atbol}
 488
 489                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 490                             | otherwise    = lexToken cont exts buf s'
 491
 492 -- {-# LINE .. #-} pragmas.  yeuch.
 493 line_prag cont buf s@PState{loc=loc} =
 494   case expandWhile# is_space buf                of { buf1 ->
 495   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 496   -- subtract one: the line number refers to the *following* line.
 497   let real_line = line - 1 in
 498   case fromInteger real_line                    of { i@(I# l) ->
 499         -- ToDo, if no filename then we skip the newline.... d'oh
 500   case expandWhile# is_space buf2               of { buf3 ->
 501   case currentChar# buf3                        of
 502      '\"'#{-"-} ->
 503         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 504         let
 505             file = lexemeToFastString buf4
 506             new_buf = stepOn (stepOverLexeme buf4)
 507         in
 508         if nullFastString file
 509                 then cont new_buf s{loc = replaceSrcLine loc l}
 510                 else cont new_buf s{loc = mkSrcLoc file i}
 511         }
 512      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 513   }}}}
 514
 515 skipNestedComment :: P a -> P a
 516 skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
 517
 518 skipNestedComment' :: SrcLoc -> P a -> P a
 519 skipNestedComment' orig_loc cont buf = loop buf
 520  where
 521    loop buf =
 522      case currentChar# buf of
 523         '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
 524
 525         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 526               skipNestedComment
 527                 (skipNestedComment' orig_loc cont)
 528                 (stepOnBy# buf 2#)
 529
 530         '\n'# -> \ s@PState{loc=loc} ->
 531                  let buf' = stepOn buf in
 532                  loop buf' s{loc = incSrcLine loc,
 533                              bol = currentIndex# buf',
 534                              atbol = 1#}
 535
 536         -- pass the original SrcLoc to lexError so that the error is
 537         -- reported at the line it was originally on, not the line at
 538         -- the end of the file.
 539         '\NUL'# | bufferExhausted (stepOn buf) ->
 540                 \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
 541
 542         _   -> loop (stepOn buf)
 543
 544 -- When we are lexing the first token of a line, check whether we need to
 545 -- insert virtual semicolons or close braces due to layout.
 546
 547 lexBOL :: (Token -> P a) -> P a
 548 lexBOL cont buf s@(PState{
 549                     loc = loc,
 550                     extsBitmap = exts,
 551                     bol = bol,
 552                     atbol = atbol,
 553                     context = ctx
 554                   }) =
 555         if need_close_curly then
 556                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 557                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 558         else if need_semi_colon then
 559                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 560                 cont ITsemi buf s{atbol = 0#}
 561         else
 562                 lexToken cont exts buf s{atbol = 0#}
 563   where
 564         col = currentIndex# buf -# bol
 565
 566         need_close_curly =
 567                 case ctx of
 568                         [] -> False
 569                         (i:_) -> case i of
 570                                     NoLayout -> False
 571                                     Layout n -> col <# n
 572         need_semi_colon =
 573                 case ctx of
 574                         [] -> False
 575                         (i:_) -> case i of
 576                                     NoLayout -> False
 577                                     Layout n -> col ==# n
 578
 579
 580 lexToken :: (Token -> P a) -> Int# -> P a
 581 lexToken cont exts buf =
 582 -- trace "lexToken" $
 583   case currentChar# buf of
 584
 585     -- special symbols ----------------------------------------------------
 586     '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
 587                 -> cont IToubxparen (setCurrentPos# buf 2#)
 588          | otherwise
 589                 -> cont IToparen (incLexeme buf)
 590
 591     ')'# -> cont ITcparen    (incLexeme buf)
 592     '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
 593             cont ITopabrack  (setCurrentPos# buf 2#)
 594          | otherwise ->
 595             cont ITobrack    (incLexeme buf)
 596     ']'# -> cont ITcbrack    (incLexeme buf)
 597     ','# -> cont ITcomma     (incLexeme buf)
 598     ';'# -> cont ITsemi      (incLexeme buf)
 599     '}'# -> \ s@PState{context = ctx} ->
 600             case ctx of
 601                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 602                 _        -> lexError "too many '}'s" buf s
 603     '|'# -> case lookAhead# buf 1# of
 604                  '}'#  | glaExtsEnabled exts -> cont ITccurlybar
 605                                                      (setCurrentPos# buf 2#)
 606                  _                           -> lex_sym cont (incLexeme buf)
 607     ':'# -> case lookAhead# buf 1# of
 608                  ']'#  | parrEnabled exts    -> cont ITcpabrack
 609                                                      (setCurrentPos# buf 2#)
 610                  _                           -> lex_sym cont (incLexeme buf)
 611
 612
 613     '#'# -> case lookAhead# buf 1# of
 614                 ')'#  | glaExtsEnabled exts
 615                      -> cont ITcubxparen (setCurrentPos# buf 2#)
 616                 '-'# -> case lookAhead# buf 2# of
 617                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 618                            _    -> lex_sym cont (incLexeme buf)
 619                 _    -> lex_sym cont (incLexeme buf)
 620
 621     '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
 622                 -> lex_cstring cont (setCurrentPos# buf 2#)
 623          | otherwise
 624                 -> cont ITbackquote (incLexeme buf)
 625
 626     '{'# ->     -- look for "{-##" special iface pragma   -- for Emacs: -}
 627             case lookAhead# buf 1# of
 628            '|'# | glaExtsEnabled exts
 629                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 630            '-'# -> case lookAhead# buf 2# of
 631                     '#'# -> case lookAhead# buf 3# of
 632                                 '#'# ->
 633                                    lexPragma
 634                                       cont
 635                                       (\ cont lexeme buf' -> cont (ITpragma lexeme) buf')
 636                                       0#
 637                                       (stepOnBy# (stepOverLexeme buf) 4#)
 638                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 639                     _    -> cont ITocurly (incLexeme buf)
 640            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 641
 642     -- strings/characters -------------------------------------------------
 643     '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf)
 644     '\''#      -> lex_char (char_end cont) exts (incLexeme buf)
 645
 646     -- strictness and cpr pragmas and __scc treated specially.
 647     '_'# | glaExtsEnabled exts ->
 648          case lookAhead# buf 1# of
 649            '_'# -> case lookAhead# buf 2# of
 650                     'S'# ->
 651                         lex_demand cont (stepOnUntil (not . isSpace)
 652                                         (stepOnBy# buf 3#)) -- past __S
 653                     'M'# ->
 654                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 655
 656                     's'# ->
 657                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 658                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 659                                Nothing   -> lex_id cont exts buf
 660                     _ -> lex_id cont exts buf
 661            _    -> lex_id cont exts buf
 662
 663         -- Hexadecimal and octal constants
 664     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 665                 -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
 666          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 667                 -> readNum (after_lexnum cont exts) buf' is_octdigit  8 oct_or_dec
 668         where ch   = lookAhead# buf 1#
 669               ch2  = lookAhead# buf 2#
 670               buf' = setCurrentPos# buf 2#
 671
 672     '\NUL'# ->
 673             if bufferExhausted (stepOn buf) then
 674                cont ITeof buf
 675             else
 676                trace "lexIface: misplaced NUL?" $
 677                cont (ITunknown "\NUL") (stepOn buf)
 678
 679     '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 680             lex_ip ITdupipvarid cont (incLexeme buf)
 681     '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 682             lex_ip ITsplitipvarid cont (incLexeme buf)
 683     c | is_digit  c -> lex_num cont exts 0 buf
 684       | is_symbol c -> lex_sym cont buf
 685       | is_upper  c -> lex_con cont exts buf
 686       | is_ident  c -> lex_id  cont exts buf
 687       | otherwise   -> lexError "illegal character" buf
 688
 689 -- Int# is unlifted, and therefore faster than Bool for flags.
 690 {-# INLINE flag #-}
 691 flag :: Int# -> Bool
 692 flag 0# = False
 693 flag _  = True
 694
 695 -------------------------------------------------------------------------------
 696 -- Pragmas
 697
 698 lex_prag cont buf
 699   = case expandWhile# is_space buf of { buf1 ->
 700     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 701     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 702     case lookupUFM pragmaKeywordsFM lexeme of
 703         Just kw -> cont kw (mergeLexemes buf buf2)
 704         Nothing -> panic "lex_prag"
 705   }}
 706
 707 -------------------------------------------------------------------------------
 708 -- Strings & Chars
 709
 710 lex_string cont exts s buf
 711   = case currentChar# buf of
 712         '"'#{-"-} ->
 713            let buf' = incLexeme buf
 714                s' = mkFastStringNarrow (map chr (reverse s))
 715            in case currentChar# buf' of
 716                 '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s
 717                     then cont (ITprimstring s') (incLexeme buf')
 718                     else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 719                 _                   -> cont (ITstring s') buf'
 720
 721         -- ignore \& in a string, deal with string gaps
 722         '\\'# | next_ch `eqChar#` '&'#
 723                 -> lex_string cont exts s buf'
 724               | is_space next_ch
 725                 -> lex_stringgap cont exts s (incLexeme buf)
 726
 727             where next_ch = lookAhead# buf 1#
 728                   buf' = setCurrentPos# buf 2#
 729
 730         _ -> lex_char (lex_next_string cont s) exts buf
 731
 732 lex_stringgap cont exts s buf
 733   = let buf' = incLexeme buf in
 734     case currentChar# buf of
 735         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
 736                   st{loc = incSrcLine loc}
 737         '\\'# -> lex_string cont exts s buf'
 738         c | is_space c -> lex_stringgap cont exts s buf'
 739         other -> charError buf'
 740
 741 lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
 742
 743 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 744 lex_char cont exts buf
 745   = case currentChar# buf of
 746         '\\'# -> lex_escape (cont exts) (incLexeme buf)
 747         c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf)
 748         other -> charError buf
 749
 750 char_end cont exts c buf
 751   = case currentChar# buf of
 752         '\''# -> let buf' = incLexeme buf in
 753                  case currentChar# buf' of
 754                         '#'# | glaExtsEnabled exts
 755                                 -> cont (ITprimchar c) (incLexeme buf')
 756                         _       -> cont (ITchar c) buf'
 757         _     -> charError buf
 758
 759 lex_escape cont buf
 760   = let buf' = incLexeme buf in
 761     case currentChar# buf of
 762         'a'#       -> cont (ord '\a') buf'
 763         'b'#       -> cont (ord '\b') buf'
 764         'f'#       -> cont (ord '\f') buf'
 765         'n'#       -> cont (ord '\n') buf'
 766         'r'#       -> cont (ord '\r') buf'
 767         't'#       -> cont (ord '\t') buf'
 768         'v'#       -> cont (ord '\v') buf'
 769         '\\'#      -> cont (ord '\\') buf'
 770         '"'#       -> cont (ord '\"') buf'
 771         '\''#      -> cont (ord '\'') buf'
 772         '^'#       -> let c = currentChar# buf' in
 773                       if c `geChar#` '@'# && c `leChar#` '_'#
 774                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 775                         else charError buf'
 776
 777         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 778         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 779         x | is_digit x
 780                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 781
 782         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 783                                        Just buf2 <- [prefixMatch buf p] ] of
 784                             (c,buf2):_ -> cont (ord c) buf2
 785                             [] -> charError buf'
 786
 787 after_charnum cont i buf
 788   = if i >= 0 && i <= 0x10FFFF
 789         then cont (fromInteger i) buf
 790         else charError buf
 791
 792 readNum cont buf is_digit base conv = read buf 0
 793   where read buf i
 794           = case currentChar# buf of { c ->
 795             if is_digit c
 796                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 797                 else cont i buf
 798             }
 799
 800 is_hexdigit c
 801         =  is_digit c
 802         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 803         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 804
 805 hex c | is_digit c = ord# c -# ord# '0'#
 806       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 807 oct_or_dec c = ord# c -# ord# '0'#
 808
 809 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 810
 811 to_lower c
 812   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 813         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 814   | otherwise = c
 815
 816 charError buf = lexError "error in character literal" buf
 817
 818 silly_escape_chars = [
 819         ("NUL", '\NUL'),
 820         ("SOH", '\SOH'),
 821         ("STX", '\STX'),
 822         ("ETX", '\ETX'),
 823         ("EOT", '\EOT'),
 824         ("ENQ", '\ENQ'),
 825         ("ACK", '\ACK'),
 826         ("BEL", '\BEL'),
 827         ("BS", '\BS'),
 828         ("HT", '\HT'),
 829         ("LF", '\LF'),
 830         ("VT", '\VT'),
 831         ("FF", '\FF'),
 832         ("CR", '\CR'),
 833         ("SO", '\SO'),
 834         ("SI", '\SI'),
 835         ("DLE", '\DLE'),
 836         ("DC1", '\DC1'),
 837         ("DC2", '\DC2'),
 838         ("DC3", '\DC3'),
 839         ("DC4", '\DC4'),
 840         ("NAK", '\NAK'),
 841         ("SYN", '\SYN'),
 842         ("ETB", '\ETB'),
 843         ("CAN", '\CAN'),
 844         ("EM", '\EM'),
 845         ("SUB", '\SUB'),
 846         ("ESC", '\ESC'),
 847         ("FS", '\FS'),
 848         ("GS", '\GS'),
 849         ("RS", '\RS'),
 850         ("US", '\US'),
 851         ("SP", '\SP'),
 852         ("DEL", '\DEL')
 853         ]
 854
 855 -------------------------------------------------------------------------------
 856
 857 lex_demand cont buf =
 858  case read_em [] buf of { (ls,buf') ->
 859  case currentChar# buf' of
 860    'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf')
 861    'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf')
 862    _    -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf'
 863  }
 864  where
 865   read_em acc buf =
 866    case currentChar# buf of
 867     'T'# -> read_em (Top     : acc) (stepOn buf)
 868     'L'# -> read_em (lazyDmd : acc) (stepOn buf)
 869     'A'# -> read_em (Abs     : acc) (stepOn buf)
 870     'V'# -> read_em (evalDmd : acc) (stepOn buf)        -- Temporary, until
 871                                                         -- we've recompiled prelude etc
 872     'C'# -> do_unary Call  acc (stepOnBy# buf 2#)       -- Skip 'C('
 873
 874     'U'# -> do_seq1 Eval         acc (stepOnBy# buf 1#)
 875     'D'# -> do_seq1 Defer        acc (stepOnBy# buf 1#)
 876     'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#)
 877
 878     _    -> (reverse acc, buf)
 879
 880   do_seq1 fn acc buf
 881     = case currentChar# buf of
 882         '('# -> do_seq2 fn acc (stepOnBy# buf 1#)
 883         _    -> read_em (fn (Poly Abs) : acc) buf
 884
 885   do_seq2 fn acc buf
 886     = case read_em [] buf of { (dmds, buf) ->
 887       case currentChar# buf of
 888         ')'# -> read_em (fn (Prod dmds) : acc)
 889                         (stepOn buf)
 890         '*'# -> ASSERT( length dmds == 1 )
 891                 read_em (fn (Poly (head dmds)) : acc)
 892                         (stepOnBy# buf 2#)      -- Skip '*)'
 893       }
 894
 895   do_unary fn acc buf
 896     = case read_em [] buf of
 897         ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest)   -- Skip ')'
 898
 899 ------------------
 900 lex_scc cont buf =
 901  case currentChar# buf of
 902   'C'# -> cont ITsccAllCafs (incLexeme buf)
 903   other -> cont ITscc buf
 904
 905 -----------------------------------------------------------------------------
 906 -- Numbers
 907
 908 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 909 lex_num cont exts acc buf =
 910  case scanNumLit acc buf of
 911      (acc',buf') ->
 912        case currentChar# buf' of
 913          '.'# | is_digit (lookAhead# buf' 1#) ->
 914              -- this case is not optimised at all, as the
 915              -- presence of floating point numbers in interface
 916              -- files is not that common. (ToDo)
 917             case expandWhile# is_digit (incLexeme buf') of
 918               buf2 -> -- points to first non digit char
 919
 920                 let l = case currentChar# buf2 of
 921                           'E'# -> do_exponent
 922                           'e'# -> do_exponent
 923                           _ -> buf2
 924
 925                     do_exponent
 926                         = let buf3 = incLexeme buf2 in
 927                           case currentChar# buf3 of
 928                                 '-'# | is_digit (lookAhead# buf 1#)
 929                                    -> expandWhile# is_digit (incLexeme buf3)
 930                                 '+'# | is_digit (lookAhead# buf 1#)
 931                                    -> expandWhile# is_digit (incLexeme buf3)
 932                                 x | is_digit x -> expandWhile# is_digit buf3
 933                                 _ -> buf2
 934
 935                     v = readRational__ (lexemeToString l)
 936
 937                 in case currentChar# l of -- glasgow exts only
 938                       '#'# | glaExtsEnabled exts -> let l' = incLexeme l in
 939                               case currentChar# l' of
 940                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 941                                 _    -> cont (ITprimfloat  v) l'
 942                       _ -> cont (ITrational v) l
 943
 944          _ -> after_lexnum cont exts acc' buf'
 945
 946 after_lexnum cont exts i buf
 947   = case currentChar# buf of
 948         '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf)
 949         _                          -> cont (ITinteger i) buf
 950
 951 -----------------------------------------------------------------------------
 952 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 953
 954 -- we lexemeToFastString on the bit between the ``''s, but include the
 955 -- quotes in the full lexeme.
 956
 957 lex_cstring cont buf =
 958  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 959    Just buf' -> cont (ITlitlit (lexemeToFastString
 960                                 (setCurrentPos# buf' (negateInt# 2#))))
 961                    (mergeLexemes buf buf')
 962    Nothing   -> lexError "unterminated ``" buf
 963
 964 -----------------------------------------------------------------------------
 965 -- identifiers, symbols etc.
 966
 967 lex_ip ip_constr cont buf =
 968  case expandWhile# is_ident buf of
 969    buf' -> cont (ip_constr (tailFS lexeme)) buf'
 970         where lexeme = lexemeToFastString buf'
 971
 972 lex_id cont exts buf =
 973  let buf1 = expandWhile# is_ident buf in
 974  seq buf1 $
 975
 976  case (if glaExtsEnabled exts
 977         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 978         else buf1)                              of { buf' ->
 979
 980  let lexeme  = lexemeToFastString buf' in
 981
 982  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 983         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 984                           cont kwd_token buf';
 985         Nothing        ->
 986
 987  let var_token = cont (ITvarid lexeme) buf' in
 988
 989  if not (glaExtsEnabled exts)
 990    then var_token
 991    else
 992
 993  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 994         Just kwd_token -> cont kwd_token buf';
 995         Nothing        -> var_token
 996
 997  }}}
 998
 999 lex_sym cont buf =
1000  -- trace "lex_sym" $
1001  case expandWhile# is_symbol buf of
1002    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
1003                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
1004                                   cont kwd_token buf' ;
1005                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
1006                                   cont (mk_var_token lexeme) buf'
1007            }
1008         where lexeme = lexemeToFastString buf'
1009
1010
1011 -- lex_con recursively collects components of a qualified identifer.
1012 -- The argument buf is the StringBuffer representing the lexeme
1013 -- identified so far, where the next character is upper-case.
1014
1015 lex_con cont exts buf =
1016  -- trace ("con: "{-++unpackFS lexeme-}) $
1017  let empty_buf = stepOverLexeme buf in
1018  case expandWhile# is_ident empty_buf of { buf1 ->
1019  case slurp_trailing_hashes buf1 exts of { con_buf ->
1020
1021  let all_buf = mergeLexemes buf con_buf
1022
1023      con_lexeme = lexemeToFastString con_buf
1024      mod_lexeme = lexemeToFastString (decLexeme buf)
1025      all_lexeme = lexemeToFastString all_buf
1026
1027      just_a_conid
1028         | emptyLexeme buf = cont (ITconid con_lexeme)               all_buf
1029         | otherwise       = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
1030  in
1031
1032  case currentChar# all_buf of
1033      '.'# -> maybe_qualified cont exts all_lexeme
1034                 (incLexeme all_buf) just_a_conid
1035      _    -> just_a_conid
1036   }}
1037
1038
1039 maybe_qualified cont exts mod buf just_a_conid =
1040  -- trace ("qid: "{-++unpackFS lexeme-}) $
1041  case currentChar# buf of
1042   '['# ->       -- Special case for []
1043     case lookAhead# buf 1# of
1044      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
1045      _    -> just_a_conid
1046
1047   '('# ->  -- Special case for (,,,)
1048            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
1049     case lookAhead# buf 1# of
1050      '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
1051                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
1052                                 just_a_conid
1053                 _    -> just_a_conid
1054      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
1055      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
1056      _    -> just_a_conid
1057
1058   '-'# -> case lookAhead# buf 1# of
1059             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
1060             _    -> lex_id3 cont exts mod buf just_a_conid
1061
1062   _    -> lex_id3 cont exts mod buf just_a_conid
1063
1064
1065 lex_id3 cont exts mod buf just_a_conid
1066   | is_upper (currentChar# buf) =
1067      lex_con cont exts buf
1068
1069   | is_symbol (currentChar# buf) =
1070      let
1071         start_new_lexeme = stepOverLexeme buf
1072      in
1073      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
1074      case expandWhile# is_symbol start_new_lexeme of { buf' ->
1075      let
1076        lexeme  = lexemeToFastString buf'
1077         -- real lexeme is M.<sym>
1078        new_buf = mergeLexemes buf buf'
1079      in
1080      cont (mk_qvar_token mod lexeme) new_buf
1081         -- wrong, but arguably morally right: M... is now a qvarsym
1082      }
1083
1084   | otherwise   =
1085      let
1086         start_new_lexeme = stepOverLexeme buf
1087      in
1088      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1089      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1090      if emptyLexeme buf1
1091             then just_a_conid
1092             else
1093
1094      case slurp_trailing_hashes buf1 exts of { buf' ->
1095
1096      let
1097       lexeme      = lexemeToFastString buf'
1098       new_buf     = mergeLexemes buf buf'
1099       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1100      in
1101      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1102             Nothing          -> is_a_qvarid ;
1103
1104             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1105                            -> is_a_qvarid          --  recognised as keywords here.
1106                            | otherwise
1107                            -> just_a_conid         -- avoid M.where etc.
1108      }}}
1109
1110 slurp_trailing_hashes buf exts
1111   | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
1112   | otherwise           = buf
1113
1114
1115 mk_var_token pk_str
1116   | is_upper f          = ITconid pk_str
1117   | is_ident f          = ITvarid pk_str
1118   | f `eqChar#` ':'#    = ITconsym pk_str
1119   | otherwise           = ITvarsym pk_str
1120   where
1121       (C# f) = _HEAD_ pk_str
1122       -- tl     = _TAIL_ pk_str
1123
1124 mk_qvar_token m token =
1125 -- trace ("mk_qvar ") $
1126  case mk_var_token token of
1127    ITconid n  -> ITqconid  (m,n)
1128    ITvarid n  -> ITqvarid  (m,n)
1129    ITconsym n -> ITqconsym (m,n)
1130    ITvarsym n -> ITqvarsym (m,n)
1131    _          -> ITunknown (show token)
1132 \end{code}
1133
1134 ----------------------------------------------------------------------------
1135 Horrible stuff for dealing with M.(,,,)
1136
1137 \begin{code}
1138 lex_tuple cont mod buf back_off =
1139   go 2 buf
1140   where
1141    go n buf =
1142     case currentChar# buf of
1143       ','# -> go (n+1) (stepOn buf)
1144       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1145       _    -> back_off
1146
1147 lex_ubx_tuple cont mod buf back_off =
1148   go 2 buf
1149   where
1150    go n buf =
1151     case currentChar# buf of
1152       ','# -> go (n+1) (stepOn buf)
1153       '#'# -> case lookAhead# buf 1# of
1154                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1155                                  (stepOnBy# buf 2#)
1156                 _    -> back_off
1157       _    -> back_off
1158 \end{code}
1159
1160 -----------------------------------------------------------------------------
1161 'lexPragma' rips along really fast, looking for a '##-}',
1162 indicating the end of the pragma we're skipping
1163
1164 \begin{code}
1165 lexPragma cont contf inStr buf =
1166  case currentChar# buf of
1167    '#'# | inStr ==# 0# ->
1168        case lookAhead# buf 1# of { '#'# ->
1169        case lookAhead# buf 2# of { '-'# ->
1170        case lookAhead# buf 3# of { '}'# ->
1171            contf cont (lexemeToBuffer buf)
1172                       (stepOverLexeme (setCurrentPos# buf 4#));
1173         _    -> lexPragma cont contf inStr (incLexeme buf) };
1174         _    -> lexPragma cont contf inStr (incLexeme buf) };
1175         _    -> lexPragma cont contf inStr (incLexeme buf) }
1176
1177    '"'# ->
1178        let
1179         odd_slashes buf flg i# =
1180           case lookAhead# buf i# of
1181            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1182            _     -> flg
1183
1184         not_inStr = if inStr ==# 0# then 1# else 0#
1185        in
1186        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1187          '\\'# -> -- escaping something..
1188            if odd_slashes buf True (negateInt# 2#)
1189                 then  -- odd number of slashes, " is escaped.
1190                       lexPragma cont contf inStr (incLexeme buf)
1191                 else  -- even number of slashes, \ is escaped.
1192                       lexPragma cont contf not_inStr (incLexeme buf)
1193          _ -> lexPragma cont contf not_inStr (incLexeme buf)
1194
1195    '\''# | inStr ==# 0# ->
1196         case lookAhead# buf 1# of { '"'# ->
1197         case lookAhead# buf 2# of { '\''# ->
1198            lexPragma cont contf inStr (setCurrentPos# buf 3#);
1199         _ -> lexPragma cont contf inStr (incLexeme buf) };
1200         _ -> lexPragma cont contf inStr (incLexeme buf) }
1201
1202     -- a sign that the input is ill-formed, since pragmas are
1203     -- assumed to always be properly closed (in .hi files).
1204    '\NUL'# -> trace "lexPragma: unexpected end-of-file" $
1205               cont (ITunknown "\NUL") buf
1206
1207    _ -> lexPragma cont contf inStr (incLexeme buf)
1208
1209 \end{code}
1210
1211 -----------------------------------------------------------------------------
1212
1213 \begin{code}
1214 data LayoutContext
1215   = NoLayout
1216   | Layout Int#
1217
1218 data ParseResult a
1219   = POk PState a
1220   | PFailed Message
1221
1222 data PState = PState {
1223         loc        :: SrcLoc,
1224         extsBitmap :: Int#,     -- bitmap that determines permitted extensions
1225         bol        :: Int#,
1226         atbol      :: Int#,
1227         context    :: [LayoutContext]
1228      }
1229
1230 type P a = StringBuffer         -- Input string
1231         -> PState
1232         -> ParseResult a
1233
1234 returnP   :: a -> P a
1235 returnP a buf s = POk s a
1236
1237 thenP      :: P a -> (a -> P b) -> P b
1238 m `thenP` k = \ buf s ->
1239         case m buf s of
1240                 POk s1 a -> k a buf s1
1241                 PFailed err  -> PFailed err
1242
1243 thenP_     :: P a -> P b -> P b
1244 m `thenP_` k = m `thenP` \_ -> k
1245
1246 mapP :: (a -> P b) -> [a] -> P [b]
1247 mapP f [] = returnP []
1248 mapP f (a:as) =
1249      f a `thenP` \b ->
1250      mapP f as `thenP` \bs ->
1251      returnP (b:bs)
1252
1253 failP :: String -> P a
1254 failP msg buf s = PFailed (text msg)
1255
1256 failMsgP :: Message -> P a
1257 failMsgP msg buf s = PFailed msg
1258
1259 lexError :: String -> P a
1260 lexError str buf s@PState{ loc = loc }
1261   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1262
1263 getSrcLocP :: P SrcLoc
1264 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1265
1266 -- use a temporary SrcLoc for the duration of the argument
1267 setSrcLocP :: SrcLoc -> P a -> P a
1268 setSrcLocP new_loc p buf s =
1269   case p buf s{ loc=new_loc } of
1270       POk _ a   -> POk s a
1271       PFailed e -> PFailed e
1272
1273 getSrcFile :: P FAST_STRING
1274 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1275
1276 pushContext :: LayoutContext -> P ()
1277 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1278
1279 {-
1280
1281 This special case in layoutOn is to handle layout contexts with are
1282 indented the same or less than the current context.  This is illegal
1283 according to the Haskell spec, so we have to arrange to close the
1284 current context.  eg.
1285
1286 class Foo a where
1287 class Bar a
1288
1289 after the first 'where', the sequence of events is:
1290
1291         - layout system inserts a ';' (column 0)
1292         - parser begins a new context at column 0
1293         - parser shifts ';' (legal empty declaration)
1294         - parser sees 'class': parse error (we're still in the inner context)
1295
1296 trouble is, by the time we know we need a new context, the lexer has
1297 already generated the ';'.  Hacky solution is as follows: since we
1298 know the column of the next token (it's the column number of the new
1299 context), we set the ACTUAL column number of the new context to this
1300 numer plus one.  Hence the next time the lexer is called, a '}' will
1301 be generated to close the new context straight away.  Furthermore, we
1302 have to set the atbol flag so that the ';' that the parser shifted as
1303 part of the new context is re-generated.
1304
1305 when the new context is *less* indented than the current one:
1306
1307 f = f where g = g where
1308 h = h
1309
1310         - current context: column 12.
1311         - on seeing 'h' (column 0), the layout system inserts '}'
1312         - parser starts a new context, column 0
1313         - parser sees '}', uses it to close new context
1314         - we still need to insert another '}' followed by a ';',
1315           hence the atbol trick.
1316
1317 There's also a special hack in here to deal with
1318
1319         do
1320            ....
1321            e $ do
1322            blah
1323
1324 i.e. the inner context is at the same indentation level as the outer
1325 context.  This is strictly illegal according to Haskell 98, but
1326 there's a lot of existing code using this style and it doesn't make
1327 any sense to disallow it, since empty 'do' lists don't make sense.
1328 -}
1329
1330 layoutOn :: Bool -> P ()
1331 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1332     let offset = lexemeIndex buf -# bol in
1333     case ctx of
1334         Layout prev_off : _
1335            | if strict then prev_off >=# offset else prev_off ># offset ->
1336                 --trace ("layout on, column: " ++  show (I# offset)) $
1337                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1338         other ->
1339                 --trace ("layout on, column: " ++  show (I# offset)) $
1340                 POk s{ context = Layout offset : ctx } ()
1341
1342 layoutOff :: P ()
1343 layoutOff buf s@(PState{ context = ctx }) =
1344     POk s{ context = NoLayout:ctx } ()
1345
1346 popContext :: P ()
1347 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1348   case ctx of
1349         (_:tl) -> POk s{ context = tl } ()
1350         []     -> PFailed (srcParseErr buf loc)
1351
1352 {-
1353  Note that if the name of the file we're processing ends
1354  with `hi-boot', we accept it on faith as having the right
1355  version. This is done so that .hi-boot files that comes
1356  with hsc don't have to be updated before every release,
1357  *and* it allows us to share .hi-boot files with versions
1358  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1359
1360  If the version number is 0, the checking is also turned off.
1361  (needed to deal with GHC.hi only!)
1362
1363  Once we can assume we're compiling with a version of ghc that
1364  supports interface file checking, we can drop the special
1365  pleading
1366 -}
1367 checkVersion :: Maybe Integer -> P ()
1368 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1369  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s ()
1370  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1371 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1372  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1373  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1374
1375
1376 -- for reasons of efficiency, flags indicating language extensions (eg,
1377 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1378 -- integer
1379
1380 glaExtsBit, ffiBit, parrBit :: Int
1381 glaExtsBit = 0
1382 ffiBit     = 1  -- FIXME: not used yet; still part of `glaExtsBit'
1383 parrBit    = 2
1384
1385 glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
1386 glaExtsEnabled flags = testBit (I# flags) glaExtsBit
1387 ffiEnabled     flags = testBit (I# flags) ffiBit
1388 parrEnabled    flags = testBit (I# flags) parrBit
1389
1390 -- convenient record-based bitmap for the interface to the rest of the world
1391 --
1392 data ExtFlags = ExtFlags {
1393                   glasgowExtsEF :: Bool,
1394 --                ffiEF         :: Bool,  -- commented out to avoid warnings
1395                   parrEF        :: Bool   -- while not used yet
1396                 }
1397
1398 -- create a parse state
1399 --
1400 mkPState          :: SrcLoc -> ExtFlags -> PState
1401 mkPState loc exts  = PState {
1402                        loc        = loc,
1403                        extsBitmap = case bitmap of {I# bits -> bits},
1404                        bol        = 0#,
1405                        atbol      = 1#,
1406                        context    = []
1407                      }
1408                      where
1409                        bitmap =     glaExtsBit `setBitIf` glasgowExtsEF exts
1410 --                              .|. ffiBit     `setBitIf` ffiEF         exts
1411                                 .|. parrBit    `setBitIf` parrEF        exts
1412                        --
1413                        b `setBitIf` cond | cond      = bit b
1414                                          | otherwise = 0
1415
1416
1417 -----------------------------------------------------------------
1418
1419 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1420 ifaceParseErr s l
1421   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1422           text (lexemeToString s), char '\'']
1423
1424 ifaceVersionErr hi_vers l toks
1425   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1426           ptext SLIT("Expected"), int opt_HiVersion,
1427           ptext SLIT("found "), pp_version]
1428     where
1429      pp_version =
1430       case hi_vers of
1431         Nothing -> ptext SLIT("pre ghc-3.02 version")
1432         Just v  -> ptext SLIT("version") <+> integer v
1433
1434 -----------------------------------------------------------------------------
1435
1436 srcParseErr :: StringBuffer -> SrcLoc -> Message
1437 srcParseErr s l
1438   = hcat [ppr l,
1439           if null token
1440              then ptext SLIT(": parse error (possibly incorrect indentation)")
1441              else hcat [ptext SLIT(": parse error on input "),
1442                         char '`', text token, char '\'']
1443     ]
1444   where
1445         token = lexemeToString s
1446
1447 \end{code}