ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19
  20 module Lex (
  21
  22         ifaceParseErr, srcParseErr,
  23
  24         -- Monad for parser
  25         Token(..), lexer, ParseResult(..), PState(..),
  26         checkVersion, ExtFlags(..), mkPState,
  27         StringBuffer,
  28
  29         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  30         getSrcLocP, setSrcLocP, getSrcFile,
  31         layoutOn, layoutOff, pushContext, popContext
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import Char             ( isSpace, toUpper )
  37 import List             ( isSuffixOf )
  38
  39 import PrelNames        ( mkTupNameStr )
  40 import CmdLineOpts      ( opt_HiVersion, opt_NoHiCheck )
  41 import ForeignCall      ( Safety(..) )
  42 import NewDemand        ( StrictSig(..), Demand(..), Demands(..),
  43                           DmdResult(..), mkTopDmdType, evalDmd, lazyDmd )
  44 import UniqFM           ( listToUFM, lookupUFM )
  45 import BasicTypes       ( Boxity(..) )
  46 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  47                           replaceSrcLine, mkSrcLoc )
  48
  49 import ErrUtils         ( Message )
  50 import Outputable
  51
  52 import FastString
  53 import StringBuffer
  54 import GlaExts
  55 import Ctype
  56 import Char             ( chr, ord )
  57
  58 import Bits             ( Bits(..) )       -- non-std
  59
  60 #if __GLASGOW_HASKELL__ >= 503
  61 import GHC.Read         ( readRational__ ) -- Glasgow non-std
  62 #else
  63 import PrelRead         ( readRational__ ) -- Glasgow non-std
  64 #endif
  65 import Int              ( Int32 )
  66 \end{code}
  67
  68 %************************************************************************
  69 %*                                                                      *
  70 \subsection{Data types}
  71 %*                                                                      *
  72 %************************************************************************
  73
  74 The token data type, fairly un-interesting except from one
  75 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  76 strictness, unfolding etc).
  77
  78 The Idea/Observation here is that the renamer needs to scan through
  79 all of an interface file before it can continue. But only a fraction
  80 of the information contained in the file turns out to be useful, so
  81 delaying as much as possible of the scanning and parsing of an
  82 interface file Makes Sense (Heap profiles of the compiler
  83 show a reduction in heap usage by at least a factor of two,
  84 post-renamer).
  85
  86 Hence, the interface file lexer spots when value declarations are
  87 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  88 for the type and any other id info for that binding (unfolding, strictness
  89 etc). These constructors are applied to the result of lexing these sub-chunks.
  90
  91 The lexing of the type and id info is all done lazily, of course, so
  92 the scanning (and subsequent parsing) will be done *only* on the ids the
  93 renamer finds out that it is interested in. The rest will just be junked.
  94 Laziness, you know it makes sense :-)
  95
  96 \begin{code}
  97 data Token
  98   = ITas                        -- Haskell keywords
  99   | ITcase
 100   | ITclass
 101   | ITdata
 102   | ITdefault
 103   | ITderiving
 104   | ITdo
 105   | ITelse
 106   | IThiding
 107   | ITif
 108   | ITimport
 109   | ITin
 110   | ITinfix
 111   | ITinfixl
 112   | ITinfixr
 113   | ITinstance
 114   | ITlet
 115   | ITmodule
 116   | ITnewtype
 117   | ITof
 118   | ITqualified
 119   | ITthen
 120   | ITtype
 121   | ITwhere
 122   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 123
 124   | ITforall                    -- GHC extension keywords
 125   | ITforeign
 126   | ITexport
 127   | ITlabel
 128   | ITdynamic
 129   | ITsafe
 130   | ITthreadsafe
 131   | ITunsafe
 132   | ITwith
 133   | ITstdcallconv
 134   | ITccallconv
 135   | ITdotnet
 136
 137   | ITinterface                 -- interface keywords
 138   | IT__export
 139   | ITdepends
 140   | IT__forall
 141   | ITletrec
 142   | ITcoerce
 143   | ITinlineMe
 144   | ITinlineCall
 145   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 146   | ITdefaultbranch
 147   | ITbottom
 148   | ITinteger_lit
 149   | ITfloat_lit
 150   | ITword_lit
 151   | ITword64_lit
 152   | ITint64_lit
 153   | ITrational_lit
 154   | ITaddr_lit
 155   | ITlabel_lit
 156   | ITlit_lit
 157   | ITstring_lit
 158   | ITtypeapp
 159   | ITusage
 160   | ITfuall
 161   | ITarity
 162   | ITspecialise
 163   | ITnocaf
 164   | ITunfold
 165   | ITstrict StrictSig
 166   | ITrules
 167   | ITcprinfo
 168   | ITdeprecated
 169   | IT__scc
 170   | ITsccAllCafs
 171
 172   | ITspecialise_prag           -- Pragmas
 173   | ITsource_prag
 174   | ITinline_prag
 175   | ITnoinline_prag
 176   | ITrules_prag
 177   | ITdeprecated_prag
 178   | ITline_prag
 179   | ITscc_prag
 180   | ITclose_prag
 181
 182   | ITdotdot                    -- reserved symbols
 183   | ITdcolon
 184   | ITequal
 185   | ITlam
 186   | ITvbar
 187   | ITlarrow
 188   | ITrarrow
 189   | ITat
 190   | ITtilde
 191   | ITdarrow
 192   | ITminus
 193   | ITbang
 194   | ITstar
 195   | ITdot
 196
 197   | ITbiglam                    -- GHC-extension symbols
 198
 199   | ITocurly                    -- special symbols
 200   | ITccurly
 201   | ITocurlybar                 -- {|, for type applications
 202   | ITccurlybar                 -- |}, for type applications
 203   | ITvccurly
 204   | ITobrack
 205   | ITopabrack                  -- [:, for parallel arrays with -fparr
 206   | ITcpabrack                  -- :], for parallel arrays with -fparr
 207   | ITcbrack
 208   | IToparen
 209   | ITcparen
 210   | IToubxparen
 211   | ITcubxparen
 212   | ITsemi
 213   | ITcomma
 214   | ITunderscore
 215   | ITbackquote
 216
 217   | ITvarid   FAST_STRING       -- identifiers
 218   | ITconid   FAST_STRING
 219   | ITvarsym  FAST_STRING
 220   | ITconsym  FAST_STRING
 221   | ITqvarid  (FAST_STRING,FAST_STRING)
 222   | ITqconid  (FAST_STRING,FAST_STRING)
 223   | ITqvarsym (FAST_STRING,FAST_STRING)
 224   | ITqconsym (FAST_STRING,FAST_STRING)
 225
 226   | ITdupipvarid   FAST_STRING  -- GHC extension: implicit param: ?x
 227   | ITsplitipvarid FAST_STRING  -- GHC extension: implicit param: %x
 228
 229   | ITpragma StringBuffer
 230
 231   | ITchar       Int
 232   | ITstring     FAST_STRING
 233   | ITinteger    Integer
 234   | ITrational   Rational
 235
 236   | ITprimchar   Int
 237   | ITprimstring FAST_STRING
 238   | ITprimint    Integer
 239   | ITprimfloat  Rational
 240   | ITprimdouble Rational
 241   | ITlitlit     FAST_STRING
 242
 243   | ITunknown String            -- Used when the lexer can't make sense of it
 244   | ITeof                       -- end of file token
 245   deriving Show -- debugging
 246 \end{code}
 247
 248 -----------------------------------------------------------------------------
 249 Keyword Lists
 250
 251 \begin{code}
 252 pragmaKeywordsFM = listToUFM $
 253       map (\ (x,y) -> (_PK_ x,y))
 254        [( "SPECIALISE", ITspecialise_prag ),
 255         ( "SPECIALIZE", ITspecialise_prag ),
 256         ( "SOURCE",     ITsource_prag ),
 257         ( "INLINE",     ITinline_prag ),
 258         ( "NOINLINE",   ITnoinline_prag ),
 259         ( "NOTINLINE",  ITnoinline_prag ),
 260         ( "LINE",       ITline_prag ),
 261         ( "RULES",      ITrules_prag ),
 262         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 263         ( "SCC",        ITscc_prag ),
 264         ( "DEPRECATED", ITdeprecated_prag )
 265         ]
 266
 267 haskellKeywordsFM = listToUFM $
 268       map (\ (x,y) -> (_PK_ x,y))
 269        [( "_",          ITunderscore ),
 270         ( "as",         ITas ),
 271         ( "case",       ITcase ),
 272         ( "class",      ITclass ),
 273         ( "data",       ITdata ),
 274         ( "default",    ITdefault ),
 275         ( "deriving",   ITderiving ),
 276         ( "do",         ITdo ),
 277         ( "else",       ITelse ),
 278         ( "hiding",     IThiding ),
 279         ( "if",         ITif ),
 280         ( "import",     ITimport ),
 281         ( "in",         ITin ),
 282         ( "infix",      ITinfix ),
 283         ( "infixl",     ITinfixl ),
 284         ( "infixr",     ITinfixr ),
 285         ( "instance",   ITinstance ),
 286         ( "let",        ITlet ),
 287         ( "module",     ITmodule ),
 288         ( "newtype",    ITnewtype ),
 289         ( "of",         ITof ),
 290         ( "qualified",  ITqualified ),
 291         ( "then",       ITthen ),
 292         ( "type",       ITtype ),
 293         ( "where",      ITwhere ),
 294         ( "_scc_",      ITscc )         -- ToDo: remove
 295      ]
 296
 297 isSpecial :: Token -> Bool
 298 -- If we see M.x, where x is a keyword, but
 299 -- is special, we treat is as just plain M.x,
 300 -- not as a keyword.
 301 isSpecial ITas          = True
 302 isSpecial IThiding      = True
 303 isSpecial ITqualified   = True
 304 isSpecial ITforall      = True
 305 isSpecial ITexport      = True
 306 isSpecial ITlabel       = True
 307 isSpecial ITdynamic     = True
 308 isSpecial ITsafe        = True
 309 isSpecial ITthreadsafe  = True
 310 isSpecial ITunsafe      = True
 311 isSpecial ITwith        = True
 312 isSpecial ITccallconv   = True
 313 isSpecial ITstdcallconv = True
 314 isSpecial _             = False
 315
 316 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 317 ghcExtensionKeywordsFM = listToUFM $
 318         map (\ (x,y) -> (_PK_ x,y))
 319      [  ( "forall",     ITforall ),
 320         ( "foreign",    ITforeign ),
 321         ( "export",     ITexport ),
 322         ( "label",      ITlabel ),
 323         ( "dynamic",    ITdynamic ),
 324         ( "safe",       ITsafe ),
 325         ( "threadsafe", ITthreadsafe ),
 326         ( "unsafe",     ITunsafe ),
 327         ( "with",       ITwith ),
 328         ( "stdcall",    ITstdcallconv),
 329         ( "ccall",      ITccallconv),
 330         ( "dotnet",     ITdotnet),
 331         ("_ccall_",     ITccall (False, False, PlayRisky)),
 332         ("_ccall_GC_",  ITccall (False, False, PlaySafe False)),
 333         ("_casm_",      ITccall (False, True,  PlayRisky)),
 334         ("_casm_GC_",   ITccall (False, True,  PlaySafe False)),
 335
 336         -- interface keywords
 337         ("__interface",         ITinterface),
 338         ("__export",            IT__export),
 339         ("__depends",           ITdepends),
 340         ("__forall",            IT__forall),
 341         ("__letrec",            ITletrec),
 342         ("__coerce",            ITcoerce),
 343         ("__inline_me",         ITinlineMe),
 344         ("__inline_call",       ITinlineCall),
 345         ("__depends",           ITdepends),
 346         ("__DEFAULT",           ITdefaultbranch),
 347         ("__bot",               ITbottom),
 348         ("__integer",           ITinteger_lit),
 349         ("__float",             ITfloat_lit),
 350         ("__int64",             ITint64_lit),
 351         ("__word",              ITword_lit),
 352         ("__word64",            ITword64_lit),
 353         ("__rational",          ITrational_lit),
 354         ("__addr",              ITaddr_lit),
 355         ("__label",             ITlabel_lit),
 356         ("__litlit",            ITlit_lit),
 357         ("__string",            ITstring_lit),
 358         ("__a",                 ITtypeapp),
 359         ("__u",                 ITusage),
 360         ("__fuall",             ITfuall),
 361         ("__A",                 ITarity),
 362         ("__P",                 ITspecialise),
 363         ("__C",                 ITnocaf),
 364         ("__R",                 ITrules),
 365         ("__D",                 ITdeprecated),
 366         ("__U",                 ITunfold),
 367
 368         ("__ccall",             ITccall (False, False, PlayRisky)),
 369         ("__ccall_GC",          ITccall (False, False, PlaySafe False)),
 370         ("__dyn_ccall",         ITccall (True,  False, PlayRisky)),
 371         ("__dyn_ccall_GC",      ITccall (True,  False, PlaySafe False)),
 372         ("__casm",              ITccall (False, True,  PlayRisky)),
 373         ("__dyn_casm",          ITccall (True,  True,  PlayRisky)),
 374         ("__casm_GC",           ITccall (False, True,  PlaySafe False)),
 375         ("__dyn_casm_GC",       ITccall (True,  True,  PlaySafe False)),
 376
 377         ("/\\",                 ITbiglam)
 378      ]
 379
 380
 381 haskellKeySymsFM = listToUFM $
 382         map (\ (x,y) -> (_PK_ x,y))
 383       [ ("..",          ITdotdot)
 384        ,("::",          ITdcolon)
 385        ,("=",           ITequal)
 386        ,("\\",          ITlam)
 387        ,("|",           ITvbar)
 388        ,("<-",          ITlarrow)
 389        ,("->",          ITrarrow)
 390        ,("@",           ITat)
 391        ,("~",           ITtilde)
 392        ,("=>",          ITdarrow)
 393        ,("-",           ITminus)
 394        ,("!",           ITbang)
 395        ,("*",           ITstar)
 396        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 397        ]
 398 \end{code}
 399
 400 -----------------------------------------------------------------------------
 401 The lexical analyser
 402
 403 Lexer state:
 404
 405         - (exts)  lexing a source with extensions, eg, an interface file or
 406                   with -fglasgow-exts
 407         - (bol)   pointer to beginning of line (for column calculations)
 408         - (buf)   pointer to beginning of token
 409         - (buf)   pointer to current char
 410         - (atbol) flag indicating whether we're at the beginning of a line
 411
 412 \begin{code}
 413 lexer :: (Token -> P a) -> P a
 414 lexer cont buf s@(PState{
 415                     loc = loc,
 416                     extsBitmap = exts,
 417                     bol = bol,
 418                     atbol = atbol,
 419                     context = ctx
 420                 })
 421
 422         -- first, start a new lexeme and lose all the whitespace
 423   =  _scc_ "Lexer"
 424   tab line bol atbol (stepOverLexeme buf)
 425   where
 426         line = srcLocLine loc
 427
 428         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 429           case currentChar# buf of
 430
 431             '\NUL'# ->
 432                    if bufferExhausted (stepOn buf)
 433                         then cont ITeof buf s'
 434                         else trace "lexer: misplaced NUL?" $
 435                              tab y bol atbol (stepOn buf)
 436
 437             '\n'# -> let buf' = stepOn buf
 438                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 439
 440                 -- find comments.  This got harder in Haskell 98.
 441             '-'# ->  let trundle n =
 442                           let next = lookAhead# buf n in
 443                           if next `eqChar#` '-'# then trundle (n +# 1#)
 444                           else if is_symbol next || n <# 2#
 445                                 then is_a_token
 446                                 else tab y bol atbol
 447                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 448                     in trundle 1#
 449
 450                 -- comments and pragmas.  We deal with LINE pragmas here,
 451                 -- and throw out any unrecognised pragmas as comments.  Any
 452                 -- pragmas we know about are dealt with later (after any layout
 453                 -- processing if necessary).
 454             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 455                 if lookAhead# buf 2# `eqChar#` '#'# then
 456                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
 457                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 458                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 459                   let lexeme = mkFastString -- ToDo: too slow
 460                                   (map toUpper (lexemeToString buf2)) in
 461                   case lookupUFM pragmaKeywordsFM lexeme of
 462                         -- ignore RULES pragmas when -fglasgow-exts is off
 463                         Just ITrules_prag | not (glaExtsEnabled exts) ->
 464                            skip_to_end (stepOnBy# buf 2#) s'
 465                         Just ITline_prag ->
 466                            line_prag skip_to_end buf2 s'
 467                         Just other -> is_a_token
 468                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 469                   }}
 470
 471                 else skip_to_end (stepOnBy# buf 2#) s'
 472                 where
 473                     skip_to_end = skipNestedComment (lexer cont)
 474
 475                 -- special GHC extension: we grok cpp-style #line pragmas
 476             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 477                 let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
 478                            lookAhead# buf 2# `eqChar#` 'i'# &&
 479                            lookAhead# buf 3# `eqChar#` 'n'# &&
 480                            lookAhead# buf 4# `eqChar#` 'e'#  = stepOnBy# buf 5#
 481                          | otherwise = stepOn buf
 482                 in
 483                 case expandWhile# is_space buf1 of { buf2 ->
 484                 if is_digit (currentChar# buf2)
 485                         then line_prag next_line buf2 s'
 486                         else is_a_token
 487                 }
 488                 where
 489                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 490
 491                 -- tabs have been expanded beforehand
 492             c | is_space c -> tab y bol atbol (stepOn buf)
 493               | otherwise  -> is_a_token
 494
 495            where s' = s{loc = replaceSrcLine loc y,
 496                         bol = bol,
 497                        atbol = atbol}
 498
 499                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 500                             | otherwise    = lexToken cont exts buf s'
 501
 502 -- {-# LINE .. #-} pragmas.  yeuch.
 503 line_prag cont buf s@PState{loc=loc} =
 504   case expandWhile# is_space buf                of { buf1 ->
 505   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 506   -- subtract one: the line number refers to the *following* line.
 507   let real_line = line - 1 in
 508   case fromInteger real_line                    of { i@(I# l) ->
 509         -- ToDo, if no filename then we skip the newline.... d'oh
 510   case expandWhile# is_space buf2               of { buf3 ->
 511   case currentChar# buf3                        of
 512      '\"'#{-"-} ->
 513         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 514         let
 515             file = lexemeToFastString buf4
 516             new_buf = stepOn (stepOverLexeme buf4)
 517         in
 518         if nullFastString file
 519                 then cont new_buf s{loc = replaceSrcLine loc l}
 520                 else cont new_buf s{loc = mkSrcLoc file i}
 521         }
 522      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 523   }}}}
 524
 525 skipNestedComment :: P a -> P a
 526 skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
 527
 528 skipNestedComment' :: SrcLoc -> P a -> P a
 529 skipNestedComment' orig_loc cont buf = loop buf
 530  where
 531    loop buf =
 532      case currentChar# buf of
 533         '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
 534
 535         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 536               skipNestedComment
 537                 (skipNestedComment' orig_loc cont)
 538                 (stepOnBy# buf 2#)
 539
 540         '\n'# -> \ s@PState{loc=loc} ->
 541                  let buf' = stepOn buf in
 542                  loop buf' s{loc = incSrcLine loc,
 543                              bol = currentIndex# buf',
 544                              atbol = 1#}
 545
 546         -- pass the original SrcLoc to lexError so that the error is
 547         -- reported at the line it was originally on, not the line at
 548         -- the end of the file.
 549         '\NUL'# | bufferExhausted (stepOn buf) ->
 550                 \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
 551
 552         _   -> loop (stepOn buf)
 553
 554 -- When we are lexing the first token of a line, check whether we need to
 555 -- insert virtual semicolons or close braces due to layout.
 556
 557 lexBOL :: (Token -> P a) -> P a
 558 lexBOL cont buf s@(PState{
 559                     loc = loc,
 560                     extsBitmap = exts,
 561                     bol = bol,
 562                     atbol = atbol,
 563                     context = ctx
 564                   }) =
 565         if need_close_curly then
 566                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 567                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 568         else if need_semi_colon then
 569                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 570                 cont ITsemi buf s{atbol = 0#}
 571         else
 572                 lexToken cont exts buf s{atbol = 0#}
 573   where
 574         col = currentIndex# buf -# bol
 575
 576         need_close_curly =
 577                 case ctx of
 578                         [] -> False
 579                         (i:_) -> case i of
 580                                     NoLayout -> False
 581                                     Layout n -> col <# n
 582         need_semi_colon =
 583                 case ctx of
 584                         [] -> False
 585                         (i:_) -> case i of
 586                                     NoLayout -> False
 587                                     Layout n -> col ==# n
 588
 589
 590 lexToken :: (Token -> P a) -> Int# -> P a
 591 lexToken cont exts buf =
 592 -- trace "lexToken" $
 593   case currentChar# buf of
 594
 595     -- special symbols ----------------------------------------------------
 596     '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
 597                 -> cont IToubxparen (setCurrentPos# buf 2#)
 598          | otherwise
 599                 -> cont IToparen (incLexeme buf)
 600
 601     ')'# -> cont ITcparen    (incLexeme buf)
 602     '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
 603             cont ITopabrack  (setCurrentPos# buf 2#)
 604          | otherwise ->
 605             cont ITobrack    (incLexeme buf)
 606     ']'# -> cont ITcbrack    (incLexeme buf)
 607     ','# -> cont ITcomma     (incLexeme buf)
 608     ';'# -> cont ITsemi      (incLexeme buf)
 609     '}'# -> \ s@PState{context = ctx} ->
 610             case ctx of
 611                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 612                 _        -> lexError "too many '}'s" buf s
 613     '|'# -> case lookAhead# buf 1# of
 614                  '}'#  | glaExtsEnabled exts -> cont ITccurlybar
 615                                                      (setCurrentPos# buf 2#)
 616                  _                           -> lex_sym cont (incLexeme buf)
 617     ':'# -> case lookAhead# buf 1# of
 618                  ']'#  | parrEnabled exts    -> cont ITcpabrack
 619                                                      (setCurrentPos# buf 2#)
 620                  _                           -> lex_sym cont (incLexeme buf)
 621
 622
 623     '#'# -> case lookAhead# buf 1# of
 624                 ')'#  | glaExtsEnabled exts
 625                      -> cont ITcubxparen (setCurrentPos# buf 2#)
 626                 '-'# -> case lookAhead# buf 2# of
 627                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 628                            _    -> lex_sym cont (incLexeme buf)
 629                 _    -> lex_sym cont (incLexeme buf)
 630
 631     '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
 632                 -> lex_cstring cont (setCurrentPos# buf 2#)
 633          | otherwise
 634                 -> cont ITbackquote (incLexeme buf)
 635
 636     '{'# ->     -- look for "{-##" special iface pragma   -- for Emacs: -}
 637             case lookAhead# buf 1# of
 638            '|'# | glaExtsEnabled exts
 639                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 640            '-'# -> case lookAhead# buf 2# of
 641                     '#'# -> case lookAhead# buf 3# of
 642                                 '#'# ->
 643                                    lexPragma
 644                                       cont
 645                                       (\ cont lexeme buf' -> cont (ITpragma lexeme) buf')
 646                                       0#
 647                                       (stepOnBy# (stepOverLexeme buf) 4#)
 648                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
 649                     _    -> cont ITocurly (incLexeme buf)
 650            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 651
 652     -- strings/characters -------------------------------------------------
 653     '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf)
 654     '\''#      -> lex_char (char_end cont) exts (incLexeme buf)
 655
 656     -- strictness and cpr pragmas and __scc treated specially.
 657     '_'# | glaExtsEnabled exts ->
 658          case lookAhead# buf 1# of
 659            '_'# -> case lookAhead# buf 2# of
 660                     'S'# ->
 661                         lex_demand cont (stepOnUntil (not . isSpace)
 662                                         (stepOnBy# buf 3#)) -- past __S
 663                     'M'# ->
 664                         cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
 665
 666                     's'# ->
 667                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 668                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 669                                Nothing   -> lex_id cont exts buf
 670                     _ -> lex_id cont exts buf
 671            _    -> lex_id cont exts buf
 672
 673         -- Hexadecimal and octal constants
 674     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 675                 -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
 676          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 677                 -> readNum (after_lexnum cont exts) buf' is_octdigit  8 oct_or_dec
 678         where ch   = lookAhead# buf 1#
 679               ch2  = lookAhead# buf 2#
 680               buf' = setCurrentPos# buf 2#
 681
 682     '\NUL'# ->
 683             if bufferExhausted (stepOn buf) then
 684                cont ITeof buf
 685             else
 686                trace "lexIface: misplaced NUL?" $
 687                cont (ITunknown "\NUL") (stepOn buf)
 688
 689     '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 690             lex_ip ITdupipvarid cont (incLexeme buf)
 691     '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 692             lex_ip ITsplitipvarid cont (incLexeme buf)
 693     c | is_digit  c -> lex_num cont exts 0 buf
 694       | is_symbol c -> lex_sym cont buf
 695       | is_upper  c -> lex_con cont exts buf
 696       | is_ident  c -> lex_id  cont exts buf
 697       | otherwise   -> lexError "illegal character" buf
 698
 699 -- Int# is unlifted, and therefore faster than Bool for flags.
 700 {-# INLINE flag #-}
 701 flag :: Int# -> Bool
 702 flag 0# = False
 703 flag _  = True
 704
 705 -------------------------------------------------------------------------------
 706 -- Pragmas
 707
 708 lex_prag cont buf
 709   = case expandWhile# is_space buf of { buf1 ->
 710     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 711     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 712     case lookupUFM pragmaKeywordsFM lexeme of
 713         Just kw -> cont kw (mergeLexemes buf buf2)
 714         Nothing -> panic "lex_prag"
 715   }}
 716
 717 -------------------------------------------------------------------------------
 718 -- Strings & Chars
 719
 720 lex_string cont exts s buf
 721   = case currentChar# buf of
 722         '"'#{-"-} ->
 723            let buf' = incLexeme buf
 724                s' = mkFastStringNarrow (map chr (reverse s))
 725            in case currentChar# buf' of
 726                 '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s
 727                     then cont (ITprimstring s') (incLexeme buf')
 728                     else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 729                 _                   -> cont (ITstring s') buf'
 730
 731         -- ignore \& in a string, deal with string gaps
 732         '\\'# | next_ch `eqChar#` '&'#
 733                 -> lex_string cont exts s buf'
 734               | is_space next_ch
 735                 -> lex_stringgap cont exts s (incLexeme buf)
 736
 737             where next_ch = lookAhead# buf 1#
 738                   buf' = setCurrentPos# buf 2#
 739
 740         _ -> lex_char (lex_next_string cont s) exts buf
 741
 742 lex_stringgap cont exts s buf
 743   = let buf' = incLexeme buf in
 744     case currentChar# buf of
 745         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
 746                   st{loc = incSrcLine loc}
 747         '\\'# -> lex_string cont exts s buf'
 748         c | is_space c -> lex_stringgap cont exts s buf'
 749         other -> charError buf'
 750
 751 lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
 752
 753 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 754 lex_char cont exts buf
 755   = case currentChar# buf of
 756         '\\'# -> lex_escape (cont exts) (incLexeme buf)
 757         c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf)
 758         other -> charError buf
 759
 760 char_end cont exts c buf
 761   = case currentChar# buf of
 762         '\''# -> let buf' = incLexeme buf in
 763                  case currentChar# buf' of
 764                         '#'# | glaExtsEnabled exts
 765                                 -> cont (ITprimchar c) (incLexeme buf')
 766                         _       -> cont (ITchar c) buf'
 767         _     -> charError buf
 768
 769 lex_escape cont buf
 770   = let buf' = incLexeme buf in
 771     case currentChar# buf of
 772         'a'#       -> cont (ord '\a') buf'
 773         'b'#       -> cont (ord '\b') buf'
 774         'f'#       -> cont (ord '\f') buf'
 775         'n'#       -> cont (ord '\n') buf'
 776         'r'#       -> cont (ord '\r') buf'
 777         't'#       -> cont (ord '\t') buf'
 778         'v'#       -> cont (ord '\v') buf'
 779         '\\'#      -> cont (ord '\\') buf'
 780         '"'#       -> cont (ord '\"') buf'
 781         '\''#      -> cont (ord '\'') buf'
 782         '^'#       -> let c = currentChar# buf' in
 783                       if c `geChar#` '@'# && c `leChar#` '_'#
 784                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 785                         else charError buf'
 786
 787         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 788         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 789         x | is_digit x
 790                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 791
 792         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 793                                        Just buf2 <- [prefixMatch buf p] ] of
 794                             (c,buf2):_ -> cont (ord c) buf2
 795                             [] -> charError buf'
 796
 797 after_charnum cont i buf
 798   = if i >= 0 && i <= 0x10FFFF
 799         then cont (fromInteger i) buf
 800         else charError buf
 801
 802 readNum cont buf is_digit base conv = read buf 0
 803   where read buf i
 804           = case currentChar# buf of { c ->
 805             if is_digit c
 806                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 807                 else cont i buf
 808             }
 809
 810 is_hexdigit c
 811         =  is_digit c
 812         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 813         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 814
 815 hex c | is_digit c = ord# c -# ord# '0'#
 816       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 817 oct_or_dec c = ord# c -# ord# '0'#
 818
 819 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 820
 821 to_lower c
 822   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 823         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 824   | otherwise = c
 825
 826 charError buf = lexError "error in character literal" buf
 827
 828 silly_escape_chars = [
 829         ("NUL", '\NUL'),
 830         ("SOH", '\SOH'),
 831         ("STX", '\STX'),
 832         ("ETX", '\ETX'),
 833         ("EOT", '\EOT'),
 834         ("ENQ", '\ENQ'),
 835         ("ACK", '\ACK'),
 836         ("BEL", '\BEL'),
 837         ("BS", '\BS'),
 838         ("HT", '\HT'),
 839         ("LF", '\LF'),
 840         ("VT", '\VT'),
 841         ("FF", '\FF'),
 842         ("CR", '\CR'),
 843         ("SO", '\SO'),
 844         ("SI", '\SI'),
 845         ("DLE", '\DLE'),
 846         ("DC1", '\DC1'),
 847         ("DC2", '\DC2'),
 848         ("DC3", '\DC3'),
 849         ("DC4", '\DC4'),
 850         ("NAK", '\NAK'),
 851         ("SYN", '\SYN'),
 852         ("ETB", '\ETB'),
 853         ("CAN", '\CAN'),
 854         ("EM", '\EM'),
 855         ("SUB", '\SUB'),
 856         ("ESC", '\ESC'),
 857         ("FS", '\FS'),
 858         ("GS", '\GS'),
 859         ("RS", '\RS'),
 860         ("US", '\US'),
 861         ("SP", '\SP'),
 862         ("DEL", '\DEL')
 863         ]
 864
 865 -------------------------------------------------------------------------------
 866
 867 lex_demand cont buf =
 868  case read_em [] buf of { (ls,buf') ->
 869  case currentChar# buf' of
 870    'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf')
 871    'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf')
 872    _    -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf'
 873  }
 874  where
 875   read_em acc buf =
 876    case currentChar# buf of
 877     'T'# -> read_em (Top     : acc) (stepOn buf)
 878     'L'# -> read_em (lazyDmd : acc) (stepOn buf)
 879     'A'# -> read_em (Abs     : acc) (stepOn buf)
 880     'V'# -> read_em (evalDmd : acc) (stepOn buf)        -- Temporary, until
 881                                                         -- we've recompiled prelude etc
 882     'C'# -> do_unary Call  acc (stepOnBy# buf 2#)       -- Skip 'C('
 883
 884     'U'# -> do_seq1 Eval         acc (stepOnBy# buf 1#)
 885     'D'# -> do_seq1 Defer        acc (stepOnBy# buf 1#)
 886     'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#)
 887
 888     _    -> (reverse acc, buf)
 889
 890   do_seq1 fn acc buf
 891     = case currentChar# buf of
 892         '('# -> do_seq2 fn acc (stepOnBy# buf 1#)
 893         _    -> read_em (fn (Poly Abs) : acc) buf
 894
 895   do_seq2 fn acc buf
 896     = case read_em [] buf of { (dmds, buf) ->
 897       case currentChar# buf of
 898         ')'# -> read_em (fn (Prod dmds) : acc)
 899                         (stepOn buf)
 900         '*'# -> ASSERT( length dmds == 1 )
 901                 read_em (fn (Poly (head dmds)) : acc)
 902                         (stepOnBy# buf 2#)      -- Skip '*)'
 903       }
 904
 905   do_unary fn acc buf
 906     = case read_em [] buf of
 907         ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest)   -- Skip ')'
 908
 909 ------------------
 910 lex_scc cont buf =
 911  case currentChar# buf of
 912   'C'# -> cont ITsccAllCafs (incLexeme buf)
 913   other -> cont ITscc buf
 914
 915 -----------------------------------------------------------------------------
 916 -- Numbers
 917
 918 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 919 lex_num cont exts acc buf =
 920  case scanNumLit acc buf of
 921      (acc',buf') ->
 922        case currentChar# buf' of
 923          '.'# | is_digit (lookAhead# buf' 1#) ->
 924              -- this case is not optimised at all, as the
 925              -- presence of floating point numbers in interface
 926              -- files is not that common. (ToDo)
 927             case expandWhile# is_digit (incLexeme buf') of
 928               buf2 -> -- points to first non digit char
 929
 930                 let l = case currentChar# buf2 of
 931                           'E'# -> do_exponent
 932                           'e'# -> do_exponent
 933                           _ -> buf2
 934
 935                     do_exponent
 936                         = let buf3 = incLexeme buf2 in
 937                           case currentChar# buf3 of
 938                                 '-'# | is_digit (lookAhead# buf3 1#)
 939                                    -> expandWhile# is_digit (incLexeme buf3)
 940                                 '+'# | is_digit (lookAhead# buf3 1#)
 941                                    -> expandWhile# is_digit (incLexeme buf3)
 942                                 x | is_digit x -> expandWhile# is_digit buf3
 943                                 _ -> buf2
 944
 945                     v = readRational__ (lexemeToString l)
 946
 947                 in case currentChar# l of -- glasgow exts only
 948                       '#'# | glaExtsEnabled exts -> let l' = incLexeme l in
 949                               case currentChar# l' of
 950                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 951                                 _    -> cont (ITprimfloat  v) l'
 952                       _ -> cont (ITrational v) l
 953
 954          _ -> after_lexnum cont exts acc' buf'
 955
 956 after_lexnum cont exts i buf
 957   = case currentChar# buf of
 958         '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf)
 959         _                          -> cont (ITinteger i) buf
 960
 961 -----------------------------------------------------------------------------
 962 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 963
 964 -- we lexemeToFastString on the bit between the ``''s, but include the
 965 -- quotes in the full lexeme.
 966
 967 lex_cstring cont buf =
 968  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 969    Just buf' -> cont (ITlitlit (lexemeToFastString
 970                                 (setCurrentPos# buf' (negateInt# 2#))))
 971                    (mergeLexemes buf buf')
 972    Nothing   -> lexError "unterminated ``" buf
 973
 974 -----------------------------------------------------------------------------
 975 -- identifiers, symbols etc.
 976
 977 lex_ip ip_constr cont buf =
 978  case expandWhile# is_ident buf of
 979    buf' -> cont (ip_constr (tailFS lexeme)) buf'
 980         where lexeme = lexemeToFastString buf'
 981
 982 lex_id cont exts buf =
 983  let buf1 = expandWhile# is_ident buf in
 984  seq buf1 $
 985
 986  case (if glaExtsEnabled exts
 987         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 988         else buf1)                              of { buf' ->
 989
 990  let lexeme  = lexemeToFastString buf' in
 991
 992  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 993         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 994                           cont kwd_token buf';
 995         Nothing        ->
 996
 997  let var_token = cont (ITvarid lexeme) buf' in
 998
 999  if not (glaExtsEnabled exts)
1000    then var_token
1001    else
1002
1003  case lookupUFM ghcExtensionKeywordsFM lexeme of {
1004         Just kwd_token -> cont kwd_token buf';
1005         Nothing        -> var_token
1006
1007  }}}
1008
1009 lex_sym cont buf =
1010  -- trace "lex_sym" $
1011  case expandWhile# is_symbol buf of
1012    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
1013                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
1014                                   cont kwd_token buf' ;
1015                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
1016                                   cont (mk_var_token lexeme) buf'
1017            }
1018         where lexeme = lexemeToFastString buf'
1019
1020
1021 -- lex_con recursively collects components of a qualified identifer.
1022 -- The argument buf is the StringBuffer representing the lexeme
1023 -- identified so far, where the next character is upper-case.
1024
1025 lex_con cont exts buf =
1026  -- trace ("con: "{-++unpackFS lexeme-}) $
1027  let empty_buf = stepOverLexeme buf in
1028  case expandWhile# is_ident empty_buf of { buf1 ->
1029  case slurp_trailing_hashes buf1 exts of { con_buf ->
1030
1031  let all_buf = mergeLexemes buf con_buf
1032
1033      con_lexeme = lexemeToFastString con_buf
1034      mod_lexeme = lexemeToFastString (decLexeme buf)
1035      all_lexeme = lexemeToFastString all_buf
1036
1037      just_a_conid
1038         | emptyLexeme buf = cont (ITconid con_lexeme)               all_buf
1039         | otherwise       = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
1040  in
1041
1042  case currentChar# all_buf of
1043      '.'# -> maybe_qualified cont exts all_lexeme
1044                 (incLexeme all_buf) just_a_conid
1045      _    -> just_a_conid
1046   }}
1047
1048
1049 maybe_qualified cont exts mod buf just_a_conid =
1050  -- trace ("qid: "{-++unpackFS lexeme-}) $
1051  case currentChar# buf of
1052   '['# ->       -- Special case for []
1053     case lookAhead# buf 1# of
1054      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
1055      _    -> just_a_conid
1056
1057   '('# ->  -- Special case for (,,,)
1058            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
1059     case lookAhead# buf 1# of
1060      '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
1061                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
1062                                 just_a_conid
1063                 _    -> just_a_conid
1064      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
1065      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
1066      _    -> just_a_conid
1067
1068   '-'# -> case lookAhead# buf 1# of
1069             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
1070             _    -> lex_id3 cont exts mod buf just_a_conid
1071
1072   _    -> lex_id3 cont exts mod buf just_a_conid
1073
1074
1075 lex_id3 cont exts mod buf just_a_conid
1076   | is_upper (currentChar# buf) =
1077      lex_con cont exts buf
1078
1079   | is_symbol (currentChar# buf) =
1080      let
1081         start_new_lexeme = stepOverLexeme buf
1082      in
1083      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
1084      case expandWhile# is_symbol start_new_lexeme of { buf' ->
1085      let
1086        lexeme  = lexemeToFastString buf'
1087         -- real lexeme is M.<sym>
1088        new_buf = mergeLexemes buf buf'
1089      in
1090      cont (mk_qvar_token mod lexeme) new_buf
1091         -- wrong, but arguably morally right: M... is now a qvarsym
1092      }
1093
1094   | otherwise   =
1095      let
1096         start_new_lexeme = stepOverLexeme buf
1097      in
1098      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1099      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1100      if emptyLexeme buf1
1101             then just_a_conid
1102             else
1103
1104      case slurp_trailing_hashes buf1 exts of { buf' ->
1105
1106      let
1107       lexeme      = lexemeToFastString buf'
1108       new_buf     = mergeLexemes buf buf'
1109       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1110      in
1111      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1112             Nothing          -> is_a_qvarid ;
1113
1114             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1115                            -> is_a_qvarid          --  recognised as keywords here.
1116                            | otherwise
1117                            -> just_a_conid         -- avoid M.where etc.
1118      }}}
1119
1120 slurp_trailing_hashes buf exts
1121   | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
1122   | otherwise           = buf
1123
1124
1125 mk_var_token pk_str
1126   | is_upper f          = ITconid pk_str
1127   | is_ident f          = ITvarid pk_str
1128   | f `eqChar#` ':'#    = ITconsym pk_str
1129   | otherwise           = ITvarsym pk_str
1130   where
1131       (C# f) = _HEAD_ pk_str
1132       -- tl     = _TAIL_ pk_str
1133
1134 mk_qvar_token m token =
1135 -- trace ("mk_qvar ") $
1136  case mk_var_token token of
1137    ITconid n  -> ITqconid  (m,n)
1138    ITvarid n  -> ITqvarid  (m,n)
1139    ITconsym n -> ITqconsym (m,n)
1140    ITvarsym n -> ITqvarsym (m,n)
1141    _          -> ITunknown (show token)
1142 \end{code}
1143
1144 ----------------------------------------------------------------------------
1145 Horrible stuff for dealing with M.(,,,)
1146
1147 \begin{code}
1148 lex_tuple cont mod buf back_off =
1149   go 2 buf
1150   where
1151    go n buf =
1152     case currentChar# buf of
1153       ','# -> go (n+1) (stepOn buf)
1154       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1155       _    -> back_off
1156
1157 lex_ubx_tuple cont mod buf back_off =
1158   go 2 buf
1159   where
1160    go n buf =
1161     case currentChar# buf of
1162       ','# -> go (n+1) (stepOn buf)
1163       '#'# -> case lookAhead# buf 1# of
1164                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1165                                  (stepOnBy# buf 2#)
1166                 _    -> back_off
1167       _    -> back_off
1168 \end{code}
1169
1170 -----------------------------------------------------------------------------
1171 'lexPragma' rips along really fast, looking for a '##-}',
1172 indicating the end of the pragma we're skipping
1173
1174 \begin{code}
1175 lexPragma cont contf inStr buf =
1176  case currentChar# buf of
1177    '#'# | inStr ==# 0# ->
1178        case lookAhead# buf 1# of { '#'# ->
1179        case lookAhead# buf 2# of { '-'# ->
1180        case lookAhead# buf 3# of { '}'# ->
1181            contf cont (lexemeToBuffer buf)
1182                       (stepOverLexeme (setCurrentPos# buf 4#));
1183         _    -> lexPragma cont contf inStr (incLexeme buf) };
1184         _    -> lexPragma cont contf inStr (incLexeme buf) };
1185         _    -> lexPragma cont contf inStr (incLexeme buf) }
1186
1187    '"'# ->
1188        let
1189         odd_slashes buf flg i# =
1190           case lookAhead# buf i# of
1191            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
1192            _     -> flg
1193
1194         not_inStr = if inStr ==# 0# then 1# else 0#
1195        in
1196        case lookAhead# buf (negateInt# 1#) of --backwards, actually
1197          '\\'# -> -- escaping something..
1198            if odd_slashes buf True (negateInt# 2#)
1199                 then  -- odd number of slashes, " is escaped.
1200                       lexPragma cont contf inStr (incLexeme buf)
1201                 else  -- even number of slashes, \ is escaped.
1202                       lexPragma cont contf not_inStr (incLexeme buf)
1203          _ -> lexPragma cont contf not_inStr (incLexeme buf)
1204
1205    '\''# | inStr ==# 0# ->
1206         case lookAhead# buf 1# of { '"'# ->
1207         case lookAhead# buf 2# of { '\''# ->
1208            lexPragma cont contf inStr (setCurrentPos# buf 3#);
1209         _ -> lexPragma cont contf inStr (incLexeme buf) };
1210         _ -> lexPragma cont contf inStr (incLexeme buf) }
1211
1212     -- a sign that the input is ill-formed, since pragmas are
1213     -- assumed to always be properly closed (in .hi files).
1214    '\NUL'# -> trace "lexPragma: unexpected end-of-file" $
1215               cont (ITunknown "\NUL") buf
1216
1217    _ -> lexPragma cont contf inStr (incLexeme buf)
1218
1219 \end{code}
1220
1221 -----------------------------------------------------------------------------
1222
1223 \begin{code}
1224 data LayoutContext
1225   = NoLayout
1226   | Layout Int#
1227
1228 data ParseResult a
1229   = POk PState a
1230   | PFailed Message
1231
1232 data PState = PState {
1233         loc        :: SrcLoc,
1234         extsBitmap :: Int#,     -- bitmap that determines permitted extensions
1235         bol        :: Int#,
1236         atbol      :: Int#,
1237         context    :: [LayoutContext]
1238      }
1239
1240 type P a = StringBuffer         -- Input string
1241         -> PState
1242         -> ParseResult a
1243
1244 returnP   :: a -> P a
1245 returnP a buf s = POk s a
1246
1247 thenP      :: P a -> (a -> P b) -> P b
1248 m `thenP` k = \ buf s ->
1249         case m buf s of
1250                 POk s1 a -> k a buf s1
1251                 PFailed err  -> PFailed err
1252
1253 thenP_     :: P a -> P b -> P b
1254 m `thenP_` k = m `thenP` \_ -> k
1255
1256 mapP :: (a -> P b) -> [a] -> P [b]
1257 mapP f [] = returnP []
1258 mapP f (a:as) =
1259      f a `thenP` \b ->
1260      mapP f as `thenP` \bs ->
1261      returnP (b:bs)
1262
1263 failP :: String -> P a
1264 failP msg buf s = PFailed (text msg)
1265
1266 failMsgP :: Message -> P a
1267 failMsgP msg buf s = PFailed msg
1268
1269 lexError :: String -> P a
1270 lexError str buf s@PState{ loc = loc }
1271   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1272
1273 getSrcLocP :: P SrcLoc
1274 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1275
1276 -- use a temporary SrcLoc for the duration of the argument
1277 setSrcLocP :: SrcLoc -> P a -> P a
1278 setSrcLocP new_loc p buf s =
1279   case p buf s{ loc=new_loc } of
1280       POk _ a   -> POk s a
1281       PFailed e -> PFailed e
1282
1283 getSrcFile :: P FAST_STRING
1284 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1285
1286 pushContext :: LayoutContext -> P ()
1287 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1288
1289 {-
1290
1291 This special case in layoutOn is to handle layout contexts with are
1292 indented the same or less than the current context.  This is illegal
1293 according to the Haskell spec, so we have to arrange to close the
1294 current context.  eg.
1295
1296 class Foo a where
1297 class Bar a
1298
1299 after the first 'where', the sequence of events is:
1300
1301         - layout system inserts a ';' (column 0)
1302         - parser begins a new context at column 0
1303         - parser shifts ';' (legal empty declaration)
1304         - parser sees 'class': parse error (we're still in the inner context)
1305
1306 trouble is, by the time we know we need a new context, the lexer has
1307 already generated the ';'.  Hacky solution is as follows: since we
1308 know the column of the next token (it's the column number of the new
1309 context), we set the ACTUAL column number of the new context to this
1310 numer plus one.  Hence the next time the lexer is called, a '}' will
1311 be generated to close the new context straight away.  Furthermore, we
1312 have to set the atbol flag so that the ';' that the parser shifted as
1313 part of the new context is re-generated.
1314
1315 when the new context is *less* indented than the current one:
1316
1317 f = f where g = g where
1318 h = h
1319
1320         - current context: column 12.
1321         - on seeing 'h' (column 0), the layout system inserts '}'
1322         - parser starts a new context, column 0
1323         - parser sees '}', uses it to close new context
1324         - we still need to insert another '}' followed by a ';',
1325           hence the atbol trick.
1326
1327 There's also a special hack in here to deal with
1328
1329         do
1330            ....
1331            e $ do
1332            blah
1333
1334 i.e. the inner context is at the same indentation level as the outer
1335 context.  This is strictly illegal according to Haskell 98, but
1336 there's a lot of existing code using this style and it doesn't make
1337 any sense to disallow it, since empty 'do' lists don't make sense.
1338 -}
1339
1340 layoutOn :: Bool -> P ()
1341 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1342     let offset = lexemeIndex buf -# bol in
1343     case ctx of
1344         Layout prev_off : _
1345            | if strict then prev_off >=# offset else prev_off ># offset ->
1346                 --trace ("layout on, column: " ++  show (I# offset)) $
1347                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1348         other ->
1349                 --trace ("layout on, column: " ++  show (I# offset)) $
1350                 POk s{ context = Layout offset : ctx } ()
1351
1352 layoutOff :: P ()
1353 layoutOff buf s@(PState{ context = ctx }) =
1354     POk s{ context = NoLayout:ctx } ()
1355
1356 popContext :: P ()
1357 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1358   case ctx of
1359         (_:tl) -> POk s{ context = tl } ()
1360         []     -> PFailed (srcParseErr buf loc)
1361
1362 {-
1363  Note that if the name of the file we're processing ends
1364  with `hi-boot', we accept it on faith as having the right
1365  version. This is done so that .hi-boot files that comes
1366  with hsc don't have to be updated before every release,
1367  *and* it allows us to share .hi-boot files with versions
1368  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
1369
1370  If the version number is 0, the checking is also turned off.
1371  (needed to deal with GHC.hi only!)
1372
1373  Once we can assume we're compiling with a version of ghc that
1374  supports interface file checking, we can drop the special
1375  pleading
1376 -}
1377 checkVersion :: Maybe Integer -> P ()
1378 checkVersion mb@(Just v) buf s@(PState{loc = loc})
1379  | (v==0) || (v == fromIntegral opt_HiVersion) || opt_NoHiCheck = POk s ()
1380  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1381 checkVersion mb@Nothing  buf s@(PState{loc = loc})
1382  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
1383  | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
1384
1385
1386 -- for reasons of efficiency, flags indicating language extensions (eg,
1387 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1388 -- integer
1389
1390 glaExtsBit, ffiBit, parrBit :: Int
1391 glaExtsBit = 0
1392 ffiBit     = 1  -- FIXME: not used yet; still part of `glaExtsBit'
1393 parrBit    = 2
1394
1395 glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
1396 glaExtsEnabled flags = testBit (toInt32 flags) glaExtsBit
1397 ffiEnabled     flags = testBit (toInt32 flags) ffiBit
1398 parrEnabled    flags = testBit (toInt32 flags) parrBit
1399
1400 toInt32 :: Int# -> Int32
1401 toInt32 x# = fromIntegral (I# x#)
1402
1403 -- convenient record-based bitmap for the interface to the rest of the world
1404 --
1405 data ExtFlags = ExtFlags {
1406                   glasgowExtsEF :: Bool,
1407 --                ffiEF         :: Bool,  -- commented out to avoid warnings
1408                   parrEF        :: Bool   -- while not used yet
1409                 }
1410
1411 -- create a parse state
1412 --
1413 mkPState          :: SrcLoc -> ExtFlags -> PState
1414 mkPState loc exts  = PState {
1415                        loc        = loc,
1416                        extsBitmap = case (fromIntegral bitmap) of {I# bits -> bits},
1417                        bol        = 0#,
1418                        atbol      = 1#,
1419                        context    = []
1420                      }
1421                      where
1422                        bitmap =     glaExtsBit `setBitIf` glasgowExtsEF exts
1423 --                              .|. ffiBit     `setBitIf` ffiEF         exts
1424                                 .|. parrBit    `setBitIf` parrEF        exts
1425                        --
1426                        setBitIf :: Int -> Bool -> Int32
1427                        b `setBitIf` cond | cond      = bit b
1428                                          | otherwise = 0
1429
1430
1431 -----------------------------------------------------------------
1432
1433 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
1434 ifaceParseErr s l
1435   = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"),
1436           text (lexemeToString s), char '\'']
1437
1438 ifaceVersionErr hi_vers l toks
1439   = hsep [ppr l, ptext SLIT("Interface file version error;"),
1440           ptext SLIT("Expected"), int opt_HiVersion,
1441           ptext SLIT("found "), pp_version]
1442     where
1443      pp_version =
1444       case hi_vers of
1445         Nothing -> ptext SLIT("pre ghc-3.02 version")
1446         Just v  -> ptext SLIT("version") <+> integer v
1447
1448 -----------------------------------------------------------------------------
1449
1450 srcParseErr :: StringBuffer -> SrcLoc -> Message
1451 srcParseErr s l
1452   = hcat [ppr l,
1453           if null token
1454              then ptext SLIT(": parse error (possibly incorrect indentation)")
1455              else hcat [ptext SLIT(": parse error on input "),
1456                         char '`', text token, char '\'']
1457     ]
1458   where
1459         token = lexemeToString s
1460
1461 \end{code}