ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 module Lex (
  20
  21         srcParseErr,
  22
  23         -- Monad for parser
  24         Token(..), lexer, ParseResult(..), PState(..),
  25         ExtFlags(..), mkPState,
  26         StringBuffer,
  27
  28         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  29         getSrcLocP, setSrcLocP, getSrcFile,
  30         layoutOn, layoutOff, pushContext, popContext
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( toUpper, isDigit, chr, ord )
  36 import Ratio            ( (%) )
  37
  38 import PrelNames        ( mkTupNameStr )
  39 import ForeignCall      ( Safety(..) )
  40 import UniqFM           ( listToUFM, lookupUFM )
  41 import BasicTypes       ( Boxity(..) )
  42 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  43                           replaceSrcLine, mkSrcLoc )
  44
  45 import ErrUtils         ( Message )
  46 import Outputable
  47
  48 import FastString
  49 import StringBuffer
  50 import GlaExts
  51 import Ctype
  52
  53 import Bits             ( Bits(..) )       -- non-std
  54 import Int              ( Int32 )
  55 \end{code}
  56
  57 %************************************************************************
  58 %*                                                                      *
  59 \subsection{Data types}
  60 %*                                                                      *
  61 %************************************************************************
  62
  63 The token data type, fairly un-interesting except from one
  64 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  65 strictness, unfolding etc).
  66
  67 The Idea/Observation here is that the renamer needs to scan through
  68 all of an interface file before it can continue. But only a fraction
  69 of the information contained in the file turns out to be useful, so
  70 delaying as much as possible of the scanning and parsing of an
  71 interface file Makes Sense (Heap profiles of the compiler
  72 show a reduction in heap usage by at least a factor of two,
  73 post-renamer).
  74
  75 Hence, the interface file lexer spots when value declarations are
  76 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  77 for the type and any other id info for that binding (unfolding, strictness
  78 etc). These constructors are applied to the result of lexing these sub-chunks.
  79
  80 The lexing of the type and id info is all done lazily, of course, so
  81 the scanning (and subsequent parsing) will be done *only* on the ids the
  82 renamer finds out that it is interested in. The rest will just be junked.
  83 Laziness, you know it makes sense :-)
  84
  85 \begin{code}
  86 data Token
  87   = ITas                        -- Haskell keywords
  88   | ITcase
  89   | ITclass
  90   | ITdata
  91   | ITdefault
  92   | ITderiving
  93   | ITdo
  94   | ITelse
  95   | IThiding
  96   | ITif
  97   | ITimport
  98   | ITin
  99   | ITinfix
 100   | ITinfixl
 101   | ITinfixr
 102   | ITinstance
 103   | ITlet
 104   | ITmodule
 105   | ITnewtype
 106   | ITof
 107   | ITqualified
 108   | ITthen
 109   | ITtype
 110   | ITwhere
 111   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 112
 113   | ITforall                    -- GHC extension keywords
 114   | ITforeign
 115   | ITexport
 116   | ITlabel
 117   | ITdynamic
 118   | ITsafe
 119   | ITthreadsafe
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124   | ITdotnet
 125   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 126
 127   | ITspecialise_prag           -- Pragmas
 128   | ITsource_prag
 129   | ITinline_prag
 130   | ITnoinline_prag
 131   | ITrules_prag
 132   | ITdeprecated_prag
 133   | ITline_prag
 134   | ITscc_prag
 135   | ITclose_prag
 136
 137   | ITdotdot                    -- reserved symbols
 138   | ITdcolon
 139   | ITequal
 140   | ITlam
 141   | ITvbar
 142   | ITlarrow
 143   | ITrarrow
 144   | ITat
 145   | ITtilde
 146   | ITdarrow
 147   | ITminus
 148   | ITbang
 149   | ITstar
 150   | ITdot
 151
 152   | ITbiglam                    -- GHC-extension symbols
 153
 154   | ITocurly                    -- special symbols
 155   | ITccurly
 156   | ITocurlybar                 -- {|, for type applications
 157   | ITccurlybar                 -- |}, for type applications
 158   | ITvccurly
 159   | ITobrack
 160   | ITopabrack                  -- [:, for parallel arrays with -fparr
 161   | ITcpabrack                  -- :], for parallel arrays with -fparr
 162   | ITcbrack
 163   | IToparen
 164   | ITcparen
 165   | IToubxparen
 166   | ITcubxparen
 167   | ITsemi
 168   | ITcomma
 169   | ITunderscore
 170   | ITbackquote
 171
 172   | ITvarid   FastString        -- identifiers
 173   | ITconid   FastString
 174   | ITvarsym  FastString
 175   | ITconsym  FastString
 176   | ITqvarid  (FastString,FastString)
 177   | ITqconid  (FastString,FastString)
 178   | ITqvarsym (FastString,FastString)
 179   | ITqconsym (FastString,FastString)
 180
 181   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 182   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 183
 184   | ITpragma StringBuffer
 185
 186   | ITchar       Int
 187   | ITstring     FastString
 188   | ITinteger    Integer
 189   | ITrational   Rational
 190
 191   | ITprimchar   Int
 192   | ITprimstring FastString
 193   | ITprimint    Integer
 194   | ITprimfloat  Rational
 195   | ITprimdouble Rational
 196   | ITlitlit     FastString
 197
 198   | ITunknown String            -- Used when the lexer can't make sense of it
 199   | ITeof                       -- end of file token
 200   deriving Show -- debugging
 201 \end{code}
 202
 203 -----------------------------------------------------------------------------
 204 Keyword Lists
 205
 206 \begin{code}
 207 pragmaKeywordsFM = listToUFM $
 208       map (\ (x,y) -> (mkFastString x,y))
 209        [( "SPECIALISE", ITspecialise_prag ),
 210         ( "SPECIALIZE", ITspecialise_prag ),
 211         ( "SOURCE",     ITsource_prag ),
 212         ( "INLINE",     ITinline_prag ),
 213         ( "NOINLINE",   ITnoinline_prag ),
 214         ( "NOTINLINE",  ITnoinline_prag ),
 215         ( "LINE",       ITline_prag ),
 216         ( "RULES",      ITrules_prag ),
 217         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 218         ( "SCC",        ITscc_prag ),
 219         ( "DEPRECATED", ITdeprecated_prag )
 220         ]
 221
 222 haskellKeywordsFM = listToUFM $
 223       map (\ (x,y) -> (mkFastString x,y))
 224        [( "_",          ITunderscore ),
 225         ( "as",         ITas ),
 226         ( "case",       ITcase ),
 227         ( "class",      ITclass ),
 228         ( "data",       ITdata ),
 229         ( "default",    ITdefault ),
 230         ( "deriving",   ITderiving ),
 231         ( "do",         ITdo ),
 232         ( "else",       ITelse ),
 233         ( "hiding",     IThiding ),
 234         ( "if",         ITif ),
 235         ( "import",     ITimport ),
 236         ( "in",         ITin ),
 237         ( "infix",      ITinfix ),
 238         ( "infixl",     ITinfixl ),
 239         ( "infixr",     ITinfixr ),
 240         ( "instance",   ITinstance ),
 241         ( "let",        ITlet ),
 242         ( "module",     ITmodule ),
 243         ( "newtype",    ITnewtype ),
 244         ( "of",         ITof ),
 245         ( "qualified",  ITqualified ),
 246         ( "then",       ITthen ),
 247         ( "type",       ITtype ),
 248         ( "where",      ITwhere ),
 249         ( "_scc_",      ITscc )         -- ToDo: remove
 250      ]
 251
 252 isSpecial :: Token -> Bool
 253 -- If we see M.x, where x is a keyword, but
 254 -- is special, we treat is as just plain M.x,
 255 -- not as a keyword.
 256 isSpecial ITas          = True
 257 isSpecial IThiding      = True
 258 isSpecial ITqualified   = True
 259 isSpecial ITforall      = True
 260 isSpecial ITexport      = True
 261 isSpecial ITlabel       = True
 262 isSpecial ITdynamic     = True
 263 isSpecial ITsafe        = True
 264 isSpecial ITthreadsafe  = True
 265 isSpecial ITunsafe      = True
 266 isSpecial ITwith        = True
 267 isSpecial ITccallconv   = True
 268 isSpecial ITstdcallconv = True
 269 isSpecial _             = False
 270
 271 -- the bitmap provided as the third component indicates whether the
 272 -- corresponding extension keyword is valid under the extension options
 273 -- provided to the compiler; if the extension corresponding to *any* of the
 274 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 275 -- facilitates using a keyword in two different extensions that can be
 276 -- activated independently)
 277 --
 278 ghcExtensionKeywordsFM = listToUFM $
 279         map (\(x, y, z) -> (mkFastString x, (y, z)))
 280      [  ( "forall",     ITforall,        bit glaExtsBit),
 281         ( "foreign",    ITforeign,       bit ffiBit),
 282         ( "export",     ITexport,        bit ffiBit),
 283         ( "label",      ITlabel,         bit ffiBit),
 284         ( "dynamic",    ITdynamic,       bit ffiBit),
 285         ( "safe",       ITsafe,          bit ffiBit),
 286         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 287         ( "unsafe",     ITunsafe,        bit ffiBit),
 288         ( "with",       ITwith,          bit withBit),
 289         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 290         ( "ccall",      ITccallconv,     bit ffiBit),
 291         ( "dotnet",     ITdotnet,        bit ffiBit),
 292         ("_ccall_",     ITccall (False, False, PlayRisky),
 293                                          bit glaExtsBit),
 294         ("_ccall_GC_",  ITccall (False, False, PlaySafe False),
 295                                          bit glaExtsBit),
 296         ("_casm_",      ITccall (False, True,  PlayRisky),
 297                                          bit glaExtsBit),
 298         ("_casm_GC_",   ITccall (False, True,  PlaySafe False),
 299                                          bit glaExtsBit)
 300      ]
 301
 302 haskellKeySymsFM = listToUFM $
 303         map (\ (x,y) -> (mkFastString x,y))
 304       [ ("..",          ITdotdot)
 305        ,("::",          ITdcolon)
 306        ,("=",           ITequal)
 307        ,("\\",          ITlam)
 308        ,("|",           ITvbar)
 309        ,("<-",          ITlarrow)
 310        ,("->",          ITrarrow)
 311        ,("@",           ITat)
 312        ,("~",           ITtilde)
 313        ,("=>",          ITdarrow)
 314        ,("-",           ITminus)
 315        ,("!",           ITbang)
 316        ,("*",           ITstar)
 317        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 318        ]
 319 \end{code}
 320
 321 -----------------------------------------------------------------------------
 322 The lexical analyser
 323
 324 Lexer state:
 325
 326         - (exts)  lexing a source with extensions, eg, an interface file or
 327                   with -fglasgow-exts
 328         - (bol)   pointer to beginning of line (for column calculations)
 329         - (buf)   pointer to beginning of token
 330         - (buf)   pointer to current char
 331         - (atbol) flag indicating whether we're at the beginning of a line
 332
 333 \begin{code}
 334 lexer :: (Token -> P a) -> P a
 335 lexer cont buf s@(PState{
 336                     loc = loc,
 337                     extsBitmap = exts,
 338                     bol = bol,
 339                     atbol = atbol,
 340                     context = ctx
 341                 })
 342
 343         -- first, start a new lexeme and lose all the whitespace
 344   =  _scc_ "Lexer"
 345   tab line bol atbol (stepOverLexeme buf)
 346   where
 347         line = srcLocLine loc
 348
 349         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 350           case currentChar# buf of
 351
 352             '\NUL'# ->
 353                    if bufferExhausted (stepOn buf)
 354                         then cont ITeof buf s'
 355                         else trace "lexer: misplaced NUL?" $
 356                              tab y bol atbol (stepOn buf)
 357
 358             '\n'# -> let buf' = stepOn buf
 359                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 360
 361                 -- find comments.  This got harder in Haskell 98.
 362             '-'# ->  let trundle n =
 363                           let next = lookAhead# buf n in
 364                           if next `eqChar#` '-'# then trundle (n +# 1#)
 365                           else if is_symbol next || n <# 2#
 366                                 then is_a_token
 367                                 else tab y bol atbol
 368                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 369                     in trundle 1#
 370
 371                 -- comments and pragmas.  We deal with LINE pragmas here,
 372                 -- and throw out any unrecognised pragmas as comments.  Any
 373                 -- pragmas we know about are dealt with later (after any layout
 374                 -- processing if necessary).
 375             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 376                 if lookAhead# buf 2# `eqChar#` '#'# then
 377                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 378                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 379                   let lexeme = mkFastString -- ToDo: too slow
 380                                   (map toUpper (lexemeToString buf2)) in
 381                   case lookupUFM pragmaKeywordsFM lexeme of
 382                         -- ignore RULES pragmas when -fglasgow-exts is off
 383                         Just ITrules_prag | not (glaExtsEnabled exts) ->
 384                            skip_to_end (stepOnBy# buf 2#) s'
 385                         Just ITline_prag ->
 386                            line_prag skip_to_end buf2 s'
 387                         Just other -> is_a_token
 388                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 389                   }}
 390
 391                 else skip_to_end (stepOnBy# buf 2#) s'
 392                 where
 393                     skip_to_end = skipNestedComment (lexer cont)
 394
 395                 -- special GHC extension: we grok cpp-style #line pragmas
 396             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 397                 let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
 398                            lookAhead# buf 2# `eqChar#` 'i'# &&
 399                            lookAhead# buf 3# `eqChar#` 'n'# &&
 400                            lookAhead# buf 4# `eqChar#` 'e'#  = stepOnBy# buf 5#
 401                          | otherwise = stepOn buf
 402                 in
 403                 case expandWhile# is_space buf1 of { buf2 ->
 404                 if is_digit (currentChar# buf2)
 405                         then line_prag next_line buf2 s'
 406                         else is_a_token
 407                 }
 408                 where
 409                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 410
 411                 -- tabs have been expanded beforehand
 412             c | is_space c -> tab y bol atbol (stepOn buf)
 413               | otherwise  -> is_a_token
 414
 415            where s' = s{loc = replaceSrcLine loc y,
 416                         bol = bol,
 417                        atbol = atbol}
 418
 419                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 420                             | otherwise    = lexToken cont exts buf s'
 421
 422 -- {-# LINE .. #-} pragmas.  yeuch.
 423 line_prag cont buf s@PState{loc=loc} =
 424   case expandWhile# is_space buf                of { buf1 ->
 425   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 426   -- subtract one: the line number refers to the *following* line.
 427   let real_line = line - 1 in
 428   case fromInteger real_line                    of { i@(I# l) ->
 429         -- ToDo, if no filename then we skip the newline.... d'oh
 430   case expandWhile# is_space buf2               of { buf3 ->
 431   case currentChar# buf3                        of
 432      '\"'#{-"-} ->
 433         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 434         let
 435             file = lexemeToFastString buf4
 436             new_buf = stepOn (stepOverLexeme buf4)
 437         in
 438         if nullFastString file
 439                 then cont new_buf s{loc = replaceSrcLine loc l}
 440                 else cont new_buf s{loc = mkSrcLoc file i}
 441         }
 442      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 443   }}}}
 444
 445 skipNestedComment :: P a -> P a
 446 skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
 447
 448 skipNestedComment' :: SrcLoc -> P a -> P a
 449 skipNestedComment' orig_loc cont buf = loop buf
 450  where
 451    loop buf =
 452      case currentChar# buf of
 453         '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
 454
 455         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 456               skipNestedComment
 457                 (skipNestedComment' orig_loc cont)
 458                 (stepOnBy# buf 2#)
 459
 460         '\n'# -> \ s@PState{loc=loc} ->
 461                  let buf' = stepOn buf in
 462                  loop buf' s{loc = incSrcLine loc,
 463                              bol = currentIndex# buf',
 464                              atbol = 1#}
 465
 466         -- pass the original SrcLoc to lexError so that the error is
 467         -- reported at the line it was originally on, not the line at
 468         -- the end of the file.
 469         '\NUL'# | bufferExhausted (stepOn buf) ->
 470                 \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
 471
 472         _   -> loop (stepOn buf)
 473
 474 -- When we are lexing the first token of a line, check whether we need to
 475 -- insert virtual semicolons or close braces due to layout.
 476
 477 lexBOL :: (Token -> P a) -> P a
 478 lexBOL cont buf s@(PState{
 479                     loc = loc,
 480                     extsBitmap = exts,
 481                     bol = bol,
 482                     atbol = atbol,
 483                     context = ctx
 484                   }) =
 485         if need_close_curly then
 486                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 487                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 488         else if need_semi_colon then
 489                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 490                 cont ITsemi buf s{atbol = 0#}
 491         else
 492                 lexToken cont exts buf s{atbol = 0#}
 493   where
 494         col = currentIndex# buf -# bol
 495
 496         need_close_curly =
 497                 case ctx of
 498                         [] -> False
 499                         (i:_) -> case i of
 500                                     NoLayout -> False
 501                                     Layout n -> col <# n
 502         need_semi_colon =
 503                 case ctx of
 504                         [] -> False
 505                         (i:_) -> case i of
 506                                     NoLayout -> False
 507                                     Layout n -> col ==# n
 508
 509
 510 lexToken :: (Token -> P a) -> Int# -> P a
 511 lexToken cont exts buf =
 512 -- trace "lexToken" $
 513   case currentChar# buf of
 514
 515     -- special symbols ----------------------------------------------------
 516     '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
 517                 -> cont IToubxparen (setCurrentPos# buf 2#)
 518          | otherwise
 519                 -> cont IToparen (incLexeme buf)
 520
 521     ')'# -> cont ITcparen    (incLexeme buf)
 522     '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
 523             cont ITopabrack  (setCurrentPos# buf 2#)
 524          | otherwise ->
 525             cont ITobrack    (incLexeme buf)
 526     ']'# -> cont ITcbrack    (incLexeme buf)
 527     ','# -> cont ITcomma     (incLexeme buf)
 528     ';'# -> cont ITsemi      (incLexeme buf)
 529     '}'# -> \ s@PState{context = ctx} ->
 530             case ctx of
 531                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 532                 _        -> lexError "too many '}'s" buf s
 533     '|'# -> case lookAhead# buf 1# of
 534                  '}'#  | glaExtsEnabled exts -> cont ITccurlybar
 535                                                      (setCurrentPos# buf 2#)
 536                  _                           -> lex_sym cont (incLexeme buf)
 537     ':'# -> case lookAhead# buf 1# of
 538                  ']'#  | parrEnabled exts    -> cont ITcpabrack
 539                                                      (setCurrentPos# buf 2#)
 540                  _                           -> lex_sym cont (incLexeme buf)
 541
 542
 543     '#'# -> case lookAhead# buf 1# of
 544                 ')'#  | glaExtsEnabled exts
 545                      -> cont ITcubxparen (setCurrentPos# buf 2#)
 546                 '-'# -> case lookAhead# buf 2# of
 547                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 548                            _    -> lex_sym cont (incLexeme buf)
 549                 _    -> lex_sym cont (incLexeme buf)
 550
 551     '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
 552                 -> lex_cstring cont (setCurrentPos# buf 2#)
 553          | otherwise
 554                 -> cont ITbackquote (incLexeme buf)
 555
 556     '{'# ->   -- for Emacs: -}
 557             case lookAhead# buf 1# of
 558            '|'# | glaExtsEnabled exts
 559                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 560            '-'# -> case lookAhead# buf 2# of
 561                     '#'# -> lex_prag cont (setCurrentPos# buf 3#)
 562                     _    -> cont ITocurly (incLexeme buf)
 563            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 564
 565     -- strings/characters -------------------------------------------------
 566     '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf)
 567     '\''#      -> lex_char (char_end cont) exts (incLexeme buf)
 568
 569         -- Hexadecimal and octal constants
 570     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 571                 -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
 572          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 573                 -> readNum (after_lexnum cont exts) buf' is_octdigit  8 oct_or_dec
 574         where ch   = lookAhead# buf 1#
 575               ch2  = lookAhead# buf 2#
 576               buf' = setCurrentPos# buf 2#
 577
 578     '\NUL'# ->
 579             if bufferExhausted (stepOn buf) then
 580                cont ITeof buf
 581             else
 582                trace "lexIface: misplaced NUL?" $
 583                cont (ITunknown "\NUL") (stepOn buf)
 584
 585     '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 586             lex_ip ITdupipvarid cont (incLexeme buf)
 587     '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 588             lex_ip ITsplitipvarid cont (incLexeme buf)
 589     c | is_digit  c -> lex_num cont exts 0 buf
 590       | is_symbol c -> lex_sym cont buf
 591       | is_upper  c -> lex_con cont exts buf
 592       | is_ident  c -> lex_id  cont exts buf
 593       | otherwise   -> lexError "illegal character" buf
 594
 595 -- Int# is unlifted, and therefore faster than Bool for flags.
 596 {-# INLINE flag #-}
 597 flag :: Int# -> Bool
 598 flag 0# = False
 599 flag _  = True
 600
 601 -------------------------------------------------------------------------------
 602 -- Pragmas
 603
 604 lex_prag cont buf
 605   = case expandWhile# is_space buf of { buf1 ->
 606     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 607     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 608     case lookupUFM pragmaKeywordsFM lexeme of
 609         Just kw -> cont kw (mergeLexemes buf buf2)
 610         Nothing -> panic "lex_prag"
 611   }}
 612
 613 -------------------------------------------------------------------------------
 614 -- Strings & Chars
 615
 616 lex_string cont exts s buf
 617   = case currentChar# buf of
 618         '"'#{-"-} ->
 619            let buf' = incLexeme buf
 620                s' = mkFastStringNarrow (map chr (reverse s))
 621            in case currentChar# buf' of
 622                 '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s
 623                     then cont (ITprimstring s') (incLexeme buf')
 624                     else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 625                 _                   -> cont (ITstring s') buf'
 626
 627         -- ignore \& in a string, deal with string gaps
 628         '\\'# | next_ch `eqChar#` '&'#
 629                 -> lex_string cont exts s buf'
 630               | is_space next_ch
 631                 -> lex_stringgap cont exts s (incLexeme buf)
 632
 633             where next_ch = lookAhead# buf 1#
 634                   buf' = setCurrentPos# buf 2#
 635
 636         _ -> lex_char (lex_next_string cont s) exts buf
 637
 638 lex_stringgap cont exts s buf
 639   = let buf' = incLexeme buf in
 640     case currentChar# buf of
 641         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
 642                   st{loc = incSrcLine loc}
 643         '\\'# -> lex_string cont exts s buf'
 644         c | is_space c -> lex_stringgap cont exts s buf'
 645         other -> charError buf'
 646
 647 lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
 648
 649 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 650 lex_char cont exts buf
 651   = case currentChar# buf of
 652         '\\'# -> lex_escape (cont exts) (incLexeme buf)
 653         c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf)
 654         other -> charError buf
 655
 656 char_end cont exts c buf
 657   = case currentChar# buf of
 658         '\''# -> let buf' = incLexeme buf in
 659                  case currentChar# buf' of
 660                         '#'# | glaExtsEnabled exts
 661                                 -> cont (ITprimchar c) (incLexeme buf')
 662                         _       -> cont (ITchar c) buf'
 663         _     -> charError buf
 664
 665 lex_escape cont buf
 666   = let buf' = incLexeme buf in
 667     case currentChar# buf of
 668         'a'#       -> cont (ord '\a') buf'
 669         'b'#       -> cont (ord '\b') buf'
 670         'f'#       -> cont (ord '\f') buf'
 671         'n'#       -> cont (ord '\n') buf'
 672         'r'#       -> cont (ord '\r') buf'
 673         't'#       -> cont (ord '\t') buf'
 674         'v'#       -> cont (ord '\v') buf'
 675         '\\'#      -> cont (ord '\\') buf'
 676         '"'#       -> cont (ord '\"') buf'
 677         '\''#      -> cont (ord '\'') buf'
 678         '^'#       -> let c = currentChar# buf' in
 679                       if c `geChar#` '@'# && c `leChar#` '_'#
 680                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 681                         else charError buf'
 682
 683         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 684         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 685         x | is_digit x
 686                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 687
 688         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 689                                        Just buf2 <- [prefixMatch buf p] ] of
 690                             (c,buf2):_ -> cont (ord c) buf2
 691                             [] -> charError buf'
 692
 693 after_charnum cont i buf
 694   = if i >= 0 && i <= 0x10FFFF
 695         then cont (fromInteger i) buf
 696         else charError buf
 697
 698 readNum cont buf is_digit base conv = read buf 0
 699   where read buf i
 700           = case currentChar# buf of { c ->
 701             if is_digit c
 702                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 703                 else cont i buf
 704             }
 705
 706 is_hexdigit c
 707         =  is_digit c
 708         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 709         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 710
 711 hex c | is_digit c = ord# c -# ord# '0'#
 712       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 713 oct_or_dec c = ord# c -# ord# '0'#
 714
 715 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 716
 717 to_lower c
 718   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 719         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 720   | otherwise = c
 721
 722 charError buf = lexError "error in character literal" buf
 723
 724 silly_escape_chars = [
 725         ("NUL", '\NUL'),
 726         ("SOH", '\SOH'),
 727         ("STX", '\STX'),
 728         ("ETX", '\ETX'),
 729         ("EOT", '\EOT'),
 730         ("ENQ", '\ENQ'),
 731         ("ACK", '\ACK'),
 732         ("BEL", '\BEL'),
 733         ("BS", '\BS'),
 734         ("HT", '\HT'),
 735         ("LF", '\LF'),
 736         ("VT", '\VT'),
 737         ("FF", '\FF'),
 738         ("CR", '\CR'),
 739         ("SO", '\SO'),
 740         ("SI", '\SI'),
 741         ("DLE", '\DLE'),
 742         ("DC1", '\DC1'),
 743         ("DC2", '\DC2'),
 744         ("DC3", '\DC3'),
 745         ("DC4", '\DC4'),
 746         ("NAK", '\NAK'),
 747         ("SYN", '\SYN'),
 748         ("ETB", '\ETB'),
 749         ("CAN", '\CAN'),
 750         ("EM", '\EM'),
 751         ("SUB", '\SUB'),
 752         ("ESC", '\ESC'),
 753         ("FS", '\FS'),
 754         ("GS", '\GS'),
 755         ("RS", '\RS'),
 756         ("US", '\US'),
 757         ("SP", '\SP'),
 758         ("DEL", '\DEL')
 759         ]
 760
 761 -----------------------------------------------------------------------------
 762 -- Numbers
 763
 764 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 765 lex_num cont exts acc buf =
 766  case scanNumLit acc buf of
 767      (acc',buf') ->
 768        case currentChar# buf' of
 769          '.'# | is_digit (lookAhead# buf' 1#) ->
 770              -- this case is not optimised at all, as the
 771              -- presence of floating point numbers in interface
 772              -- files is not that common. (ToDo)
 773             case expandWhile# is_digit (incLexeme buf') of
 774               buf2 -> -- points to first non digit char
 775
 776                 let l = case currentChar# buf2 of
 777                           'E'# -> do_exponent
 778                           'e'# -> do_exponent
 779                           _ -> buf2
 780
 781                     do_exponent
 782                         = let buf3 = incLexeme buf2 in
 783                           case currentChar# buf3 of
 784                                 '-'# | is_digit (lookAhead# buf3 1#)
 785                                    -> expandWhile# is_digit (incLexeme buf3)
 786                                 '+'# | is_digit (lookAhead# buf3 1#)
 787                                    -> expandWhile# is_digit (incLexeme buf3)
 788                                 x | is_digit x -> expandWhile# is_digit buf3
 789                                 _ -> buf2
 790
 791                     v = readRational__ (lexemeToString l)
 792
 793                 in case currentChar# l of -- glasgow exts only
 794                       '#'# | glaExtsEnabled exts -> let l' = incLexeme l in
 795                               case currentChar# l' of
 796                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 797                                 _    -> cont (ITprimfloat  v) l'
 798                       _ -> cont (ITrational v) l
 799
 800          _ -> after_lexnum cont exts acc' buf'
 801
 802 after_lexnum cont exts i buf
 803   = case currentChar# buf of
 804         '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf)
 805         _                          -> cont (ITinteger i) buf
 806
 807 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
 808 readRational r = do
 809      (n,d,s) <- readFix r
 810      (k,t)   <- readExp s
 811      return ((n%1)*10^^(k-d), t)
 812  where
 813      readFix r = do
 814         (ds,s)  <- lexDecDigits r
 815         (ds',t) <- lexDotDigits s
 816         return (read (ds++ds'), length ds', t)
 817
 818      readExp (e:s) | e `elem` "eE" = readExp' s
 819      readExp s                     = return (0,s)
 820
 821      readExp' ('+':s) = readDec s
 822      readExp' ('-':s) = do
 823                         (k,t) <- readDec s
 824                         return (-k,t)
 825      readExp' s       = readDec s
 826
 827      readDec s = do
 828         (ds,r) <- nonnull isDigit s
 829         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 830                 r)
 831
 832      lexDecDigits = nonnull isDigit
 833
 834      lexDotDigits ('.':s) = return (span isDigit s)
 835      lexDotDigits s       = return ("",s)
 836
 837      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 838                       return (cs,t)
 839
 840 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
 841 readRational__ top_s
 842   = case top_s of
 843       '-' : xs -> - (read_me xs)
 844       xs       -> read_me xs
 845   where
 846     read_me s
 847       = case (do { (x,"") <- readRational s ; return x }) of
 848           [x] -> x
 849           []  -> error ("readRational__: no parse:"        ++ top_s)
 850           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
 851
 852 -----------------------------------------------------------------------------
 853 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 854
 855 -- we lexemeToFastString on the bit between the ``''s, but include the
 856 -- quotes in the full lexeme.
 857
 858 lex_cstring cont buf =
 859  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 860    Just buf' -> cont (ITlitlit (lexemeToFastString
 861                                 (setCurrentPos# buf' (negateInt# 2#))))
 862                    (mergeLexemes buf buf')
 863    Nothing   -> lexError "unterminated ``" buf
 864
 865 -----------------------------------------------------------------------------
 866 -- identifiers, symbols etc.
 867
 868 lex_ip ip_constr cont buf =
 869  case expandWhile# is_ident buf of
 870    buf' -> cont (ip_constr (tailFS lexeme)) buf'
 871         where lexeme = lexemeToFastString buf'
 872
 873 lex_id cont exts buf =
 874  let buf1 = expandWhile# is_ident buf in
 875  seq buf1 $
 876
 877  case (if glaExtsEnabled exts
 878         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 879         else buf1)                              of { buf' ->
 880  seq buf' $
 881
 882  let lexeme  = lexemeToFastString buf' in
 883
 884  case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 885         Just kwd_token -> --trace ("hkeywd: "++unpackFS(lexeme)) $
 886                           cont kwd_token buf';
 887         Nothing        ->
 888
 889  let var_token = cont (ITvarid lexeme) buf' in
 890
 891  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 892     Just (kwd_token, validExts)
 893       | validExts .&. (I# exts) /= 0 -> cont kwd_token buf';
 894     _                                -> var_token
 895
 896  }}}
 897
 898 lex_sym cont buf =
 899  -- trace "lex_sym" $
 900  case expandWhile# is_symbol buf of
 901    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 902                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 903                                   cont kwd_token buf' ;
 904                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 905                                   cont (mk_var_token lexeme) buf'
 906            }
 907         where lexeme = lexemeToFastString buf'
 908
 909
 910 -- lex_con recursively collects components of a qualified identifer.
 911 -- The argument buf is the StringBuffer representing the lexeme
 912 -- identified so far, where the next character is upper-case.
 913
 914 lex_con cont exts buf =
 915  -- trace ("con: "{-++unpackFS lexeme-}) $
 916  let empty_buf = stepOverLexeme buf in
 917  case expandWhile# is_ident empty_buf of { buf1 ->
 918  case slurp_trailing_hashes buf1 exts of { con_buf ->
 919
 920  let all_buf = mergeLexemes buf con_buf
 921
 922      con_lexeme = lexemeToFastString con_buf
 923      mod_lexeme = lexemeToFastString (decLexeme buf)
 924      all_lexeme = lexemeToFastString all_buf
 925
 926      just_a_conid
 927         | emptyLexeme buf = cont (ITconid con_lexeme)               all_buf
 928         | otherwise       = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
 929  in
 930
 931  case currentChar# all_buf of
 932      '.'# -> maybe_qualified cont exts all_lexeme
 933                 (incLexeme all_buf) just_a_conid
 934      _    -> just_a_conid
 935   }}
 936
 937
 938 maybe_qualified cont exts mod buf just_a_conid =
 939  -- trace ("qid: "{-++unpackFS lexeme-}) $
 940  case currentChar# buf of
 941   '['# ->       -- Special case for []
 942     case lookAhead# buf 1# of
 943      ']'# -> cont (ITqconid  (mod,FSLIT("[]"))) (setCurrentPos# buf 2#)
 944      _    -> just_a_conid
 945
 946   '('# ->  -- Special case for (,,,)
 947            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 948     case lookAhead# buf 1# of
 949      '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
 950                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 951                                 just_a_conid
 952                 _    -> just_a_conid
 953      ')'# -> cont (ITqconid (mod,FSLIT("()"))) (setCurrentPos# buf 2#)
 954      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 955      _    -> just_a_conid
 956
 957   '-'# -> case lookAhead# buf 1# of
 958             '>'# -> cont (ITqconid (mod,FSLIT("(->)"))) (setCurrentPos# buf 2#)
 959             _    -> lex_id3 cont exts mod buf just_a_conid
 960
 961   _    -> lex_id3 cont exts mod buf just_a_conid
 962
 963
 964 lex_id3 cont exts mod buf just_a_conid
 965   | is_upper (currentChar# buf) =
 966      lex_con cont exts buf
 967
 968   | is_symbol (currentChar# buf) =
 969      let
 970         start_new_lexeme = stepOverLexeme buf
 971      in
 972      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 973      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 974      let
 975        lexeme  = lexemeToFastString buf'
 976         -- real lexeme is M.<sym>
 977        new_buf = mergeLexemes buf buf'
 978      in
 979      cont (mk_qvar_token mod lexeme) new_buf
 980         -- wrong, but arguably morally right: M... is now a qvarsym
 981      }
 982
 983   | otherwise   =
 984      let
 985         start_new_lexeme = stepOverLexeme buf
 986      in
 987      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
 988      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 989      if emptyLexeme buf1
 990             then just_a_conid
 991             else
 992
 993      case slurp_trailing_hashes buf1 exts of { buf' ->
 994
 995      let
 996       lexeme      = lexemeToFastString buf'
 997       new_buf     = mergeLexemes buf buf'
 998       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
 999      in
1000      case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1001             Nothing          -> is_a_qvarid ;
1002
1003             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1004                            -> is_a_qvarid          --  recognised as keywords here.
1005                            | otherwise
1006                            -> just_a_conid         -- avoid M.where etc.
1007      }}}
1008
1009 slurp_trailing_hashes buf exts
1010   | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
1011   | otherwise           = buf
1012
1013
1014 mk_var_token pk_str
1015   | is_upper f          = ITconid pk_str
1016   | is_ident f          = ITvarid pk_str
1017   | f `eqChar#` ':'#    = ITconsym pk_str
1018   | otherwise           = ITvarsym pk_str
1019   where
1020       (C# f) = headFS pk_str
1021       -- tl     = _TAIL_ pk_str
1022
1023 mk_qvar_token m token =
1024 -- trace ("mk_qvar ") $
1025  case mk_var_token token of
1026    ITconid n  -> ITqconid  (m,n)
1027    ITvarid n  -> ITqvarid  (m,n)
1028    ITconsym n -> ITqconsym (m,n)
1029    ITvarsym n -> ITqvarsym (m,n)
1030    _          -> ITunknown (show token)
1031 \end{code}
1032
1033 ----------------------------------------------------------------------------
1034 Horrible stuff for dealing with M.(,,,)
1035
1036 \begin{code}
1037 lex_tuple cont mod buf back_off =
1038   go 2 buf
1039   where
1040    go n buf =
1041     case currentChar# buf of
1042       ','# -> go (n+1) (stepOn buf)
1043       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1044       _    -> back_off
1045
1046 lex_ubx_tuple cont mod buf back_off =
1047   go 2 buf
1048   where
1049    go n buf =
1050     case currentChar# buf of
1051       ','# -> go (n+1) (stepOn buf)
1052       '#'# -> case lookAhead# buf 1# of
1053                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1054                                  (stepOnBy# buf 2#)
1055                 _    -> back_off
1056       _    -> back_off
1057 \end{code}
1058
1059 -----------------------------------------------------------------------------
1060
1061 \begin{code}
1062 data LayoutContext
1063   = NoLayout
1064   | Layout Int#
1065
1066 data ParseResult a
1067   = POk PState a
1068   | PFailed Message
1069
1070 data PState = PState {
1071         loc        :: SrcLoc,
1072         extsBitmap :: Int#,     -- bitmap that determines permitted extensions
1073         bol        :: Int#,
1074         atbol      :: Int#,
1075         context    :: [LayoutContext]
1076      }
1077
1078 type P a = StringBuffer         -- Input string
1079         -> PState
1080         -> ParseResult a
1081
1082 returnP   :: a -> P a
1083 returnP a buf s = POk s a
1084
1085 thenP      :: P a -> (a -> P b) -> P b
1086 m `thenP` k = \ buf s ->
1087         case m buf s of
1088                 POk s1 a -> k a buf s1
1089                 PFailed err  -> PFailed err
1090
1091 thenP_     :: P a -> P b -> P b
1092 m `thenP_` k = m `thenP` \_ -> k
1093
1094 mapP :: (a -> P b) -> [a] -> P [b]
1095 mapP f [] = returnP []
1096 mapP f (a:as) =
1097      f a `thenP` \b ->
1098      mapP f as `thenP` \bs ->
1099      returnP (b:bs)
1100
1101 failP :: String -> P a
1102 failP msg buf s = PFailed (text msg)
1103
1104 failMsgP :: Message -> P a
1105 failMsgP msg buf s = PFailed msg
1106
1107 lexError :: String -> P a
1108 lexError str buf s@PState{ loc = loc }
1109   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1110
1111 getSrcLocP :: P SrcLoc
1112 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1113
1114 -- use a temporary SrcLoc for the duration of the argument
1115 setSrcLocP :: SrcLoc -> P a -> P a
1116 setSrcLocP new_loc p buf s =
1117   case p buf s{ loc=new_loc } of
1118       POk _ a   -> POk s a
1119       PFailed e -> PFailed e
1120
1121 getSrcFile :: P FastString
1122 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1123
1124 pushContext :: LayoutContext -> P ()
1125 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1126
1127 {-
1128
1129 This special case in layoutOn is to handle layout contexts with are
1130 indented the same or less than the current context.  This is illegal
1131 according to the Haskell spec, so we have to arrange to close the
1132 current context.  eg.
1133
1134 class Foo a where
1135 class Bar a
1136
1137 after the first 'where', the sequence of events is:
1138
1139         - layout system inserts a ';' (column 0)
1140         - parser begins a new context at column 0
1141         - parser shifts ';' (legal empty declaration)
1142         - parser sees 'class': parse error (we're still in the inner context)
1143
1144 trouble is, by the time we know we need a new context, the lexer has
1145 already generated the ';'.  Hacky solution is as follows: since we
1146 know the column of the next token (it's the column number of the new
1147 context), we set the ACTUAL column number of the new context to this
1148 numer plus one.  Hence the next time the lexer is called, a '}' will
1149 be generated to close the new context straight away.  Furthermore, we
1150 have to set the atbol flag so that the ';' that the parser shifted as
1151 part of the new context is re-generated.
1152
1153 when the new context is *less* indented than the current one:
1154
1155 f = f where g = g where
1156 h = h
1157
1158         - current context: column 12.
1159         - on seeing 'h' (column 0), the layout system inserts '}'
1160         - parser starts a new context, column 0
1161         - parser sees '}', uses it to close new context
1162         - we still need to insert another '}' followed by a ';',
1163           hence the atbol trick.
1164
1165 There's also a special hack in here to deal with
1166
1167         do
1168            ....
1169            e $ do
1170            blah
1171
1172 i.e. the inner context is at the same indentation level as the outer
1173 context.  This is strictly illegal according to Haskell 98, but
1174 there's a lot of existing code using this style and it doesn't make
1175 any sense to disallow it, since empty 'do' lists don't make sense.
1176 -}
1177
1178 layoutOn :: Bool -> P ()
1179 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1180     let offset = lexemeIndex buf -# bol in
1181     case ctx of
1182         Layout prev_off : _
1183            | if strict then prev_off >=# offset else prev_off ># offset ->
1184                 --trace ("layout on, column: " ++  show (I# offset)) $
1185                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1186         other ->
1187                 --trace ("layout on, column: " ++  show (I# offset)) $
1188                 POk s{ context = Layout offset : ctx } ()
1189
1190 layoutOff :: P ()
1191 layoutOff buf s@(PState{ context = ctx }) =
1192     POk s{ context = NoLayout:ctx } ()
1193
1194 popContext :: P ()
1195 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1196   case ctx of
1197         (_:tl) -> POk s{ context = tl } ()
1198         []     -> PFailed (srcParseErr buf loc)
1199
1200 -- for reasons of efficiency, flags indicating language extensions (eg,
1201 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1202 -- integer
1203
1204 glaExtsBit, ffiBit, parrBit :: Int
1205 glaExtsBit = 0
1206 ffiBit     = 1
1207 parrBit    = 2
1208 withBit    = 3
1209
1210 glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
1211 glaExtsEnabled flags = testBit (toInt32 flags) glaExtsBit
1212 ffiEnabled     flags = testBit (toInt32 flags) ffiBit
1213 withEnabled    flags = testBit (toInt32 flags) withBit
1214 parrEnabled    flags = testBit (toInt32 flags) parrBit
1215
1216 toInt32 :: Int# -> Int32
1217 toInt32 x# = fromIntegral (I# x#)
1218
1219 -- convenient record-based bitmap for the interface to the rest of the world
1220 --
1221 -- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below)
1222 --
1223 data ExtFlags = ExtFlags {
1224                   glasgowExtsEF :: Bool,
1225                   ffiEF         :: Bool,
1226                   withEF        :: Bool,
1227                   parrEF        :: Bool
1228                 }
1229
1230 -- create a parse state
1231 --
1232 mkPState          :: SrcLoc -> ExtFlags -> PState
1233 mkPState loc exts  =
1234   PState {
1235     loc        = loc,
1236       extsBitmap = case (fromIntegral bitmap) of {I# bits -> bits},
1237       bol         = 0#,
1238       atbol      = 1#,
1239       context    = []
1240     }
1241     where
1242       bitmap =     glaExtsBit `setBitIf` glasgowExtsEF     exts
1243                .|. ffiBit     `setBitIf` (ffiEF            exts
1244                                           || glasgowExtsEF exts)
1245                .|. withBit    `setBitIf` withEF            exts
1246                .|. parrBit    `setBitIf` parrEF            exts
1247       --
1248       setBitIf :: Int -> Bool -> Int32
1249       b `setBitIf` cond | cond      = bit b
1250                         | otherwise = 0
1251
1252 -----------------------------------------------------------------------------
1253
1254 srcParseErr :: StringBuffer -> SrcLoc -> Message
1255 srcParseErr s l
1256   = hcat [ppr l,
1257           if null token
1258              then ptext SLIT(": parse error (possibly incorrect indentation)")
1259              else hcat [ptext SLIT(": parse error on input "),
1260                         char '`', text token, char '\'']
1261     ]
1262   where
1263         token = lexemeToString s
1264
1265 \end{code}