ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 module Lex (
  20
  21         srcParseErr,
  22
  23         -- Monad for parser
  24         Token(..), lexer, ParseResult(..), PState(..),
  25         ExtFlags(..), mkPState,
  26         StringBuffer,
  27
  28         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  29         getSrcLocP, setSrcLocP, getSrcFile,
  30         layoutOn, layoutOff, pushContext, popContext
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( toUpper, isDigit, chr, ord )
  36 import Ratio            ( (%) )
  37
  38 import PrelNames        ( mkTupNameStr )
  39 import ForeignCall      ( Safety(..) )
  40 import UniqFM           ( listToUFM, lookupUFM )
  41 import BasicTypes       ( Boxity(..) )
  42 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  43                           replaceSrcLine, mkSrcLoc )
  44
  45 import ErrUtils         ( Message )
  46 import Outputable
  47
  48 import FastString
  49 import StringBuffer
  50 import Ctype
  51
  52 import GLAEXTS
  53 import DATA_BITS        ( Bits(..) )
  54 import DATA_INT         ( Int32 )
  55 \end{code}
  56
  57 %************************************************************************
  58 %*                                                                      *
  59 \subsection{Data types}
  60 %*                                                                      *
  61 %************************************************************************
  62
  63 The token data type, fairly un-interesting except from one
  64 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  65 strictness, unfolding etc).
  66
  67 The Idea/Observation here is that the renamer needs to scan through
  68 all of an interface file before it can continue. But only a fraction
  69 of the information contained in the file turns out to be useful, so
  70 delaying as much as possible of the scanning and parsing of an
  71 interface file Makes Sense (Heap profiles of the compiler
  72 show a reduction in heap usage by at least a factor of two,
  73 post-renamer).
  74
  75 Hence, the interface file lexer spots when value declarations are
  76 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  77 for the type and any other id info for that binding (unfolding, strictness
  78 etc). These constructors are applied to the result of lexing these sub-chunks.
  79
  80 The lexing of the type and id info is all done lazily, of course, so
  81 the scanning (and subsequent parsing) will be done *only* on the ids the
  82 renamer finds out that it is interested in. The rest will just be junked.
  83 Laziness, you know it makes sense :-)
  84
  85 \begin{code}
  86 data Token
  87   = ITas                        -- Haskell keywords
  88   | ITcase
  89   | ITclass
  90   | ITdata
  91   | ITdefault
  92   | ITderiving
  93   | ITdo
  94   | ITelse
  95   | IThiding
  96   | ITif
  97   | ITimport
  98   | ITin
  99   | ITinfix
 100   | ITinfixl
 101   | ITinfixr
 102   | ITinstance
 103   | ITlet
 104   | ITmodule
 105   | ITnewtype
 106   | ITof
 107   | ITqualified
 108   | ITthen
 109   | ITtype
 110   | ITwhere
 111   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 112
 113   | ITforall                    -- GHC extension keywords
 114   | ITforeign
 115   | ITexport
 116   | ITlabel
 117   | ITdynamic
 118   | ITsafe
 119   | ITthreadsafe
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124   | ITdotnet
 125   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 126   | ITmdo
 127
 128   | ITspecialise_prag           -- Pragmas
 129   | ITsource_prag
 130   | ITinline_prag
 131   | ITnoinline_prag
 132   | ITrules_prag
 133   | ITdeprecated_prag
 134   | ITline_prag
 135   | ITscc_prag
 136   | ITclose_prag
 137
 138   | ITdotdot                    -- reserved symbols
 139   | ITcolon
 140   | ITdcolon
 141   | ITequal
 142   | ITlam
 143   | ITvbar
 144   | ITlarrow
 145   | ITrarrow
 146   | ITat
 147   | ITtilde
 148   | ITdarrow
 149   | ITminus
 150   | ITbang
 151   | ITstar
 152   | ITdot
 153
 154   | ITbiglam                    -- GHC-extension symbols
 155
 156   | ITocurly                    -- special symbols
 157   | ITccurly
 158   | ITocurlybar                 -- {|, for type applications
 159   | ITccurlybar                 -- |}, for type applications
 160   | ITvccurly
 161   | ITobrack
 162   | ITopabrack                  -- [:, for parallel arrays with -fparr
 163   | ITcpabrack                  -- :], for parallel arrays with -fparr
 164   | ITcbrack
 165   | IToparen
 166   | ITcparen
 167   | IToubxparen
 168   | ITcubxparen
 169   | ITsemi
 170   | ITcomma
 171   | ITunderscore
 172   | ITbackquote
 173
 174   | ITvarid   FastString        -- identifiers
 175   | ITconid   FastString
 176   | ITvarsym  FastString
 177   | ITconsym  FastString
 178   | ITqvarid  (FastString,FastString)
 179   | ITqconid  (FastString,FastString)
 180   | ITqvarsym (FastString,FastString)
 181   | ITqconsym (FastString,FastString)
 182
 183   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 184   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 185
 186   | ITpragma StringBuffer
 187
 188   | ITchar       Int
 189   | ITstring     FastString
 190   | ITinteger    Integer
 191   | ITrational   Rational
 192
 193   | ITprimchar   Int
 194   | ITprimstring FastString
 195   | ITprimint    Integer
 196   | ITprimfloat  Rational
 197   | ITprimdouble Rational
 198   | ITlitlit     FastString
 199
 200   -- MetaHaskell extension tokens
 201   | ITopenExpQuote              -- [| or [e|
 202   | ITopenPatQuote              -- [p|
 203   | ITopenDecQuote              -- [d|
 204   | ITopenTypQuote              -- [t|
 205   | ITcloseQuote                -- |]
 206   | ITidEscape   FastString     -- $x
 207   | ITparenEscape               -- $(
 208   | ITreifyType
 209   | ITreifyDecl
 210   | ITreifyFixity
 211
 212   | ITunknown String            -- Used when the lexer can't make sense of it
 213   | ITeof                       -- end of file token
 214   deriving Show -- debugging
 215 \end{code}
 216
 217 -----------------------------------------------------------------------------
 218 Keyword Lists
 219
 220 \begin{code}
 221 pragmaKeywordsFM = listToUFM $
 222       map (\ (x,y) -> (mkFastString x,y))
 223        [( "SPECIALISE", ITspecialise_prag ),
 224         ( "SPECIALIZE", ITspecialise_prag ),
 225         ( "SOURCE",     ITsource_prag ),
 226         ( "INLINE",     ITinline_prag ),
 227         ( "NOINLINE",   ITnoinline_prag ),
 228         ( "NOTINLINE",  ITnoinline_prag ),
 229         ( "LINE",       ITline_prag ),
 230         ( "RULES",      ITrules_prag ),
 231         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 232         ( "SCC",        ITscc_prag ),
 233         ( "DEPRECATED", ITdeprecated_prag )
 234         ]
 235
 236 haskellKeywordsFM = listToUFM $
 237       map (\ (x,y) -> (mkFastString x,y))
 238        [( "_",          ITunderscore ),
 239         ( "as",         ITas ),
 240         ( "case",       ITcase ),
 241         ( "class",      ITclass ),
 242         ( "data",       ITdata ),
 243         ( "default",    ITdefault ),
 244         ( "deriving",   ITderiving ),
 245         ( "do",         ITdo ),
 246         ( "else",       ITelse ),
 247         ( "hiding",     IThiding ),
 248         ( "if",         ITif ),
 249         ( "import",     ITimport ),
 250         ( "in",         ITin ),
 251         ( "infix",      ITinfix ),
 252         ( "infixl",     ITinfixl ),
 253         ( "infixr",     ITinfixr ),
 254         ( "instance",   ITinstance ),
 255         ( "let",        ITlet ),
 256         ( "module",     ITmodule ),
 257         ( "newtype",    ITnewtype ),
 258         ( "of",         ITof ),
 259         ( "qualified",  ITqualified ),
 260         ( "then",       ITthen ),
 261         ( "type",       ITtype ),
 262         ( "where",      ITwhere ),
 263         ( "_scc_",      ITscc )         -- ToDo: remove
 264      ]
 265
 266 isSpecial :: Token -> Bool
 267 -- If we see M.x, where x is a keyword, but
 268 -- is special, we treat is as just plain M.x,
 269 -- not as a keyword.
 270 isSpecial ITas          = True
 271 isSpecial IThiding      = True
 272 isSpecial ITqualified   = True
 273 isSpecial ITforall      = True
 274 isSpecial ITexport      = True
 275 isSpecial ITlabel       = True
 276 isSpecial ITdynamic     = True
 277 isSpecial ITsafe        = True
 278 isSpecial ITthreadsafe  = True
 279 isSpecial ITunsafe      = True
 280 isSpecial ITwith        = True
 281 isSpecial ITccallconv   = True
 282 isSpecial ITstdcallconv = True
 283 isSpecial ITmdo         = True
 284 isSpecial _             = False
 285
 286 -- the bitmap provided as the third component indicates whether the
 287 -- corresponding extension keyword is valid under the extension options
 288 -- provided to the compiler; if the extension corresponding to *any* of the
 289 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 290 -- facilitates using a keyword in two different extensions that can be
 291 -- activated independently)
 292 --
 293 ghcExtensionKeywordsFM = listToUFM $
 294         map (\(x, y, z) -> (mkFastString x, (y, z)))
 295      [  ( "forall",     ITforall,        bit glaExtsBit),
 296         ( "foreign",    ITforeign,       bit ffiBit),
 297         ( "export",     ITexport,        bit ffiBit),
 298         ( "label",      ITlabel,         bit ffiBit),
 299         ( "dynamic",    ITdynamic,       bit ffiBit),
 300         ( "safe",       ITsafe,          bit ffiBit),
 301         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 302         ( "unsafe",     ITunsafe,        bit ffiBit),
 303         ( "with",       ITwith,          bit withBit),
 304         ( "mdo",        ITmdo,           bit glaExtsBit),
 305         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 306         ( "ccall",      ITccallconv,     bit ffiBit),
 307         ( "dotnet",     ITdotnet,        bit ffiBit),
 308         ( "reifyDecl",  ITreifyDecl,     bit glaExtsBit),
 309         ( "reifyType",  ITreifyType,     bit glaExtsBit),
 310         ( "reifyFixity",ITreifyFixity,   bit glaExtsBit),
 311         ("_ccall_",     ITccall (False, False, PlayRisky),
 312                                          bit glaExtsBit),
 313         ("_ccall_GC_",  ITccall (False, False, PlaySafe False),
 314                                          bit glaExtsBit),
 315         ("_casm_",      ITccall (False, True,  PlayRisky),
 316                                          bit glaExtsBit),
 317         ("_casm_GC_",   ITccall (False, True,  PlaySafe False),
 318                                          bit glaExtsBit)
 319      ]
 320
 321 haskellKeySymsFM = listToUFM $
 322         map (\ (x,y) -> (mkFastString x,y))
 323       [ ("..",          ITdotdot)
 324        ,(":",           ITcolon)        -- (:) is a reserved op,
 325                                         -- meaning only list cons
 326        ,("::",          ITdcolon)
 327        ,("=",           ITequal)
 328        ,("\\",          ITlam)
 329        ,("|",           ITvbar)
 330        ,("<-",          ITlarrow)
 331        ,("->",          ITrarrow)
 332        ,("@",           ITat)
 333        ,("~",           ITtilde)
 334        ,("=>",          ITdarrow)
 335        ,("-",           ITminus)
 336        ,("!",           ITbang)
 337        ,("*",           ITstar)
 338        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 339        ]
 340
 341 \end{code}
 342
 343 -----------------------------------------------------------------------------
 344 The lexical analyser
 345
 346 Lexer state:
 347
 348         - (exts)  lexing a source with extensions, eg, an interface file or
 349                   with -fglasgow-exts
 350         - (bol)   pointer to beginning of line (for column calculations)
 351         - (buf)   pointer to beginning of token
 352         - (buf)   pointer to current char
 353         - (atbol) flag indicating whether we're at the beginning of a line
 354
 355 \begin{code}
 356 lexer :: (Token -> P a) -> P a
 357 lexer cont buf s@(PState{
 358                     loc = loc,
 359                     extsBitmap = exts,
 360                     bol = bol,
 361                     atbol = atbol,
 362                     context = ctx
 363                 })
 364
 365         -- first, start a new lexeme and lose all the whitespace
 366   =  _scc_ "Lexer"
 367   tab line bol atbol (stepOverLexeme buf)
 368   where
 369         line = srcLocLine loc
 370
 371         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 372           case currentChar# buf of
 373
 374             '\NUL'# ->
 375                    if bufferExhausted (stepOn buf)
 376                         then cont ITeof buf s'
 377                         else trace "lexer: misplaced NUL?" $
 378                              tab y bol atbol (stepOn buf)
 379
 380             '\n'# -> let buf' = stepOn buf
 381                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 382
 383                 -- find comments.  This got harder in Haskell 98.
 384             '-'# ->  let trundle n =
 385                           let next = lookAhead# buf n in
 386                           if next `eqChar#` '-'# then trundle (n +# 1#)
 387                           else if is_symbol next || n <# 2#
 388                                 then is_a_token
 389                                 else tab y bol atbol
 390                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 391                     in trundle 1#
 392
 393                 -- comments and pragmas.  We deal with LINE pragmas here,
 394                 -- and throw out any unrecognised pragmas as comments.  Any
 395                 -- pragmas we know about are dealt with later (after any layout
 396                 -- processing if necessary).
 397             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 398                 if lookAhead# buf 2# `eqChar#` '#'# then
 399                   case expandWhile# is_space (addToCurrentPos buf 3#) of { buf1->
 400                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 401                   let lexeme = mkFastString -- ToDo: too slow
 402                                   (map toUpper (lexemeToString buf2)) in
 403                   case lookupUFM pragmaKeywordsFM lexeme of
 404                         -- ignore RULES pragmas when -fglasgow-exts is off
 405                         Just ITrules_prag | not (glaExtsEnabled exts) ->
 406                            skip_to_end (stepOnBy# buf 2#) s'
 407                         Just ITline_prag ->
 408                            line_prag skip_to_end buf2 s'
 409                         Just other -> is_a_token
 410                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 411                   }}
 412
 413                 else skip_to_end (stepOnBy# buf 2#) s'
 414                 where
 415                     skip_to_end = skipNestedComment (lexer cont)
 416
 417                 -- special GHC extension: we grok cpp-style #line pragmas
 418             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 419                 let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
 420                            lookAhead# buf 2# `eqChar#` 'i'# &&
 421                            lookAhead# buf 3# `eqChar#` 'n'# &&
 422                            lookAhead# buf 4# `eqChar#` 'e'#  = stepOnBy# buf 5#
 423                          | otherwise = stepOn buf
 424                 in
 425                 case expandWhile# is_space buf1 of { buf2 ->
 426                 if is_digit (currentChar# buf2)
 427                         then line_prag next_line buf2 s'
 428                         else is_a_token
 429                 }
 430                 where
 431                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 432
 433                 -- tabs have been expanded beforehand
 434             c | is_space c -> tab y bol atbol (stepOn buf)
 435               | otherwise  -> is_a_token
 436
 437            where s' = s{loc = replaceSrcLine loc y,
 438                         bol = bol,
 439                        atbol = atbol}
 440
 441                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 442                             | otherwise    = lexToken cont exts buf s'
 443
 444 -- {-# LINE .. #-} pragmas.  yeuch.
 445 line_prag cont buf s@PState{loc=loc} =
 446   case expandWhile# is_space buf                of { buf1 ->
 447   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 448   -- subtract one: the line number refers to the *following* line.
 449   let real_line = line - 1 in
 450   case fromInteger real_line                    of { i@(I# l) ->
 451         -- ToDo, if no filename then we skip the newline.... d'oh
 452   case expandWhile# is_space buf2               of { buf3 ->
 453   case currentChar# buf3                        of
 454      '\"'#{-"-} ->
 455         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 456         let
 457             file = lexemeToFastString buf4
 458             new_buf = stepOn (stepOverLexeme buf4)
 459         in
 460         if nullFastString file
 461                 then cont new_buf s{loc = replaceSrcLine loc l}
 462                 else cont new_buf s{loc = mkSrcLoc file i}
 463         }
 464      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 465   }}}}
 466
 467 skipNestedComment :: P a -> P a
 468 skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
 469
 470 skipNestedComment' :: SrcLoc -> P a -> P a
 471 skipNestedComment' orig_loc cont buf = loop buf
 472  where
 473    loop buf =
 474      case currentChar# buf of
 475         '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
 476
 477         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 478               skipNestedComment
 479                 (skipNestedComment' orig_loc cont)
 480                 (stepOnBy# buf 2#)
 481
 482         '\n'# -> \ s@PState{loc=loc} ->
 483                  let buf' = stepOn buf in
 484                  loop buf' s{loc = incSrcLine loc,
 485                              bol = currentIndex# buf',
 486                              atbol = 1#}
 487
 488         -- pass the original SrcLoc to lexError so that the error is
 489         -- reported at the line it was originally on, not the line at
 490         -- the end of the file.
 491         '\NUL'# | bufferExhausted (stepOn buf) ->
 492                 \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
 493
 494         _   -> loop (stepOn buf)
 495
 496 -- When we are lexing the first token of a line, check whether we need to
 497 -- insert virtual semicolons or close braces due to layout.
 498
 499 lexBOL :: (Token -> P a) -> P a
 500 lexBOL cont buf s@(PState{
 501                     loc = loc,
 502                     extsBitmap = exts,
 503                     bol = bol,
 504                     atbol = atbol,
 505                     context = ctx
 506                   }) =
 507         if need_close_curly then
 508                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 509                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 510         else if need_semi_colon then
 511                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 512                 cont ITsemi buf s{atbol = 0#}
 513         else
 514                 lexToken cont exts buf s{atbol = 0#}
 515   where
 516         col = currentIndex# buf -# bol
 517
 518         need_close_curly =
 519                 case ctx of
 520                         [] -> False
 521                         (i:_) -> case i of
 522                                     NoLayout -> False
 523                                     Layout n -> col <# n
 524         need_semi_colon =
 525                 case ctx of
 526                         [] -> False
 527                         (i:_) -> case i of
 528                                     NoLayout -> False
 529                                     Layout n -> col ==# n
 530
 531
 532 lexToken :: (Token -> P a) -> Int# -> P a
 533 lexToken cont exts buf =
 534 -- trace "lexToken" $
 535   case currentChar# buf of
 536
 537     -- special symbols ----------------------------------------------------
 538     '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
 539                 -> cont IToubxparen (addToCurrentPos buf 2#)
 540          | otherwise
 541                 -> cont IToparen (incCurrentPos buf)
 542
 543     ')'# -> cont ITcparen    (incCurrentPos buf)
 544     '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
 545             cont ITopabrack  (addToCurrentPos buf 2#)
 546          ------- MetaHaskell Extensions, looking for [| [e|  [t|  [p| and [d|
 547          | glaExtsEnabled exts &&
 548            ((lookAhead# buf 1# ) `eqChar#` '|'# ) ->
 549                 cont ITopenExpQuote (addToCurrentPos buf 2# )
 550          | glaExtsEnabled exts &&
 551            (let c = (lookAhead# buf 1# )
 552             in eqChar# c 'e'# || eqChar# c 't'# || eqChar# c 'd'#  || eqChar# c 'p'#) &&
 553            ((lookAhead# buf 2#) `eqChar#` '|'#) ->
 554                 let quote 'e'# = ITopenExpQuote
 555                     quote 'p'# = ITopenPatQuote
 556                     quote 'd'# = ITopenDecQuote
 557                     quote 't'# = ITopenTypQuote
 558                 in cont (quote (lookAhead# buf 1#)) (addToCurrentPos buf 3# )
 559          | otherwise ->
 560             cont ITobrack    (incCurrentPos buf)
 561
 562     ']'# -> cont ITcbrack    (incCurrentPos buf)
 563     ','# -> cont ITcomma     (incCurrentPos buf)
 564     ';'# -> cont ITsemi      (incCurrentPos buf)
 565     '}'# -> \ s@PState{context = ctx} ->
 566             case ctx of
 567                 (_:ctx') -> cont ITccurly (incCurrentPos buf) s{context=ctx'}
 568                 _        -> lexError "too many '}'s" buf s
 569     '|'# -> case lookAhead# buf 1# of
 570                  '}'#  | glaExtsEnabled exts -> cont ITccurlybar
 571                                                      (addToCurrentPos buf 2#)
 572                  -- MetaHaskell extension
 573                  ']'#  |  glaExtsEnabled exts -> cont ITcloseQuote (addToCurrentPos buf 2#)
 574                  other -> lex_sym cont (incCurrentPos buf)
 575     ':'# -> case lookAhead# buf 1# of
 576                  ']'#  | parrEnabled exts    -> cont ITcpabrack
 577                                                      (addToCurrentPos buf 2#)
 578                  _                           -> lex_sym cont (incCurrentPos buf)
 579
 580
 581     '#'# -> case lookAhead# buf 1# of
 582                 ')'#  | glaExtsEnabled exts
 583                      -> cont ITcubxparen (addToCurrentPos buf 2#)
 584                 '-'# -> case lookAhead# buf 2# of
 585                            '}'# -> cont ITclose_prag (addToCurrentPos buf 3#)
 586                            _    -> lex_sym cont (incCurrentPos buf)
 587                 _    -> lex_sym cont (incCurrentPos buf)
 588
 589     '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
 590                 -> lex_cstring cont (addToCurrentPos buf 2#)
 591          | otherwise
 592                 -> cont ITbackquote (incCurrentPos buf)
 593
 594     '{'# ->   -- for Emacs: -}
 595             case lookAhead# buf 1# of
 596            '|'# | glaExtsEnabled exts
 597                 -> cont ITocurlybar (addToCurrentPos buf 2#)
 598            '-'# -> case lookAhead# buf 2# of
 599                     '#'# -> lex_prag cont (addToCurrentPos buf 3#)
 600                     _    -> cont ITocurly (incCurrentPos buf)
 601            _ -> (layoutOff `thenP_` cont ITocurly)  (incCurrentPos buf)
 602
 603
 604
 605
 606     -- strings/characters -------------------------------------------------
 607     '\"'#{-"-} -> lex_string cont exts [] (incCurrentPos buf)
 608     '\''#      -> lex_char (char_end cont) exts (incCurrentPos buf)
 609
 610         -- Hexadecimal and octal constants
 611     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 612                 -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
 613          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 614                 -> readNum (after_lexnum cont exts) buf' is_octdigit  8 oct_or_dec
 615         where ch   = lookAhead# buf 1#
 616               ch2  = lookAhead# buf 2#
 617               buf' = addToCurrentPos buf 2#
 618
 619     '\NUL'# ->
 620             if bufferExhausted (stepOn buf) then
 621                cont ITeof buf
 622             else
 623                trace "lexIface: misplaced NUL?" $
 624                cont (ITunknown "\NUL") (stepOn buf)
 625
 626     '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->  -- ?x implicit parameter
 627             specialPrefixId ITdupipvarid cont exts (incCurrentPos buf)
 628     '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 629             specialPrefixId ITsplitipvarid cont exts (incCurrentPos buf)
 630
 631     ---------------- MetaHaskell Extensions for quotation escape
 632     '$'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->  -- $x  variable escape
 633             specialPrefixId ITidEscape cont exts (addToCurrentPos buf 1#)
 634     '$'# | glaExtsEnabled exts &&  -- $( f x )  expression escape
 635            ((lookAhead# buf 1#) `eqChar#` '('#) -> cont ITparenEscape (addToCurrentPos buf 2#)
 636
 637     c | is_digit  c -> lex_num cont exts 0 buf
 638       | is_symbol c -> lex_sym cont buf
 639       | is_upper  c -> lex_con cont exts buf
 640       | is_lower  c -> lex_id  cont exts buf
 641       | otherwise   -> lexError "illegal character" buf
 642
 643 -- Int# is unlifted, and therefore faster than Bool for flags.
 644 {-# INLINE flag #-}
 645 flag :: Int# -> Bool
 646 flag 0# = False
 647 flag _  = True
 648
 649 -------------------------------------------------------------------------------
 650 -- Pragmas
 651
 652 lex_prag cont buf
 653   = case expandWhile# is_space buf of { buf1 ->
 654     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 655     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 656     case lookupUFM pragmaKeywordsFM lexeme of
 657         Just kw -> cont kw (mergeLexemes buf buf2)
 658         Nothing -> panic "lex_prag"
 659   }}
 660
 661 -------------------------------------------------------------------------------
 662 -- Strings & Chars
 663
 664 lex_string cont exts s buf
 665   = case currentChar# buf of
 666         '"'#{-"-} ->
 667            let buf' = incCurrentPos buf
 668            in case currentChar# buf' of
 669                 '#'# | glaExtsEnabled exts ->
 670                    if any (> 0xFF) s
 671                     then lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 672                     else let s' = mkFastStringNarrow (map chr (reverse s)) in
 673                          -- always a narrow string/byte array
 674                          cont (ITprimstring s') (incCurrentPos buf')
 675
 676                 _other -> let s' = mkFastString (map chr (reverse s))
 677                           in cont (ITstring s') buf'
 678
 679         -- ignore \& in a string, deal with string gaps
 680         '\\'# | next_ch `eqChar#` '&'#
 681                 -> lex_string cont exts s buf'
 682               | is_space next_ch
 683                 -> lex_stringgap cont exts s (incCurrentPos buf)
 684
 685             where next_ch = lookAhead# buf 1#
 686                   buf' = addToCurrentPos buf 2#
 687
 688         _ -> lex_char (lex_next_string cont s) exts buf
 689
 690 lex_stringgap cont exts s buf
 691   = let buf' = incCurrentPos buf in
 692     case currentChar# buf of
 693         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
 694                   st{loc = incSrcLine loc}
 695         '\\'# -> lex_string cont exts s buf'
 696         c | is_space c -> lex_stringgap cont exts s buf'
 697         other -> charError buf'
 698
 699 lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
 700
 701 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 702 lex_char cont exts buf
 703   = case currentChar# buf of
 704         '\\'# -> lex_escape (cont exts) (incCurrentPos buf)
 705         c | is_any c -> cont exts (I# (ord# c)) (incCurrentPos buf)
 706         other -> charError buf
 707
 708 char_end cont exts c buf
 709   = case currentChar# buf of
 710         '\''# -> let buf' = incCurrentPos buf in
 711                  case currentChar# buf' of
 712                         '#'# | glaExtsEnabled exts
 713                                 -> cont (ITprimchar c) (incCurrentPos buf')
 714                         _       -> cont (ITchar c) buf'
 715         _     -> charError buf
 716
 717 lex_escape cont buf
 718   = let buf' = incCurrentPos buf in
 719     case currentChar# buf of
 720         'a'#       -> cont (ord '\a') buf'
 721         'b'#       -> cont (ord '\b') buf'
 722         'f'#       -> cont (ord '\f') buf'
 723         'n'#       -> cont (ord '\n') buf'
 724         'r'#       -> cont (ord '\r') buf'
 725         't'#       -> cont (ord '\t') buf'
 726         'v'#       -> cont (ord '\v') buf'
 727         '\\'#      -> cont (ord '\\') buf'
 728         '"'#       -> cont (ord '\"') buf'
 729         '\''#      -> cont (ord '\'') buf'
 730         '^'#       -> let c = currentChar# buf' in
 731                       if c `geChar#` '@'# && c `leChar#` '_'#
 732                         then cont (I# (ord# c -# ord# '@'#)) (incCurrentPos buf')
 733                         else charError buf'
 734
 735         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 736         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 737         x | is_digit x
 738                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 739
 740         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 741                                        Just buf2 <- [prefixMatch buf p] ] of
 742                             (c,buf2):_ -> cont (ord c) buf2
 743                             [] -> charError buf'
 744
 745 after_charnum cont i buf
 746   = if i >= 0 && i <= 0x10FFFF
 747         then cont (fromInteger i) buf
 748         else charError buf
 749
 750 readNum cont buf is_digit base conv = read buf 0
 751   where read buf i
 752           = case currentChar# buf of { c ->
 753             if is_digit c
 754                 then read (incCurrentPos buf) (i*base + (toInteger (I# (conv c))))
 755                 else cont i buf
 756             }
 757
 758 is_hexdigit c
 759         =  is_digit c
 760         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 761         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 762
 763 hex c | is_digit c = ord# c -# ord# '0'#
 764       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 765 oct_or_dec c = ord# c -# ord# '0'#
 766
 767 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 768
 769 to_lower c
 770   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 771         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 772   | otherwise = c
 773
 774 charError buf = lexError "error in character literal" buf
 775
 776 silly_escape_chars = [
 777         ("NUL", '\NUL'),
 778         ("SOH", '\SOH'),
 779         ("STX", '\STX'),
 780         ("ETX", '\ETX'),
 781         ("EOT", '\EOT'),
 782         ("ENQ", '\ENQ'),
 783         ("ACK", '\ACK'),
 784         ("BEL", '\BEL'),
 785         ("BS", '\BS'),
 786         ("HT", '\HT'),
 787         ("LF", '\LF'),
 788         ("VT", '\VT'),
 789         ("FF", '\FF'),
 790         ("CR", '\CR'),
 791         ("SO", '\SO'),
 792         ("SI", '\SI'),
 793         ("DLE", '\DLE'),
 794         ("DC1", '\DC1'),
 795         ("DC2", '\DC2'),
 796         ("DC3", '\DC3'),
 797         ("DC4", '\DC4'),
 798         ("NAK", '\NAK'),
 799         ("SYN", '\SYN'),
 800         ("ETB", '\ETB'),
 801         ("CAN", '\CAN'),
 802         ("EM", '\EM'),
 803         ("SUB", '\SUB'),
 804         ("ESC", '\ESC'),
 805         ("FS", '\FS'),
 806         ("GS", '\GS'),
 807         ("RS", '\RS'),
 808         ("US", '\US'),
 809         ("SP", '\SP'),
 810         ("DEL", '\DEL')
 811         ]
 812
 813 -----------------------------------------------------------------------------
 814 -- Numbers
 815
 816 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 817 lex_num cont exts acc buf =
 818  case scanNumLit acc buf of
 819      (acc',buf') ->
 820        case currentChar# buf' of
 821          '.'# | is_digit (lookAhead# buf' 1#) ->
 822              -- this case is not optimised at all, as the
 823              -- presence of floating point numbers in interface
 824              -- files is not that common. (ToDo)
 825             case expandWhile# is_digit (incCurrentPos buf') of
 826               buf2 -> -- points to first non digit char
 827
 828                 let l = case currentChar# buf2 of
 829                           'E'# -> do_exponent
 830                           'e'# -> do_exponent
 831                           _ -> buf2
 832
 833                     do_exponent
 834                         = let buf3 = incCurrentPos buf2 in
 835                           case currentChar# buf3 of
 836                                 '-'# | is_digit (lookAhead# buf3 1#)
 837                                    -> expandWhile# is_digit (incCurrentPos buf3)
 838                                 '+'# | is_digit (lookAhead# buf3 1#)
 839                                    -> expandWhile# is_digit (incCurrentPos buf3)
 840                                 x | is_digit x -> expandWhile# is_digit buf3
 841                                 _ -> buf2
 842
 843                     v = readRational__ (lexemeToString l)
 844
 845                 in case currentChar# l of -- glasgow exts only
 846                       '#'# | glaExtsEnabled exts -> let l' = incCurrentPos l in
 847                               case currentChar# l' of
 848                                 '#'# -> cont (ITprimdouble v) (incCurrentPos l')
 849                                 _    -> cont (ITprimfloat  v) l'
 850                       _ -> cont (ITrational v) l
 851
 852          _ -> after_lexnum cont exts acc' buf'
 853
 854 after_lexnum cont exts i buf
 855   = case currentChar# buf of
 856         '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incCurrentPos buf)
 857         _                          -> cont (ITinteger i) buf
 858
 859 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
 860 readRational r = do
 861      (n,d,s) <- readFix r
 862      (k,t)   <- readExp s
 863      return ((n%1)*10^^(k-d), t)
 864  where
 865      readFix r = do
 866         (ds,s)  <- lexDecDigits r
 867         (ds',t) <- lexDotDigits s
 868         return (read (ds++ds'), length ds', t)
 869
 870      readExp (e:s) | e `elem` "eE" = readExp' s
 871      readExp s                     = return (0,s)
 872
 873      readExp' ('+':s) = readDec s
 874      readExp' ('-':s) = do
 875                         (k,t) <- readDec s
 876                         return (-k,t)
 877      readExp' s       = readDec s
 878
 879      readDec s = do
 880         (ds,r) <- nonnull isDigit s
 881         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 882                 r)
 883
 884      lexDecDigits = nonnull isDigit
 885
 886      lexDotDigits ('.':s) = return (span isDigit s)
 887      lexDotDigits s       = return ("",s)
 888
 889      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 890                       return (cs,t)
 891
 892 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
 893 readRational__ top_s
 894   = case top_s of
 895       '-' : xs -> - (read_me xs)
 896       xs       -> read_me xs
 897   where
 898     read_me s
 899       = case (do { (x,"") <- readRational s ; return x }) of
 900           [x] -> x
 901           []  -> error ("readRational__: no parse:"        ++ top_s)
 902           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
 903
 904 -----------------------------------------------------------------------------
 905 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 906
 907 -- we lexemeToFastString on the bit between the ``''s, but include the
 908 -- quotes in the full lexeme.
 909
 910 lex_cstring cont buf =
 911  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 912    Just buf' -> cont (ITlitlit (lexemeToFastString
 913                                 (addToCurrentPos buf' (negateInt# 2#))))
 914                    (mergeLexemes buf buf')
 915    Nothing   -> lexError "unterminated ``" buf
 916
 917 -----------------------------------------------------------------------------
 918 -- identifiers, symbols etc.
 919
 920 -- used for identifiers with special prefixes like
 921 -- ?x (implicit parameters), $x (MetaHaskell escapes) and #x
 922 -- we've already seen the prefix char, so look for an id, and wrap
 923 -- the new "ip_constr" around the lexeme returned
 924
 925 specialPrefixId ip_constr cont exts buf = lex_id newcont exts buf
 926  where newcont (ITvarid lexeme) buf2 = cont (ip_constr (tailFS lexeme)) buf2
 927        newcont token buf2 = cont token buf2
 928 {-
 929  case expandWhile# is_ident buf of
 930    buf' -> cont (ip_constr (tailFS lexeme)) buf'
 931         where lexeme = lexemeToFastString buf'
 932 -}
 933
 934 lex_id cont exts buf =
 935  let buf1 = expandWhile# is_ident buf in
 936  seq buf1 $
 937
 938  case (if glaExtsEnabled exts
 939         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 940         else buf1)                              of { buf' ->
 941  seq buf' $
 942
 943  let lexeme  = lexemeToFastString buf' in
 944
 945  case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 946         Just kwd_token -> --trace ("hkeywd: "++unpackFS(lexeme)) $
 947                           cont kwd_token buf';
 948         Nothing        ->
 949
 950  let var_token = cont (ITvarid lexeme) buf' in
 951
 952  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 953     Just (kwd_token, validExts)
 954       | validExts .&. (toInt32 exts) /= 0 -> cont kwd_token buf';
 955     _                                     -> var_token
 956
 957  }}}
 958
 959 lex_sym cont buf =
 960  -- trace "lex_sym" $
 961  case expandWhile# is_symbol buf of
 962    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 963                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 964                                   cont kwd_token buf' ;
 965                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 966                           cont (mk_var_token lexeme) buf'
 967            }
 968         where lexeme = lexemeToFastString buf'
 969
 970
 971 -- lex_con recursively collects components of a qualified identifer.
 972 -- The argument buf is the StringBuffer representing the lexeme
 973 -- identified so far, where the next character is upper-case.
 974
 975 lex_con cont exts buf =
 976  -- trace ("con: "{-++unpackFS lexeme-}) $
 977  let empty_buf = stepOverLexeme buf in
 978  case expandWhile# is_ident empty_buf of { buf1 ->
 979  case slurp_trailing_hashes buf1 exts of { con_buf ->
 980
 981  let all_buf = mergeLexemes buf con_buf
 982
 983      con_lexeme = lexemeToFastString con_buf
 984      mod_lexeme = lexemeToFastString (decCurrentPos buf)
 985      all_lexeme = lexemeToFastString all_buf
 986
 987      just_a_conid
 988         | emptyLexeme buf = cont (ITconid con_lexeme)               all_buf
 989         | otherwise       = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
 990  in
 991
 992  case currentChar# all_buf of
 993      '.'# -> maybe_qualified cont exts all_lexeme
 994                 (incCurrentPos all_buf) just_a_conid
 995      _    -> just_a_conid
 996   }}
 997
 998
 999 maybe_qualified cont exts mod buf just_a_conid =
1000  -- trace ("qid: "{-++unpackFS lexeme-}) $
1001  case currentChar# buf of
1002   '['# ->       -- Special case for []
1003     case lookAhead# buf 1# of
1004      ']'# -> cont (ITqconid  (mod,FSLIT("[]"))) (addToCurrentPos buf 2#)
1005      _    -> just_a_conid
1006
1007   '('# ->  -- Special case for (,,,)
1008            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
1009     case lookAhead# buf 1# of
1010      '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
1011                 ','# -> lex_ubx_tuple cont mod (addToCurrentPos buf 3#)
1012                                 just_a_conid
1013                 _    -> just_a_conid
1014      ')'# -> cont (ITqconid (mod,FSLIT("()"))) (addToCurrentPos buf 2#)
1015      ','# -> lex_tuple cont mod (addToCurrentPos buf 2#) just_a_conid
1016      _    -> just_a_conid
1017
1018   '-'# -> case lookAhead# buf 1# of
1019             '>'# -> cont (ITqconid (mod,FSLIT("(->)"))) (addToCurrentPos buf 2#)
1020             _    -> lex_id3 cont exts mod buf just_a_conid
1021
1022   _    -> lex_id3 cont exts mod buf just_a_conid
1023
1024
1025 lex_id3 cont exts mod buf just_a_conid
1026   | is_upper (currentChar# buf) =
1027      lex_con cont exts buf
1028
1029   | is_symbol (currentChar# buf) =
1030      let
1031         start_new_lexeme = stepOverLexeme buf
1032      in
1033      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
1034      case expandWhile# is_symbol start_new_lexeme of { buf' ->
1035      let
1036        lexeme  = lexemeToFastString buf'
1037         -- real lexeme is M.<sym>
1038        new_buf = mergeLexemes buf buf'
1039      in
1040      cont (mk_qvar_token mod lexeme) new_buf
1041         -- wrong, but arguably morally right: M... is now a qvarsym
1042      }
1043
1044   | otherwise   =
1045      let
1046         start_new_lexeme = stepOverLexeme buf
1047      in
1048      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
1049      case expandWhile# is_ident start_new_lexeme of { buf1 ->
1050      if emptyLexeme buf1
1051             then just_a_conid
1052             else
1053
1054      case slurp_trailing_hashes buf1 exts of { buf' ->
1055
1056      let
1057       lexeme      = lexemeToFastString buf'
1058       new_buf     = mergeLexemes buf buf'
1059       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
1060      in
1061      case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
1062             Nothing          -> is_a_qvarid ;
1063
1064             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
1065                            -> is_a_qvarid          --  recognised as keywords here.
1066                            | otherwise
1067                            -> just_a_conid         -- avoid M.where etc.
1068      }}}
1069
1070 slurp_trailing_hashes buf exts
1071   | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
1072   | otherwise           = buf
1073
1074
1075 mk_var_token pk_str
1076   | is_upper f          = ITconid pk_str
1077   | is_ident f          = ITvarid pk_str
1078   | f `eqChar#` ':'#    = ITconsym pk_str
1079   | otherwise           = ITvarsym pk_str
1080   where
1081       (C# f) = headFS pk_str
1082       -- tl     = _TAIL_ pk_str
1083
1084 mk_qvar_token m token =
1085 -- trace ("mk_qvar ") $
1086  case mk_var_token token of
1087    ITconid n  -> ITqconid  (m,n)
1088    ITvarid n  -> ITqvarid  (m,n)
1089    ITconsym n -> ITqconsym (m,n)
1090    ITvarsym n -> ITqvarsym (m,n)
1091    _          -> ITunknown (show token)
1092 \end{code}
1093
1094 ----------------------------------------------------------------------------
1095 Horrible stuff for dealing with M.(,,,)
1096
1097 \begin{code}
1098 lex_tuple cont mod buf back_off =
1099   go 2 buf
1100   where
1101    go n buf =
1102     case currentChar# buf of
1103       ','# -> go (n+1) (stepOn buf)
1104       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1105       _    -> back_off
1106
1107 lex_ubx_tuple cont mod buf back_off =
1108   go 2 buf
1109   where
1110    go n buf =
1111     case currentChar# buf of
1112       ','# -> go (n+1) (stepOn buf)
1113       '#'# -> case lookAhead# buf 1# of
1114                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1115                                  (stepOnBy# buf 2#)
1116                 _    -> back_off
1117       _    -> back_off
1118 \end{code}
1119
1120 -----------------------------------------------------------------------------
1121
1122 \begin{code}
1123 data LayoutContext
1124   = NoLayout
1125   | Layout Int#
1126
1127 data ParseResult a
1128   = POk PState a
1129   | PFailed Message
1130
1131 data PState = PState {
1132         loc        :: SrcLoc,
1133         extsBitmap :: Int#,     -- bitmap that determines permitted extensions
1134         bol        :: Int#,
1135         atbol      :: Int#,
1136         context    :: [LayoutContext]
1137      }
1138
1139 type P a = StringBuffer         -- Input string
1140         -> PState
1141         -> ParseResult a
1142
1143 returnP   :: a -> P a
1144 returnP a buf s = POk s a
1145
1146 thenP      :: P a -> (a -> P b) -> P b
1147 m `thenP` k = \ buf s ->
1148         case m buf s of
1149                 POk s1 a -> k a buf s1
1150                 PFailed err  -> PFailed err
1151
1152 thenP_     :: P a -> P b -> P b
1153 m `thenP_` k = m `thenP` \_ -> k
1154
1155 mapP :: (a -> P b) -> [a] -> P [b]
1156 mapP f [] = returnP []
1157 mapP f (a:as) =
1158      f a `thenP` \b ->
1159      mapP f as `thenP` \bs ->
1160      returnP (b:bs)
1161
1162 failP :: String -> P a
1163 failP msg buf s = PFailed (text msg)
1164
1165 failMsgP :: Message -> P a
1166 failMsgP msg buf s = PFailed msg
1167
1168 lexError :: String -> P a
1169 lexError str buf s@PState{ loc = loc }
1170   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1171
1172 getSrcLocP :: P SrcLoc
1173 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1174
1175 -- use a temporary SrcLoc for the duration of the argument
1176 setSrcLocP :: SrcLoc -> P a -> P a
1177 setSrcLocP new_loc p buf s =
1178   case p buf s{ loc=new_loc } of
1179       POk _ a   -> POk s a
1180       PFailed e -> PFailed e
1181
1182 getSrcFile :: P FastString
1183 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1184
1185 pushContext :: LayoutContext -> P ()
1186 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1187
1188 {-
1189
1190 This special case in layoutOn is to handle layout contexts with are
1191 indented the same or less than the current context.  This is illegal
1192 according to the Haskell spec, so we have to arrange to close the
1193 current context.  eg.
1194
1195 class Foo a where
1196 class Bar a
1197
1198 after the first 'where', the sequence of events is:
1199
1200         - layout system inserts a ';' (column 0)
1201         - parser begins a new context at column 0
1202         - parser shifts ';' (legal empty declaration)
1203         - parser sees 'class': parse error (we're still in the inner context)
1204
1205 trouble is, by the time we know we need a new context, the lexer has
1206 already generated the ';'.  Hacky solution is as follows: since we
1207 know the column of the next token (it's the column number of the new
1208 context), we set the ACTUAL column number of the new context to this
1209 numer plus one.  Hence the next time the lexer is called, a '}' will
1210 be generated to close the new context straight away.  Furthermore, we
1211 have to set the atbol flag so that the ';' that the parser shifted as
1212 part of the new context is re-generated.
1213
1214 when the new context is *less* indented than the current one:
1215
1216 f = f where g = g where
1217 h = h
1218
1219         - current context: column 12.
1220         - on seeing 'h' (column 0), the layout system inserts '}'
1221         - parser starts a new context, column 0
1222         - parser sees '}', uses it to close new context
1223         - we still need to insert another '}' followed by a ';',
1224           hence the atbol trick.
1225
1226 There's also a special hack in here to deal with
1227
1228         do
1229            ....
1230            e $ do
1231            blah
1232
1233 i.e. the inner context is at the same indentation level as the outer
1234 context.  This is strictly illegal according to Haskell 98, but
1235 there's a lot of existing code using this style and it doesn't make
1236 any sense to disallow it, since empty 'do' lists don't make sense.
1237 -}
1238
1239 layoutOn :: Bool -> P ()
1240 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1241     let offset = lexemeIndex buf -# bol in
1242     case ctx of
1243         Layout prev_off : _
1244            | if strict then prev_off >=# offset else prev_off ># offset ->
1245                 --trace ("layout on, column: " ++  show (I# offset)) $
1246                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1247         other ->
1248                 --trace ("layout on, column: " ++  show (I# offset)) $
1249                 POk s{ context = Layout offset : ctx } ()
1250
1251 layoutOff :: P ()
1252 layoutOff buf s@(PState{ context = ctx }) =
1253     POk s{ context = NoLayout:ctx } ()
1254
1255 popContext :: P ()
1256 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1257   case ctx of
1258         (_:tl) -> POk s{ context = tl } ()
1259         []     -> PFailed (srcParseErr buf loc)
1260
1261 -- for reasons of efficiency, flags indicating language extensions (eg,
1262 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1263 -- integer
1264
1265 glaExtsBit, ffiBit, parrBit :: Int
1266 glaExtsBit = 0
1267 ffiBit     = 1
1268 parrBit    = 2
1269 withBit    = 3
1270
1271 glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
1272 glaExtsEnabled flags = testBit (toInt32 flags) glaExtsBit
1273 ffiEnabled     flags = testBit (toInt32 flags) ffiBit
1274 withEnabled    flags = testBit (toInt32 flags) withBit
1275 parrEnabled    flags = testBit (toInt32 flags) parrBit
1276
1277 toInt32 :: Int# -> Int32
1278 toInt32 x# = fromIntegral (I# x#)
1279
1280 -- convenient record-based bitmap for the interface to the rest of the world
1281 --
1282 -- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below)
1283 --
1284 data ExtFlags = ExtFlags {
1285                   glasgowExtsEF :: Bool,
1286                   ffiEF         :: Bool,
1287                   withEF        :: Bool,
1288                   parrEF        :: Bool
1289                 }
1290
1291 -- create a parse state
1292 --
1293 mkPState          :: SrcLoc -> ExtFlags -> PState
1294 mkPState loc exts  =
1295   PState {
1296     loc        = loc,
1297       extsBitmap = case (fromIntegral bitmap) of {I# bits -> bits},
1298       bol         = 0#,
1299       atbol      = 1#,
1300       context    = []
1301     }
1302     where
1303       bitmap =     glaExtsBit `setBitIf` glasgowExtsEF     exts
1304                .|. ffiBit     `setBitIf` (ffiEF            exts
1305                                           || glasgowExtsEF exts)
1306                .|. withBit    `setBitIf` withEF            exts
1307                .|. parrBit    `setBitIf` parrEF            exts
1308       --
1309       setBitIf :: Int -> Bool -> Int32
1310       b `setBitIf` cond | cond      = bit b
1311                         | otherwise = 0
1312
1313 -----------------------------------------------------------------------------
1314
1315 srcParseErr :: StringBuffer -> SrcLoc -> Message
1316 srcParseErr s l
1317   = hcat [ppr l,
1318           if null token
1319              then ptext SLIT(": parse error (possibly incorrect indentation)")
1320              else hcat [ptext SLIT(": parse error on input "),
1321                         char '`', text token, char '\'']
1322     ]
1323   where
1324         token = lexemeToString s
1325
1326 \end{code}