ghc/compiler/parser/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 module Lex (
  20
  21         srcParseErr,
  22
  23         -- Monad for parser
  24         Token(..), lexer, ParseResult(..), PState(..),
  25         ExtFlags(..), mkPState,
  26         StringBuffer,
  27
  28         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
  29         getSrcLocP, setSrcLocP, getSrcFile,
  30         layoutOn, layoutOff, pushContext, popContext
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( toUpper, isDigit, chr, ord )
  36 import Ratio            ( (%) )
  37
  38 import PrelNames        ( mkTupNameStr )
  39 import ForeignCall      ( Safety(..) )
  40 import UniqFM           ( listToUFM, lookupUFM )
  41 import BasicTypes       ( Boxity(..) )
  42 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
  43                           replaceSrcLine, mkSrcLoc )
  44
  45 import ErrUtils         ( Message )
  46 import Outputable
  47
  48 import FastString
  49 import StringBuffer
  50 import GlaExts
  51 import Ctype
  52
  53 import Bits             ( Bits(..) )       -- non-std
  54 import Int              ( Int32 )
  55 \end{code}
  56
  57 %************************************************************************
  58 %*                                                                      *
  59 \subsection{Data types}
  60 %*                                                                      *
  61 %************************************************************************
  62
  63 The token data type, fairly un-interesting except from one
  64 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  65 strictness, unfolding etc).
  66
  67 The Idea/Observation here is that the renamer needs to scan through
  68 all of an interface file before it can continue. But only a fraction
  69 of the information contained in the file turns out to be useful, so
  70 delaying as much as possible of the scanning and parsing of an
  71 interface file Makes Sense (Heap profiles of the compiler
  72 show a reduction in heap usage by at least a factor of two,
  73 post-renamer).
  74
  75 Hence, the interface file lexer spots when value declarations are
  76 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  77 for the type and any other id info for that binding (unfolding, strictness
  78 etc). These constructors are applied to the result of lexing these sub-chunks.
  79
  80 The lexing of the type and id info is all done lazily, of course, so
  81 the scanning (and subsequent parsing) will be done *only* on the ids the
  82 renamer finds out that it is interested in. The rest will just be junked.
  83 Laziness, you know it makes sense :-)
  84
  85 \begin{code}
  86 data Token
  87   = ITas                        -- Haskell keywords
  88   | ITcase
  89   | ITclass
  90   | ITdata
  91   | ITdefault
  92   | ITderiving
  93   | ITdo
  94   | ITelse
  95   | IThiding
  96   | ITif
  97   | ITimport
  98   | ITin
  99   | ITinfix
 100   | ITinfixl
 101   | ITinfixr
 102   | ITinstance
 103   | ITlet
 104   | ITmodule
 105   | ITnewtype
 106   | ITof
 107   | ITqualified
 108   | ITthen
 109   | ITtype
 110   | ITwhere
 111   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 112
 113   | ITforall                    -- GHC extension keywords
 114   | ITforeign
 115   | ITexport
 116   | ITlabel
 117   | ITdynamic
 118   | ITsafe
 119   | ITthreadsafe
 120   | ITunsafe
 121   | ITwith
 122   | ITstdcallconv
 123   | ITccallconv
 124   | ITdotnet
 125   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 126
 127   | ITspecialise_prag           -- Pragmas
 128   | ITsource_prag
 129   | ITinline_prag
 130   | ITnoinline_prag
 131   | ITrules_prag
 132   | ITdeprecated_prag
 133   | ITline_prag
 134   | ITscc_prag
 135   | ITclose_prag
 136
 137   | ITdotdot                    -- reserved symbols
 138   | ITdcolon
 139   | ITequal
 140   | ITlam
 141   | ITvbar
 142   | ITlarrow
 143   | ITrarrow
 144   | ITat
 145   | ITtilde
 146   | ITdarrow
 147   | ITminus
 148   | ITbang
 149   | ITstar
 150   | ITdot
 151
 152   | ITbiglam                    -- GHC-extension symbols
 153
 154   | ITocurly                    -- special symbols
 155   | ITccurly
 156   | ITocurlybar                 -- {|, for type applications
 157   | ITccurlybar                 -- |}, for type applications
 158   | ITvccurly
 159   | ITobrack
 160   | ITopabrack                  -- [:, for parallel arrays with -fparr
 161   | ITcpabrack                  -- :], for parallel arrays with -fparr
 162   | ITcbrack
 163   | IToparen
 164   | ITcparen
 165   | IToubxparen
 166   | ITcubxparen
 167   | ITsemi
 168   | ITcomma
 169   | ITunderscore
 170   | ITbackquote
 171
 172   | ITvarid   FAST_STRING       -- identifiers
 173   | ITconid   FAST_STRING
 174   | ITvarsym  FAST_STRING
 175   | ITconsym  FAST_STRING
 176   | ITqvarid  (FAST_STRING,FAST_STRING)
 177   | ITqconid  (FAST_STRING,FAST_STRING)
 178   | ITqvarsym (FAST_STRING,FAST_STRING)
 179   | ITqconsym (FAST_STRING,FAST_STRING)
 180
 181   | ITdupipvarid   FAST_STRING  -- GHC extension: implicit param: ?x
 182   | ITsplitipvarid FAST_STRING  -- GHC extension: implicit param: %x
 183
 184   | ITpragma StringBuffer
 185
 186   | ITchar       Int
 187   | ITstring     FAST_STRING
 188   | ITinteger    Integer
 189   | ITrational   Rational
 190
 191   | ITprimchar   Int
 192   | ITprimstring FAST_STRING
 193   | ITprimint    Integer
 194   | ITprimfloat  Rational
 195   | ITprimdouble Rational
 196   | ITlitlit     FAST_STRING
 197
 198   | ITunknown String            -- Used when the lexer can't make sense of it
 199   | ITeof                       -- end of file token
 200   deriving Show -- debugging
 201 \end{code}
 202
 203 -----------------------------------------------------------------------------
 204 Keyword Lists
 205
 206 \begin{code}
 207 pragmaKeywordsFM = listToUFM $
 208       map (\ (x,y) -> (_PK_ x,y))
 209        [( "SPECIALISE", ITspecialise_prag ),
 210         ( "SPECIALIZE", ITspecialise_prag ),
 211         ( "SOURCE",     ITsource_prag ),
 212         ( "INLINE",     ITinline_prag ),
 213         ( "NOINLINE",   ITnoinline_prag ),
 214         ( "NOTINLINE",  ITnoinline_prag ),
 215         ( "LINE",       ITline_prag ),
 216         ( "RULES",      ITrules_prag ),
 217         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
 218         ( "SCC",        ITscc_prag ),
 219         ( "DEPRECATED", ITdeprecated_prag )
 220         ]
 221
 222 haskellKeywordsFM = listToUFM $
 223       map (\ (x,y) -> (_PK_ x,y))
 224        [( "_",          ITunderscore ),
 225         ( "as",         ITas ),
 226         ( "case",       ITcase ),
 227         ( "class",      ITclass ),
 228         ( "data",       ITdata ),
 229         ( "default",    ITdefault ),
 230         ( "deriving",   ITderiving ),
 231         ( "do",         ITdo ),
 232         ( "else",       ITelse ),
 233         ( "hiding",     IThiding ),
 234         ( "if",         ITif ),
 235         ( "import",     ITimport ),
 236         ( "in",         ITin ),
 237         ( "infix",      ITinfix ),
 238         ( "infixl",     ITinfixl ),
 239         ( "infixr",     ITinfixr ),
 240         ( "instance",   ITinstance ),
 241         ( "let",        ITlet ),
 242         ( "module",     ITmodule ),
 243         ( "newtype",    ITnewtype ),
 244         ( "of",         ITof ),
 245         ( "qualified",  ITqualified ),
 246         ( "then",       ITthen ),
 247         ( "type",       ITtype ),
 248         ( "where",      ITwhere ),
 249         ( "_scc_",      ITscc )         -- ToDo: remove
 250      ]
 251
 252 isSpecial :: Token -> Bool
 253 -- If we see M.x, where x is a keyword, but
 254 -- is special, we treat is as just plain M.x,
 255 -- not as a keyword.
 256 isSpecial ITas          = True
 257 isSpecial IThiding      = True
 258 isSpecial ITqualified   = True
 259 isSpecial ITforall      = True
 260 isSpecial ITexport      = True
 261 isSpecial ITlabel       = True
 262 isSpecial ITdynamic     = True
 263 isSpecial ITsafe        = True
 264 isSpecial ITthreadsafe  = True
 265 isSpecial ITunsafe      = True
 266 isSpecial ITwith        = True
 267 isSpecial ITccallconv   = True
 268 isSpecial ITstdcallconv = True
 269 isSpecial _             = False
 270
 271 -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 272 ghcExtensionKeywordsFM = listToUFM $
 273         map (\ (x,y) -> (_PK_ x,y))
 274      [  ( "forall",     ITforall ),
 275         ( "foreign",    ITforeign ),
 276         ( "export",     ITexport ),
 277         ( "label",      ITlabel ),
 278         ( "dynamic",    ITdynamic ),
 279         ( "safe",       ITsafe ),
 280         ( "threadsafe", ITthreadsafe ),
 281         ( "unsafe",     ITunsafe ),
 282         ( "with",       ITwith ),
 283         ( "stdcall",    ITstdcallconv),
 284         ( "ccall",      ITccallconv),
 285         ( "dotnet",     ITdotnet),
 286         ("_ccall_",     ITccall (False, False, PlayRisky)),
 287         ("_ccall_GC_",  ITccall (False, False, PlaySafe False)),
 288         ("_casm_",      ITccall (False, True,  PlayRisky)),
 289         ("_casm_GC_",   ITccall (False, True,  PlaySafe False))
 290      ]
 291
 292
 293 haskellKeySymsFM = listToUFM $
 294         map (\ (x,y) -> (_PK_ x,y))
 295       [ ("..",          ITdotdot)
 296        ,("::",          ITdcolon)
 297        ,("=",           ITequal)
 298        ,("\\",          ITlam)
 299        ,("|",           ITvbar)
 300        ,("<-",          ITlarrow)
 301        ,("->",          ITrarrow)
 302        ,("@",           ITat)
 303        ,("~",           ITtilde)
 304        ,("=>",          ITdarrow)
 305        ,("-",           ITminus)
 306        ,("!",           ITbang)
 307        ,("*",           ITstar)
 308        ,(".",           ITdot)          -- sadly, for 'forall a . t'
 309        ]
 310 \end{code}
 311
 312 -----------------------------------------------------------------------------
 313 The lexical analyser
 314
 315 Lexer state:
 316
 317         - (exts)  lexing a source with extensions, eg, an interface file or
 318                   with -fglasgow-exts
 319         - (bol)   pointer to beginning of line (for column calculations)
 320         - (buf)   pointer to beginning of token
 321         - (buf)   pointer to current char
 322         - (atbol) flag indicating whether we're at the beginning of a line
 323
 324 \begin{code}
 325 lexer :: (Token -> P a) -> P a
 326 lexer cont buf s@(PState{
 327                     loc = loc,
 328                     extsBitmap = exts,
 329                     bol = bol,
 330                     atbol = atbol,
 331                     context = ctx
 332                 })
 333
 334         -- first, start a new lexeme and lose all the whitespace
 335   =  _scc_ "Lexer"
 336   tab line bol atbol (stepOverLexeme buf)
 337   where
 338         line = srcLocLine loc
 339
 340         tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
 341           case currentChar# buf of
 342
 343             '\NUL'# ->
 344                    if bufferExhausted (stepOn buf)
 345                         then cont ITeof buf s'
 346                         else trace "lexer: misplaced NUL?" $
 347                              tab y bol atbol (stepOn buf)
 348
 349             '\n'# -> let buf' = stepOn buf
 350                      in tab (y +# 1#) (currentIndex# buf') 1# buf'
 351
 352                 -- find comments.  This got harder in Haskell 98.
 353             '-'# ->  let trundle n =
 354                           let next = lookAhead# buf n in
 355                           if next `eqChar#` '-'# then trundle (n +# 1#)
 356                           else if is_symbol next || n <# 2#
 357                                 then is_a_token
 358                                 else tab y bol atbol
 359                                          (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
 360                     in trundle 1#
 361
 362                 -- comments and pragmas.  We deal with LINE pragmas here,
 363                 -- and throw out any unrecognised pragmas as comments.  Any
 364                 -- pragmas we know about are dealt with later (after any layout
 365                 -- processing if necessary).
 366             '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 367                 if lookAhead# buf 2# `eqChar#` '#'# then
 368                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
 369                   case expandWhile# is_ident (stepOverLexeme buf1)   of { buf2->
 370                   let lexeme = mkFastString -- ToDo: too slow
 371                                   (map toUpper (lexemeToString buf2)) in
 372                   case lookupUFM pragmaKeywordsFM lexeme of
 373                         -- ignore RULES pragmas when -fglasgow-exts is off
 374                         Just ITrules_prag | not (glaExtsEnabled exts) ->
 375                            skip_to_end (stepOnBy# buf 2#) s'
 376                         Just ITline_prag ->
 377                            line_prag skip_to_end buf2 s'
 378                         Just other -> is_a_token
 379                         Nothing -> skip_to_end (stepOnBy# buf 2#) s'
 380                   }}
 381
 382                 else skip_to_end (stepOnBy# buf 2#) s'
 383                 where
 384                     skip_to_end = skipNestedComment (lexer cont)
 385
 386                 -- special GHC extension: we grok cpp-style #line pragmas
 387             '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
 388                 let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
 389                            lookAhead# buf 2# `eqChar#` 'i'# &&
 390                            lookAhead# buf 3# `eqChar#` 'n'# &&
 391                            lookAhead# buf 4# `eqChar#` 'e'#  = stepOnBy# buf 5#
 392                          | otherwise = stepOn buf
 393                 in
 394                 case expandWhile# is_space buf1 of { buf2 ->
 395                 if is_digit (currentChar# buf2)
 396                         then line_prag next_line buf2 s'
 397                         else is_a_token
 398                 }
 399                 where
 400                 next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
 401
 402                 -- tabs have been expanded beforehand
 403             c | is_space c -> tab y bol atbol (stepOn buf)
 404               | otherwise  -> is_a_token
 405
 406            where s' = s{loc = replaceSrcLine loc y,
 407                         bol = bol,
 408                        atbol = atbol}
 409
 410                  is_a_token | atbol /=# 0# = lexBOL cont buf s'
 411                             | otherwise    = lexToken cont exts buf s'
 412
 413 -- {-# LINE .. #-} pragmas.  yeuch.
 414 line_prag cont buf s@PState{loc=loc} =
 415   case expandWhile# is_space buf                of { buf1 ->
 416   case scanNumLit 0 (stepOverLexeme buf1)       of { (line,buf2) ->
 417   -- subtract one: the line number refers to the *following* line.
 418   let real_line = line - 1 in
 419   case fromInteger real_line                    of { i@(I# l) ->
 420         -- ToDo, if no filename then we skip the newline.... d'oh
 421   case expandWhile# is_space buf2               of { buf3 ->
 422   case currentChar# buf3                        of
 423      '\"'#{-"-} ->
 424         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
 425         let
 426             file = lexemeToFastString buf4
 427             new_buf = stepOn (stepOverLexeme buf4)
 428         in
 429         if nullFastString file
 430                 then cont new_buf s{loc = replaceSrcLine loc l}
 431                 else cont new_buf s{loc = mkSrcLoc file i}
 432         }
 433      _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
 434   }}}}
 435
 436 skipNestedComment :: P a -> P a
 437 skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
 438
 439 skipNestedComment' :: SrcLoc -> P a -> P a
 440 skipNestedComment' orig_loc cont buf = loop buf
 441  where
 442    loop buf =
 443      case currentChar# buf of
 444         '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
 445
 446         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
 447               skipNestedComment
 448                 (skipNestedComment' orig_loc cont)
 449                 (stepOnBy# buf 2#)
 450
 451         '\n'# -> \ s@PState{loc=loc} ->
 452                  let buf' = stepOn buf in
 453                  loop buf' s{loc = incSrcLine loc,
 454                              bol = currentIndex# buf',
 455                              atbol = 1#}
 456
 457         -- pass the original SrcLoc to lexError so that the error is
 458         -- reported at the line it was originally on, not the line at
 459         -- the end of the file.
 460         '\NUL'# | bufferExhausted (stepOn buf) ->
 461                 \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
 462
 463         _   -> loop (stepOn buf)
 464
 465 -- When we are lexing the first token of a line, check whether we need to
 466 -- insert virtual semicolons or close braces due to layout.
 467
 468 lexBOL :: (Token -> P a) -> P a
 469 lexBOL cont buf s@(PState{
 470                     loc = loc,
 471                     extsBitmap = exts,
 472                     bol = bol,
 473                     atbol = atbol,
 474                     context = ctx
 475                   }) =
 476         if need_close_curly then
 477                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting '}'") $
 478                 cont ITvccurly buf s{atbol = 1#, context = tail ctx}
 479         else if need_semi_colon then
 480                 --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
 481                 cont ITsemi buf s{atbol = 0#}
 482         else
 483                 lexToken cont exts buf s{atbol = 0#}
 484   where
 485         col = currentIndex# buf -# bol
 486
 487         need_close_curly =
 488                 case ctx of
 489                         [] -> False
 490                         (i:_) -> case i of
 491                                     NoLayout -> False
 492                                     Layout n -> col <# n
 493         need_semi_colon =
 494                 case ctx of
 495                         [] -> False
 496                         (i:_) -> case i of
 497                                     NoLayout -> False
 498                                     Layout n -> col ==# n
 499
 500
 501 lexToken :: (Token -> P a) -> Int# -> P a
 502 lexToken cont exts buf =
 503 -- trace "lexToken" $
 504   case currentChar# buf of
 505
 506     -- special symbols ----------------------------------------------------
 507     '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
 508                 -> cont IToubxparen (setCurrentPos# buf 2#)
 509          | otherwise
 510                 -> cont IToparen (incLexeme buf)
 511
 512     ')'# -> cont ITcparen    (incLexeme buf)
 513     '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
 514             cont ITopabrack  (setCurrentPos# buf 2#)
 515          | otherwise ->
 516             cont ITobrack    (incLexeme buf)
 517     ']'# -> cont ITcbrack    (incLexeme buf)
 518     ','# -> cont ITcomma     (incLexeme buf)
 519     ';'# -> cont ITsemi      (incLexeme buf)
 520     '}'# -> \ s@PState{context = ctx} ->
 521             case ctx of
 522                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
 523                 _        -> lexError "too many '}'s" buf s
 524     '|'# -> case lookAhead# buf 1# of
 525                  '}'#  | glaExtsEnabled exts -> cont ITccurlybar
 526                                                      (setCurrentPos# buf 2#)
 527                  _                           -> lex_sym cont (incLexeme buf)
 528     ':'# -> case lookAhead# buf 1# of
 529                  ']'#  | parrEnabled exts    -> cont ITcpabrack
 530                                                      (setCurrentPos# buf 2#)
 531                  _                           -> lex_sym cont (incLexeme buf)
 532
 533
 534     '#'# -> case lookAhead# buf 1# of
 535                 ')'#  | glaExtsEnabled exts
 536                      -> cont ITcubxparen (setCurrentPos# buf 2#)
 537                 '-'# -> case lookAhead# buf 2# of
 538                            '}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
 539                            _    -> lex_sym cont (incLexeme buf)
 540                 _    -> lex_sym cont (incLexeme buf)
 541
 542     '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
 543                 -> lex_cstring cont (setCurrentPos# buf 2#)
 544          | otherwise
 545                 -> cont ITbackquote (incLexeme buf)
 546
 547     '{'# ->   -- for Emacs: -}
 548             case lookAhead# buf 1# of
 549            '|'# | glaExtsEnabled exts
 550                 -> cont ITocurlybar (setCurrentPos# buf 2#)
 551            '-'# -> case lookAhead# buf 2# of
 552                     '#'# -> lex_prag cont (setCurrentPos# buf 3#)
 553                     _    -> cont ITocurly (incLexeme buf)
 554            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
 555
 556     -- strings/characters -------------------------------------------------
 557     '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf)
 558     '\''#      -> lex_char (char_end cont) exts (incLexeme buf)
 559
 560         -- Hexadecimal and octal constants
 561     '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
 562                 -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
 563          | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
 564                 -> readNum (after_lexnum cont exts) buf' is_octdigit  8 oct_or_dec
 565         where ch   = lookAhead# buf 1#
 566               ch2  = lookAhead# buf 2#
 567               buf' = setCurrentPos# buf 2#
 568
 569     '\NUL'# ->
 570             if bufferExhausted (stepOn buf) then
 571                cont ITeof buf
 572             else
 573                trace "lexIface: misplaced NUL?" $
 574                cont (ITunknown "\NUL") (stepOn buf)
 575
 576     '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 577             lex_ip ITdupipvarid cont (incLexeme buf)
 578     '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
 579             lex_ip ITsplitipvarid cont (incLexeme buf)
 580     c | is_digit  c -> lex_num cont exts 0 buf
 581       | is_symbol c -> lex_sym cont buf
 582       | is_upper  c -> lex_con cont exts buf
 583       | is_ident  c -> lex_id  cont exts buf
 584       | otherwise   -> lexError "illegal character" buf
 585
 586 -- Int# is unlifted, and therefore faster than Bool for flags.
 587 {-# INLINE flag #-}
 588 flag :: Int# -> Bool
 589 flag 0# = False
 590 flag _  = True
 591
 592 -------------------------------------------------------------------------------
 593 -- Pragmas
 594
 595 lex_prag cont buf
 596   = case expandWhile# is_space buf of { buf1 ->
 597     case expandWhile# is_ident (stepOverLexeme buf1) of { buf2 ->
 598     let lexeme = mkFastString (map toUpper (lexemeToString buf2)) in
 599     case lookupUFM pragmaKeywordsFM lexeme of
 600         Just kw -> cont kw (mergeLexemes buf buf2)
 601         Nothing -> panic "lex_prag"
 602   }}
 603
 604 -------------------------------------------------------------------------------
 605 -- Strings & Chars
 606
 607 lex_string cont exts s buf
 608   = case currentChar# buf of
 609         '"'#{-"-} ->
 610            let buf' = incLexeme buf
 611                s' = mkFastStringNarrow (map chr (reverse s))
 612            in case currentChar# buf' of
 613                 '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s
 614                     then cont (ITprimstring s') (incLexeme buf')
 615                     else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
 616                 _                   -> cont (ITstring s') buf'
 617
 618         -- ignore \& in a string, deal with string gaps
 619         '\\'# | next_ch `eqChar#` '&'#
 620                 -> lex_string cont exts s buf'
 621               | is_space next_ch
 622                 -> lex_stringgap cont exts s (incLexeme buf)
 623
 624             where next_ch = lookAhead# buf 1#
 625                   buf' = setCurrentPos# buf 2#
 626
 627         _ -> lex_char (lex_next_string cont s) exts buf
 628
 629 lex_stringgap cont exts s buf
 630   = let buf' = incLexeme buf in
 631     case currentChar# buf of
 632         '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
 633                   st{loc = incSrcLine loc}
 634         '\\'# -> lex_string cont exts s buf'
 635         c | is_space c -> lex_stringgap cont exts s buf'
 636         other -> charError buf'
 637
 638 lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
 639
 640 lex_char :: (Int# -> Int -> P a) -> Int# -> P a
 641 lex_char cont exts buf
 642   = case currentChar# buf of
 643         '\\'# -> lex_escape (cont exts) (incLexeme buf)
 644         c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf)
 645         other -> charError buf
 646
 647 char_end cont exts c buf
 648   = case currentChar# buf of
 649         '\''# -> let buf' = incLexeme buf in
 650                  case currentChar# buf' of
 651                         '#'# | glaExtsEnabled exts
 652                                 -> cont (ITprimchar c) (incLexeme buf')
 653                         _       -> cont (ITchar c) buf'
 654         _     -> charError buf
 655
 656 lex_escape cont buf
 657   = let buf' = incLexeme buf in
 658     case currentChar# buf of
 659         'a'#       -> cont (ord '\a') buf'
 660         'b'#       -> cont (ord '\b') buf'
 661         'f'#       -> cont (ord '\f') buf'
 662         'n'#       -> cont (ord '\n') buf'
 663         'r'#       -> cont (ord '\r') buf'
 664         't'#       -> cont (ord '\t') buf'
 665         'v'#       -> cont (ord '\v') buf'
 666         '\\'#      -> cont (ord '\\') buf'
 667         '"'#       -> cont (ord '\"') buf'
 668         '\''#      -> cont (ord '\'') buf'
 669         '^'#       -> let c = currentChar# buf' in
 670                       if c `geChar#` '@'# && c `leChar#` '_'#
 671                         then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
 672                         else charError buf'
 673
 674         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
 675         'o'#      -> readNum (after_charnum cont) buf' is_octdigit  8 oct_or_dec
 676         x | is_digit x
 677                   -> readNum (after_charnum cont) buf is_digit    10 oct_or_dec
 678
 679         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
 680                                        Just buf2 <- [prefixMatch buf p] ] of
 681                             (c,buf2):_ -> cont (ord c) buf2
 682                             [] -> charError buf'
 683
 684 after_charnum cont i buf
 685   = if i >= 0 && i <= 0x10FFFF
 686         then cont (fromInteger i) buf
 687         else charError buf
 688
 689 readNum cont buf is_digit base conv = read buf 0
 690   where read buf i
 691           = case currentChar# buf of { c ->
 692             if is_digit c
 693                 then read (incLexeme buf) (i*base + (toInteger (I# (conv c))))
 694                 else cont i buf
 695             }
 696
 697 is_hexdigit c
 698         =  is_digit c
 699         || (c `geChar#` 'a'# && c `leChar#` 'f'#)
 700         || (c `geChar#` 'A'# && c `leChar#` 'F'#)
 701
 702 hex c | is_digit c = ord# c -# ord# '0'#
 703       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
 704 oct_or_dec c = ord# c -# ord# '0'#
 705
 706 is_octdigit c = c `geChar#` '0'# && c `leChar#` '7'#
 707
 708 to_lower c
 709   | c `geChar#` 'A'# && c `leChar#` 'Z'#
 710         = chr# (ord# c -# (ord# 'A'# -# ord# 'a'#))
 711   | otherwise = c
 712
 713 charError buf = lexError "error in character literal" buf
 714
 715 silly_escape_chars = [
 716         ("NUL", '\NUL'),
 717         ("SOH", '\SOH'),
 718         ("STX", '\STX'),
 719         ("ETX", '\ETX'),
 720         ("EOT", '\EOT'),
 721         ("ENQ", '\ENQ'),
 722         ("ACK", '\ACK'),
 723         ("BEL", '\BEL'),
 724         ("BS", '\BS'),
 725         ("HT", '\HT'),
 726         ("LF", '\LF'),
 727         ("VT", '\VT'),
 728         ("FF", '\FF'),
 729         ("CR", '\CR'),
 730         ("SO", '\SO'),
 731         ("SI", '\SI'),
 732         ("DLE", '\DLE'),
 733         ("DC1", '\DC1'),
 734         ("DC2", '\DC2'),
 735         ("DC3", '\DC3'),
 736         ("DC4", '\DC4'),
 737         ("NAK", '\NAK'),
 738         ("SYN", '\SYN'),
 739         ("ETB", '\ETB'),
 740         ("CAN", '\CAN'),
 741         ("EM", '\EM'),
 742         ("SUB", '\SUB'),
 743         ("ESC", '\ESC'),
 744         ("FS", '\FS'),
 745         ("GS", '\GS'),
 746         ("RS", '\RS'),
 747         ("US", '\US'),
 748         ("SP", '\SP'),
 749         ("DEL", '\DEL')
 750         ]
 751
 752 -----------------------------------------------------------------------------
 753 -- Numbers
 754
 755 lex_num :: (Token -> P a) -> Int# -> Integer -> P a
 756 lex_num cont exts acc buf =
 757  case scanNumLit acc buf of
 758      (acc',buf') ->
 759        case currentChar# buf' of
 760          '.'# | is_digit (lookAhead# buf' 1#) ->
 761              -- this case is not optimised at all, as the
 762              -- presence of floating point numbers in interface
 763              -- files is not that common. (ToDo)
 764             case expandWhile# is_digit (incLexeme buf') of
 765               buf2 -> -- points to first non digit char
 766
 767                 let l = case currentChar# buf2 of
 768                           'E'# -> do_exponent
 769                           'e'# -> do_exponent
 770                           _ -> buf2
 771
 772                     do_exponent
 773                         = let buf3 = incLexeme buf2 in
 774                           case currentChar# buf3 of
 775                                 '-'# | is_digit (lookAhead# buf3 1#)
 776                                    -> expandWhile# is_digit (incLexeme buf3)
 777                                 '+'# | is_digit (lookAhead# buf3 1#)
 778                                    -> expandWhile# is_digit (incLexeme buf3)
 779                                 x | is_digit x -> expandWhile# is_digit buf3
 780                                 _ -> buf2
 781
 782                     v = readRational__ (lexemeToString l)
 783
 784                 in case currentChar# l of -- glasgow exts only
 785                       '#'# | glaExtsEnabled exts -> let l' = incLexeme l in
 786                               case currentChar# l' of
 787                                 '#'# -> cont (ITprimdouble v) (incLexeme l')
 788                                 _    -> cont (ITprimfloat  v) l'
 789                       _ -> cont (ITrational v) l
 790
 791          _ -> after_lexnum cont exts acc' buf'
 792
 793 after_lexnum cont exts i buf
 794   = case currentChar# buf of
 795         '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf)
 796         _                          -> cont (ITinteger i) buf
 797
 798 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
 799 readRational r = do
 800      (n,d,s) <- readFix r
 801      (k,t)   <- readExp s
 802      return ((n%1)*10^^(k-d), t)
 803  where
 804      readFix r = do
 805         (ds,s)  <- lexDecDigits r
 806         (ds',t) <- lexDotDigits s
 807         return (read (ds++ds'), length ds', t)
 808
 809      readExp (e:s) | e `elem` "eE" = readExp' s
 810      readExp s                     = return (0,s)
 811
 812      readExp' ('+':s) = readDec s
 813      readExp' ('-':s) = do
 814                         (k,t) <- readDec s
 815                         return (-k,t)
 816      readExp' s       = readDec s
 817
 818      readDec s = do
 819         (ds,r) <- nonnull isDigit s
 820         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 821                 r)
 822
 823      lexDecDigits = nonnull isDigit
 824
 825      lexDotDigits ('.':s) = return (span isDigit s)
 826      lexDotDigits s       = return ("",s)
 827
 828      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 829                       return (cs,t)
 830
 831 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
 832 readRational__ top_s
 833   = case top_s of
 834       '-' : xs -> - (read_me xs)
 835       xs       -> read_me xs
 836   where
 837     read_me s
 838       = case (do { (x,"") <- readRational s ; return x }) of
 839           [x] -> x
 840           []  -> error ("readRational__: no parse:"        ++ top_s)
 841           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
 842
 843 -----------------------------------------------------------------------------
 844 -- C "literal literal"s  (i.e. things like ``NULL'', ``stdout'' etc.)
 845
 846 -- we lexemeToFastString on the bit between the ``''s, but include the
 847 -- quotes in the full lexeme.
 848
 849 lex_cstring cont buf =
 850  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
 851    Just buf' -> cont (ITlitlit (lexemeToFastString
 852                                 (setCurrentPos# buf' (negateInt# 2#))))
 853                    (mergeLexemes buf buf')
 854    Nothing   -> lexError "unterminated ``" buf
 855
 856 -----------------------------------------------------------------------------
 857 -- identifiers, symbols etc.
 858
 859 lex_ip ip_constr cont buf =
 860  case expandWhile# is_ident buf of
 861    buf' -> cont (ip_constr (tailFS lexeme)) buf'
 862         where lexeme = lexemeToFastString buf'
 863
 864 lex_id cont exts buf =
 865  let buf1 = expandWhile# is_ident buf in
 866  seq buf1 $
 867
 868  case (if glaExtsEnabled exts
 869         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
 870         else buf1)                              of { buf' ->
 871  seq buf' $
 872
 873  let lexeme  = lexemeToFastString buf' in
 874
 875  case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 876         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 877                           cont kwd_token buf';
 878         Nothing        ->
 879
 880  let var_token = cont (ITvarid lexeme) buf' in
 881
 882  if not (glaExtsEnabled exts)
 883    then var_token
 884    else
 885
 886  case lookupUFM ghcExtensionKeywordsFM lexeme of {
 887         Just kwd_token -> cont kwd_token buf';
 888         Nothing        -> var_token
 889
 890  }}}
 891
 892 lex_sym cont buf =
 893  -- trace "lex_sym" $
 894  case expandWhile# is_symbol buf of
 895    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 896                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 897                                   cont kwd_token buf' ;
 898                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 899                                   cont (mk_var_token lexeme) buf'
 900            }
 901         where lexeme = lexemeToFastString buf'
 902
 903
 904 -- lex_con recursively collects components of a qualified identifer.
 905 -- The argument buf is the StringBuffer representing the lexeme
 906 -- identified so far, where the next character is upper-case.
 907
 908 lex_con cont exts buf =
 909  -- trace ("con: "{-++unpackFS lexeme-}) $
 910  let empty_buf = stepOverLexeme buf in
 911  case expandWhile# is_ident empty_buf of { buf1 ->
 912  case slurp_trailing_hashes buf1 exts of { con_buf ->
 913
 914  let all_buf = mergeLexemes buf con_buf
 915
 916      con_lexeme = lexemeToFastString con_buf
 917      mod_lexeme = lexemeToFastString (decLexeme buf)
 918      all_lexeme = lexemeToFastString all_buf
 919
 920      just_a_conid
 921         | emptyLexeme buf = cont (ITconid con_lexeme)               all_buf
 922         | otherwise       = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
 923  in
 924
 925  case currentChar# all_buf of
 926      '.'# -> maybe_qualified cont exts all_lexeme
 927                 (incLexeme all_buf) just_a_conid
 928      _    -> just_a_conid
 929   }}
 930
 931
 932 maybe_qualified cont exts mod buf just_a_conid =
 933  -- trace ("qid: "{-++unpackFS lexeme-}) $
 934  case currentChar# buf of
 935   '['# ->       -- Special case for []
 936     case lookAhead# buf 1# of
 937      ']'# -> cont (ITqconid  (mod,SLIT("[]"))) (setCurrentPos# buf 2#)
 938      _    -> just_a_conid
 939
 940   '('# ->  -- Special case for (,,,)
 941            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 942     case lookAhead# buf 1# of
 943      '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
 944                 ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
 945                                 just_a_conid
 946                 _    -> just_a_conid
 947      ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#)
 948      ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid
 949      _    -> just_a_conid
 950
 951   '-'# -> case lookAhead# buf 1# of
 952             '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
 953             _    -> lex_id3 cont exts mod buf just_a_conid
 954
 955   _    -> lex_id3 cont exts mod buf just_a_conid
 956
 957
 958 lex_id3 cont exts mod buf just_a_conid
 959   | is_upper (currentChar# buf) =
 960      lex_con cont exts buf
 961
 962   | is_symbol (currentChar# buf) =
 963      let
 964         start_new_lexeme = stepOverLexeme buf
 965      in
 966      -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
 967      case expandWhile# is_symbol start_new_lexeme of { buf' ->
 968      let
 969        lexeme  = lexemeToFastString buf'
 970         -- real lexeme is M.<sym>
 971        new_buf = mergeLexemes buf buf'
 972      in
 973      cont (mk_qvar_token mod lexeme) new_buf
 974         -- wrong, but arguably morally right: M... is now a qvarsym
 975      }
 976
 977   | otherwise   =
 978      let
 979         start_new_lexeme = stepOverLexeme buf
 980      in
 981      -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
 982      case expandWhile# is_ident start_new_lexeme of { buf1 ->
 983      if emptyLexeme buf1
 984             then just_a_conid
 985             else
 986
 987      case slurp_trailing_hashes buf1 exts of { buf' ->
 988
 989      let
 990       lexeme      = lexemeToFastString buf'
 991       new_buf     = mergeLexemes buf buf'
 992       is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
 993      in
 994      case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 995             Nothing          -> is_a_qvarid ;
 996
 997             Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
 998                            -> is_a_qvarid          --  recognised as keywords here.
 999                            | otherwise
1000                            -> just_a_conid         -- avoid M.where etc.
1001      }}}
1002
1003 slurp_trailing_hashes buf exts
1004   | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
1005   | otherwise           = buf
1006
1007
1008 mk_var_token pk_str
1009   | is_upper f          = ITconid pk_str
1010   | is_ident f          = ITvarid pk_str
1011   | f `eqChar#` ':'#    = ITconsym pk_str
1012   | otherwise           = ITvarsym pk_str
1013   where
1014       (C# f) = _HEAD_ pk_str
1015       -- tl     = _TAIL_ pk_str
1016
1017 mk_qvar_token m token =
1018 -- trace ("mk_qvar ") $
1019  case mk_var_token token of
1020    ITconid n  -> ITqconid  (m,n)
1021    ITvarid n  -> ITqvarid  (m,n)
1022    ITconsym n -> ITqconsym (m,n)
1023    ITvarsym n -> ITqvarsym (m,n)
1024    _          -> ITunknown (show token)
1025 \end{code}
1026
1027 ----------------------------------------------------------------------------
1028 Horrible stuff for dealing with M.(,,,)
1029
1030 \begin{code}
1031 lex_tuple cont mod buf back_off =
1032   go 2 buf
1033   where
1034    go n buf =
1035     case currentChar# buf of
1036       ','# -> go (n+1) (stepOn buf)
1037       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
1038       _    -> back_off
1039
1040 lex_ubx_tuple cont mod buf back_off =
1041   go 2 buf
1042   where
1043    go n buf =
1044     case currentChar# buf of
1045       ','# -> go (n+1) (stepOn buf)
1046       '#'# -> case lookAhead# buf 1# of
1047                 ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
1048                                  (stepOnBy# buf 2#)
1049                 _    -> back_off
1050       _    -> back_off
1051 \end{code}
1052
1053 -----------------------------------------------------------------------------
1054
1055 \begin{code}
1056 data LayoutContext
1057   = NoLayout
1058   | Layout Int#
1059
1060 data ParseResult a
1061   = POk PState a
1062   | PFailed Message
1063
1064 data PState = PState {
1065         loc        :: SrcLoc,
1066         extsBitmap :: Int#,     -- bitmap that determines permitted extensions
1067         bol        :: Int#,
1068         atbol      :: Int#,
1069         context    :: [LayoutContext]
1070      }
1071
1072 type P a = StringBuffer         -- Input string
1073         -> PState
1074         -> ParseResult a
1075
1076 returnP   :: a -> P a
1077 returnP a buf s = POk s a
1078
1079 thenP      :: P a -> (a -> P b) -> P b
1080 m `thenP` k = \ buf s ->
1081         case m buf s of
1082                 POk s1 a -> k a buf s1
1083                 PFailed err  -> PFailed err
1084
1085 thenP_     :: P a -> P b -> P b
1086 m `thenP_` k = m `thenP` \_ -> k
1087
1088 mapP :: (a -> P b) -> [a] -> P [b]
1089 mapP f [] = returnP []
1090 mapP f (a:as) =
1091      f a `thenP` \b ->
1092      mapP f as `thenP` \bs ->
1093      returnP (b:bs)
1094
1095 failP :: String -> P a
1096 failP msg buf s = PFailed (text msg)
1097
1098 failMsgP :: Message -> P a
1099 failMsgP msg buf s = PFailed msg
1100
1101 lexError :: String -> P a
1102 lexError str buf s@PState{ loc = loc }
1103   = failMsgP (hcat [ppr loc, text ": ", text str]) buf s
1104
1105 getSrcLocP :: P SrcLoc
1106 getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
1107
1108 -- use a temporary SrcLoc for the duration of the argument
1109 setSrcLocP :: SrcLoc -> P a -> P a
1110 setSrcLocP new_loc p buf s =
1111   case p buf s{ loc=new_loc } of
1112       POk _ a   -> POk s a
1113       PFailed e -> PFailed e
1114
1115 getSrcFile :: P FAST_STRING
1116 getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
1117
1118 pushContext :: LayoutContext -> P ()
1119 pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
1120
1121 {-
1122
1123 This special case in layoutOn is to handle layout contexts with are
1124 indented the same or less than the current context.  This is illegal
1125 according to the Haskell spec, so we have to arrange to close the
1126 current context.  eg.
1127
1128 class Foo a where
1129 class Bar a
1130
1131 after the first 'where', the sequence of events is:
1132
1133         - layout system inserts a ';' (column 0)
1134         - parser begins a new context at column 0
1135         - parser shifts ';' (legal empty declaration)
1136         - parser sees 'class': parse error (we're still in the inner context)
1137
1138 trouble is, by the time we know we need a new context, the lexer has
1139 already generated the ';'.  Hacky solution is as follows: since we
1140 know the column of the next token (it's the column number of the new
1141 context), we set the ACTUAL column number of the new context to this
1142 numer plus one.  Hence the next time the lexer is called, a '}' will
1143 be generated to close the new context straight away.  Furthermore, we
1144 have to set the atbol flag so that the ';' that the parser shifted as
1145 part of the new context is re-generated.
1146
1147 when the new context is *less* indented than the current one:
1148
1149 f = f where g = g where
1150 h = h
1151
1152         - current context: column 12.
1153         - on seeing 'h' (column 0), the layout system inserts '}'
1154         - parser starts a new context, column 0
1155         - parser sees '}', uses it to close new context
1156         - we still need to insert another '}' followed by a ';',
1157           hence the atbol trick.
1158
1159 There's also a special hack in here to deal with
1160
1161         do
1162            ....
1163            e $ do
1164            blah
1165
1166 i.e. the inner context is at the same indentation level as the outer
1167 context.  This is strictly illegal according to Haskell 98, but
1168 there's a lot of existing code using this style and it doesn't make
1169 any sense to disallow it, since empty 'do' lists don't make sense.
1170 -}
1171
1172 layoutOn :: Bool -> P ()
1173 layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
1174     let offset = lexemeIndex buf -# bol in
1175     case ctx of
1176         Layout prev_off : _
1177            | if strict then prev_off >=# offset else prev_off ># offset ->
1178                 --trace ("layout on, column: " ++  show (I# offset)) $
1179                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
1180         other ->
1181                 --trace ("layout on, column: " ++  show (I# offset)) $
1182                 POk s{ context = Layout offset : ctx } ()
1183
1184 layoutOff :: P ()
1185 layoutOff buf s@(PState{ context = ctx }) =
1186     POk s{ context = NoLayout:ctx } ()
1187
1188 popContext :: P ()
1189 popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
1190   case ctx of
1191         (_:tl) -> POk s{ context = tl } ()
1192         []     -> PFailed (srcParseErr buf loc)
1193
1194 -- for reasons of efficiency, flags indicating language extensions (eg,
1195 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1196 -- integer
1197
1198 glaExtsBit, ffiBit, parrBit :: Int
1199 glaExtsBit = 0
1200 ffiBit     = 1  -- FIXME: not used yet; still part of `glaExtsBit'
1201 parrBit    = 2
1202
1203 glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
1204 glaExtsEnabled flags = testBit (toInt32 flags) glaExtsBit
1205 ffiEnabled     flags = testBit (toInt32 flags) ffiBit
1206 parrEnabled    flags = testBit (toInt32 flags) parrBit
1207
1208 toInt32 :: Int# -> Int32
1209 toInt32 x# = fromIntegral (I# x#)
1210
1211 -- convenient record-based bitmap for the interface to the rest of the world
1212 --
1213 data ExtFlags = ExtFlags {
1214                   glasgowExtsEF :: Bool,
1215 --                ffiEF         :: Bool,  -- commented out to avoid warnings
1216                   parrEF        :: Bool   -- while not used yet
1217                 }
1218
1219 -- create a parse state
1220 --
1221 mkPState          :: SrcLoc -> ExtFlags -> PState
1222 mkPState loc exts  = PState {
1223                        loc        = loc,
1224                        extsBitmap = case (fromIntegral bitmap) of {I# bits -> bits},
1225                        bol        = 0#,
1226                        atbol      = 1#,
1227                        context    = []
1228                      }
1229                      where
1230                        bitmap =     glaExtsBit `setBitIf` glasgowExtsEF exts
1231 --                              .|. ffiBit     `setBitIf` ffiEF         exts
1232                                 .|. parrBit    `setBitIf` parrEF        exts
1233                        --
1234                        setBitIf :: Int -> Bool -> Int32
1235                        b `setBitIf` cond | cond      = bit b
1236                                          | otherwise = 0
1237
1238 -----------------------------------------------------------------------------
1239
1240 srcParseErr :: StringBuffer -> SrcLoc -> Message
1241 srcParseErr s l
1242   = hcat [ppr l,
1243           if null token
1244              then ptext SLIT(": parse error (possibly incorrect indentation)")
1245              else hcat [ptext SLIT(": parse error on input "),
1246                         char '`', text token, char '\'']
1247     ]
1248   where
1249         token = lexemeToString s
1250
1251 \end{code}