ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ErrUtils         ( Message )
  34 import Outputable
  35 import StringBuffer
  36 import FastString
  37 import FastTypes
  38 import SrcLoc
  39 import UniqFM
  40 import CmdLineOpts
  41 import Ctype
  42 import Util             ( maybePrefixMatch )
  43
  44 import DATA_BITS
  45 import Char
  46 import Ratio
  47 --import TRACE
  48 }
  49
  50 $whitechar   = [\ \t\n\r\f\v\xa0]
  51 $white_no_nl = $whitechar # \n
  52
  53 $ascdigit  = 0-9
  54 $unidigit  = \x01
  55 $digit     = [$ascdigit $unidigit]
  56
  57 $special   = [\(\)\,\;\[\]\`\{\}]
  58 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  59 $unisymbol = \x02
  60 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  61
  62 $unilarge  = \x03
  63 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  64 $large     = [$asclarge $unilarge]
  65
  66 $unismall  = \x04
  67 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  68 $small     = [$ascsmall $unismall \_]
  69
  70 $graphic   = [$small $large $symbol $digit $special \:\"\']
  71
  72 $octit     = 0-7
  73 $hexit     = [$digit A-F a-f]
  74 $symchar   = [$symbol \:]
  75 $nl        = [\n\r]
  76 $idchar    = [$small $large $digit \']
  77
  78 @varid     = $small $idchar*
  79 @conid     = $large $idchar*
  80
  81 @varsym    = $symbol $symchar*
  82 @consym    = \: $symchar*
  83
  84 @decimal     = $digit+
  85 @octal       = $octit+
  86 @hexadecimal = $hexit+
  87 @exponent    = [eE] [\-\+]? @decimal
  88
  89 -- we support the hierarchical module name extension:
  90 @qual = (@conid \.)+
  91
  92 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  93
  94 haskell :-
  95
  96 -- everywhere: skip whitespace and comments
  97 $white_no_nl+                           ;
  98
  99 -- Everywhere: deal with nested comments.  We explicitly rule out
 100 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 101 -- (this can happen even though pragmas will normally take precedence due to
 102 -- longest-match, because pragmas aren't valid in every state, but comments
 103 -- are).
 104 "{-" / { notFollowedBy '#' }            { nested_comment }
 105
 106 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 107 -- more dashes followed by a symbol should be parsed as a varsym, so we
 108 -- have to exclude those.
 109 -- The regex says: "munch all the characters after the dashes, as long as
 110 -- the first one is not a symbol".
 111 "--"\-* ([^$symbol] .*)?                ;
 112
 113 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 114 -- blank lines) until we find a non-whitespace character, then do layout
 115 -- processing.
 116 --
 117 -- One slight wibble here: what if the line begins with {-#? In
 118 -- theory, we have to lex the pragma to see if it's one we recognise,
 119 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 120 -- as a nested comment.  We don't bother with this: if the line begins
 121 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 122 <bol> {
 123   \n                                    ;
 124   ^\# (line)?                           { begin line_prag1 }
 125   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 126   ()                                    { do_bol }
 127 }
 128
 129 -- after a layout keyword (let, where, do, of), we begin a new layout
 130 -- context if the curly brace is missing.
 131 -- Careful! This stuff is quite delicate.
 132 <layout, layout_do> {
 133   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 134         -- we might encounter {-# here, but {- has been handled already
 135   \n                                    ;
 136   ^\# (line)?                           { begin line_prag1 }
 137 }
 138
 139 -- do is treated in a subtly different way, see new_layout_context
 140 <layout>    ()                          { new_layout_context True }
 141 <layout_do> ()                          { new_layout_context False }
 142
 143 -- after a new layout context which was found to be to the left of the
 144 -- previous context, we have generated a '{' token, and we now need to
 145 -- generate a matching '}' token.
 146 <layout_left>  ()                       { do_layout_left }
 147
 148 <0,glaexts> \n                          { begin bol }
 149
 150 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 151
 152 -- single-line line pragmas, of the form
 153 --    # <line> "<file>" <extra-stuff> \n
 154 <line_prag1> $digit+                    { set_line line_prag1a }
 155 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 156 <line_prag1b> .*                        { pop }
 157
 158 -- Haskell-style line pragmas, of the form
 159 --    {-# LINE <line> "<file>" #-}
 160 <line_prag2> $digit+                    { set_line line_prag2a }
 161 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 162 <line_prag2b> "#-}"                     { pop }
 163
 164 <0,glaexts> {
 165   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 166                                         { token ITspecialise_prag }
 167   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 168   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 169   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 170                                         { token ITnoinline_prag }
 171   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 172   "{-#" $whitechar* (DEPRECATED|deprecated)
 173                                         { token ITdeprecated_prag }
 174   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 175   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 176
 177   "{-#"                                 { nested_comment }
 178
 179   -- ToDo: should only be valid inside a pragma:
 180   "#-}"                                 { token ITclose_prag}
 181 }
 182
 183
 184 -- '0' state: ordinary lexemes
 185 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 186
 187 -- "special" symbols
 188
 189 <0,glaexts> {
 190   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 191   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 192 }
 193
 194 <0,glaexts> {
 195   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 196   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 197   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 198   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 199   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 200   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 201   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 202   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 203 }
 204
 205 <0,glaexts> {
 206   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 207                                         { special IToparenbar }
 208   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 209 }
 210
 211 <0,glaexts> {
 212   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 213   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 214 }
 215
 216 <glaexts> {
 217   "(#" / { notFollowedBySymbol }        { token IToubxparen }
 218   "#)"                                  { token ITcubxparen }
 219   "{|"                                  { token ITocurlybar }
 220   "|}"                                  { token ITccurlybar }
 221 }
 222
 223 <0,glaexts> {
 224   \(                                    { special IToparen }
 225   \)                                    { special ITcparen }
 226   \[                                    { special ITobrack }
 227   \]                                    { special ITcbrack }
 228   \,                                    { special ITcomma }
 229   \;                                    { special ITsemi }
 230   \`                                    { special ITbackquote }
 231
 232   \{                                    { open_brace }
 233   \}                                    { close_brace }
 234 }
 235
 236 <0,glaexts> {
 237   @qual @varid                  { check_qvarid }
 238   @qual @conid                  { idtoken qconid }
 239   @varid                        { varid }
 240   @conid                        { idtoken conid }
 241 }
 242
 243 -- after an illegal qvarid, such as 'M.let',
 244 -- we back up and try again in the bad_qvarid state:
 245 <bad_qvarid> {
 246   @conid                        { pop_and (idtoken conid) }
 247   @qual @conid                  { pop_and (idtoken qconid) }
 248 }
 249
 250 <glaexts> {
 251   @qual @varid "#"+             { idtoken qvarid }
 252   @qual @conid "#"+             { idtoken qconid }
 253   @varid "#"+                   { varid }
 254   @conid "#"+                   { idtoken conid }
 255 }
 256
 257 -- ToDo: M.(,,,)
 258
 259 <0,glaexts> {
 260   @qual @varsym                 { idtoken qvarsym }
 261   @qual @consym                 { idtoken qconsym }
 262   @varsym                       { varsym }
 263   @consym                       { consym }
 264 }
 265
 266 <0,glaexts> {
 267   @decimal                      { tok_decimal }
 268   0[oO] @octal                  { tok_octal }
 269   0[xX] @hexadecimal            { tok_hexadecimal }
 270 }
 271
 272 <glaexts> {
 273   @decimal \#                   { prim_decimal }
 274   0[oO] @octal \#               { prim_octal }
 275   0[xX] @hexadecimal \#         { prim_hexadecimal }
 276 }
 277
 278 <0,glaexts> @floating_point             { strtoken tok_float }
 279 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 280 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 281
 282 -- Strings and chars are lexed by hand-written code.  The reason is
 283 -- that even if we recognise the string or char here in the regex
 284 -- lexer, we would still have to parse the string afterward in order
 285 -- to convert it to a String.
 286 <0,glaexts> {
 287   \'                            { lex_char_tok }
 288   \"                            { lex_string_tok }
 289 }
 290
 291 {
 292 -- work around bug in Alex 2.0
 293 #if __GLASGOW_HASKELL__ < 503
 294 unsafeAt arr i = arr ! i
 295 #endif
 296
 297 -- -----------------------------------------------------------------------------
 298 -- The token type
 299
 300 data Token = T SrcSpan Token__
 301
 302 data Token__
 303   = ITas                        -- Haskell keywords
 304   | ITcase
 305   | ITclass
 306   | ITdata
 307   | ITdefault
 308   | ITderiving
 309   | ITdo
 310   | ITelse
 311   | IThiding
 312   | ITif
 313   | ITimport
 314   | ITin
 315   | ITinfix
 316   | ITinfixl
 317   | ITinfixr
 318   | ITinstance
 319   | ITlet
 320   | ITmodule
 321   | ITnewtype
 322   | ITof
 323   | ITqualified
 324   | ITthen
 325   | ITtype
 326   | ITwhere
 327   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 328
 329   | ITforall                    -- GHC extension keywords
 330   | ITforeign
 331   | ITexport
 332   | ITlabel
 333   | ITdynamic
 334   | ITsafe
 335   | ITthreadsafe
 336   | ITunsafe
 337   | ITstdcallconv
 338   | ITccallconv
 339   | ITdotnet
 340   | ITmdo
 341
 342   | ITspecialise_prag           -- Pragmas
 343   | ITsource_prag
 344   | ITinline_prag
 345   | ITnoinline_prag
 346   | ITrules_prag
 347   | ITdeprecated_prag
 348   | ITline_prag
 349   | ITscc_prag
 350   | ITcore_prag                 -- hdaume: core annotations
 351   | ITclose_prag
 352
 353   | ITdotdot                    -- reserved symbols
 354   | ITcolon
 355   | ITdcolon
 356   | ITequal
 357   | ITlam
 358   | ITvbar
 359   | ITlarrow
 360   | ITrarrow
 361   | ITat
 362   | ITtilde
 363   | ITdarrow
 364   | ITminus
 365   | ITbang
 366   | ITstar
 367   | ITdot
 368
 369   | ITbiglam                    -- GHC-extension symbols
 370
 371   | ITocurly                    -- special symbols
 372   | ITccurly
 373   | ITocurlybar                 -- {|, for type applications
 374   | ITccurlybar                 -- |}, for type applications
 375   | ITvocurly
 376   | ITvccurly
 377   | ITobrack
 378   | ITopabrack                  -- [:, for parallel arrays with -fparr
 379   | ITcpabrack                  -- :], for parallel arrays with -fparr
 380   | ITcbrack
 381   | IToparen
 382   | ITcparen
 383   | IToubxparen
 384   | ITcubxparen
 385   | ITsemi
 386   | ITcomma
 387   | ITunderscore
 388   | ITbackquote
 389
 390   | ITvarid   FastString        -- identifiers
 391   | ITconid   FastString
 392   | ITvarsym  FastString
 393   | ITconsym  FastString
 394   | ITqvarid  (FastString,FastString)
 395   | ITqconid  (FastString,FastString)
 396   | ITqvarsym (FastString,FastString)
 397   | ITqconsym (FastString,FastString)
 398
 399   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 400   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 401
 402   | ITpragma StringBuffer
 403
 404   | ITchar       Char
 405   | ITstring     FastString
 406   | ITinteger    Integer
 407   | ITrational   Rational
 408
 409   | ITprimchar   Char
 410   | ITprimstring FastString
 411   | ITprimint    Integer
 412   | ITprimfloat  Rational
 413   | ITprimdouble Rational
 414
 415   -- MetaHaskell extension tokens
 416   | ITopenExpQuote              -- [| or [e|
 417   | ITopenPatQuote              -- [p|
 418   | ITopenDecQuote              -- [d|
 419   | ITopenTypQuote              -- [t|
 420   | ITcloseQuote                -- |]
 421   | ITidEscape   FastString     -- $x
 422   | ITparenEscape               -- $(
 423   | ITvarQuote                  -- '
 424   | ITtyQuote                   -- ''
 425
 426   -- Arrow notation extension
 427   | ITproc
 428   | ITrec
 429   | IToparenbar                 -- (|
 430   | ITcparenbar                 -- |)
 431   | ITlarrowtail                -- -<
 432   | ITrarrowtail                -- >-
 433   | ITLarrowtail                -- -<<
 434   | ITRarrowtail                -- >>-
 435
 436   | ITunknown String            -- Used when the lexer can't make sense of it
 437   | ITeof                       -- end of file token
 438 #ifdef DEBUG
 439   deriving Show -- debugging
 440 #endif
 441
 442 isSpecial :: Token__ -> Bool
 443 -- If we see M.x, where x is a keyword, but
 444 -- is special, we treat is as just plain M.x,
 445 -- not as a keyword.
 446 isSpecial ITas          = True
 447 isSpecial IThiding      = True
 448 isSpecial ITqualified   = True
 449 isSpecial ITforall      = True
 450 isSpecial ITexport      = True
 451 isSpecial ITlabel       = True
 452 isSpecial ITdynamic     = True
 453 isSpecial ITsafe        = True
 454 isSpecial ITthreadsafe  = True
 455 isSpecial ITunsafe      = True
 456 isSpecial ITccallconv   = True
 457 isSpecial ITstdcallconv = True
 458 isSpecial ITmdo         = True
 459 isSpecial _             = False
 460
 461 -- the bitmap provided as the third component indicates whether the
 462 -- corresponding extension keyword is valid under the extension options
 463 -- provided to the compiler; if the extension corresponding to *any* of the
 464 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 465 -- facilitates using a keyword in two different extensions that can be
 466 -- activated independently)
 467 --
 468 reservedWordsFM = listToUFM $
 469         map (\(x, y, z) -> (mkFastString x, (y, z)))
 470        [( "_",          ITunderscore,   0 ),
 471         ( "as",         ITas,           0 ),
 472         ( "case",       ITcase,         0 ),
 473         ( "class",      ITclass,        0 ),
 474         ( "data",       ITdata,         0 ),
 475         ( "default",    ITdefault,      0 ),
 476         ( "deriving",   ITderiving,     0 ),
 477         ( "do",         ITdo,           0 ),
 478         ( "else",       ITelse,         0 ),
 479         ( "hiding",     IThiding,       0 ),
 480         ( "if",         ITif,           0 ),
 481         ( "import",     ITimport,       0 ),
 482         ( "in",         ITin,           0 ),
 483         ( "infix",      ITinfix,        0 ),
 484         ( "infixl",     ITinfixl,       0 ),
 485         ( "infixr",     ITinfixr,       0 ),
 486         ( "instance",   ITinstance,     0 ),
 487         ( "let",        ITlet,          0 ),
 488         ( "module",     ITmodule,       0 ),
 489         ( "newtype",    ITnewtype,      0 ),
 490         ( "of",         ITof,           0 ),
 491         ( "qualified",  ITqualified,    0 ),
 492         ( "then",       ITthen,         0 ),
 493         ( "type",       ITtype,         0 ),
 494         ( "where",      ITwhere,        0 ),
 495         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 496
 497         ( "forall",     ITforall,        bit glaExtsBit),
 498         ( "mdo",        ITmdo,           bit glaExtsBit),
 499
 500         ( "foreign",    ITforeign,       bit ffiBit),
 501         ( "export",     ITexport,        bit ffiBit),
 502         ( "label",      ITlabel,         bit ffiBit),
 503         ( "dynamic",    ITdynamic,       bit ffiBit),
 504         ( "safe",       ITsafe,          bit ffiBit),
 505         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 506         ( "unsafe",     ITunsafe,        bit ffiBit),
 507         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 508         ( "ccall",      ITccallconv,     bit ffiBit),
 509         ( "dotnet",     ITdotnet,        bit ffiBit),
 510
 511         ( "rec",        ITrec,           bit arrowsBit),
 512         ( "proc",       ITproc,          bit arrowsBit)
 513      ]
 514
 515 reservedSymsFM = listToUFM $
 516         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 517       [ ("..",  ITdotdot,       0)
 518        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 519                                                 -- meaning only list cons
 520        ,("::",  ITdcolon,       0)
 521        ,("=",   ITequal,        0)
 522        ,("\\",  ITlam,          0)
 523        ,("|",   ITvbar,         0)
 524        ,("<-",  ITlarrow,       0)
 525        ,("->",  ITrarrow,       0)
 526        ,("@",   ITat,           0)
 527        ,("~",   ITtilde,        0)
 528        ,("=>",  ITdarrow,       0)
 529        ,("-",   ITminus,        0)
 530        ,("!",   ITbang,         0)
 531
 532        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 533        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 534
 535        ,("-<",  ITlarrowtail,   bit arrowsBit)
 536        ,(">-",  ITrarrowtail,   bit arrowsBit)
 537        ,("-<<", ITLarrowtail,   bit arrowsBit)
 538        ,(">>-", ITRarrowtail,   bit arrowsBit)
 539        ]
 540
 541 -- -----------------------------------------------------------------------------
 542 -- Lexer actions
 543
 544 type Action = SrcSpan -> StringBuffer -> Int -> P Token
 545
 546 special :: Token__ -> Action
 547 special tok span _buf len = return (T span tok)
 548
 549 token, layout_token :: Token__ -> Action
 550 token t span buf len = return (T span t)
 551 layout_token t span buf len = pushLexState layout >> return (T span t)
 552
 553 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 554 idtoken f span buf len = return (T span $! (f buf len))
 555
 556 skip_one_varid :: (FastString -> Token__) -> Action
 557 skip_one_varid f span buf len
 558   = return (T span $! f (lexemeToFastString (stepOn buf) (len-1)))
 559
 560 strtoken :: (String -> Token__) -> Action
 561 strtoken f span buf len =
 562   return (T span $! (f $! lexemeToString buf len))
 563
 564 init_strtoken :: Int -> (String -> Token__) -> Action
 565 -- like strtoken, but drops the last N character(s)
 566 init_strtoken drop f span buf len =
 567   return (T span $! (f $! lexemeToString buf (len-drop)))
 568
 569 begin :: Int -> Action
 570 begin code _span _str _len = do pushLexState code; lexToken
 571
 572 pop :: Action
 573 pop _span _buf _len = do popLexState; lexToken
 574
 575 pop_and :: Action -> Action
 576 pop_and act span buf len = do popLexState; act span buf len
 577
 578 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 579
 580 notFollowedBySymbol _ _ _ (_,buf)
 581   = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
 582
 583 ifExtension pred bits _ _ _ = pred bits
 584
 585 {-
 586   nested comments require traversing by hand, they can't be parsed
 587   using regular expressions.
 588 -}
 589 nested_comment :: Action
 590 nested_comment span _str _len = do
 591   input <- getInput
 592   go 1 input
 593   where go 0 input = do setInput input; lexToken
 594         go n input = do
 595           case alexGetChar input of
 596             Nothing  -> err input
 597             Just (c,input) -> do
 598               case c of
 599                 '-' -> do
 600                   case alexGetChar input of
 601                     Nothing  -> err input
 602                     Just ('\125',input) -> go (n-1) input
 603                     Just (c,_)          -> go n input
 604                 '\123' -> do
 605                   case alexGetChar input of
 606                     Nothing  -> err input
 607                     Just ('-',input') -> go (n+1) input'
 608                     Just (c,input)    -> go n input
 609                 c -> go n input
 610
 611         err input = do failLocMsgP (srcSpanStart span) (fst input) "unterminated `{-'"
 612
 613 open_brace, close_brace :: Action
 614 open_brace  span _str _len = do
 615   ctx <- getContext
 616   setContext (NoLayout:ctx)
 617   return (T span ITocurly)
 618 close_brace span _str _len = do
 619   popContext
 620   return (T span ITccurly)
 621
 622 -- We have to be careful not to count M.<varid> as a qualified name
 623 -- when <varid> is a keyword.  We hack around this by catching
 624 -- the offending tokens afterward, and re-lexing in a different state.
 625 check_qvarid span buf len = do
 626   case lookupUFM reservedWordsFM var of
 627         Just (keyword,exts)
 628           | not (isSpecial keyword) ->
 629           if exts == 0
 630              then try_again
 631              else do
 632                 b <- extension (\i -> exts .&. i /= 0)
 633                 if b then try_again
 634                      else return token
 635         _other -> return token
 636   where
 637         (mod,var) = splitQualName buf len
 638         token     = T span (ITqvarid (mod,var))
 639
 640         try_again = do
 641                 setInput (srcSpanStart span,buf)
 642                 pushLexState bad_qvarid
 643                 lexToken
 644
 645 qvarid buf len = ITqvarid $! splitQualName buf len
 646 qconid buf len = ITqconid $! splitQualName buf len
 647
 648 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 649 -- takes a StringBuffer and a length, and returns the module name
 650 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 651 -- because of hierarchical module names.
 652 splitQualName orig_buf len = split orig_buf 0 0
 653   where
 654     split buf dot_off n
 655         | n == len                = done dot_off
 656         | lookAhead buf n == '.'  = split2 buf n (n+1)
 657         | otherwise               = split buf dot_off (n+1)
 658
 659     -- careful, we might get names like M....
 660     -- so, if the character after the dot is not upper-case, this is
 661     -- the end of the qualifier part.
 662     split2 buf dot_off n
 663         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 664         | otherwise                 = done dot_off
 665
 666     done dot_off =
 667         (lexemeToFastString orig_buf dot_off,
 668          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 669
 670 varid span buf len =
 671   case lookupUFM reservedWordsFM fs of
 672         Just (keyword,0)    -> do
 673                 maybe_layout keyword
 674                 return (T span keyword)
 675         Just (keyword,exts) -> do
 676                 b <- extension (\i -> exts .&. i /= 0)
 677                 if b then do maybe_layout keyword
 678                              return (T span keyword)
 679                      else return (T span (ITvarid fs))
 680         _other -> return (T span (ITvarid fs))
 681   where
 682         fs = lexemeToFastString buf len
 683
 684 conid buf len = ITconid fs
 685   where fs = lexemeToFastString buf len
 686
 687 qvarsym buf len = ITqvarsym $! splitQualName buf len
 688 qconsym buf len = ITqconsym $! splitQualName buf len
 689
 690 varsym = sym ITvarsym
 691 consym = sym ITconsym
 692
 693 sym con span buf len =
 694   case lookupUFM reservedSymsFM fs of
 695         Just (keyword,0)    -> return (T span keyword)
 696         Just (keyword,exts) -> do
 697                 b <- extension (\i -> exts .&. i /= 0)
 698                 if b then return (T span keyword)
 699                      else return (T span $! con fs)
 700         _other -> return (T span $! con fs)
 701   where
 702         fs = lexemeToFastString buf len
 703
 704 tok_decimal span buf len
 705   = return (T span (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 706
 707 tok_octal span buf len
 708   = return (T span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 709
 710 tok_hexadecimal span buf len
 711   = return (T span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 712
 713 prim_decimal span buf len
 714   = return (T span (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 715
 716 prim_octal span buf len
 717   = return (T span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 718
 719 prim_hexadecimal span buf len
 720   = return (T span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 721
 722 tok_float        str = ITrational $! readRational__ str
 723 prim_float       str = ITprimfloat  $! readRational__ str
 724 prim_double      str = ITprimdouble $! readRational__ str
 725
 726 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 727 parseInteger buf len radix to_int
 728   = go 0 0
 729   where go i x | i == len  = x
 730                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 731
 732 -- -----------------------------------------------------------------------------
 733 -- Layout processing
 734
 735 -- we're at the first token on a line, insert layout tokens if necessary
 736 do_bol :: Action
 737 do_bol span _str _len = do
 738         pos <- getOffside (srcSpanEndCol span)
 739         case pos of
 740             LT -> do
 741                 --trace "layout: inserting '}'" $ do
 742                 popContext
 743                 -- do NOT pop the lex state, we might have a ';' to insert
 744                 return (T span ITvccurly)
 745             EQ -> do
 746                 --trace "layout: inserting ';'" $ do
 747                 popLexState
 748                 return (T span ITsemi)
 749             GT -> do
 750                 popLexState
 751                 lexToken
 752
 753 -- certain keywords put us in the "layout" state, where we might
 754 -- add an opening curly brace.
 755 maybe_layout ITdo       = pushLexState layout_do
 756 maybe_layout ITmdo      = pushLexState layout_do
 757 maybe_layout ITof       = pushLexState layout
 758 maybe_layout ITlet      = pushLexState layout
 759 maybe_layout ITwhere    = pushLexState layout
 760 maybe_layout ITrec      = pushLexState layout
 761 maybe_layout _          = return ()
 762
 763 -- Pushing a new implicit layout context.  If the indentation of the
 764 -- next token is not greater than the previous layout context, then
 765 -- Haskell 98 says that the new layout context should be empty; that is
 766 -- the lexer must generate {}.
 767 --
 768 -- We are slightly more lenient than this: when the new context is started
 769 -- by a 'do', then we allow the new context to be at the same indentation as
 770 -- the previous context.  This is what the 'strict' argument is for.
 771 --
 772 new_layout_context strict span _buf _len = do
 773     popLexState
 774     let offset = srcSpanStartCol span
 775     ctx <- getContext
 776     case ctx of
 777         Layout prev_off : _  |
 778            (strict     && prev_off >= offset  ||
 779             not strict && prev_off > offset) -> do
 780                 -- token is indented to the left of the previous context.
 781                 -- we must generate a {} sequence now.
 782                 pushLexState layout_left
 783                 return (T span ITvocurly)
 784         other -> do
 785                 setContext (Layout offset : ctx)
 786                 return (T span ITvocurly)
 787
 788 do_layout_left span _buf _len = do
 789     popLexState
 790     pushLexState bol  -- we must be at the start of a line
 791     return (T span ITvccurly)
 792
 793 -- -----------------------------------------------------------------------------
 794 -- LINE pragmas
 795
 796 set_line :: Int -> Action
 797 set_line code span buf len = do
 798   let line = parseInteger buf len 10 oct_or_dec
 799   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
 800         -- subtract one: the line number refers to the *following* line
 801   popLexState
 802   pushLexState code
 803   lexToken
 804
 805 set_file :: Int -> Action
 806 set_file code span buf len = do
 807   let file = lexemeToFastString (stepOn buf) (len-2)
 808   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
 809   popLexState
 810   pushLexState code
 811   lexToken
 812
 813 -- -----------------------------------------------------------------------------
 814 -- Strings & Chars
 815
 816 -- This stuff is horrible.  I hates it.
 817
 818 lex_string_tok :: Action
 819 lex_string_tok span buf len = do
 820   tok <- lex_string ""
 821   end <- getSrcLoc
 822   return (T (mkSrcSpan (srcSpanStart span) end) tok)
 823
 824 lex_string :: String -> P Token__
 825 lex_string s = do
 826   i <- getInput
 827   case alexGetChar i of
 828     Nothing -> lit_error
 829
 830     Just ('"',i)  -> do
 831         setInput i
 832         glaexts <- extension glaExtsEnabled
 833         if glaexts
 834           then do
 835             i <- getInput
 836             case alexGetChar i of
 837               Just ('#',i) -> do
 838                    setInput i
 839                    if any (> '\xFF') s
 840                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 841                     else let s' = mkFastStringNarrow (reverse s) in
 842                          -- always a narrow string/byte array
 843                          return (ITprimstring s')
 844               _other ->
 845                 return (ITstring (mkFastString (reverse s)))
 846           else
 847                 return (ITstring (mkFastString (reverse s)))
 848
 849     Just ('\\',i)
 850         | Just ('&',i) <- next -> do
 851                 setInput i; lex_string s
 852         | Just (c,i) <- next, is_space c -> do
 853                 setInput i; lex_stringgap s
 854         where next = alexGetChar i
 855
 856     Just _ -> do
 857         c <- lex_char
 858         lex_string (c:s)
 859
 860 lex_stringgap s = do
 861   c <- getCharOrFail
 862   case c of
 863     '\\' -> lex_string s
 864     c | is_space c -> lex_stringgap s
 865     _other -> lit_error
 866
 867
 868 lex_char_tok :: Action
 869 -- Here we are basically parsing character literals, such as 'x' or '\n'
 870 -- but, when Template Haskell is on, we additionally spot
 871 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
 872 -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
 873 -- So we have to do two characters of lookahead: when we see 'x we need to
 874 -- see if there's a trailing quote
 875 lex_char_tok span buf len = do  -- We've seen '
 876    i1 <- getInput       -- Look ahead to first character
 877    let loc = srcSpanStart span
 878    case alexGetChar i1 of
 879         Nothing -> lit_error
 880
 881         Just ('\'', i2@(end2,_)) -> do  -- We've seen ''
 882                   th_exts <- extension thEnabled
 883                   if th_exts then do
 884                         setInput i2
 885                         return (T (mkSrcSpan loc end2)  ITtyQuote)
 886                    else lit_error
 887
 888         Just ('\\', i2@(end2,_)) -> do  -- We've seen 'backslash
 889                   setInput i2
 890                   lit_ch <- lex_escape
 891                   mc <- getCharOrFail   -- Trailing quote
 892                   if mc == '\'' then finish_char_tok loc lit_ch
 893                                 else lit_error
 894
 895         Just (c, i2@(end2,_)) | not (is_any c) -> lit_error
 896                               | otherwise      ->
 897
 898                 -- We've seen 'x, where x is a valid character
 899                 --  (i.e. not newline etc) but not a quote or backslash
 900            case alexGetChar i2 of       -- Look ahead one more character
 901                 Nothing -> lit_error
 902                 Just ('\'', i3) -> do   -- We've seen 'x'
 903                         setInput i3
 904                         finish_char_tok loc c
 905                 _other -> do            -- We've seen 'x not followed by quote
 906                                         -- If TH is on, just parse the quote only
 907                         th_exts <- extension thEnabled
 908                         if th_exts then return (T (mkSrcSpan loc (fst i1)) ITvarQuote)
 909                                    else lit_error
 910
 911 finish_char_tok :: SrcLoc -> Char -> P Token
 912 finish_char_tok loc ch  -- We've already seen the closing quote
 913                         -- Just need to check for trailing #
 914   = do  glaexts <- extension glaExtsEnabled
 915         if glaexts then do
 916                 i@(end,_) <- getInput
 917                 case alexGetChar i of
 918                         Just ('#',i@(end,_)) -> do
 919                                 setInput i
 920                                 return (T (mkSrcSpan loc end) (ITprimchar ch))
 921                         _other ->
 922                                 return (T (mkSrcSpan loc end) (ITchar ch))
 923                 else do
 924                    end <- getSrcLoc
 925                    return (T (mkSrcSpan loc end) (ITchar ch))
 926
 927 lex_char :: P Char
 928 lex_char = do
 929   mc <- getCharOrFail
 930   case mc of
 931       '\\' -> lex_escape
 932       c | is_any c -> return c
 933       _other -> lit_error
 934
 935 lex_escape :: P Char
 936 lex_escape = do
 937   c <- getCharOrFail
 938   case c of
 939         'a'   -> return '\a'
 940         'b'   -> return '\b'
 941         'f'   -> return '\f'
 942         'n'   -> return '\n'
 943         'r'   -> return '\r'
 944         't'   -> return '\t'
 945         'v'   -> return '\v'
 946         '\\'  -> return '\\'
 947         '"'   -> return '\"'
 948         '\''  -> return '\''
 949         '^'   -> do c <- getCharOrFail
 950                     if c >= '@' && c <= '_'
 951                         then return (chr (ord c - ord '@'))
 952                         else lit_error
 953
 954         'x'   -> readNum is_hexdigit 16 hex
 955         'o'   -> readNum is_octdigit  8 oct_or_dec
 956         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 957
 958         c1 ->  do
 959            i <- getInput
 960            case alexGetChar i of
 961             Nothing -> lit_error
 962             Just (c2,i2) ->
 963               case alexGetChar i2 of
 964                 Nothing -> lit_error
 965                 Just (c3,i3) ->
 966                    let str = [c1,c2,c3] in
 967                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 968                                      Just rest <- [maybePrefixMatch p str] ] of
 969                           (escape_char,[]):_ -> do
 970                                 setInput i3
 971                                 return escape_char
 972                           (escape_char,_:_):_ -> do
 973                                 setInput i2
 974                                 return escape_char
 975                           [] -> lit_error
 976
 977 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 978 readNum is_digit base conv = do
 979   c <- getCharOrFail
 980   if is_digit c
 981         then readNum2 is_digit base conv (conv c)
 982         else lit_error
 983
 984 readNum2 is_digit base conv i = do
 985   input <- getInput
 986   read i input
 987   where read i input = do
 988           case alexGetChar input of
 989             Just (c,input') | is_digit c -> do
 990                 read (i*base + conv c) input'
 991             _other -> do
 992                 setInput input
 993                 if i >= 0 && i <= 0x10FFFF
 994                    then return (chr i)
 995                    else lit_error
 996
 997 is_hexdigit c
 998         =  is_digit c
 999         || (c >= 'a' && c <= 'f')
1000         || (c >= 'A' && c <= 'F')
1001
1002 hex c | is_digit c = ord c - ord '0'
1003       | otherwise  = ord (to_lower c) - ord 'a' + 10
1004
1005 oct_or_dec c = ord c - ord '0'
1006
1007 is_octdigit c = c >= '0' && c <= '7'
1008
1009 to_lower c
1010   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
1011   | otherwise = c
1012
1013 silly_escape_chars = [
1014         ("NUL", '\NUL'),
1015         ("SOH", '\SOH'),
1016         ("STX", '\STX'),
1017         ("ETX", '\ETX'),
1018         ("EOT", '\EOT'),
1019         ("ENQ", '\ENQ'),
1020         ("ACK", '\ACK'),
1021         ("BEL", '\BEL'),
1022         ("BS", '\BS'),
1023         ("HT", '\HT'),
1024         ("LF", '\LF'),
1025         ("VT", '\VT'),
1026         ("FF", '\FF'),
1027         ("CR", '\CR'),
1028         ("SO", '\SO'),
1029         ("SI", '\SI'),
1030         ("DLE", '\DLE'),
1031         ("DC1", '\DC1'),
1032         ("DC2", '\DC2'),
1033         ("DC3", '\DC3'),
1034         ("DC4", '\DC4'),
1035         ("NAK", '\NAK'),
1036         ("SYN", '\SYN'),
1037         ("ETB", '\ETB'),
1038         ("CAN", '\CAN'),
1039         ("EM", '\EM'),
1040         ("SUB", '\SUB'),
1041         ("ESC", '\ESC'),
1042         ("FS", '\FS'),
1043         ("GS", '\GS'),
1044         ("RS", '\RS'),
1045         ("US", '\US'),
1046         ("SP", '\SP'),
1047         ("DEL", '\DEL')
1048         ]
1049
1050 lit_error = lexError "lexical error in string/character literal"
1051
1052 getCharOrFail :: P Char
1053 getCharOrFail =  do
1054   i <- getInput
1055   case alexGetChar i of
1056         Nothing -> lexError "unexpected end-of-file in string/character literal"
1057         Just (c,i)  -> do setInput i; return c
1058
1059 -- -----------------------------------------------------------------------------
1060 -- Floats
1061
1062 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1063 readRational r = do
1064      (n,d,s) <- readFix r
1065      (k,t)   <- readExp s
1066      return ((n%1)*10^^(k-d), t)
1067  where
1068      readFix r = do
1069         (ds,s)  <- lexDecDigits r
1070         (ds',t) <- lexDotDigits s
1071         return (read (ds++ds'), length ds', t)
1072
1073      readExp (e:s) | e `elem` "eE" = readExp' s
1074      readExp s                     = return (0,s)
1075
1076      readExp' ('+':s) = readDec s
1077      readExp' ('-':s) = do
1078                         (k,t) <- readDec s
1079                         return (-k,t)
1080      readExp' s       = readDec s
1081
1082      readDec s = do
1083         (ds,r) <- nonnull isDigit s
1084         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1085                 r)
1086
1087      lexDecDigits = nonnull isDigit
1088
1089      lexDotDigits ('.':s) = return (span isDigit s)
1090      lexDotDigits s       = return ("",s)
1091
1092      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1093                       return (cs,t)
1094
1095 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1096 readRational__ top_s
1097   = case top_s of
1098       '-' : xs -> - (read_me xs)
1099       xs       -> read_me xs
1100   where
1101     read_me s
1102       = case (do { (x,"") <- readRational s ; return x }) of
1103           [x] -> x
1104           []  -> error ("readRational__: no parse:"        ++ top_s)
1105           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1106
1107 -- -----------------------------------------------------------------------------
1108 -- The Parse Monad
1109
1110 data LayoutContext
1111   = NoLayout
1112   | Layout !Int
1113
1114 data ParseResult a
1115   = POk PState a
1116   | PFailed
1117         SrcSpan         -- The spam the error.  Might be used in environments
1118                         -- which can show this span, e.g. by highlighting it.
1119         Message         -- The error message
1120
1121 showPFailed span err = hcat [ppr span, text ": ", err]
1122
1123 data PState = PState {
1124         buffer     :: StringBuffer,
1125         last_span  :: SrcSpan,          -- span of previous token
1126         last_len   :: !Int,
1127         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1128         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1129         context    :: [LayoutContext],
1130         lex_state  :: [Int]
1131      }
1132         -- last_span is used when generating error messages,
1133         -- and in pushCurrentContext only.
1134         -- last_len is used when generating error messages, and is
1135         -- needed because we need to back up the buffer pointer by that
1136         -- number of characters for outputing the token in the error message.
1137
1138 newtype P a = P { unP :: PState -> ParseResult a }
1139
1140 instance Monad P where
1141   return = returnP
1142   (>>=) = thenP
1143   fail = failP
1144
1145 returnP :: a -> P a
1146 returnP a = P $ \s -> POk s a
1147
1148 thenP :: P a -> (a -> P b) -> P b
1149 (P m) `thenP` k = P $ \ s ->
1150         case m s of
1151                 POk s1 a         -> (unP (k a)) s1
1152                 PFailed span err -> PFailed span err
1153
1154 failP :: String -> P a
1155 failP msg = P $ \s -> PFailed (last_span s) (text msg)
1156
1157 failMsgP :: String -> P a
1158 failMsgP msg = P $ \s -> PFailed (last_span s) (text msg)
1159
1160 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1161 failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
1162
1163 extension :: (Int -> Bool) -> P Bool
1164 extension p = P $ \s -> POk s (p $! extsBitmap s)
1165
1166 getExts :: P Int
1167 getExts = P $ \s -> POk s (extsBitmap s)
1168
1169 setSrcLoc :: SrcLoc -> P ()
1170 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1171
1172 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1173 -- any calls to the lexer, because it assumes things about the SrcLoc.
1174 setSrcLocFor :: SrcLoc -> P a -> P a
1175 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1176   case unP scope s{loc=new_loc} of
1177         PFailed span msg -> PFailed span msg
1178         POk _ r -> POk s r
1179
1180 getSrcLoc :: P SrcLoc
1181 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1182
1183 setLastToken :: SrcSpan -> Int -> P ()
1184 setLastToken span len = P $ \s -> POk s{ last_span=span, last_len=len } ()
1185
1186 type AlexInput = (SrcLoc,StringBuffer)
1187
1188 alexInputPrevChar :: AlexInput -> Char
1189 alexInputPrevChar (_,s) = prevChar s '\n'
1190
1191 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1192 alexGetChar (loc,s)
1193   | atEnd s   = Nothing
1194   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1195   where c = currentChar s
1196         loc' = advanceSrcLoc loc c
1197         s'   = stepOn s
1198
1199 getInput :: P AlexInput
1200 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1201
1202 setInput :: AlexInput -> P ()
1203 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1204
1205 pushLexState :: Int -> P ()
1206 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1207
1208 popLexState :: P Int
1209 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1210
1211 getLexState :: P Int
1212 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1213
1214 -- for reasons of efficiency, flags indicating language extensions (eg,
1215 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1216 -- integer
1217
1218 glaExtsBit, ffiBit, parrBit :: Int
1219 glaExtsBit = 0
1220 ffiBit     = 1
1221 parrBit    = 2
1222 arrowsBit  = 4
1223 thBit      = 5
1224 ipBit      = 6
1225
1226 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1227 glaExtsEnabled flags = testBit flags glaExtsBit
1228 ffiEnabled     flags = testBit flags ffiBit
1229 parrEnabled    flags = testBit flags parrBit
1230 arrowsEnabled  flags = testBit flags arrowsBit
1231 thEnabled      flags = testBit flags thBit
1232 ipEnabled      flags = testBit flags ipBit
1233
1234 -- create a parse state
1235 --
1236 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1237 mkPState buf loc flags  =
1238   PState {
1239       buffer     = buf,
1240       last_span  = mkSrcSpan loc loc,
1241       last_len   = 0,
1242       loc        = loc,
1243       extsBitmap = fromIntegral bitmap,
1244       context    = [],
1245       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1246         -- we begin in the layout state if toplev_layout is set
1247     }
1248     where
1249       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1250                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1251                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1252                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1253                .|. thBit      `setBitIf` dopt Opt_TH          flags
1254                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1255       --
1256       setBitIf :: Int -> Bool -> Int
1257       b `setBitIf` cond | cond      = bit b
1258                         | otherwise = 0
1259
1260 getContext :: P [LayoutContext]
1261 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1262
1263 setContext :: [LayoutContext] -> P ()
1264 setContext ctx = P $ \s -> POk s{context=ctx} ()
1265
1266 popContext :: P ()
1267 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1268                           loc = loc, last_len = len, last_span = last_span }) ->
1269   case ctx of
1270         (_:tl) -> POk s{ context = tl } ()
1271         []     -> PFailed last_span (srcParseErr buf len)
1272
1273 -- Push a new layout context at the indentation of the last token read.
1274 -- This is only used at the outer level of a module when the 'module'
1275 -- keyword is missing.
1276 pushCurrentContext :: P ()
1277 pushCurrentContext = P $ \ s@PState{ last_span=span, context=ctx } ->
1278   POk s{ context = Layout (srcSpanStartCol span) : ctx} ()
1279
1280 getOffside :: Int -> P Ordering
1281 getOffside col = P $ \s@PState{context=stk} ->
1282                 let ord = case stk of
1283                         (Layout n:_) -> compare col n
1284                         _            -> GT
1285                 in POk s $! ord
1286
1287 -- ---------------------------------------------------------------------------
1288 -- Construct a parse error
1289
1290 srcParseErr
1291   :: StringBuffer       -- current buffer (placed just after the last token)
1292   -> Int                -- length of the previous token
1293   -> Message
1294 srcParseErr buf len
1295   = hcat [ if null token
1296              then ptext SLIT("parse error (possibly incorrect indentation)")
1297              else hcat [ptext SLIT("parse error on input "),
1298                         char '`', text token, char '\'']
1299     ]
1300   where token = lexemeToString (stepOnBy (-len) buf) len
1301
1302 -- Report a parse failure, giving the span of the previous token as
1303 -- the location of the error.  This is the entry point for errors
1304 -- detected during parsing.
1305 srcParseFail :: P a
1306 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1307                                 last_span = last_span, loc = loc } ->
1308     PFailed last_span (srcParseErr buf len)
1309
1310 -- A lexical error is reported at a particular position in the source file,
1311 -- not over a token range.  TODO: this is slightly wrong, because we record
1312 -- the error at the character position following the one which caused the
1313 -- error.  We should somehow back up by one character.
1314 lexError :: String -> P a
1315 lexError str = do
1316   loc <- getSrcLoc
1317   failLocMsgP loc loc str
1318
1319 -- -----------------------------------------------------------------------------
1320 -- This is the top-level function: called from the parser each time a
1321 -- new token is to be read from the input.
1322
1323 lexer :: (Token -> P a) -> P a
1324 lexer cont = do
1325   tok@(T _ tok__) <- lexToken
1326   --trace ("token: " ++ show tok__) $ do
1327   cont tok
1328
1329 lexToken :: P Token
1330 lexToken = do
1331   inp@(loc1,buf) <- getInput
1332   sc <- getLexState
1333   exts <- getExts
1334   case alexScanUser exts inp sc of
1335     AlexEOF -> do let span = mkSrcSpan loc1 loc1
1336                   setLastToken span 0
1337                   return (T span ITeof)
1338     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1339     AlexSkip inp2 _ -> do
1340         setInput inp2
1341         lexToken
1342     AlexToken inp2@(end,buf2) len t -> do
1343         setInput inp2
1344         let span = mkSrcSpan loc1 end
1345         (setLastToken $! span) $! len
1346         t span buf len
1347 }