ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), lexer, mkPState,
  26    P(..), ParseResult(..), getSrcLoc,
  27    failMsgP, failLocMsgP, failSpanMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ErrUtils         ( Message )
  34 import Outputable
  35 import StringBuffer
  36 import FastString
  37 import FastTypes
  38 import SrcLoc
  39 import UniqFM
  40 import CmdLineOpts
  41 import Ctype
  42 import Util             ( maybePrefixMatch )
  43
  44 import DATA_BITS
  45 import Char
  46 import Ratio
  47 --import TRACE
  48 }
  49
  50 $whitechar   = [\ \t\n\r\f\v\xa0]
  51 $white_no_nl = $whitechar # \n
  52
  53 $ascdigit  = 0-9
  54 $unidigit  = \x01
  55 $digit     = [$ascdigit $unidigit]
  56
  57 $special   = [\(\)\,\;\[\]\`\{\}]
  58 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  59 $unisymbol = \x02
  60 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  61
  62 $unilarge  = \x03
  63 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  64 $large     = [$asclarge $unilarge]
  65
  66 $unismall  = \x04
  67 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  68 $small     = [$ascsmall $unismall \_]
  69
  70 $graphic   = [$small $large $symbol $digit $special \:\"\']
  71
  72 $octit     = 0-7
  73 $hexit     = [$digit A-F a-f]
  74 $symchar   = [$symbol \:]
  75 $nl        = [\n\r]
  76 $idchar    = [$small $large $digit \']
  77
  78 @varid     = $small $idchar*
  79 @conid     = $large $idchar*
  80
  81 @varsym    = $symbol $symchar*
  82 @consym    = \: $symchar*
  83
  84 @decimal     = $digit+
  85 @octal       = $octit+
  86 @hexadecimal = $hexit+
  87 @exponent    = [eE] [\-\+]? @decimal
  88
  89 -- we support the hierarchical module name extension:
  90 @qual = (@conid \.)+
  91
  92 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  93
  94 haskell :-
  95
  96 -- everywhere: skip whitespace and comments
  97 $white_no_nl+                           ;
  98
  99 -- Everywhere: deal with nested comments.  We explicitly rule out
 100 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 101 -- (this can happen even though pragmas will normally take precedence due to
 102 -- longest-match, because pragmas aren't valid in every state, but comments
 103 -- are).
 104 "{-" / { notFollowedBy '#' }            { nested_comment }
 105
 106 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 107 -- more dashes followed by a symbol should be parsed as a varsym, so we
 108 -- have to exclude those.
 109 -- The regex says: "munch all the characters after the dashes, as long as
 110 -- the first one is not a symbol".
 111 "--"\-* [^$symbol] .*                   ;
 112 "--"\-* / { atEOL }                     ;
 113
 114 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 115 -- blank lines) until we find a non-whitespace character, then do layout
 116 -- processing.
 117 --
 118 -- One slight wibble here: what if the line begins with {-#? In
 119 -- theory, we have to lex the pragma to see if it's one we recognise,
 120 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 121 -- as a nested comment.  We don't bother with this: if the line begins
 122 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 123 <bol> {
 124   \n                                    ;
 125   ^\# (line)?                           { begin line_prag1 }
 126   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 127   ()                                    { do_bol }
 128 }
 129
 130 -- after a layout keyword (let, where, do, of), we begin a new layout
 131 -- context if the curly brace is missing.
 132 -- Careful! This stuff is quite delicate.
 133 <layout, layout_do> {
 134   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 135         -- we might encounter {-# here, but {- has been handled already
 136   \n                                    ;
 137   ^\# (line)?                           { begin line_prag1 }
 138 }
 139
 140 -- do is treated in a subtly different way, see new_layout_context
 141 <layout>    ()                          { new_layout_context True }
 142 <layout_do> ()                          { new_layout_context False }
 143
 144 -- after a new layout context which was found to be to the left of the
 145 -- previous context, we have generated a '{' token, and we now need to
 146 -- generate a matching '}' token.
 147 <layout_left>  ()                       { do_layout_left }
 148
 149 <0,glaexts> \n                          { begin bol }
 150
 151 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 152
 153 -- single-line line pragmas, of the form
 154 --    # <line> "<file>" <extra-stuff> \n
 155 <line_prag1> $digit+                    { set_line line_prag1a }
 156 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 157 <line_prag1b> .*                        { pop }
 158
 159 -- Haskell-style line pragmas, of the form
 160 --    {-# LINE <line> "<file>" #-}
 161 <line_prag2> $digit+                    { set_line line_prag2a }
 162 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 163 <line_prag2b> "#-}"|"-}"                { pop }
 164    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 165    -- with older versions of GHC which generated these.
 166
 167 <0,glaexts> {
 168   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 169                                         { token ITspecialise_prag }
 170   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 171   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 172   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 173                                         { token ITnoinline_prag }
 174   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 175   "{-#" $whitechar* (DEPRECATED|deprecated)
 176                                         { token ITdeprecated_prag }
 177   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 178   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 179   "{-#" $whitechar* (UNPACK|unpack)     { token ITunpack_prag }
 180
 181   "{-#"                                 { nested_comment }
 182
 183   -- ToDo: should only be valid inside a pragma:
 184   "#-}"                                 { token ITclose_prag}
 185 }
 186
 187
 188 -- '0' state: ordinary lexemes
 189 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 190
 191 -- "special" symbols
 192
 193 <0,glaexts> {
 194   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 195   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 196 }
 197
 198 <0,glaexts> {
 199   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 200   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 201   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 202   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 203   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 204   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 205   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 206   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 207 }
 208
 209 <0,glaexts> {
 210   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 211                                         { special IToparenbar }
 212   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 213 }
 214
 215 <0,glaexts> {
 216   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 217   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 218 }
 219
 220 <glaexts> {
 221   "(#" / { notFollowedBySymbol }        { token IToubxparen }
 222   "#)"                                  { token ITcubxparen }
 223   "{|"                                  { token ITocurlybar }
 224   "|}"                                  { token ITccurlybar }
 225 }
 226
 227 <0,glaexts> {
 228   \(                                    { special IToparen }
 229   \)                                    { special ITcparen }
 230   \[                                    { special ITobrack }
 231   \]                                    { special ITcbrack }
 232   \,                                    { special ITcomma }
 233   \;                                    { special ITsemi }
 234   \`                                    { special ITbackquote }
 235
 236   \{                                    { open_brace }
 237   \}                                    { close_brace }
 238 }
 239
 240 <0,glaexts> {
 241   @qual @varid                  { check_qvarid }
 242   @qual @conid                  { idtoken qconid }
 243   @varid                        { varid }
 244   @conid                        { idtoken conid }
 245 }
 246
 247 -- after an illegal qvarid, such as 'M.let',
 248 -- we back up and try again in the bad_qvarid state:
 249 <bad_qvarid> {
 250   @conid                        { pop_and (idtoken conid) }
 251   @qual @conid                  { pop_and (idtoken qconid) }
 252 }
 253
 254 <glaexts> {
 255   @qual @varid "#"+             { idtoken qvarid }
 256   @qual @conid "#"+             { idtoken qconid }
 257   @varid "#"+                   { varid }
 258   @conid "#"+                   { idtoken conid }
 259 }
 260
 261 -- ToDo: M.(,,,)
 262
 263 <0,glaexts> {
 264   @qual @varsym                 { idtoken qvarsym }
 265   @qual @consym                 { idtoken qconsym }
 266   @varsym                       { varsym }
 267   @consym                       { consym }
 268 }
 269
 270 <0,glaexts> {
 271   @decimal                      { tok_decimal }
 272   0[oO] @octal                  { tok_octal }
 273   0[xX] @hexadecimal            { tok_hexadecimal }
 274 }
 275
 276 <glaexts> {
 277   @decimal \#                   { prim_decimal }
 278   0[oO] @octal \#               { prim_octal }
 279   0[xX] @hexadecimal \#         { prim_hexadecimal }
 280 }
 281
 282 <0,glaexts> @floating_point             { strtoken tok_float }
 283 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 284 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 285
 286 -- Strings and chars are lexed by hand-written code.  The reason is
 287 -- that even if we recognise the string or char here in the regex
 288 -- lexer, we would still have to parse the string afterward in order
 289 -- to convert it to a String.
 290 <0,glaexts> {
 291   \'                            { lex_char_tok }
 292   \"                            { lex_string_tok }
 293 }
 294
 295 {
 296 -- work around bug in Alex 2.0
 297 #if __GLASGOW_HASKELL__ < 503
 298 unsafeAt arr i = arr ! i
 299 #endif
 300
 301 -- -----------------------------------------------------------------------------
 302 -- The token type
 303
 304 data Token
 305   = ITas                        -- Haskell keywords
 306   | ITcase
 307   | ITclass
 308   | ITdata
 309   | ITdefault
 310   | ITderiving
 311   | ITdo
 312   | ITelse
 313   | IThiding
 314   | ITif
 315   | ITimport
 316   | ITin
 317   | ITinfix
 318   | ITinfixl
 319   | ITinfixr
 320   | ITinstance
 321   | ITlet
 322   | ITmodule
 323   | ITnewtype
 324   | ITof
 325   | ITqualified
 326   | ITthen
 327   | ITtype
 328   | ITwhere
 329   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 330
 331   | ITforall                    -- GHC extension keywords
 332   | ITforeign
 333   | ITexport
 334   | ITlabel
 335   | ITdynamic
 336   | ITsafe
 337   | ITthreadsafe
 338   | ITunsafe
 339   | ITstdcallconv
 340   | ITccallconv
 341   | ITdotnet
 342   | ITmdo
 343
 344   | ITspecialise_prag           -- Pragmas
 345   | ITsource_prag
 346   | ITinline_prag
 347   | ITnoinline_prag
 348   | ITrules_prag
 349   | ITdeprecated_prag
 350   | ITline_prag
 351   | ITscc_prag
 352   | ITcore_prag                 -- hdaume: core annotations
 353   | ITunpack_prag
 354   | ITclose_prag
 355
 356   | ITdotdot                    -- reserved symbols
 357   | ITcolon
 358   | ITdcolon
 359   | ITequal
 360   | ITlam
 361   | ITvbar
 362   | ITlarrow
 363   | ITrarrow
 364   | ITat
 365   | ITtilde
 366   | ITdarrow
 367   | ITminus
 368   | ITbang
 369   | ITstar
 370   | ITdot
 371
 372   | ITbiglam                    -- GHC-extension symbols
 373
 374   | ITocurly                    -- special symbols
 375   | ITccurly
 376   | ITocurlybar                 -- {|, for type applications
 377   | ITccurlybar                 -- |}, for type applications
 378   | ITvocurly
 379   | ITvccurly
 380   | ITobrack
 381   | ITopabrack                  -- [:, for parallel arrays with -fparr
 382   | ITcpabrack                  -- :], for parallel arrays with -fparr
 383   | ITcbrack
 384   | IToparen
 385   | ITcparen
 386   | IToubxparen
 387   | ITcubxparen
 388   | ITsemi
 389   | ITcomma
 390   | ITunderscore
 391   | ITbackquote
 392
 393   | ITvarid   FastString        -- identifiers
 394   | ITconid   FastString
 395   | ITvarsym  FastString
 396   | ITconsym  FastString
 397   | ITqvarid  (FastString,FastString)
 398   | ITqconid  (FastString,FastString)
 399   | ITqvarsym (FastString,FastString)
 400   | ITqconsym (FastString,FastString)
 401
 402   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 403   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 404
 405   | ITpragma StringBuffer
 406
 407   | ITchar       Char
 408   | ITstring     FastString
 409   | ITinteger    Integer
 410   | ITrational   Rational
 411
 412   | ITprimchar   Char
 413   | ITprimstring FastString
 414   | ITprimint    Integer
 415   | ITprimfloat  Rational
 416   | ITprimdouble Rational
 417
 418   -- MetaHaskell extension tokens
 419   | ITopenExpQuote              -- [| or [e|
 420   | ITopenPatQuote              -- [p|
 421   | ITopenDecQuote              -- [d|
 422   | ITopenTypQuote              -- [t|
 423   | ITcloseQuote                -- |]
 424   | ITidEscape   FastString     -- $x
 425   | ITparenEscape               -- $(
 426   | ITvarQuote                  -- '
 427   | ITtyQuote                   -- ''
 428
 429   -- Arrow notation extension
 430   | ITproc
 431   | ITrec
 432   | IToparenbar                 -- (|
 433   | ITcparenbar                 -- |)
 434   | ITlarrowtail                -- -<
 435   | ITrarrowtail                -- >-
 436   | ITLarrowtail                -- -<<
 437   | ITRarrowtail                -- >>-
 438
 439   | ITunknown String            -- Used when the lexer can't make sense of it
 440   | ITeof                       -- end of file token
 441 #ifdef DEBUG
 442   deriving Show -- debugging
 443 #endif
 444
 445 isSpecial :: Token -> Bool
 446 -- If we see M.x, where x is a keyword, but
 447 -- is special, we treat is as just plain M.x,
 448 -- not as a keyword.
 449 isSpecial ITas          = True
 450 isSpecial IThiding      = True
 451 isSpecial ITqualified   = True
 452 isSpecial ITforall      = True
 453 isSpecial ITexport      = True
 454 isSpecial ITlabel       = True
 455 isSpecial ITdynamic     = True
 456 isSpecial ITsafe        = True
 457 isSpecial ITthreadsafe  = True
 458 isSpecial ITunsafe      = True
 459 isSpecial ITccallconv   = True
 460 isSpecial ITstdcallconv = True
 461 isSpecial ITmdo         = True
 462 isSpecial _             = False
 463
 464 -- the bitmap provided as the third component indicates whether the
 465 -- corresponding extension keyword is valid under the extension options
 466 -- provided to the compiler; if the extension corresponding to *any* of the
 467 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 468 -- facilitates using a keyword in two different extensions that can be
 469 -- activated independently)
 470 --
 471 reservedWordsFM = listToUFM $
 472         map (\(x, y, z) -> (mkFastString x, (y, z)))
 473        [( "_",          ITunderscore,   0 ),
 474         ( "as",         ITas,           0 ),
 475         ( "case",       ITcase,         0 ),
 476         ( "class",      ITclass,        0 ),
 477         ( "data",       ITdata,         0 ),
 478         ( "default",    ITdefault,      0 ),
 479         ( "deriving",   ITderiving,     0 ),
 480         ( "do",         ITdo,           0 ),
 481         ( "else",       ITelse,         0 ),
 482         ( "hiding",     IThiding,       0 ),
 483         ( "if",         ITif,           0 ),
 484         ( "import",     ITimport,       0 ),
 485         ( "in",         ITin,           0 ),
 486         ( "infix",      ITinfix,        0 ),
 487         ( "infixl",     ITinfixl,       0 ),
 488         ( "infixr",     ITinfixr,       0 ),
 489         ( "instance",   ITinstance,     0 ),
 490         ( "let",        ITlet,          0 ),
 491         ( "module",     ITmodule,       0 ),
 492         ( "newtype",    ITnewtype,      0 ),
 493         ( "of",         ITof,           0 ),
 494         ( "qualified",  ITqualified,    0 ),
 495         ( "then",       ITthen,         0 ),
 496         ( "type",       ITtype,         0 ),
 497         ( "where",      ITwhere,        0 ),
 498         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 499
 500         ( "forall",     ITforall,        bit glaExtsBit),
 501         ( "mdo",        ITmdo,           bit glaExtsBit),
 502
 503         ( "foreign",    ITforeign,       bit ffiBit),
 504         ( "export",     ITexport,        bit ffiBit),
 505         ( "label",      ITlabel,         bit ffiBit),
 506         ( "dynamic",    ITdynamic,       bit ffiBit),
 507         ( "safe",       ITsafe,          bit ffiBit),
 508         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 509         ( "unsafe",     ITunsafe,        bit ffiBit),
 510         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 511         ( "ccall",      ITccallconv,     bit ffiBit),
 512         ( "dotnet",     ITdotnet,        bit ffiBit),
 513
 514         ( "rec",        ITrec,           bit arrowsBit),
 515         ( "proc",       ITproc,          bit arrowsBit)
 516      ]
 517
 518 reservedSymsFM = listToUFM $
 519         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 520       [ ("..",  ITdotdot,       0)
 521        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 522                                                 -- meaning only list cons
 523        ,("::",  ITdcolon,       0)
 524        ,("=",   ITequal,        0)
 525        ,("\\",  ITlam,          0)
 526        ,("|",   ITvbar,         0)
 527        ,("<-",  ITlarrow,       0)
 528        ,("->",  ITrarrow,       0)
 529        ,("@",   ITat,           0)
 530        ,("~",   ITtilde,        0)
 531        ,("=>",  ITdarrow,       0)
 532        ,("-",   ITminus,        0)
 533        ,("!",   ITbang,         0)
 534
 535        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 536        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 537
 538        ,("-<",  ITlarrowtail,   bit arrowsBit)
 539        ,(">-",  ITrarrowtail,   bit arrowsBit)
 540        ,("-<<", ITLarrowtail,   bit arrowsBit)
 541        ,(">>-", ITRarrowtail,   bit arrowsBit)
 542        ]
 543
 544 -- -----------------------------------------------------------------------------
 545 -- Lexer actions
 546
 547 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 548
 549 special :: Token -> Action
 550 special tok span _buf len = return (L span tok)
 551
 552 token, layout_token :: Token -> Action
 553 token t span buf len = return (L span t)
 554 layout_token t span buf len = pushLexState layout >> return (L span t)
 555
 556 idtoken :: (StringBuffer -> Int -> Token) -> Action
 557 idtoken f span buf len = return (L span $! (f buf len))
 558
 559 skip_one_varid :: (FastString -> Token) -> Action
 560 skip_one_varid f span buf len
 561   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 562
 563 strtoken :: (String -> Token) -> Action
 564 strtoken f span buf len =
 565   return (L span $! (f $! lexemeToString buf len))
 566
 567 init_strtoken :: Int -> (String -> Token) -> Action
 568 -- like strtoken, but drops the last N character(s)
 569 init_strtoken drop f span buf len =
 570   return (L span $! (f $! lexemeToString buf (len-drop)))
 571
 572 begin :: Int -> Action
 573 begin code _span _str _len = do pushLexState code; lexToken
 574
 575 pop :: Action
 576 pop _span _buf _len = do popLexState; lexToken
 577
 578 pop_and :: Action -> Action
 579 pop_and act span buf len = do popLexState; act span buf len
 580
 581 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 582
 583 notFollowedBySymbol _ _ _ (_,buf)
 584   = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
 585
 586 atEOL _ _ _ (_,buf) = atEnd buf || currentChar buf == '\n'
 587
 588 ifExtension pred bits _ _ _ = pred bits
 589
 590 {-
 591   nested comments require traversing by hand, they can't be parsed
 592   using regular expressions.
 593 -}
 594 nested_comment :: Action
 595 nested_comment span _str _len = do
 596   input <- getInput
 597   go 1 input
 598   where go 0 input = do setInput input; lexToken
 599         go n input = do
 600           case alexGetChar input of
 601             Nothing  -> err input
 602             Just (c,input) -> do
 603               case c of
 604                 '-' -> do
 605                   case alexGetChar input of
 606                     Nothing  -> err input
 607                     Just ('\125',input) -> go (n-1) input
 608                     Just (c,_)          -> go n input
 609                 '\123' -> do
 610                   case alexGetChar input of
 611                     Nothing  -> err input
 612                     Just ('-',input') -> go (n+1) input'
 613                     Just (c,input)    -> go n input
 614                 c -> go n input
 615
 616         err input = do failLocMsgP (srcSpanStart span) (fst input)
 617                         "unterminated `{-'"
 618
 619 open_brace, close_brace :: Action
 620 open_brace span _str _len = do
 621   ctx <- getContext
 622   setContext (NoLayout:ctx)
 623   return (L span ITocurly)
 624 close_brace span _str _len = do
 625   popContext
 626   return (L span ITccurly)
 627
 628 -- We have to be careful not to count M.<varid> as a qualified name
 629 -- when <varid> is a keyword.  We hack around this by catching
 630 -- the offending tokens afterward, and re-lexing in a different state.
 631 check_qvarid span buf len = do
 632   case lookupUFM reservedWordsFM var of
 633         Just (keyword,exts)
 634           | not (isSpecial keyword) ->
 635           if exts == 0
 636              then try_again
 637              else do
 638                 b <- extension (\i -> exts .&. i /= 0)
 639                 if b then try_again
 640                      else return token
 641         _other -> return token
 642   where
 643         (mod,var) = splitQualName buf len
 644         token     = L span (ITqvarid (mod,var))
 645
 646         try_again = do
 647                 setInput (srcSpanStart span,buf)
 648                 pushLexState bad_qvarid
 649                 lexToken
 650
 651 qvarid buf len = ITqvarid $! splitQualName buf len
 652 qconid buf len = ITqconid $! splitQualName buf len
 653
 654 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 655 -- takes a StringBuffer and a length, and returns the module name
 656 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 657 -- because of hierarchical module names.
 658 splitQualName orig_buf len = split orig_buf 0 0
 659   where
 660     split buf dot_off n
 661         | n == len                = done dot_off
 662         | lookAhead buf n == '.'  = split2 buf n (n+1)
 663         | otherwise               = split buf dot_off (n+1)
 664
 665     -- careful, we might get names like M....
 666     -- so, if the character after the dot is not upper-case, this is
 667     -- the end of the qualifier part.
 668     split2 buf dot_off n
 669         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 670         | otherwise                 = done dot_off
 671
 672     done dot_off =
 673         (lexemeToFastString orig_buf dot_off,
 674          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 675
 676 varid span buf len =
 677   case lookupUFM reservedWordsFM fs of
 678         Just (keyword,0)    -> do
 679                 maybe_layout keyword
 680                 return (L span keyword)
 681         Just (keyword,exts) -> do
 682                 b <- extension (\i -> exts .&. i /= 0)
 683                 if b then do maybe_layout keyword
 684                              return (L span keyword)
 685                      else return (L span (ITvarid fs))
 686         _other -> return (L span (ITvarid fs))
 687   where
 688         fs = lexemeToFastString buf len
 689
 690 conid buf len = ITconid fs
 691   where fs = lexemeToFastString buf len
 692
 693 qvarsym buf len = ITqvarsym $! splitQualName buf len
 694 qconsym buf len = ITqconsym $! splitQualName buf len
 695
 696 varsym = sym ITvarsym
 697 consym = sym ITconsym
 698
 699 sym con span buf len =
 700   case lookupUFM reservedSymsFM fs of
 701         Just (keyword,0)    -> return (L span keyword)
 702         Just (keyword,exts) -> do
 703                 b <- extension (\i -> exts .&. i /= 0)
 704                 if b then return (L span keyword)
 705                      else return (L span $! con fs)
 706         _other -> return (L span $! con fs)
 707   where
 708         fs = lexemeToFastString buf len
 709
 710 tok_decimal span buf len
 711   = return (L span (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 712
 713 tok_octal span buf len
 714   = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 715
 716 tok_hexadecimal span buf len
 717   = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 718
 719 prim_decimal span buf len
 720   = return (L span (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 721
 722 prim_octal span buf len
 723   = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 724
 725 prim_hexadecimal span buf len
 726   = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 727
 728 tok_float        str = ITrational $! readRational__ str
 729 prim_float       str = ITprimfloat  $! readRational__ str
 730 prim_double      str = ITprimdouble $! readRational__ str
 731
 732 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 733 parseInteger buf len radix to_int
 734   = go 0 0
 735   where go i x | i == len  = x
 736                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 737
 738 -- -----------------------------------------------------------------------------
 739 -- Layout processing
 740
 741 -- we're at the first token on a line, insert layout tokens if necessary
 742 do_bol :: Action
 743 do_bol span _str _len = do
 744         pos <- getOffside (srcSpanEnd span)
 745         case pos of
 746             LT -> do
 747                 --trace "layout: inserting '}'" $ do
 748                 popContext
 749                 -- do NOT pop the lex state, we might have a ';' to insert
 750                 return (L span ITvccurly)
 751             EQ -> do
 752                 --trace "layout: inserting ';'" $ do
 753                 popLexState
 754                 return (L span ITsemi)
 755             GT -> do
 756                 popLexState
 757                 lexToken
 758
 759 -- certain keywords put us in the "layout" state, where we might
 760 -- add an opening curly brace.
 761 maybe_layout ITdo       = pushLexState layout_do
 762 maybe_layout ITmdo      = pushLexState layout_do
 763 maybe_layout ITof       = pushLexState layout
 764 maybe_layout ITlet      = pushLexState layout
 765 maybe_layout ITwhere    = pushLexState layout
 766 maybe_layout ITrec      = pushLexState layout
 767 maybe_layout _          = return ()
 768
 769 -- Pushing a new implicit layout context.  If the indentation of the
 770 -- next token is not greater than the previous layout context, then
 771 -- Haskell 98 says that the new layout context should be empty; that is
 772 -- the lexer must generate {}.
 773 --
 774 -- We are slightly more lenient than this: when the new context is started
 775 -- by a 'do', then we allow the new context to be at the same indentation as
 776 -- the previous context.  This is what the 'strict' argument is for.
 777 --
 778 new_layout_context strict span _buf _len = do
 779     popLexState
 780     let offset = srcSpanStartCol span
 781     ctx <- getContext
 782     case ctx of
 783         Layout prev_off : _  |
 784            (strict     && prev_off >= offset  ||
 785             not strict && prev_off > offset) -> do
 786                 -- token is indented to the left of the previous context.
 787                 -- we must generate a {} sequence now.
 788                 pushLexState layout_left
 789                 return (L span ITvocurly)
 790         other -> do
 791                 setContext (Layout offset : ctx)
 792                 return (L span ITvocurly)
 793
 794 do_layout_left span _buf _len = do
 795     popLexState
 796     pushLexState bol  -- we must be at the start of a line
 797     return (L span ITvccurly)
 798
 799 -- -----------------------------------------------------------------------------
 800 -- LINE pragmas
 801
 802 set_line :: Int -> Action
 803 set_line code span buf len = do
 804   let line = parseInteger buf len 10 oct_or_dec
 805   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
 806         -- subtract one: the line number refers to the *following* line
 807   popLexState
 808   pushLexState code
 809   lexToken
 810
 811 set_file :: Int -> Action
 812 set_file code span buf len = do
 813   let file = lexemeToFastString (stepOn buf) (len-2)
 814   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
 815   popLexState
 816   pushLexState code
 817   lexToken
 818
 819 -- -----------------------------------------------------------------------------
 820 -- Strings & Chars
 821
 822 -- This stuff is horrible.  I hates it.
 823
 824 lex_string_tok :: Action
 825 lex_string_tok span buf len = do
 826   tok <- lex_string ""
 827   end <- getSrcLoc
 828   return (L (mkSrcSpan (srcSpanStart span) end) tok)
 829
 830 lex_string :: String -> P Token
 831 lex_string s = do
 832   i <- getInput
 833   case alexGetChar i of
 834     Nothing -> lit_error
 835
 836     Just ('"',i)  -> do
 837         setInput i
 838         glaexts <- extension glaExtsEnabled
 839         if glaexts
 840           then do
 841             i <- getInput
 842             case alexGetChar i of
 843               Just ('#',i) -> do
 844                    setInput i
 845                    if any (> '\xFF') s
 846                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 847                     else let s' = mkFastStringNarrow (reverse s) in
 848                          -- always a narrow string/byte array
 849                          return (ITprimstring s')
 850               _other ->
 851                 return (ITstring (mkFastString (reverse s)))
 852           else
 853                 return (ITstring (mkFastString (reverse s)))
 854
 855     Just ('\\',i)
 856         | Just ('&',i) <- next -> do
 857                 setInput i; lex_string s
 858         | Just (c,i) <- next, is_space c -> do
 859                 setInput i; lex_stringgap s
 860         where next = alexGetChar i
 861
 862     Just _ -> do
 863         c <- lex_char
 864         lex_string (c:s)
 865
 866 lex_stringgap s = do
 867   c <- getCharOrFail
 868   case c of
 869     '\\' -> lex_string s
 870     c | is_space c -> lex_stringgap s
 871     _other -> lit_error
 872
 873
 874 lex_char_tok :: Action
 875 -- Here we are basically parsing character literals, such as 'x' or '\n'
 876 -- but, when Template Haskell is on, we additionally spot
 877 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
 878 -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
 879 -- So we have to do two characters of lookahead: when we see 'x we need to
 880 -- see if there's a trailing quote
 881 lex_char_tok span buf len = do  -- We've seen '
 882    i1 <- getInput       -- Look ahead to first character
 883    let loc = srcSpanStart span
 884    case alexGetChar i1 of
 885         Nothing -> lit_error
 886
 887         Just ('\'', i2@(end2,_)) -> do  -- We've seen ''
 888                   th_exts <- extension thEnabled
 889                   if th_exts then do
 890                         setInput i2
 891                         return (L (mkSrcSpan loc end2)  ITtyQuote)
 892                    else lit_error
 893
 894         Just ('\\', i2@(end2,_)) -> do  -- We've seen 'backslash
 895                   setInput i2
 896                   lit_ch <- lex_escape
 897                   mc <- getCharOrFail   -- Trailing quote
 898                   if mc == '\'' then finish_char_tok loc lit_ch
 899                                 else lit_error
 900
 901         Just (c, i2@(end2,_)) | not (is_any c) -> lit_error
 902                               | otherwise      ->
 903
 904                 -- We've seen 'x, where x is a valid character
 905                 --  (i.e. not newline etc) but not a quote or backslash
 906            case alexGetChar i2 of       -- Look ahead one more character
 907                 Nothing -> lit_error
 908                 Just ('\'', i3) -> do   -- We've seen 'x'
 909                         setInput i3
 910                         finish_char_tok loc c
 911                 _other -> do            -- We've seen 'x not followed by quote
 912                                         -- If TH is on, just parse the quote only
 913                         th_exts <- extension thEnabled
 914                         if th_exts then return (L (mkSrcSpan loc (fst i1)) ITvarQuote)
 915                                    else lit_error
 916
 917 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 918 finish_char_tok loc ch  -- We've already seen the closing quote
 919                         -- Just need to check for trailing #
 920   = do  glaexts <- extension glaExtsEnabled
 921         i@(end,_) <- getInput
 922         if glaexts then do
 923                 case alexGetChar i of
 924                         Just ('#',i@(end,_)) -> do
 925                                 setInput i
 926                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
 927                         _other ->
 928                                 return (L (mkSrcSpan loc end) (ITchar ch))
 929                 else do
 930                    return (L (mkSrcSpan loc end) (ITchar ch))
 931
 932 lex_char :: P Char
 933 lex_char = do
 934   mc <- getCharOrFail
 935   case mc of
 936       '\\' -> lex_escape
 937       c | is_any c -> return c
 938       _other -> lit_error
 939
 940 lex_escape :: P Char
 941 lex_escape = do
 942   c <- getCharOrFail
 943   case c of
 944         'a'   -> return '\a'
 945         'b'   -> return '\b'
 946         'f'   -> return '\f'
 947         'n'   -> return '\n'
 948         'r'   -> return '\r'
 949         't'   -> return '\t'
 950         'v'   -> return '\v'
 951         '\\'  -> return '\\'
 952         '"'   -> return '\"'
 953         '\''  -> return '\''
 954         '^'   -> do c <- getCharOrFail
 955                     if c >= '@' && c <= '_'
 956                         then return (chr (ord c - ord '@'))
 957                         else lit_error
 958
 959         'x'   -> readNum is_hexdigit 16 hex
 960         'o'   -> readNum is_octdigit  8 oct_or_dec
 961         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 962
 963         c1 ->  do
 964            i <- getInput
 965            case alexGetChar i of
 966             Nothing -> lit_error
 967             Just (c2,i2) ->
 968               case alexGetChar i2 of
 969                 Nothing -> lit_error
 970                 Just (c3,i3) ->
 971                    let str = [c1,c2,c3] in
 972                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 973                                      Just rest <- [maybePrefixMatch p str] ] of
 974                           (escape_char,[]):_ -> do
 975                                 setInput i3
 976                                 return escape_char
 977                           (escape_char,_:_):_ -> do
 978                                 setInput i2
 979                                 return escape_char
 980                           [] -> lit_error
 981
 982 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 983 readNum is_digit base conv = do
 984   c <- getCharOrFail
 985   if is_digit c
 986         then readNum2 is_digit base conv (conv c)
 987         else lit_error
 988
 989 readNum2 is_digit base conv i = do
 990   input <- getInput
 991   read i input
 992   where read i input = do
 993           case alexGetChar input of
 994             Just (c,input') | is_digit c -> do
 995                 read (i*base + conv c) input'
 996             _other -> do
 997                 setInput input
 998                 if i >= 0 && i <= 0x10FFFF
 999                    then return (chr i)
1000                    else lit_error
1001
1002 is_hexdigit c
1003         =  is_digit c
1004         || (c >= 'a' && c <= 'f')
1005         || (c >= 'A' && c <= 'F')
1006
1007 hex c | is_digit c = ord c - ord '0'
1008       | otherwise  = ord (to_lower c) - ord 'a' + 10
1009
1010 oct_or_dec c = ord c - ord '0'
1011
1012 is_octdigit c = c >= '0' && c <= '7'
1013
1014 to_lower c
1015   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
1016   | otherwise = c
1017
1018 silly_escape_chars = [
1019         ("NUL", '\NUL'),
1020         ("SOH", '\SOH'),
1021         ("STX", '\STX'),
1022         ("ETX", '\ETX'),
1023         ("EOT", '\EOT'),
1024         ("ENQ", '\ENQ'),
1025         ("ACK", '\ACK'),
1026         ("BEL", '\BEL'),
1027         ("BS", '\BS'),
1028         ("HT", '\HT'),
1029         ("LF", '\LF'),
1030         ("VT", '\VT'),
1031         ("FF", '\FF'),
1032         ("CR", '\CR'),
1033         ("SO", '\SO'),
1034         ("SI", '\SI'),
1035         ("DLE", '\DLE'),
1036         ("DC1", '\DC1'),
1037         ("DC2", '\DC2'),
1038         ("DC3", '\DC3'),
1039         ("DC4", '\DC4'),
1040         ("NAK", '\NAK'),
1041         ("SYN", '\SYN'),
1042         ("ETB", '\ETB'),
1043         ("CAN", '\CAN'),
1044         ("EM", '\EM'),
1045         ("SUB", '\SUB'),
1046         ("ESC", '\ESC'),
1047         ("FS", '\FS'),
1048         ("GS", '\GS'),
1049         ("RS", '\RS'),
1050         ("US", '\US'),
1051         ("SP", '\SP'),
1052         ("DEL", '\DEL')
1053         ]
1054
1055 lit_error = lexError "lexical error in string/character literal"
1056
1057 getCharOrFail :: P Char
1058 getCharOrFail =  do
1059   i <- getInput
1060   case alexGetChar i of
1061         Nothing -> lexError "unexpected end-of-file in string/character literal"
1062         Just (c,i)  -> do setInput i; return c
1063
1064 -- -----------------------------------------------------------------------------
1065 -- Floats
1066
1067 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1068 readRational r = do
1069      (n,d,s) <- readFix r
1070      (k,t)   <- readExp s
1071      return ((n%1)*10^^(k-d), t)
1072  where
1073      readFix r = do
1074         (ds,s)  <- lexDecDigits r
1075         (ds',t) <- lexDotDigits s
1076         return (read (ds++ds'), length ds', t)
1077
1078      readExp (e:s) | e `elem` "eE" = readExp' s
1079      readExp s                     = return (0,s)
1080
1081      readExp' ('+':s) = readDec s
1082      readExp' ('-':s) = do
1083                         (k,t) <- readDec s
1084                         return (-k,t)
1085      readExp' s       = readDec s
1086
1087      readDec s = do
1088         (ds,r) <- nonnull isDigit s
1089         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1090                 r)
1091
1092      lexDecDigits = nonnull isDigit
1093
1094      lexDotDigits ('.':s) = return (span isDigit s)
1095      lexDotDigits s       = return ("",s)
1096
1097      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1098                       return (cs,t)
1099
1100 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1101 readRational__ top_s
1102   = case top_s of
1103       '-' : xs -> - (read_me xs)
1104       xs       -> read_me xs
1105   where
1106     read_me s
1107       = case (do { (x,"") <- readRational s ; return x }) of
1108           [x] -> x
1109           []  -> error ("readRational__: no parse:"        ++ top_s)
1110           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1111
1112 -- -----------------------------------------------------------------------------
1113 -- The Parse Monad
1114
1115 data LayoutContext
1116   = NoLayout
1117   | Layout !Int
1118
1119 data ParseResult a
1120   = POk PState a
1121   | PFailed
1122         SrcSpan         -- The start and end of the text span related to
1123                         -- the error.  Might be used in environments which can
1124                         -- show this span, e.g. by highlighting it.
1125         Message         -- The error message
1126
1127 data PState = PState {
1128         buffer     :: StringBuffer,
1129         last_loc   :: SrcSpan,  -- pos of previous token
1130         last_len   :: !Int,     -- len of previous token
1131         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1132         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1133         context    :: [LayoutContext],
1134         lex_state  :: [Int]
1135      }
1136         -- last_loc and last_len are used when generating error messages,
1137         -- and in pushCurrentContext only.
1138
1139 newtype P a = P { unP :: PState -> ParseResult a }
1140
1141 instance Monad P where
1142   return = returnP
1143   (>>=) = thenP
1144   fail = failP
1145
1146 returnP :: a -> P a
1147 returnP a = P $ \s -> POk s a
1148
1149 thenP :: P a -> (a -> P b) -> P b
1150 (P m) `thenP` k = P $ \ s ->
1151         case m s of
1152                 POk s1 a         -> (unP (k a)) s1
1153                 PFailed span err -> PFailed span err
1154
1155 failP :: String -> P a
1156 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1157
1158 failMsgP :: String -> P a
1159 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1160
1161 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1162 failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
1163
1164 failSpanMsgP :: SrcSpan -> String -> P a
1165 failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
1166
1167 extension :: (Int -> Bool) -> P Bool
1168 extension p = P $ \s -> POk s (p $! extsBitmap s)
1169
1170 getExts :: P Int
1171 getExts = P $ \s -> POk s (extsBitmap s)
1172
1173 setSrcLoc :: SrcLoc -> P ()
1174 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1175
1176 getSrcLoc :: P SrcLoc
1177 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1178
1179 setLastToken :: SrcSpan -> Int -> P ()
1180 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1181
1182 type AlexInput = (SrcLoc,StringBuffer)
1183
1184 alexInputPrevChar :: AlexInput -> Char
1185 alexInputPrevChar (_,s) = prevChar s '\n'
1186
1187 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1188 alexGetChar (loc,s)
1189   | atEnd s   = Nothing
1190   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1191   where c = currentChar s
1192         loc' = advanceSrcLoc loc c
1193         s'   = stepOn s
1194
1195 getInput :: P AlexInput
1196 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1197
1198 setInput :: AlexInput -> P ()
1199 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1200
1201 pushLexState :: Int -> P ()
1202 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1203
1204 popLexState :: P Int
1205 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1206
1207 getLexState :: P Int
1208 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1209
1210 -- for reasons of efficiency, flags indicating language extensions (eg,
1211 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1212 -- integer
1213
1214 glaExtsBit, ffiBit, parrBit :: Int
1215 glaExtsBit = 0
1216 ffiBit     = 1
1217 parrBit    = 2
1218 arrowsBit  = 4
1219 thBit      = 5
1220 ipBit      = 6
1221
1222 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1223 glaExtsEnabled flags = testBit flags glaExtsBit
1224 ffiEnabled     flags = testBit flags ffiBit
1225 parrEnabled    flags = testBit flags parrBit
1226 arrowsEnabled  flags = testBit flags arrowsBit
1227 thEnabled      flags = testBit flags thBit
1228 ipEnabled      flags = testBit flags ipBit
1229
1230 -- create a parse state
1231 --
1232 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1233 mkPState buf loc flags  =
1234   PState {
1235       buffer     = buf,
1236       last_loc   = mkSrcSpan loc loc,
1237       last_len   = 0,
1238       loc        = loc,
1239       extsBitmap = fromIntegral bitmap,
1240       context    = [],
1241       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1242         -- we begin in the layout state if toplev_layout is set
1243     }
1244     where
1245       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1246                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1247                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1248                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1249                .|. thBit      `setBitIf` dopt Opt_TH          flags
1250                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1251       --
1252       setBitIf :: Int -> Bool -> Int
1253       b `setBitIf` cond | cond      = bit b
1254                         | otherwise = 0
1255
1256 getContext :: P [LayoutContext]
1257 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1258
1259 setContext :: [LayoutContext] -> P ()
1260 setContext ctx = P $ \s -> POk s{context=ctx} ()
1261
1262 popContext :: P ()
1263 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1264                            loc = loc, last_len = len, last_loc = last_loc }) ->
1265   case ctx of
1266         (_:tl) -> POk s{ context = tl } ()
1267         []     -> PFailed last_loc (srcParseErr buf len)
1268
1269 -- Push a new layout context at the indentation of the last token read.
1270 -- This is only used at the outer level of a module when the 'module'
1271 -- keyword is missing.
1272 pushCurrentContext :: P ()
1273 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1274   POk s{ context = Layout (srcSpanStartCol loc) : ctx} ()
1275
1276 getOffside :: SrcLoc -> P Ordering
1277 getOffside loc = P $ \s@PState{context=stk} ->
1278                 let ord = case stk of
1279                         (Layout n:_) -> compare (srcLocCol loc) n
1280                         _            -> GT
1281                 in POk s ord
1282
1283 -- ---------------------------------------------------------------------------
1284 -- Construct a parse error
1285
1286 srcParseErr
1287   :: StringBuffer       -- current buffer (placed just after the last token)
1288   -> Int                -- length of the previous token
1289   -> Message
1290 srcParseErr buf len
1291   = hcat [ if null token
1292              then ptext SLIT("parse error (possibly incorrect indentation)")
1293              else hcat [ptext SLIT("parse error on input "),
1294                         char '`', text token, char '\'']
1295     ]
1296   where token = lexemeToString (stepOnBy (-len) buf) len
1297
1298 -- Report a parse failure, giving the span of the previous token as
1299 -- the location of the error.  This is the entry point for errors
1300 -- detected during parsing.
1301 srcParseFail :: P a
1302 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1303                                 last_loc = last_loc, loc = loc } ->
1304     PFailed last_loc (srcParseErr buf len)
1305
1306 -- A lexical error is reported at a particular position in the source file,
1307 -- not over a token range.  TODO: this is slightly wrong, because we record
1308 -- the error at the character position following the one which caused the
1309 -- error.  We should somehow back up by one character.
1310 lexError :: String -> P a
1311 lexError str = do
1312   loc <- getSrcLoc
1313   i@(end,_) <- getInput
1314   failLocMsgP loc end str
1315
1316 -- -----------------------------------------------------------------------------
1317 -- This is the top-level function: called from the parser each time a
1318 -- new token is to be read from the input.
1319
1320 lexer :: (Located Token -> P a) -> P a
1321 lexer cont = do
1322   tok@(L _ tok__) <- lexToken
1323   --trace ("token: " ++ show tok__) $ do
1324   cont tok
1325
1326 lexToken :: P (Located Token)
1327 lexToken = do
1328   inp@(loc1,buf) <- getInput
1329   sc <- getLexState
1330   exts <- getExts
1331   case alexScanUser exts inp sc of
1332     AlexEOF -> do let span = mkSrcSpan loc1 loc1
1333                   setLastToken span 0
1334                   return (L span ITeof)
1335     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1336     AlexSkip inp2 _ -> do
1337         setInput inp2
1338         lexToken
1339     AlexToken inp2@(end,buf2) len t -> do
1340         setInput inp2
1341         let span = mkSrcSpan loc1 end
1342         span `seq` setLastToken span len
1343         t span buf len
1344 }