ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), lexer, mkPState,
  26    P(..), ParseResult(..), getSrcLoc,
  27    failMsgP, failLocMsgP, failSpanMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ErrUtils         ( Message )
  34 import Outputable
  35 import StringBuffer
  36 import FastString
  37 import FastTypes
  38 import SrcLoc
  39 import UniqFM
  40 import CmdLineOpts
  41 import Ctype
  42 import Util             ( maybePrefixMatch )
  43
  44 import DATA_BITS
  45 import Char
  46 import Ratio
  47 --import TRACE
  48 }
  49
  50 $whitechar   = [\ \t\n\r\f\v\xa0]
  51 $white_no_nl = $whitechar # \n
  52
  53 $ascdigit  = 0-9
  54 $unidigit  = \x01
  55 $digit     = [$ascdigit $unidigit]
  56
  57 $special   = [\(\)\,\;\[\]\`\{\}]
  58 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  59 $unisymbol = \x02
  60 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  61
  62 $unilarge  = \x03
  63 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  64 $large     = [$asclarge $unilarge]
  65
  66 $unismall  = \x04
  67 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  68 $small     = [$ascsmall $unismall \_]
  69
  70 $graphic   = [$small $large $symbol $digit $special \:\"\']
  71
  72 $octit     = 0-7
  73 $hexit     = [$digit A-F a-f]
  74 $symchar   = [$symbol \:]
  75 $nl        = [\n\r]
  76 $idchar    = [$small $large $digit \']
  77
  78 @varid     = $small $idchar*
  79 @conid     = $large $idchar*
  80
  81 @varsym    = $symbol $symchar*
  82 @consym    = \: $symchar*
  83
  84 @decimal     = $digit+
  85 @octal       = $octit+
  86 @hexadecimal = $hexit+
  87 @exponent    = [eE] [\-\+]? @decimal
  88
  89 -- we support the hierarchical module name extension:
  90 @qual = (@conid \.)+
  91
  92 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  93
  94 haskell :-
  95
  96 -- everywhere: skip whitespace and comments
  97 $white_no_nl+                           ;
  98
  99 -- Everywhere: deal with nested comments.  We explicitly rule out
 100 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 101 -- (this can happen even though pragmas will normally take precedence due to
 102 -- longest-match, because pragmas aren't valid in every state, but comments
 103 -- are).
 104 "{-" / { notFollowedBy '#' }            { nested_comment }
 105
 106 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 107 -- more dashes followed by a symbol should be parsed as a varsym, so we
 108 -- have to exclude those.
 109 -- The regex says: "munch all the characters after the dashes, as long as
 110 -- the first one is not a symbol".
 111 "--"\-* [^$symbol] .*                   ;
 112 "--"\-* / { atEOL }                     ;
 113
 114 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 115 -- blank lines) until we find a non-whitespace character, then do layout
 116 -- processing.
 117 --
 118 -- One slight wibble here: what if the line begins with {-#? In
 119 -- theory, we have to lex the pragma to see if it's one we recognise,
 120 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 121 -- as a nested comment.  We don't bother with this: if the line begins
 122 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 123 <bol> {
 124   \n                                    ;
 125   ^\# (line)?                           { begin line_prag1 }
 126   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 127   ^\# \! .* \n                          ; -- #!, for scripts
 128   ()                                    { do_bol }
 129 }
 130
 131 -- after a layout keyword (let, where, do, of), we begin a new layout
 132 -- context if the curly brace is missing.
 133 -- Careful! This stuff is quite delicate.
 134 <layout, layout_do> {
 135   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 136         -- we might encounter {-# here, but {- has been handled already
 137   \n                                    ;
 138   ^\# (line)?                           { begin line_prag1 }
 139 }
 140
 141 -- do is treated in a subtly different way, see new_layout_context
 142 <layout>    ()                          { new_layout_context True }
 143 <layout_do> ()                          { new_layout_context False }
 144
 145 -- after a new layout context which was found to be to the left of the
 146 -- previous context, we have generated a '{' token, and we now need to
 147 -- generate a matching '}' token.
 148 <layout_left>  ()                       { do_layout_left }
 149
 150 <0,glaexts> \n                          { begin bol }
 151
 152 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 153
 154 -- single-line line pragmas, of the form
 155 --    # <line> "<file>" <extra-stuff> \n
 156 <line_prag1> $digit+                    { set_line line_prag1a }
 157 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 158 <line_prag1b> .*                        { pop }
 159
 160 -- Haskell-style line pragmas, of the form
 161 --    {-# LINE <line> "<file>" #-}
 162 <line_prag2> $digit+                    { set_line line_prag2a }
 163 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 164 <line_prag2b> "#-}"|"-}"                { pop }
 165    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 166    -- with older versions of GHC which generated these.
 167
 168 <0,glaexts> {
 169   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 170                                         { token ITspecialise_prag }
 171   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 172   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 173   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 174                                         { token ITnoinline_prag }
 175   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 176   "{-#" $whitechar* (DEPRECATED|deprecated)
 177                                         { token ITdeprecated_prag }
 178   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 179   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 180   "{-#" $whitechar* (UNPACK|unpack)     { token ITunpack_prag }
 181
 182   "{-#"                                 { nested_comment }
 183
 184   -- ToDo: should only be valid inside a pragma:
 185   "#-}"                                 { token ITclose_prag}
 186 }
 187
 188
 189 -- '0' state: ordinary lexemes
 190 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 191
 192 -- "special" symbols
 193
 194 <0,glaexts> {
 195   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 196   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 197 }
 198
 199 <0,glaexts> {
 200   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 201   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 202   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 203   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 204   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 205   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 206   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 207   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 208 }
 209
 210 <0,glaexts> {
 211   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 212                                         { special IToparenbar }
 213   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 214 }
 215
 216 <0,glaexts> {
 217   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 218   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 219 }
 220
 221 <glaexts> {
 222   "(#" / { notFollowedBySymbol }        { token IToubxparen }
 223   "#)"                                  { token ITcubxparen }
 224   "{|"                                  { token ITocurlybar }
 225   "|}"                                  { token ITccurlybar }
 226 }
 227
 228 <0,glaexts> {
 229   \(                                    { special IToparen }
 230   \)                                    { special ITcparen }
 231   \[                                    { special ITobrack }
 232   \]                                    { special ITcbrack }
 233   \,                                    { special ITcomma }
 234   \;                                    { special ITsemi }
 235   \`                                    { special ITbackquote }
 236
 237   \{                                    { open_brace }
 238   \}                                    { close_brace }
 239 }
 240
 241 <0,glaexts> {
 242   @qual @varid                  { check_qvarid }
 243   @qual @conid                  { idtoken qconid }
 244   @varid                        { varid }
 245   @conid                        { idtoken conid }
 246 }
 247
 248 -- after an illegal qvarid, such as 'M.let',
 249 -- we back up and try again in the bad_qvarid state:
 250 <bad_qvarid> {
 251   @conid                        { pop_and (idtoken conid) }
 252   @qual @conid                  { pop_and (idtoken qconid) }
 253 }
 254
 255 <glaexts> {
 256   @qual @varid "#"+             { idtoken qvarid }
 257   @qual @conid "#"+             { idtoken qconid }
 258   @varid "#"+                   { varid }
 259   @conid "#"+                   { idtoken conid }
 260 }
 261
 262 -- ToDo: M.(,,,)
 263
 264 <0,glaexts> {
 265   @qual @varsym                 { idtoken qvarsym }
 266   @qual @consym                 { idtoken qconsym }
 267   @varsym                       { varsym }
 268   @consym                       { consym }
 269 }
 270
 271 <0,glaexts> {
 272   @decimal                      { tok_decimal }
 273   0[oO] @octal                  { tok_octal }
 274   0[xX] @hexadecimal            { tok_hexadecimal }
 275 }
 276
 277 <glaexts> {
 278   @decimal \#                   { prim_decimal }
 279   0[oO] @octal \#               { prim_octal }
 280   0[xX] @hexadecimal \#         { prim_hexadecimal }
 281 }
 282
 283 <0,glaexts> @floating_point             { strtoken tok_float }
 284 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 285 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 286
 287 -- Strings and chars are lexed by hand-written code.  The reason is
 288 -- that even if we recognise the string or char here in the regex
 289 -- lexer, we would still have to parse the string afterward in order
 290 -- to convert it to a String.
 291 <0,glaexts> {
 292   \'                            { lex_char_tok }
 293   \"                            { lex_string_tok }
 294 }
 295
 296 {
 297 -- work around bug in Alex 2.0
 298 #if __GLASGOW_HASKELL__ < 503
 299 unsafeAt arr i = arr ! i
 300 #endif
 301
 302 -- -----------------------------------------------------------------------------
 303 -- The token type
 304
 305 data Token
 306   = ITas                        -- Haskell keywords
 307   | ITcase
 308   | ITclass
 309   | ITdata
 310   | ITdefault
 311   | ITderiving
 312   | ITdo
 313   | ITelse
 314   | IThiding
 315   | ITif
 316   | ITimport
 317   | ITin
 318   | ITinfix
 319   | ITinfixl
 320   | ITinfixr
 321   | ITinstance
 322   | ITlet
 323   | ITmodule
 324   | ITnewtype
 325   | ITof
 326   | ITqualified
 327   | ITthen
 328   | ITtype
 329   | ITwhere
 330   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 331
 332   | ITforall                    -- GHC extension keywords
 333   | ITforeign
 334   | ITexport
 335   | ITlabel
 336   | ITdynamic
 337   | ITsafe
 338   | ITthreadsafe
 339   | ITunsafe
 340   | ITstdcallconv
 341   | ITccallconv
 342   | ITdotnet
 343   | ITmdo
 344
 345   | ITspecialise_prag           -- Pragmas
 346   | ITsource_prag
 347   | ITinline_prag
 348   | ITnoinline_prag
 349   | ITrules_prag
 350   | ITdeprecated_prag
 351   | ITline_prag
 352   | ITscc_prag
 353   | ITcore_prag                 -- hdaume: core annotations
 354   | ITunpack_prag
 355   | ITclose_prag
 356
 357   | ITdotdot                    -- reserved symbols
 358   | ITcolon
 359   | ITdcolon
 360   | ITequal
 361   | ITlam
 362   | ITvbar
 363   | ITlarrow
 364   | ITrarrow
 365   | ITat
 366   | ITtilde
 367   | ITdarrow
 368   | ITminus
 369   | ITbang
 370   | ITstar
 371   | ITdot
 372
 373   | ITbiglam                    -- GHC-extension symbols
 374
 375   | ITocurly                    -- special symbols
 376   | ITccurly
 377   | ITocurlybar                 -- {|, for type applications
 378   | ITccurlybar                 -- |}, for type applications
 379   | ITvocurly
 380   | ITvccurly
 381   | ITobrack
 382   | ITopabrack                  -- [:, for parallel arrays with -fparr
 383   | ITcpabrack                  -- :], for parallel arrays with -fparr
 384   | ITcbrack
 385   | IToparen
 386   | ITcparen
 387   | IToubxparen
 388   | ITcubxparen
 389   | ITsemi
 390   | ITcomma
 391   | ITunderscore
 392   | ITbackquote
 393
 394   | ITvarid   FastString        -- identifiers
 395   | ITconid   FastString
 396   | ITvarsym  FastString
 397   | ITconsym  FastString
 398   | ITqvarid  (FastString,FastString)
 399   | ITqconid  (FastString,FastString)
 400   | ITqvarsym (FastString,FastString)
 401   | ITqconsym (FastString,FastString)
 402
 403   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 404   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 405
 406   | ITpragma StringBuffer
 407
 408   | ITchar       Char
 409   | ITstring     FastString
 410   | ITinteger    Integer
 411   | ITrational   Rational
 412
 413   | ITprimchar   Char
 414   | ITprimstring FastString
 415   | ITprimint    Integer
 416   | ITprimfloat  Rational
 417   | ITprimdouble Rational
 418
 419   -- MetaHaskell extension tokens
 420   | ITopenExpQuote              -- [| or [e|
 421   | ITopenPatQuote              -- [p|
 422   | ITopenDecQuote              -- [d|
 423   | ITopenTypQuote              -- [t|
 424   | ITcloseQuote                -- |]
 425   | ITidEscape   FastString     -- $x
 426   | ITparenEscape               -- $(
 427   | ITvarQuote                  -- '
 428   | ITtyQuote                   -- ''
 429
 430   -- Arrow notation extension
 431   | ITproc
 432   | ITrec
 433   | IToparenbar                 -- (|
 434   | ITcparenbar                 -- |)
 435   | ITlarrowtail                -- -<
 436   | ITrarrowtail                -- >-
 437   | ITLarrowtail                -- -<<
 438   | ITRarrowtail                -- >>-
 439
 440   | ITunknown String            -- Used when the lexer can't make sense of it
 441   | ITeof                       -- end of file token
 442 #ifdef DEBUG
 443   deriving Show -- debugging
 444 #endif
 445
 446 isSpecial :: Token -> Bool
 447 -- If we see M.x, where x is a keyword, but
 448 -- is special, we treat is as just plain M.x,
 449 -- not as a keyword.
 450 isSpecial ITas          = True
 451 isSpecial IThiding      = True
 452 isSpecial ITqualified   = True
 453 isSpecial ITforall      = True
 454 isSpecial ITexport      = True
 455 isSpecial ITlabel       = True
 456 isSpecial ITdynamic     = True
 457 isSpecial ITsafe        = True
 458 isSpecial ITthreadsafe  = True
 459 isSpecial ITunsafe      = True
 460 isSpecial ITccallconv   = True
 461 isSpecial ITstdcallconv = True
 462 isSpecial ITmdo         = True
 463 isSpecial _             = False
 464
 465 -- the bitmap provided as the third component indicates whether the
 466 -- corresponding extension keyword is valid under the extension options
 467 -- provided to the compiler; if the extension corresponding to *any* of the
 468 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 469 -- facilitates using a keyword in two different extensions that can be
 470 -- activated independently)
 471 --
 472 reservedWordsFM = listToUFM $
 473         map (\(x, y, z) -> (mkFastString x, (y, z)))
 474        [( "_",          ITunderscore,   0 ),
 475         ( "as",         ITas,           0 ),
 476         ( "case",       ITcase,         0 ),
 477         ( "class",      ITclass,        0 ),
 478         ( "data",       ITdata,         0 ),
 479         ( "default",    ITdefault,      0 ),
 480         ( "deriving",   ITderiving,     0 ),
 481         ( "do",         ITdo,           0 ),
 482         ( "else",       ITelse,         0 ),
 483         ( "hiding",     IThiding,       0 ),
 484         ( "if",         ITif,           0 ),
 485         ( "import",     ITimport,       0 ),
 486         ( "in",         ITin,           0 ),
 487         ( "infix",      ITinfix,        0 ),
 488         ( "infixl",     ITinfixl,       0 ),
 489         ( "infixr",     ITinfixr,       0 ),
 490         ( "instance",   ITinstance,     0 ),
 491         ( "let",        ITlet,          0 ),
 492         ( "module",     ITmodule,       0 ),
 493         ( "newtype",    ITnewtype,      0 ),
 494         ( "of",         ITof,           0 ),
 495         ( "qualified",  ITqualified,    0 ),
 496         ( "then",       ITthen,         0 ),
 497         ( "type",       ITtype,         0 ),
 498         ( "where",      ITwhere,        0 ),
 499         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 500
 501         ( "forall",     ITforall,        bit glaExtsBit),
 502         ( "mdo",        ITmdo,           bit glaExtsBit),
 503
 504         ( "foreign",    ITforeign,       bit ffiBit),
 505         ( "export",     ITexport,        bit ffiBit),
 506         ( "label",      ITlabel,         bit ffiBit),
 507         ( "dynamic",    ITdynamic,       bit ffiBit),
 508         ( "safe",       ITsafe,          bit ffiBit),
 509         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 510         ( "unsafe",     ITunsafe,        bit ffiBit),
 511         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 512         ( "ccall",      ITccallconv,     bit ffiBit),
 513         ( "dotnet",     ITdotnet,        bit ffiBit),
 514
 515         ( "rec",        ITrec,           bit arrowsBit),
 516         ( "proc",       ITproc,          bit arrowsBit)
 517      ]
 518
 519 reservedSymsFM = listToUFM $
 520         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 521       [ ("..",  ITdotdot,       0)
 522        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 523                                                 -- meaning only list cons
 524        ,("::",  ITdcolon,       0)
 525        ,("=",   ITequal,        0)
 526        ,("\\",  ITlam,          0)
 527        ,("|",   ITvbar,         0)
 528        ,("<-",  ITlarrow,       0)
 529        ,("->",  ITrarrow,       0)
 530        ,("@",   ITat,           0)
 531        ,("~",   ITtilde,        0)
 532        ,("=>",  ITdarrow,       0)
 533        ,("-",   ITminus,        0)
 534        ,("!",   ITbang,         0)
 535
 536        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 537        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 538
 539        ,("-<",  ITlarrowtail,   bit arrowsBit)
 540        ,(">-",  ITrarrowtail,   bit arrowsBit)
 541        ,("-<<", ITLarrowtail,   bit arrowsBit)
 542        ,(">>-", ITRarrowtail,   bit arrowsBit)
 543        ]
 544
 545 -- -----------------------------------------------------------------------------
 546 -- Lexer actions
 547
 548 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 549
 550 special :: Token -> Action
 551 special tok span _buf len = return (L span tok)
 552
 553 token, layout_token :: Token -> Action
 554 token t span buf len = return (L span t)
 555 layout_token t span buf len = pushLexState layout >> return (L span t)
 556
 557 idtoken :: (StringBuffer -> Int -> Token) -> Action
 558 idtoken f span buf len = return (L span $! (f buf len))
 559
 560 skip_one_varid :: (FastString -> Token) -> Action
 561 skip_one_varid f span buf len
 562   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 563
 564 strtoken :: (String -> Token) -> Action
 565 strtoken f span buf len =
 566   return (L span $! (f $! lexemeToString buf len))
 567
 568 init_strtoken :: Int -> (String -> Token) -> Action
 569 -- like strtoken, but drops the last N character(s)
 570 init_strtoken drop f span buf len =
 571   return (L span $! (f $! lexemeToString buf (len-drop)))
 572
 573 begin :: Int -> Action
 574 begin code _span _str _len = do pushLexState code; lexToken
 575
 576 pop :: Action
 577 pop _span _buf _len = do popLexState; lexToken
 578
 579 pop_and :: Action -> Action
 580 pop_and act span buf len = do popLexState; act span buf len
 581
 582 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 583
 584 notFollowedBySymbol _ _ _ (_,buf)
 585   = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
 586
 587 atEOL _ _ _ (_,buf) = atEnd buf || currentChar buf == '\n'
 588
 589 ifExtension pred bits _ _ _ = pred bits
 590
 591 {-
 592   nested comments require traversing by hand, they can't be parsed
 593   using regular expressions.
 594 -}
 595 nested_comment :: Action
 596 nested_comment span _str _len = do
 597   input <- getInput
 598   go 1 input
 599   where go 0 input = do setInput input; lexToken
 600         go n input = do
 601           case alexGetChar input of
 602             Nothing  -> err input
 603             Just (c,input) -> do
 604               case c of
 605                 '-' -> do
 606                   case alexGetChar input of
 607                     Nothing  -> err input
 608                     Just ('\125',input) -> go (n-1) input
 609                     Just (c,_)          -> go n input
 610                 '\123' -> do
 611                   case alexGetChar input of
 612                     Nothing  -> err input
 613                     Just ('-',input') -> go (n+1) input'
 614                     Just (c,input)    -> go n input
 615                 c -> go n input
 616
 617         err input = do failLocMsgP (srcSpanStart span) (fst input)
 618                         "unterminated `{-'"
 619
 620 open_brace, close_brace :: Action
 621 open_brace span _str _len = do
 622   ctx <- getContext
 623   setContext (NoLayout:ctx)
 624   return (L span ITocurly)
 625 close_brace span _str _len = do
 626   popContext
 627   return (L span ITccurly)
 628
 629 -- We have to be careful not to count M.<varid> as a qualified name
 630 -- when <varid> is a keyword.  We hack around this by catching
 631 -- the offending tokens afterward, and re-lexing in a different state.
 632 check_qvarid span buf len = do
 633   case lookupUFM reservedWordsFM var of
 634         Just (keyword,exts)
 635           | not (isSpecial keyword) ->
 636           if exts == 0
 637              then try_again
 638              else do
 639                 b <- extension (\i -> exts .&. i /= 0)
 640                 if b then try_again
 641                      else return token
 642         _other -> return token
 643   where
 644         (mod,var) = splitQualName buf len
 645         token     = L span (ITqvarid (mod,var))
 646
 647         try_again = do
 648                 setInput (srcSpanStart span,buf)
 649                 pushLexState bad_qvarid
 650                 lexToken
 651
 652 qvarid buf len = ITqvarid $! splitQualName buf len
 653 qconid buf len = ITqconid $! splitQualName buf len
 654
 655 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 656 -- takes a StringBuffer and a length, and returns the module name
 657 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 658 -- because of hierarchical module names.
 659 splitQualName orig_buf len = split orig_buf 0 0
 660   where
 661     split buf dot_off n
 662         | n == len                = done dot_off
 663         | lookAhead buf n == '.'  = split2 buf n (n+1)
 664         | otherwise               = split buf dot_off (n+1)
 665
 666     -- careful, we might get names like M....
 667     -- so, if the character after the dot is not upper-case, this is
 668     -- the end of the qualifier part.
 669     split2 buf dot_off n
 670         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 671         | otherwise                 = done dot_off
 672
 673     done dot_off =
 674         (lexemeToFastString orig_buf dot_off,
 675          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 676
 677 varid span buf len =
 678   case lookupUFM reservedWordsFM fs of
 679         Just (keyword,0)    -> do
 680                 maybe_layout keyword
 681                 return (L span keyword)
 682         Just (keyword,exts) -> do
 683                 b <- extension (\i -> exts .&. i /= 0)
 684                 if b then do maybe_layout keyword
 685                              return (L span keyword)
 686                      else return (L span (ITvarid fs))
 687         _other -> return (L span (ITvarid fs))
 688   where
 689         fs = lexemeToFastString buf len
 690
 691 conid buf len = ITconid fs
 692   where fs = lexemeToFastString buf len
 693
 694 qvarsym buf len = ITqvarsym $! splitQualName buf len
 695 qconsym buf len = ITqconsym $! splitQualName buf len
 696
 697 varsym = sym ITvarsym
 698 consym = sym ITconsym
 699
 700 sym con span buf len =
 701   case lookupUFM reservedSymsFM fs of
 702         Just (keyword,0)    -> return (L span keyword)
 703         Just (keyword,exts) -> do
 704                 b <- extension (\i -> exts .&. i /= 0)
 705                 if b then return (L span keyword)
 706                      else return (L span $! con fs)
 707         _other -> return (L span $! con fs)
 708   where
 709         fs = lexemeToFastString buf len
 710
 711 tok_decimal span buf len
 712   = return (L span (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 713
 714 tok_octal span buf len
 715   = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 716
 717 tok_hexadecimal span buf len
 718   = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 719
 720 prim_decimal span buf len
 721   = return (L span (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 722
 723 prim_octal span buf len
 724   = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 725
 726 prim_hexadecimal span buf len
 727   = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 728
 729 tok_float        str = ITrational $! readRational__ str
 730 prim_float       str = ITprimfloat  $! readRational__ str
 731 prim_double      str = ITprimdouble $! readRational__ str
 732
 733 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 734 parseInteger buf len radix to_int
 735   = go 0 0
 736   where go i x | i == len  = x
 737                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 738
 739 -- -----------------------------------------------------------------------------
 740 -- Layout processing
 741
 742 -- we're at the first token on a line, insert layout tokens if necessary
 743 do_bol :: Action
 744 do_bol span _str _len = do
 745         pos <- getOffside (srcSpanEnd span)
 746         case pos of
 747             LT -> do
 748                 --trace "layout: inserting '}'" $ do
 749                 popContext
 750                 -- do NOT pop the lex state, we might have a ';' to insert
 751                 return (L span ITvccurly)
 752             EQ -> do
 753                 --trace "layout: inserting ';'" $ do
 754                 popLexState
 755                 return (L span ITsemi)
 756             GT -> do
 757                 popLexState
 758                 lexToken
 759
 760 -- certain keywords put us in the "layout" state, where we might
 761 -- add an opening curly brace.
 762 maybe_layout ITdo       = pushLexState layout_do
 763 maybe_layout ITmdo      = pushLexState layout_do
 764 maybe_layout ITof       = pushLexState layout
 765 maybe_layout ITlet      = pushLexState layout
 766 maybe_layout ITwhere    = pushLexState layout
 767 maybe_layout ITrec      = pushLexState layout
 768 maybe_layout _          = return ()
 769
 770 -- Pushing a new implicit layout context.  If the indentation of the
 771 -- next token is not greater than the previous layout context, then
 772 -- Haskell 98 says that the new layout context should be empty; that is
 773 -- the lexer must generate {}.
 774 --
 775 -- We are slightly more lenient than this: when the new context is started
 776 -- by a 'do', then we allow the new context to be at the same indentation as
 777 -- the previous context.  This is what the 'strict' argument is for.
 778 --
 779 new_layout_context strict span _buf _len = do
 780     popLexState
 781     let offset = srcSpanStartCol span
 782     ctx <- getContext
 783     case ctx of
 784         Layout prev_off : _  |
 785            (strict     && prev_off >= offset  ||
 786             not strict && prev_off > offset) -> do
 787                 -- token is indented to the left of the previous context.
 788                 -- we must generate a {} sequence now.
 789                 pushLexState layout_left
 790                 return (L span ITvocurly)
 791         other -> do
 792                 setContext (Layout offset : ctx)
 793                 return (L span ITvocurly)
 794
 795 do_layout_left span _buf _len = do
 796     popLexState
 797     pushLexState bol  -- we must be at the start of a line
 798     return (L span ITvccurly)
 799
 800 -- -----------------------------------------------------------------------------
 801 -- LINE pragmas
 802
 803 set_line :: Int -> Action
 804 set_line code span buf len = do
 805   let line = parseInteger buf len 10 oct_or_dec
 806   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
 807         -- subtract one: the line number refers to the *following* line
 808   popLexState
 809   pushLexState code
 810   lexToken
 811
 812 set_file :: Int -> Action
 813 set_file code span buf len = do
 814   let file = lexemeToFastString (stepOn buf) (len-2)
 815   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
 816   popLexState
 817   pushLexState code
 818   lexToken
 819
 820 -- -----------------------------------------------------------------------------
 821 -- Strings & Chars
 822
 823 -- This stuff is horrible.  I hates it.
 824
 825 lex_string_tok :: Action
 826 lex_string_tok span buf len = do
 827   tok <- lex_string ""
 828   end <- getSrcLoc
 829   return (L (mkSrcSpan (srcSpanStart span) end) tok)
 830
 831 lex_string :: String -> P Token
 832 lex_string s = do
 833   i <- getInput
 834   case alexGetChar i of
 835     Nothing -> lit_error
 836
 837     Just ('"',i)  -> do
 838         setInput i
 839         glaexts <- extension glaExtsEnabled
 840         if glaexts
 841           then do
 842             i <- getInput
 843             case alexGetChar i of
 844               Just ('#',i) -> do
 845                    setInput i
 846                    if any (> '\xFF') s
 847                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 848                     else let s' = mkFastStringNarrow (reverse s) in
 849                          -- always a narrow string/byte array
 850                          return (ITprimstring s')
 851               _other ->
 852                 return (ITstring (mkFastString (reverse s)))
 853           else
 854                 return (ITstring (mkFastString (reverse s)))
 855
 856     Just ('\\',i)
 857         | Just ('&',i) <- next -> do
 858                 setInput i; lex_string s
 859         | Just (c,i) <- next, is_space c -> do
 860                 setInput i; lex_stringgap s
 861         where next = alexGetChar i
 862
 863     Just _ -> do
 864         c <- lex_char
 865         lex_string (c:s)
 866
 867 lex_stringgap s = do
 868   c <- getCharOrFail
 869   case c of
 870     '\\' -> lex_string s
 871     c | is_space c -> lex_stringgap s
 872     _other -> lit_error
 873
 874
 875 lex_char_tok :: Action
 876 -- Here we are basically parsing character literals, such as 'x' or '\n'
 877 -- but, when Template Haskell is on, we additionally spot
 878 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
 879 -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
 880 -- So we have to do two characters of lookahead: when we see 'x we need to
 881 -- see if there's a trailing quote
 882 lex_char_tok span buf len = do  -- We've seen '
 883    i1 <- getInput       -- Look ahead to first character
 884    let loc = srcSpanStart span
 885    case alexGetChar i1 of
 886         Nothing -> lit_error
 887
 888         Just ('\'', i2@(end2,_)) -> do  -- We've seen ''
 889                   th_exts <- extension thEnabled
 890                   if th_exts then do
 891                         setInput i2
 892                         return (L (mkSrcSpan loc end2)  ITtyQuote)
 893                    else lit_error
 894
 895         Just ('\\', i2@(end2,_)) -> do  -- We've seen 'backslash
 896                   setInput i2
 897                   lit_ch <- lex_escape
 898                   mc <- getCharOrFail   -- Trailing quote
 899                   if mc == '\'' then finish_char_tok loc lit_ch
 900                                 else lit_error
 901
 902         Just (c, i2@(end2,_)) | not (is_any c) -> lit_error
 903                               | otherwise      ->
 904
 905                 -- We've seen 'x, where x is a valid character
 906                 --  (i.e. not newline etc) but not a quote or backslash
 907            case alexGetChar i2 of       -- Look ahead one more character
 908                 Nothing -> lit_error
 909                 Just ('\'', i3) -> do   -- We've seen 'x'
 910                         setInput i3
 911                         finish_char_tok loc c
 912                 _other -> do            -- We've seen 'x not followed by quote
 913                                         -- If TH is on, just parse the quote only
 914                         th_exts <- extension thEnabled
 915                         if th_exts then return (L (mkSrcSpan loc (fst i1)) ITvarQuote)
 916                                    else lit_error
 917
 918 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 919 finish_char_tok loc ch  -- We've already seen the closing quote
 920                         -- Just need to check for trailing #
 921   = do  glaexts <- extension glaExtsEnabled
 922         i@(end,_) <- getInput
 923         if glaexts then do
 924                 case alexGetChar i of
 925                         Just ('#',i@(end,_)) -> do
 926                                 setInput i
 927                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
 928                         _other ->
 929                                 return (L (mkSrcSpan loc end) (ITchar ch))
 930                 else do
 931                    return (L (mkSrcSpan loc end) (ITchar ch))
 932
 933 lex_char :: P Char
 934 lex_char = do
 935   mc <- getCharOrFail
 936   case mc of
 937       '\\' -> lex_escape
 938       c | is_any c -> return c
 939       _other -> lit_error
 940
 941 lex_escape :: P Char
 942 lex_escape = do
 943   c <- getCharOrFail
 944   case c of
 945         'a'   -> return '\a'
 946         'b'   -> return '\b'
 947         'f'   -> return '\f'
 948         'n'   -> return '\n'
 949         'r'   -> return '\r'
 950         't'   -> return '\t'
 951         'v'   -> return '\v'
 952         '\\'  -> return '\\'
 953         '"'   -> return '\"'
 954         '\''  -> return '\''
 955         '^'   -> do c <- getCharOrFail
 956                     if c >= '@' && c <= '_'
 957                         then return (chr (ord c - ord '@'))
 958                         else lit_error
 959
 960         'x'   -> readNum is_hexdigit 16 hex
 961         'o'   -> readNum is_octdigit  8 oct_or_dec
 962         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 963
 964         c1 ->  do
 965            i <- getInput
 966            case alexGetChar i of
 967             Nothing -> lit_error
 968             Just (c2,i2) ->
 969               case alexGetChar i2 of
 970                 Nothing -> lit_error
 971                 Just (c3,i3) ->
 972                    let str = [c1,c2,c3] in
 973                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 974                                      Just rest <- [maybePrefixMatch p str] ] of
 975                           (escape_char,[]):_ -> do
 976                                 setInput i3
 977                                 return escape_char
 978                           (escape_char,_:_):_ -> do
 979                                 setInput i2
 980                                 return escape_char
 981                           [] -> lit_error
 982
 983 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 984 readNum is_digit base conv = do
 985   c <- getCharOrFail
 986   if is_digit c
 987         then readNum2 is_digit base conv (conv c)
 988         else lit_error
 989
 990 readNum2 is_digit base conv i = do
 991   input <- getInput
 992   read i input
 993   where read i input = do
 994           case alexGetChar input of
 995             Just (c,input') | is_digit c -> do
 996                 read (i*base + conv c) input'
 997             _other -> do
 998                 setInput input
 999                 if i >= 0 && i <= 0x10FFFF
1000                    then return (chr i)
1001                    else lit_error
1002
1003 is_hexdigit c
1004         =  is_digit c
1005         || (c >= 'a' && c <= 'f')
1006         || (c >= 'A' && c <= 'F')
1007
1008 hex c | is_digit c = ord c - ord '0'
1009       | otherwise  = ord (to_lower c) - ord 'a' + 10
1010
1011 oct_or_dec c = ord c - ord '0'
1012
1013 is_octdigit c = c >= '0' && c <= '7'
1014
1015 to_lower c
1016   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
1017   | otherwise = c
1018
1019 silly_escape_chars = [
1020         ("NUL", '\NUL'),
1021         ("SOH", '\SOH'),
1022         ("STX", '\STX'),
1023         ("ETX", '\ETX'),
1024         ("EOT", '\EOT'),
1025         ("ENQ", '\ENQ'),
1026         ("ACK", '\ACK'),
1027         ("BEL", '\BEL'),
1028         ("BS", '\BS'),
1029         ("HT", '\HT'),
1030         ("LF", '\LF'),
1031         ("VT", '\VT'),
1032         ("FF", '\FF'),
1033         ("CR", '\CR'),
1034         ("SO", '\SO'),
1035         ("SI", '\SI'),
1036         ("DLE", '\DLE'),
1037         ("DC1", '\DC1'),
1038         ("DC2", '\DC2'),
1039         ("DC3", '\DC3'),
1040         ("DC4", '\DC4'),
1041         ("NAK", '\NAK'),
1042         ("SYN", '\SYN'),
1043         ("ETB", '\ETB'),
1044         ("CAN", '\CAN'),
1045         ("EM", '\EM'),
1046         ("SUB", '\SUB'),
1047         ("ESC", '\ESC'),
1048         ("FS", '\FS'),
1049         ("GS", '\GS'),
1050         ("RS", '\RS'),
1051         ("US", '\US'),
1052         ("SP", '\SP'),
1053         ("DEL", '\DEL')
1054         ]
1055
1056 lit_error = lexError "lexical error in string/character literal"
1057
1058 getCharOrFail :: P Char
1059 getCharOrFail =  do
1060   i <- getInput
1061   case alexGetChar i of
1062         Nothing -> lexError "unexpected end-of-file in string/character literal"
1063         Just (c,i)  -> do setInput i; return c
1064
1065 -- -----------------------------------------------------------------------------
1066 -- Floats
1067
1068 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1069 readRational r = do
1070      (n,d,s) <- readFix r
1071      (k,t)   <- readExp s
1072      return ((n%1)*10^^(k-d), t)
1073  where
1074      readFix r = do
1075         (ds,s)  <- lexDecDigits r
1076         (ds',t) <- lexDotDigits s
1077         return (read (ds++ds'), length ds', t)
1078
1079      readExp (e:s) | e `elem` "eE" = readExp' s
1080      readExp s                     = return (0,s)
1081
1082      readExp' ('+':s) = readDec s
1083      readExp' ('-':s) = do
1084                         (k,t) <- readDec s
1085                         return (-k,t)
1086      readExp' s       = readDec s
1087
1088      readDec s = do
1089         (ds,r) <- nonnull isDigit s
1090         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1091                 r)
1092
1093      lexDecDigits = nonnull isDigit
1094
1095      lexDotDigits ('.':s) = return (span isDigit s)
1096      lexDotDigits s       = return ("",s)
1097
1098      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1099                       return (cs,t)
1100
1101 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1102 readRational__ top_s
1103   = case top_s of
1104       '-' : xs -> - (read_me xs)
1105       xs       -> read_me xs
1106   where
1107     read_me s
1108       = case (do { (x,"") <- readRational s ; return x }) of
1109           [x] -> x
1110           []  -> error ("readRational__: no parse:"        ++ top_s)
1111           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1112
1113 -- -----------------------------------------------------------------------------
1114 -- The Parse Monad
1115
1116 data LayoutContext
1117   = NoLayout
1118   | Layout !Int
1119
1120 data ParseResult a
1121   = POk PState a
1122   | PFailed
1123         SrcSpan         -- The start and end of the text span related to
1124                         -- the error.  Might be used in environments which can
1125                         -- show this span, e.g. by highlighting it.
1126         Message         -- The error message
1127
1128 data PState = PState {
1129         buffer     :: StringBuffer,
1130         last_loc   :: SrcSpan,  -- pos of previous token
1131         last_len   :: !Int,     -- len of previous token
1132         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1133         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1134         context    :: [LayoutContext],
1135         lex_state  :: [Int]
1136      }
1137         -- last_loc and last_len are used when generating error messages,
1138         -- and in pushCurrentContext only.
1139
1140 newtype P a = P { unP :: PState -> ParseResult a }
1141
1142 instance Monad P where
1143   return = returnP
1144   (>>=) = thenP
1145   fail = failP
1146
1147 returnP :: a -> P a
1148 returnP a = P $ \s -> POk s a
1149
1150 thenP :: P a -> (a -> P b) -> P b
1151 (P m) `thenP` k = P $ \ s ->
1152         case m s of
1153                 POk s1 a         -> (unP (k a)) s1
1154                 PFailed span err -> PFailed span err
1155
1156 failP :: String -> P a
1157 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1158
1159 failMsgP :: String -> P a
1160 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1161
1162 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1163 failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
1164
1165 failSpanMsgP :: SrcSpan -> String -> P a
1166 failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
1167
1168 extension :: (Int -> Bool) -> P Bool
1169 extension p = P $ \s -> POk s (p $! extsBitmap s)
1170
1171 getExts :: P Int
1172 getExts = P $ \s -> POk s (extsBitmap s)
1173
1174 setSrcLoc :: SrcLoc -> P ()
1175 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1176
1177 getSrcLoc :: P SrcLoc
1178 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1179
1180 setLastToken :: SrcSpan -> Int -> P ()
1181 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1182
1183 type AlexInput = (SrcLoc,StringBuffer)
1184
1185 alexInputPrevChar :: AlexInput -> Char
1186 alexInputPrevChar (_,s) = prevChar s '\n'
1187
1188 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1189 alexGetChar (loc,s)
1190   | atEnd s   = Nothing
1191   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1192   where c = currentChar s
1193         loc' = advanceSrcLoc loc c
1194         s'   = stepOn s
1195
1196 getInput :: P AlexInput
1197 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1198
1199 setInput :: AlexInput -> P ()
1200 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1201
1202 pushLexState :: Int -> P ()
1203 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1204
1205 popLexState :: P Int
1206 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1207
1208 getLexState :: P Int
1209 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1210
1211 -- for reasons of efficiency, flags indicating language extensions (eg,
1212 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1213 -- integer
1214
1215 glaExtsBit, ffiBit, parrBit :: Int
1216 glaExtsBit = 0
1217 ffiBit     = 1
1218 parrBit    = 2
1219 arrowsBit  = 4
1220 thBit      = 5
1221 ipBit      = 6
1222
1223 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1224 glaExtsEnabled flags = testBit flags glaExtsBit
1225 ffiEnabled     flags = testBit flags ffiBit
1226 parrEnabled    flags = testBit flags parrBit
1227 arrowsEnabled  flags = testBit flags arrowsBit
1228 thEnabled      flags = testBit flags thBit
1229 ipEnabled      flags = testBit flags ipBit
1230
1231 -- create a parse state
1232 --
1233 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1234 mkPState buf loc flags  =
1235   PState {
1236       buffer     = buf,
1237       last_loc   = mkSrcSpan loc loc,
1238       last_len   = 0,
1239       loc        = loc,
1240       extsBitmap = fromIntegral bitmap,
1241       context    = [],
1242       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1243         -- we begin in the layout state if toplev_layout is set
1244     }
1245     where
1246       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1247                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1248                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1249                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1250                .|. thBit      `setBitIf` dopt Opt_TH          flags
1251                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1252       --
1253       setBitIf :: Int -> Bool -> Int
1254       b `setBitIf` cond | cond      = bit b
1255                         | otherwise = 0
1256
1257 getContext :: P [LayoutContext]
1258 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1259
1260 setContext :: [LayoutContext] -> P ()
1261 setContext ctx = P $ \s -> POk s{context=ctx} ()
1262
1263 popContext :: P ()
1264 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1265                            loc = loc, last_len = len, last_loc = last_loc }) ->
1266   case ctx of
1267         (_:tl) -> POk s{ context = tl } ()
1268         []     -> PFailed last_loc (srcParseErr buf len)
1269
1270 -- Push a new layout context at the indentation of the last token read.
1271 -- This is only used at the outer level of a module when the 'module'
1272 -- keyword is missing.
1273 pushCurrentContext :: P ()
1274 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1275   POk s{ context = Layout (srcSpanStartCol loc) : ctx} ()
1276
1277 getOffside :: SrcLoc -> P Ordering
1278 getOffside loc = P $ \s@PState{context=stk} ->
1279                 let ord = case stk of
1280                         (Layout n:_) -> compare (srcLocCol loc) n
1281                         _            -> GT
1282                 in POk s ord
1283
1284 -- ---------------------------------------------------------------------------
1285 -- Construct a parse error
1286
1287 srcParseErr
1288   :: StringBuffer       -- current buffer (placed just after the last token)
1289   -> Int                -- length of the previous token
1290   -> Message
1291 srcParseErr buf len
1292   = hcat [ if null token
1293              then ptext SLIT("parse error (possibly incorrect indentation)")
1294              else hcat [ptext SLIT("parse error on input "),
1295                         char '`', text token, char '\'']
1296     ]
1297   where token = lexemeToString (stepOnBy (-len) buf) len
1298
1299 -- Report a parse failure, giving the span of the previous token as
1300 -- the location of the error.  This is the entry point for errors
1301 -- detected during parsing.
1302 srcParseFail :: P a
1303 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1304                                 last_loc = last_loc, loc = loc } ->
1305     PFailed last_loc (srcParseErr buf len)
1306
1307 -- A lexical error is reported at a particular position in the source file,
1308 -- not over a token range.  TODO: this is slightly wrong, because we record
1309 -- the error at the character position following the one which caused the
1310 -- error.  We should somehow back up by one character.
1311 lexError :: String -> P a
1312 lexError str = do
1313   loc <- getSrcLoc
1314   i@(end,_) <- getInput
1315   failLocMsgP loc end str
1316
1317 -- -----------------------------------------------------------------------------
1318 -- This is the top-level function: called from the parser each time a
1319 -- new token is to be read from the input.
1320
1321 lexer :: (Located Token -> P a) -> P a
1322 lexer cont = do
1323   tok@(L _ tok__) <- lexToken
1324   --trace ("token: " ++ show tok__) $ do
1325   cont tok
1326
1327 lexToken :: P (Located Token)
1328 lexToken = do
1329   inp@(loc1,buf) <- getInput
1330   sc <- getLexState
1331   exts <- getExts
1332   case alexScanUser exts inp sc of
1333     AlexEOF -> do let span = mkSrcSpan loc1 loc1
1334                   setLastToken span 0
1335                   return (L span ITeof)
1336     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1337     AlexSkip inp2 _ -> do
1338         setInput inp2
1339         lexToken
1340     AlexToken inp2@(end,buf2) len t -> do
1341         setInput inp2
1342         let span = mkSrcSpan loc1 end
1343         span `seq` setLastToken span len
1344         t span buf len
1345 }