ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), lexer, mkPState,
  26    P(..), ParseResult(..), getSrcLoc,
  27    failMsgP, failLocMsgP, failSpanMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ErrUtils         ( Message )
  34 import Outputable
  35 import StringBuffer
  36 import FastString
  37 import FastTypes
  38 import SrcLoc
  39 import UniqFM
  40 import CmdLineOpts
  41 import Ctype
  42 import Util             ( maybePrefixMatch )
  43
  44 import DATA_BITS
  45 import Char
  46 import Ratio
  47 --import TRACE
  48 }
  49
  50 $whitechar   = [\ \t\n\r\f\v\xa0]
  51 $white_no_nl = $whitechar # \n
  52
  53 $ascdigit  = 0-9
  54 $unidigit  = \x01
  55 $digit     = [$ascdigit $unidigit]
  56
  57 $special   = [\(\)\,\;\[\]\`\{\}]
  58 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  59 $unisymbol = \x02
  60 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  61
  62 $unilarge  = \x03
  63 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  64 $large     = [$asclarge $unilarge]
  65
  66 $unismall  = \x04
  67 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  68 $small     = [$ascsmall $unismall \_]
  69
  70 $graphic   = [$small $large $symbol $digit $special \:\"\']
  71
  72 $octit     = 0-7
  73 $hexit     = [$digit A-F a-f]
  74 $symchar   = [$symbol \:]
  75 $nl        = [\n\r]
  76 $idchar    = [$small $large $digit \']
  77
  78 @varid     = $small $idchar*
  79 @conid     = $large $idchar*
  80
  81 @varsym    = $symbol $symchar*
  82 @consym    = \: $symchar*
  83
  84 @decimal     = $digit+
  85 @octal       = $octit+
  86 @hexadecimal = $hexit+
  87 @exponent    = [eE] [\-\+]? @decimal
  88
  89 -- we support the hierarchical module name extension:
  90 @qual = (@conid \.)+
  91
  92 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  93
  94 haskell :-
  95
  96 -- everywhere: skip whitespace and comments
  97 $white_no_nl+                           ;
  98
  99 -- Everywhere: deal with nested comments.  We explicitly rule out
 100 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 101 -- (this can happen even though pragmas will normally take precedence due to
 102 -- longest-match, because pragmas aren't valid in every state, but comments
 103 -- are).
 104 "{-" / { notFollowedBy '#' }            { nested_comment }
 105
 106 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 107 -- more dashes followed by a symbol should be parsed as a varsym, so we
 108 -- have to exclude those.
 109 -- The regex says: "munch all the characters after the dashes, as long as
 110 -- the first one is not a symbol".
 111 "--"\-* ([^$symbol] .*)?                ;
 112
 113 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 114 -- blank lines) until we find a non-whitespace character, then do layout
 115 -- processing.
 116 --
 117 -- One slight wibble here: what if the line begins with {-#? In
 118 -- theory, we have to lex the pragma to see if it's one we recognise,
 119 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 120 -- as a nested comment.  We don't bother with this: if the line begins
 121 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 122 <bol> {
 123   \n                                    ;
 124   ^\# (line)?                           { begin line_prag1 }
 125   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 126   ()                                    { do_bol }
 127 }
 128
 129 -- after a layout keyword (let, where, do, of), we begin a new layout
 130 -- context if the curly brace is missing.
 131 -- Careful! This stuff is quite delicate.
 132 <layout, layout_do> {
 133   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 134         -- we might encounter {-# here, but {- has been handled already
 135   \n                                    ;
 136   ^\# (line)?                           { begin line_prag1 }
 137 }
 138
 139 -- do is treated in a subtly different way, see new_layout_context
 140 <layout>    ()                          { new_layout_context True }
 141 <layout_do> ()                          { new_layout_context False }
 142
 143 -- after a new layout context which was found to be to the left of the
 144 -- previous context, we have generated a '{' token, and we now need to
 145 -- generate a matching '}' token.
 146 <layout_left>  ()                       { do_layout_left }
 147
 148 <0,glaexts> \n                          { begin bol }
 149
 150 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 151
 152 -- single-line line pragmas, of the form
 153 --    # <line> "<file>" <extra-stuff> \n
 154 <line_prag1> $digit+                    { set_line line_prag1a }
 155 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 156 <line_prag1b> .*                        { pop }
 157
 158 -- Haskell-style line pragmas, of the form
 159 --    {-# LINE <line> "<file>" #-}
 160 <line_prag2> $digit+                    { set_line line_prag2a }
 161 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 162 <line_prag2b> "#-}"                     { pop }
 163
 164 <0,glaexts> {
 165   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 166                                         { token ITspecialise_prag }
 167   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 168   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 169   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 170                                         { token ITnoinline_prag }
 171   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 172   "{-#" $whitechar* (DEPRECATED|deprecated)
 173                                         { token ITdeprecated_prag }
 174   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 175   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 176   "{-#" $whitechar* (UNPACK|unpack)     { token ITunpack_prag }
 177
 178   "{-#"                                 { nested_comment }
 179
 180   -- ToDo: should only be valid inside a pragma:
 181   "#-}"                                 { token ITclose_prag}
 182 }
 183
 184
 185 -- '0' state: ordinary lexemes
 186 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 187
 188 -- "special" symbols
 189
 190 <0,glaexts> {
 191   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 192   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 193 }
 194
 195 <0,glaexts> {
 196   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 197   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 198   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 199   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 200   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 201   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 202   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 203   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 204 }
 205
 206 <0,glaexts> {
 207   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 208                                         { special IToparenbar }
 209   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 210 }
 211
 212 <0,glaexts> {
 213   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 214   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 215 }
 216
 217 <glaexts> {
 218   "(#" / { notFollowedBySymbol }        { token IToubxparen }
 219   "#)"                                  { token ITcubxparen }
 220   "{|"                                  { token ITocurlybar }
 221   "|}"                                  { token ITccurlybar }
 222 }
 223
 224 <0,glaexts> {
 225   \(                                    { special IToparen }
 226   \)                                    { special ITcparen }
 227   \[                                    { special ITobrack }
 228   \]                                    { special ITcbrack }
 229   \,                                    { special ITcomma }
 230   \;                                    { special ITsemi }
 231   \`                                    { special ITbackquote }
 232
 233   \{                                    { open_brace }
 234   \}                                    { close_brace }
 235 }
 236
 237 <0,glaexts> {
 238   @qual @varid                  { check_qvarid }
 239   @qual @conid                  { idtoken qconid }
 240   @varid                        { varid }
 241   @conid                        { idtoken conid }
 242 }
 243
 244 -- after an illegal qvarid, such as 'M.let',
 245 -- we back up and try again in the bad_qvarid state:
 246 <bad_qvarid> {
 247   @conid                        { pop_and (idtoken conid) }
 248   @qual @conid                  { pop_and (idtoken qconid) }
 249 }
 250
 251 <glaexts> {
 252   @qual @varid "#"+             { idtoken qvarid }
 253   @qual @conid "#"+             { idtoken qconid }
 254   @varid "#"+                   { varid }
 255   @conid "#"+                   { idtoken conid }
 256 }
 257
 258 -- ToDo: M.(,,,)
 259
 260 <0,glaexts> {
 261   @qual @varsym                 { idtoken qvarsym }
 262   @qual @consym                 { idtoken qconsym }
 263   @varsym                       { varsym }
 264   @consym                       { consym }
 265 }
 266
 267 <0,glaexts> {
 268   @decimal                      { tok_decimal }
 269   0[oO] @octal                  { tok_octal }
 270   0[xX] @hexadecimal            { tok_hexadecimal }
 271 }
 272
 273 <glaexts> {
 274   @decimal \#                   { prim_decimal }
 275   0[oO] @octal \#               { prim_octal }
 276   0[xX] @hexadecimal \#         { prim_hexadecimal }
 277 }
 278
 279 <0,glaexts> @floating_point             { strtoken tok_float }
 280 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 281 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 282
 283 -- Strings and chars are lexed by hand-written code.  The reason is
 284 -- that even if we recognise the string or char here in the regex
 285 -- lexer, we would still have to parse the string afterward in order
 286 -- to convert it to a String.
 287 <0,glaexts> {
 288   \'                            { lex_char_tok }
 289   \"                            { lex_string_tok }
 290 }
 291
 292 {
 293 -- work around bug in Alex 2.0
 294 #if __GLASGOW_HASKELL__ < 503
 295 unsafeAt arr i = arr ! i
 296 #endif
 297
 298 -- -----------------------------------------------------------------------------
 299 -- The token type
 300
 301 data Token
 302   = ITas                        -- Haskell keywords
 303   | ITcase
 304   | ITclass
 305   | ITdata
 306   | ITdefault
 307   | ITderiving
 308   | ITdo
 309   | ITelse
 310   | IThiding
 311   | ITif
 312   | ITimport
 313   | ITin
 314   | ITinfix
 315   | ITinfixl
 316   | ITinfixr
 317   | ITinstance
 318   | ITlet
 319   | ITmodule
 320   | ITnewtype
 321   | ITof
 322   | ITqualified
 323   | ITthen
 324   | ITtype
 325   | ITwhere
 326   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 327
 328   | ITforall                    -- GHC extension keywords
 329   | ITforeign
 330   | ITexport
 331   | ITlabel
 332   | ITdynamic
 333   | ITsafe
 334   | ITthreadsafe
 335   | ITunsafe
 336   | ITstdcallconv
 337   | ITccallconv
 338   | ITdotnet
 339   | ITmdo
 340
 341   | ITspecialise_prag           -- Pragmas
 342   | ITsource_prag
 343   | ITinline_prag
 344   | ITnoinline_prag
 345   | ITrules_prag
 346   | ITdeprecated_prag
 347   | ITline_prag
 348   | ITscc_prag
 349   | ITcore_prag                 -- hdaume: core annotations
 350   | ITunpack_prag
 351   | ITclose_prag
 352
 353   | ITdotdot                    -- reserved symbols
 354   | ITcolon
 355   | ITdcolon
 356   | ITequal
 357   | ITlam
 358   | ITvbar
 359   | ITlarrow
 360   | ITrarrow
 361   | ITat
 362   | ITtilde
 363   | ITdarrow
 364   | ITminus
 365   | ITbang
 366   | ITstar
 367   | ITdot
 368
 369   | ITbiglam                    -- GHC-extension symbols
 370
 371   | ITocurly                    -- special symbols
 372   | ITccurly
 373   | ITocurlybar                 -- {|, for type applications
 374   | ITccurlybar                 -- |}, for type applications
 375   | ITvocurly
 376   | ITvccurly
 377   | ITobrack
 378   | ITopabrack                  -- [:, for parallel arrays with -fparr
 379   | ITcpabrack                  -- :], for parallel arrays with -fparr
 380   | ITcbrack
 381   | IToparen
 382   | ITcparen
 383   | IToubxparen
 384   | ITcubxparen
 385   | ITsemi
 386   | ITcomma
 387   | ITunderscore
 388   | ITbackquote
 389
 390   | ITvarid   FastString        -- identifiers
 391   | ITconid   FastString
 392   | ITvarsym  FastString
 393   | ITconsym  FastString
 394   | ITqvarid  (FastString,FastString)
 395   | ITqconid  (FastString,FastString)
 396   | ITqvarsym (FastString,FastString)
 397   | ITqconsym (FastString,FastString)
 398
 399   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 400   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 401
 402   | ITpragma StringBuffer
 403
 404   | ITchar       Char
 405   | ITstring     FastString
 406   | ITinteger    Integer
 407   | ITrational   Rational
 408
 409   | ITprimchar   Char
 410   | ITprimstring FastString
 411   | ITprimint    Integer
 412   | ITprimfloat  Rational
 413   | ITprimdouble Rational
 414
 415   -- MetaHaskell extension tokens
 416   | ITopenExpQuote              -- [| or [e|
 417   | ITopenPatQuote              -- [p|
 418   | ITopenDecQuote              -- [d|
 419   | ITopenTypQuote              -- [t|
 420   | ITcloseQuote                -- |]
 421   | ITidEscape   FastString     -- $x
 422   | ITparenEscape               -- $(
 423   | ITvarQuote                  -- '
 424   | ITtyQuote                   -- ''
 425
 426   -- Arrow notation extension
 427   | ITproc
 428   | ITrec
 429   | IToparenbar                 -- (|
 430   | ITcparenbar                 -- |)
 431   | ITlarrowtail                -- -<
 432   | ITrarrowtail                -- >-
 433   | ITLarrowtail                -- -<<
 434   | ITRarrowtail                -- >>-
 435
 436   | ITunknown String            -- Used when the lexer can't make sense of it
 437   | ITeof                       -- end of file token
 438 #ifdef DEBUG
 439   deriving Show -- debugging
 440 #endif
 441
 442 isSpecial :: Token -> Bool
 443 -- If we see M.x, where x is a keyword, but
 444 -- is special, we treat is as just plain M.x,
 445 -- not as a keyword.
 446 isSpecial ITas          = True
 447 isSpecial IThiding      = True
 448 isSpecial ITqualified   = True
 449 isSpecial ITforall      = True
 450 isSpecial ITexport      = True
 451 isSpecial ITlabel       = True
 452 isSpecial ITdynamic     = True
 453 isSpecial ITsafe        = True
 454 isSpecial ITthreadsafe  = True
 455 isSpecial ITunsafe      = True
 456 isSpecial ITccallconv   = True
 457 isSpecial ITstdcallconv = True
 458 isSpecial ITmdo         = True
 459 isSpecial _             = False
 460
 461 -- the bitmap provided as the third component indicates whether the
 462 -- corresponding extension keyword is valid under the extension options
 463 -- provided to the compiler; if the extension corresponding to *any* of the
 464 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 465 -- facilitates using a keyword in two different extensions that can be
 466 -- activated independently)
 467 --
 468 reservedWordsFM = listToUFM $
 469         map (\(x, y, z) -> (mkFastString x, (y, z)))
 470        [( "_",          ITunderscore,   0 ),
 471         ( "as",         ITas,           0 ),
 472         ( "case",       ITcase,         0 ),
 473         ( "class",      ITclass,        0 ),
 474         ( "data",       ITdata,         0 ),
 475         ( "default",    ITdefault,      0 ),
 476         ( "deriving",   ITderiving,     0 ),
 477         ( "do",         ITdo,           0 ),
 478         ( "else",       ITelse,         0 ),
 479         ( "hiding",     IThiding,       0 ),
 480         ( "if",         ITif,           0 ),
 481         ( "import",     ITimport,       0 ),
 482         ( "in",         ITin,           0 ),
 483         ( "infix",      ITinfix,        0 ),
 484         ( "infixl",     ITinfixl,       0 ),
 485         ( "infixr",     ITinfixr,       0 ),
 486         ( "instance",   ITinstance,     0 ),
 487         ( "let",        ITlet,          0 ),
 488         ( "module",     ITmodule,       0 ),
 489         ( "newtype",    ITnewtype,      0 ),
 490         ( "of",         ITof,           0 ),
 491         ( "qualified",  ITqualified,    0 ),
 492         ( "then",       ITthen,         0 ),
 493         ( "type",       ITtype,         0 ),
 494         ( "where",      ITwhere,        0 ),
 495         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 496
 497         ( "forall",     ITforall,        bit glaExtsBit),
 498         ( "mdo",        ITmdo,           bit glaExtsBit),
 499
 500         ( "foreign",    ITforeign,       bit ffiBit),
 501         ( "export",     ITexport,        bit ffiBit),
 502         ( "label",      ITlabel,         bit ffiBit),
 503         ( "dynamic",    ITdynamic,       bit ffiBit),
 504         ( "safe",       ITsafe,          bit ffiBit),
 505         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 506         ( "unsafe",     ITunsafe,        bit ffiBit),
 507         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 508         ( "ccall",      ITccallconv,     bit ffiBit),
 509         ( "dotnet",     ITdotnet,        bit ffiBit),
 510
 511         ( "rec",        ITrec,           bit arrowsBit),
 512         ( "proc",       ITproc,          bit arrowsBit)
 513      ]
 514
 515 reservedSymsFM = listToUFM $
 516         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 517       [ ("..",  ITdotdot,       0)
 518        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 519                                                 -- meaning only list cons
 520        ,("::",  ITdcolon,       0)
 521        ,("=",   ITequal,        0)
 522        ,("\\",  ITlam,          0)
 523        ,("|",   ITvbar,         0)
 524        ,("<-",  ITlarrow,       0)
 525        ,("->",  ITrarrow,       0)
 526        ,("@",   ITat,           0)
 527        ,("~",   ITtilde,        0)
 528        ,("=>",  ITdarrow,       0)
 529        ,("-",   ITminus,        0)
 530        ,("!",   ITbang,         0)
 531
 532        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 533        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 534
 535        ,("-<",  ITlarrowtail,   bit arrowsBit)
 536        ,(">-",  ITrarrowtail,   bit arrowsBit)
 537        ,("-<<", ITLarrowtail,   bit arrowsBit)
 538        ,(">>-", ITRarrowtail,   bit arrowsBit)
 539        ]
 540
 541 -- -----------------------------------------------------------------------------
 542 -- Lexer actions
 543
 544 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 545
 546 special :: Token -> Action
 547 special tok span _buf len = return (L span tok)
 548
 549 token, layout_token :: Token -> Action
 550 token t span buf len = return (L span t)
 551 layout_token t span buf len = pushLexState layout >> return (L span t)
 552
 553 idtoken :: (StringBuffer -> Int -> Token) -> Action
 554 idtoken f span buf len = return (L span $! (f buf len))
 555
 556 skip_one_varid :: (FastString -> Token) -> Action
 557 skip_one_varid f span buf len
 558   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 559
 560 strtoken :: (String -> Token) -> Action
 561 strtoken f span buf len =
 562   return (L span $! (f $! lexemeToString buf len))
 563
 564 init_strtoken :: Int -> (String -> Token) -> Action
 565 -- like strtoken, but drops the last N character(s)
 566 init_strtoken drop f span buf len =
 567   return (L span $! (f $! lexemeToString buf (len-drop)))
 568
 569 begin :: Int -> Action
 570 begin code _span _str _len = do pushLexState code; lexToken
 571
 572 pop :: Action
 573 pop _span _buf _len = do popLexState; lexToken
 574
 575 pop_and :: Action -> Action
 576 pop_and act span buf len = do popLexState; act span buf len
 577
 578 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 579
 580 notFollowedBySymbol _ _ _ (_,buf)
 581   = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
 582
 583 ifExtension pred bits _ _ _ = pred bits
 584
 585 {-
 586   nested comments require traversing by hand, they can't be parsed
 587   using regular expressions.
 588 -}
 589 nested_comment :: Action
 590 nested_comment span _str _len = do
 591   input <- getInput
 592   go 1 input
 593   where go 0 input = do setInput input; lexToken
 594         go n input = do
 595           case alexGetChar input of
 596             Nothing  -> err input
 597             Just (c,input) -> do
 598               case c of
 599                 '-' -> do
 600                   case alexGetChar input of
 601                     Nothing  -> err input
 602                     Just ('\125',input) -> go (n-1) input
 603                     Just (c,_)          -> go n input
 604                 '\123' -> do
 605                   case alexGetChar input of
 606                     Nothing  -> err input
 607                     Just ('-',input') -> go (n+1) input'
 608                     Just (c,input)    -> go n input
 609                 c -> go n input
 610
 611         err input = do failLocMsgP (srcSpanStart span) (fst input)
 612                         "unterminated `{-'"
 613
 614 open_brace, close_brace :: Action
 615 open_brace span _str _len = do
 616   ctx <- getContext
 617   setContext (NoLayout:ctx)
 618   return (L span ITocurly)
 619 close_brace span _str _len = do
 620   popContext
 621   return (L span ITccurly)
 622
 623 -- We have to be careful not to count M.<varid> as a qualified name
 624 -- when <varid> is a keyword.  We hack around this by catching
 625 -- the offending tokens afterward, and re-lexing in a different state.
 626 check_qvarid span buf len = do
 627   case lookupUFM reservedWordsFM var of
 628         Just (keyword,exts)
 629           | not (isSpecial keyword) ->
 630           if exts == 0
 631              then try_again
 632              else do
 633                 b <- extension (\i -> exts .&. i /= 0)
 634                 if b then try_again
 635                      else return token
 636         _other -> return token
 637   where
 638         (mod,var) = splitQualName buf len
 639         token     = L span (ITqvarid (mod,var))
 640
 641         try_again = do
 642                 setInput (srcSpanStart span,buf)
 643                 pushLexState bad_qvarid
 644                 lexToken
 645
 646 qvarid buf len = ITqvarid $! splitQualName buf len
 647 qconid buf len = ITqconid $! splitQualName buf len
 648
 649 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 650 -- takes a StringBuffer and a length, and returns the module name
 651 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 652 -- because of hierarchical module names.
 653 splitQualName orig_buf len = split orig_buf 0 0
 654   where
 655     split buf dot_off n
 656         | n == len                = done dot_off
 657         | lookAhead buf n == '.'  = split2 buf n (n+1)
 658         | otherwise               = split buf dot_off (n+1)
 659
 660     -- careful, we might get names like M....
 661     -- so, if the character after the dot is not upper-case, this is
 662     -- the end of the qualifier part.
 663     split2 buf dot_off n
 664         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 665         | otherwise                 = done dot_off
 666
 667     done dot_off =
 668         (lexemeToFastString orig_buf dot_off,
 669          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 670
 671 varid span buf len =
 672   case lookupUFM reservedWordsFM fs of
 673         Just (keyword,0)    -> do
 674                 maybe_layout keyword
 675                 return (L span keyword)
 676         Just (keyword,exts) -> do
 677                 b <- extension (\i -> exts .&. i /= 0)
 678                 if b then do maybe_layout keyword
 679                              return (L span keyword)
 680                      else return (L span (ITvarid fs))
 681         _other -> return (L span (ITvarid fs))
 682   where
 683         fs = lexemeToFastString buf len
 684
 685 conid buf len = ITconid fs
 686   where fs = lexemeToFastString buf len
 687
 688 qvarsym buf len = ITqvarsym $! splitQualName buf len
 689 qconsym buf len = ITqconsym $! splitQualName buf len
 690
 691 varsym = sym ITvarsym
 692 consym = sym ITconsym
 693
 694 sym con span buf len =
 695   case lookupUFM reservedSymsFM fs of
 696         Just (keyword,0)    -> return (L span keyword)
 697         Just (keyword,exts) -> do
 698                 b <- extension (\i -> exts .&. i /= 0)
 699                 if b then return (L span keyword)
 700                      else return (L span $! con fs)
 701         _other -> return (L span $! con fs)
 702   where
 703         fs = lexemeToFastString buf len
 704
 705 tok_decimal span buf len
 706   = return (L span (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 707
 708 tok_octal span buf len
 709   = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 710
 711 tok_hexadecimal span buf len
 712   = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 713
 714 prim_decimal span buf len
 715   = return (L span (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 716
 717 prim_octal span buf len
 718   = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 719
 720 prim_hexadecimal span buf len
 721   = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 722
 723 tok_float        str = ITrational $! readRational__ str
 724 prim_float       str = ITprimfloat  $! readRational__ str
 725 prim_double      str = ITprimdouble $! readRational__ str
 726
 727 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 728 parseInteger buf len radix to_int
 729   = go 0 0
 730   where go i x | i == len  = x
 731                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 732
 733 -- -----------------------------------------------------------------------------
 734 -- Layout processing
 735
 736 -- we're at the first token on a line, insert layout tokens if necessary
 737 do_bol :: Action
 738 do_bol span _str _len = do
 739         pos <- getOffside (srcSpanEnd span)
 740         case pos of
 741             LT -> do
 742                 --trace "layout: inserting '}'" $ do
 743                 popContext
 744                 -- do NOT pop the lex state, we might have a ';' to insert
 745                 return (L span ITvccurly)
 746             EQ -> do
 747                 --trace "layout: inserting ';'" $ do
 748                 popLexState
 749                 return (L span ITsemi)
 750             GT -> do
 751                 popLexState
 752                 lexToken
 753
 754 -- certain keywords put us in the "layout" state, where we might
 755 -- add an opening curly brace.
 756 maybe_layout ITdo       = pushLexState layout_do
 757 maybe_layout ITmdo      = pushLexState layout_do
 758 maybe_layout ITof       = pushLexState layout
 759 maybe_layout ITlet      = pushLexState layout
 760 maybe_layout ITwhere    = pushLexState layout
 761 maybe_layout ITrec      = pushLexState layout
 762 maybe_layout _          = return ()
 763
 764 -- Pushing a new implicit layout context.  If the indentation of the
 765 -- next token is not greater than the previous layout context, then
 766 -- Haskell 98 says that the new layout context should be empty; that is
 767 -- the lexer must generate {}.
 768 --
 769 -- We are slightly more lenient than this: when the new context is started
 770 -- by a 'do', then we allow the new context to be at the same indentation as
 771 -- the previous context.  This is what the 'strict' argument is for.
 772 --
 773 new_layout_context strict span _buf _len = do
 774     popLexState
 775     let offset = srcSpanStartCol span
 776     ctx <- getContext
 777     case ctx of
 778         Layout prev_off : _  |
 779            (strict     && prev_off >= offset  ||
 780             not strict && prev_off > offset) -> do
 781                 -- token is indented to the left of the previous context.
 782                 -- we must generate a {} sequence now.
 783                 pushLexState layout_left
 784                 return (L span ITvocurly)
 785         other -> do
 786                 setContext (Layout offset : ctx)
 787                 return (L span ITvocurly)
 788
 789 do_layout_left span _buf _len = do
 790     popLexState
 791     pushLexState bol  -- we must be at the start of a line
 792     return (L span ITvccurly)
 793
 794 -- -----------------------------------------------------------------------------
 795 -- LINE pragmas
 796
 797 set_line :: Int -> Action
 798 set_line code span buf len = do
 799   let line = parseInteger buf len 10 oct_or_dec
 800   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
 801         -- subtract one: the line number refers to the *following* line
 802   popLexState
 803   pushLexState code
 804   lexToken
 805
 806 set_file :: Int -> Action
 807 set_file code span buf len = do
 808   let file = lexemeToFastString (stepOn buf) (len-2)
 809   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
 810   popLexState
 811   pushLexState code
 812   lexToken
 813
 814 -- -----------------------------------------------------------------------------
 815 -- Strings & Chars
 816
 817 -- This stuff is horrible.  I hates it.
 818
 819 lex_string_tok :: Action
 820 lex_string_tok span buf len = do
 821   tok <- lex_string ""
 822   end <- getSrcLoc
 823   return (L (mkSrcSpan (srcSpanStart span) end) tok)
 824
 825 lex_string :: String -> P Token
 826 lex_string s = do
 827   i <- getInput
 828   case alexGetChar i of
 829     Nothing -> lit_error
 830
 831     Just ('"',i)  -> do
 832         setInput i
 833         glaexts <- extension glaExtsEnabled
 834         if glaexts
 835           then do
 836             i <- getInput
 837             case alexGetChar i of
 838               Just ('#',i) -> do
 839                    setInput i
 840                    if any (> '\xFF') s
 841                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 842                     else let s' = mkFastStringNarrow (reverse s) in
 843                          -- always a narrow string/byte array
 844                          return (ITprimstring s')
 845               _other ->
 846                 return (ITstring (mkFastString (reverse s)))
 847           else
 848                 return (ITstring (mkFastString (reverse s)))
 849
 850     Just ('\\',i)
 851         | Just ('&',i) <- next -> do
 852                 setInput i; lex_string s
 853         | Just (c,i) <- next, is_space c -> do
 854                 setInput i; lex_stringgap s
 855         where next = alexGetChar i
 856
 857     Just _ -> do
 858         c <- lex_char
 859         lex_string (c:s)
 860
 861 lex_stringgap s = do
 862   c <- getCharOrFail
 863   case c of
 864     '\\' -> lex_string s
 865     c | is_space c -> lex_stringgap s
 866     _other -> lit_error
 867
 868
 869 lex_char_tok :: Action
 870 -- Here we are basically parsing character literals, such as 'x' or '\n'
 871 -- but, when Template Haskell is on, we additionally spot
 872 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
 873 -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
 874 -- So we have to do two characters of lookahead: when we see 'x we need to
 875 -- see if there's a trailing quote
 876 lex_char_tok span buf len = do  -- We've seen '
 877    i1 <- getInput       -- Look ahead to first character
 878    let loc = srcSpanStart span
 879    case alexGetChar i1 of
 880         Nothing -> lit_error
 881
 882         Just ('\'', i2@(end2,_)) -> do  -- We've seen ''
 883                   th_exts <- extension thEnabled
 884                   if th_exts then do
 885                         setInput i2
 886                         return (L (mkSrcSpan loc end2)  ITtyQuote)
 887                    else lit_error
 888
 889         Just ('\\', i2@(end2,_)) -> do  -- We've seen 'backslash
 890                   setInput i2
 891                   lit_ch <- lex_escape
 892                   mc <- getCharOrFail   -- Trailing quote
 893                   if mc == '\'' then finish_char_tok loc lit_ch
 894                                 else lit_error
 895
 896         Just (c, i2@(end2,_)) | not (is_any c) -> lit_error
 897                               | otherwise      ->
 898
 899                 -- We've seen 'x, where x is a valid character
 900                 --  (i.e. not newline etc) but not a quote or backslash
 901            case alexGetChar i2 of       -- Look ahead one more character
 902                 Nothing -> lit_error
 903                 Just ('\'', i3) -> do   -- We've seen 'x'
 904                         setInput i3
 905                         finish_char_tok loc c
 906                 _other -> do            -- We've seen 'x not followed by quote
 907                                         -- If TH is on, just parse the quote only
 908                         th_exts <- extension thEnabled
 909                         if th_exts then return (L (mkSrcSpan loc (fst i1)) ITvarQuote)
 910                                    else lit_error
 911
 912 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 913 finish_char_tok loc ch  -- We've already seen the closing quote
 914                         -- Just need to check for trailing #
 915   = do  glaexts <- extension glaExtsEnabled
 916         i@(end,_) <- getInput
 917         if glaexts then do
 918                 case alexGetChar i of
 919                         Just ('#',i@(end,_)) -> do
 920                                 setInput i
 921                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
 922                         _other ->
 923                                 return (L (mkSrcSpan loc end) (ITchar ch))
 924                 else do
 925                    return (L (mkSrcSpan loc end) (ITchar ch))
 926
 927 lex_char :: P Char
 928 lex_char = do
 929   mc <- getCharOrFail
 930   case mc of
 931       '\\' -> lex_escape
 932       c | is_any c -> return c
 933       _other -> lit_error
 934
 935 lex_escape :: P Char
 936 lex_escape = do
 937   c <- getCharOrFail
 938   case c of
 939         'a'   -> return '\a'
 940         'b'   -> return '\b'
 941         'f'   -> return '\f'
 942         'n'   -> return '\n'
 943         'r'   -> return '\r'
 944         't'   -> return '\t'
 945         'v'   -> return '\v'
 946         '\\'  -> return '\\'
 947         '"'   -> return '\"'
 948         '\''  -> return '\''
 949         '^'   -> do c <- getCharOrFail
 950                     if c >= '@' && c <= '_'
 951                         then return (chr (ord c - ord '@'))
 952                         else lit_error
 953
 954         'x'   -> readNum is_hexdigit 16 hex
 955         'o'   -> readNum is_octdigit  8 oct_or_dec
 956         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 957
 958         c1 ->  do
 959            i <- getInput
 960            case alexGetChar i of
 961             Nothing -> lit_error
 962             Just (c2,i2) ->
 963               case alexGetChar i2 of
 964                 Nothing -> lit_error
 965                 Just (c3,i3) ->
 966                    let str = [c1,c2,c3] in
 967                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 968                                      Just rest <- [maybePrefixMatch p str] ] of
 969                           (escape_char,[]):_ -> do
 970                                 setInput i3
 971                                 return escape_char
 972                           (escape_char,_:_):_ -> do
 973                                 setInput i2
 974                                 return escape_char
 975                           [] -> lit_error
 976
 977 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 978 readNum is_digit base conv = do
 979   c <- getCharOrFail
 980   if is_digit c
 981         then readNum2 is_digit base conv (conv c)
 982         else lit_error
 983
 984 readNum2 is_digit base conv i = do
 985   input <- getInput
 986   read i input
 987   where read i input = do
 988           case alexGetChar input of
 989             Just (c,input') | is_digit c -> do
 990                 read (i*base + conv c) input'
 991             _other -> do
 992                 setInput input
 993                 if i >= 0 && i <= 0x10FFFF
 994                    then return (chr i)
 995                    else lit_error
 996
 997 is_hexdigit c
 998         =  is_digit c
 999         || (c >= 'a' && c <= 'f')
1000         || (c >= 'A' && c <= 'F')
1001
1002 hex c | is_digit c = ord c - ord '0'
1003       | otherwise  = ord (to_lower c) - ord 'a' + 10
1004
1005 oct_or_dec c = ord c - ord '0'
1006
1007 is_octdigit c = c >= '0' && c <= '7'
1008
1009 to_lower c
1010   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
1011   | otherwise = c
1012
1013 silly_escape_chars = [
1014         ("NUL", '\NUL'),
1015         ("SOH", '\SOH'),
1016         ("STX", '\STX'),
1017         ("ETX", '\ETX'),
1018         ("EOT", '\EOT'),
1019         ("ENQ", '\ENQ'),
1020         ("ACK", '\ACK'),
1021         ("BEL", '\BEL'),
1022         ("BS", '\BS'),
1023         ("HT", '\HT'),
1024         ("LF", '\LF'),
1025         ("VT", '\VT'),
1026         ("FF", '\FF'),
1027         ("CR", '\CR'),
1028         ("SO", '\SO'),
1029         ("SI", '\SI'),
1030         ("DLE", '\DLE'),
1031         ("DC1", '\DC1'),
1032         ("DC2", '\DC2'),
1033         ("DC3", '\DC3'),
1034         ("DC4", '\DC4'),
1035         ("NAK", '\NAK'),
1036         ("SYN", '\SYN'),
1037         ("ETB", '\ETB'),
1038         ("CAN", '\CAN'),
1039         ("EM", '\EM'),
1040         ("SUB", '\SUB'),
1041         ("ESC", '\ESC'),
1042         ("FS", '\FS'),
1043         ("GS", '\GS'),
1044         ("RS", '\RS'),
1045         ("US", '\US'),
1046         ("SP", '\SP'),
1047         ("DEL", '\DEL')
1048         ]
1049
1050 lit_error = lexError "lexical error in string/character literal"
1051
1052 getCharOrFail :: P Char
1053 getCharOrFail =  do
1054   i <- getInput
1055   case alexGetChar i of
1056         Nothing -> lexError "unexpected end-of-file in string/character literal"
1057         Just (c,i)  -> do setInput i; return c
1058
1059 -- -----------------------------------------------------------------------------
1060 -- Floats
1061
1062 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1063 readRational r = do
1064      (n,d,s) <- readFix r
1065      (k,t)   <- readExp s
1066      return ((n%1)*10^^(k-d), t)
1067  where
1068      readFix r = do
1069         (ds,s)  <- lexDecDigits r
1070         (ds',t) <- lexDotDigits s
1071         return (read (ds++ds'), length ds', t)
1072
1073      readExp (e:s) | e `elem` "eE" = readExp' s
1074      readExp s                     = return (0,s)
1075
1076      readExp' ('+':s) = readDec s
1077      readExp' ('-':s) = do
1078                         (k,t) <- readDec s
1079                         return (-k,t)
1080      readExp' s       = readDec s
1081
1082      readDec s = do
1083         (ds,r) <- nonnull isDigit s
1084         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1085                 r)
1086
1087      lexDecDigits = nonnull isDigit
1088
1089      lexDotDigits ('.':s) = return (span isDigit s)
1090      lexDotDigits s       = return ("",s)
1091
1092      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1093                       return (cs,t)
1094
1095 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1096 readRational__ top_s
1097   = case top_s of
1098       '-' : xs -> - (read_me xs)
1099       xs       -> read_me xs
1100   where
1101     read_me s
1102       = case (do { (x,"") <- readRational s ; return x }) of
1103           [x] -> x
1104           []  -> error ("readRational__: no parse:"        ++ top_s)
1105           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1106
1107 -- -----------------------------------------------------------------------------
1108 -- The Parse Monad
1109
1110 data LayoutContext
1111   = NoLayout
1112   | Layout !Int
1113
1114 data ParseResult a
1115   = POk PState a
1116   | PFailed
1117         SrcSpan         -- The start and end of the text span related to
1118                         -- the error.  Might be used in environments which can
1119                         -- show this span, e.g. by highlighting it.
1120         Message         -- The error message
1121
1122 data PState = PState {
1123         buffer     :: StringBuffer,
1124         last_loc   :: SrcSpan,  -- pos of previous token
1125         last_len   :: !Int,     -- len of previous token
1126         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1127         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1128         context    :: [LayoutContext],
1129         lex_state  :: [Int]
1130      }
1131         -- last_loc and last_len are used when generating error messages,
1132         -- and in pushCurrentContext only.
1133
1134 newtype P a = P { unP :: PState -> ParseResult a }
1135
1136 instance Monad P where
1137   return = returnP
1138   (>>=) = thenP
1139   fail = failP
1140
1141 returnP :: a -> P a
1142 returnP a = P $ \s -> POk s a
1143
1144 thenP :: P a -> (a -> P b) -> P b
1145 (P m) `thenP` k = P $ \ s ->
1146         case m s of
1147                 POk s1 a         -> (unP (k a)) s1
1148                 PFailed span err -> PFailed span err
1149
1150 failP :: String -> P a
1151 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1152
1153 failMsgP :: String -> P a
1154 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1155
1156 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1157 failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
1158
1159 failSpanMsgP :: SrcSpan -> String -> P a
1160 failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
1161
1162 extension :: (Int -> Bool) -> P Bool
1163 extension p = P $ \s -> POk s (p $! extsBitmap s)
1164
1165 getExts :: P Int
1166 getExts = P $ \s -> POk s (extsBitmap s)
1167
1168 setSrcLoc :: SrcLoc -> P ()
1169 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1170
1171 getSrcLoc :: P SrcLoc
1172 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1173
1174 setLastToken :: SrcSpan -> Int -> P ()
1175 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1176
1177 type AlexInput = (SrcLoc,StringBuffer)
1178
1179 alexInputPrevChar :: AlexInput -> Char
1180 alexInputPrevChar (_,s) = prevChar s '\n'
1181
1182 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1183 alexGetChar (loc,s)
1184   | atEnd s   = Nothing
1185   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1186   where c = currentChar s
1187         loc' = advanceSrcLoc loc c
1188         s'   = stepOn s
1189
1190 getInput :: P AlexInput
1191 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1192
1193 setInput :: AlexInput -> P ()
1194 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1195
1196 pushLexState :: Int -> P ()
1197 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1198
1199 popLexState :: P Int
1200 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1201
1202 getLexState :: P Int
1203 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1204
1205 -- for reasons of efficiency, flags indicating language extensions (eg,
1206 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1207 -- integer
1208
1209 glaExtsBit, ffiBit, parrBit :: Int
1210 glaExtsBit = 0
1211 ffiBit     = 1
1212 parrBit    = 2
1213 arrowsBit  = 4
1214 thBit      = 5
1215 ipBit      = 6
1216
1217 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1218 glaExtsEnabled flags = testBit flags glaExtsBit
1219 ffiEnabled     flags = testBit flags ffiBit
1220 parrEnabled    flags = testBit flags parrBit
1221 arrowsEnabled  flags = testBit flags arrowsBit
1222 thEnabled      flags = testBit flags thBit
1223 ipEnabled      flags = testBit flags ipBit
1224
1225 -- create a parse state
1226 --
1227 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1228 mkPState buf loc flags  =
1229   PState {
1230       buffer     = buf,
1231       last_loc   = mkSrcSpan loc loc,
1232       last_len   = 0,
1233       loc        = loc,
1234       extsBitmap = fromIntegral bitmap,
1235       context    = [],
1236       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1237         -- we begin in the layout state if toplev_layout is set
1238     }
1239     where
1240       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1241                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1242                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1243                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1244                .|. thBit      `setBitIf` dopt Opt_TH          flags
1245                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1246       --
1247       setBitIf :: Int -> Bool -> Int
1248       b `setBitIf` cond | cond      = bit b
1249                         | otherwise = 0
1250
1251 getContext :: P [LayoutContext]
1252 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1253
1254 setContext :: [LayoutContext] -> P ()
1255 setContext ctx = P $ \s -> POk s{context=ctx} ()
1256
1257 popContext :: P ()
1258 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1259                            loc = loc, last_len = len, last_loc = last_loc }) ->
1260   case ctx of
1261         (_:tl) -> POk s{ context = tl } ()
1262         []     -> PFailed last_loc (srcParseErr buf len)
1263
1264 -- Push a new layout context at the indentation of the last token read.
1265 -- This is only used at the outer level of a module when the 'module'
1266 -- keyword is missing.
1267 pushCurrentContext :: P ()
1268 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1269   POk s{ context = Layout (srcSpanStartCol loc) : ctx} ()
1270
1271 getOffside :: SrcLoc -> P Ordering
1272 getOffside loc = P $ \s@PState{context=stk} ->
1273                 let ord = case stk of
1274                         (Layout n:_) -> compare (srcLocCol loc) n
1275                         _            -> GT
1276                 in POk s ord
1277
1278 -- ---------------------------------------------------------------------------
1279 -- Construct a parse error
1280
1281 srcParseErr
1282   :: StringBuffer       -- current buffer (placed just after the last token)
1283   -> Int                -- length of the previous token
1284   -> Message
1285 srcParseErr buf len
1286   = hcat [ if null token
1287              then ptext SLIT("parse error (possibly incorrect indentation)")
1288              else hcat [ptext SLIT("parse error on input "),
1289                         char '`', text token, char '\'']
1290     ]
1291   where token = lexemeToString (stepOnBy (-len) buf) len
1292
1293 -- Report a parse failure, giving the span of the previous token as
1294 -- the location of the error.  This is the entry point for errors
1295 -- detected during parsing.
1296 srcParseFail :: P a
1297 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1298                                 last_loc = last_loc, loc = loc } ->
1299     PFailed last_loc (srcParseErr buf len)
1300
1301 -- A lexical error is reported at a particular position in the source file,
1302 -- not over a token range.  TODO: this is slightly wrong, because we record
1303 -- the error at the character position following the one which caused the
1304 -- error.  We should somehow back up by one character.
1305 lexError :: String -> P a
1306 lexError str = do
1307   loc <- getSrcLoc
1308   i@(end,_) <- getInput
1309   failLocMsgP loc end str
1310
1311 -- -----------------------------------------------------------------------------
1312 -- This is the top-level function: called from the parser each time a
1313 -- new token is to be read from the input.
1314
1315 lexer :: (Located Token -> P a) -> P a
1316 lexer cont = do
1317   tok@(L _ tok__) <- lexToken
1318   --trace ("token: " ++ show tok__) $ do
1319   cont tok
1320
1321 lexToken :: P (Located Token)
1322 lexToken = do
1323   inp@(loc1,buf) <- getInput
1324   sc <- getLexState
1325   exts <- getExts
1326   case alexScanUser exts inp sc of
1327     AlexEOF -> do let span = mkSrcSpan loc1 loc1
1328                   setLastToken span 0
1329                   return (L span ITeof)
1330     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1331     AlexSkip inp2 _ -> do
1332         setInput inp2
1333         lexToken
1334     AlexToken inp2@(end,buf2) len t -> do
1335         setInput inp2
1336         let span = mkSrcSpan loc1 end
1337         span `seq` setLastToken span len
1338         t span buf len
1339 }