ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ForeignCall      ( Safety(..) )
  34 import ErrUtils         ( Message )
  35 import Outputable
  36 import StringBuffer
  37 import FastString
  38 import FastTypes
  39 import SrcLoc
  40 import UniqFM
  41 import CmdLineOpts
  42 import Ctype
  43 import Util             ( maybePrefixMatch )
  44
  45 import DATA_BITS
  46 import Char
  47 import Ratio
  48 import TRACE
  49 }
  50
  51 $whitechar   = [\ \t\n\r\f\v]
  52 $white_no_nl = $whitechar # \n
  53
  54 $ascdigit  = 0-9
  55 $unidigit  = \x01
  56 $digit     = [$ascdigit $unidigit]
  57
  58 $special   = [\(\)\,\;\[\]\`\{\}]
  59 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  60 $unisymbol = \x02
  61 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  62
  63 $unilarge  = \x03
  64 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  65 $large     = [$asclarge $unilarge]
  66
  67 $unismall  = \x04
  68 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  69 $small     = [$ascsmall $unismall \_]
  70
  71 $graphic   = [$small $large $symbol $digit $special \:\"\']
  72
  73 $octit     = 0-7
  74 $hexit     = [$digit A-F a-f]
  75 $symchar   = [$symbol \:]
  76 $nl        = [\n\r]
  77 $idchar    = [$small $large $digit \']
  78
  79 @varid     = $small $idchar*
  80 @conid     = $large $idchar*
  81
  82 @varsym    = $symbol $symchar*
  83 @consym    = \: $symchar*
  84
  85 @decimal     = $digit+
  86 @octal       = $octit+
  87 @hexadecimal = $hexit+
  88 @exponent    = [eE] [\-\+]? @decimal
  89
  90 -- we support the hierarchical module name extension:
  91 @qual = (@conid \.)+
  92
  93 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  94
  95 haskell :-
  96
  97 -- everywhere: skip whitespace and comments
  98 $white_no_nl+                           ;
  99
 100 -- Everywhere: deal with nested comments.  We explicitly rule out
 101 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 102 -- (this can happen even though pragmas will normally take precedence due to
 103 -- longest-match, because pragmas aren't valid in every state, but comments
 104 -- are).
 105 "{-" / { notFollowedBy '#' }            { nested_comment }
 106
 107 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 108 -- more dashes followed by a symbol should be parsed as a varsym, so we
 109 -- have to exclude those.
 110 -- The regex says: "munch all the characters after the dashes, as long as
 111 -- the first one is not a symbol".
 112 "--"\-* ([^$symbol] .*)?                ;
 113
 114 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 115 -- blank lines) until we find a non-whitespace character, then do layout
 116 -- processing.
 117 --
 118 -- One slight wibble here: what if the line begins with {-#? In
 119 -- theory, we have to lex the pragma to see if it's one we recognise,
 120 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 121 -- as a nested comment.  We don't bother with this: if the line begins
 122 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 123 <bol> {
 124   \n                                    ;
 125   ^\# (line)?                           { begin line_prag1 }
 126   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 127   ()                                    { do_bol }
 128 }
 129
 130 -- after a layout keyword (let, where, do, of), we begin a new layout
 131 -- context if the curly brace is missing.
 132 -- Careful! This stuff is quite delicate.
 133 <layout, layout_do> {
 134   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 135         -- we might encounter {-# here, but {- has been handled already
 136   \n                                    ;
 137   ^\# (line)?                           { begin line_prag1 }
 138 }
 139
 140 -- do is treated in a subtly different way, see new_layout_context
 141 <layout>    ()                          { new_layout_context True }
 142 <layout_do> ()                          { new_layout_context False }
 143
 144 -- after a new layout context which was found to be to the left of the
 145 -- previous context, we have generated a '{' token, and we now need to
 146 -- generate a matching '}' token.
 147 <layout_left>  ()                       { do_layout_left }
 148
 149 <0,glaexts> \n                          { begin bol }
 150
 151 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 152
 153 -- single-line line pragmas, of the form
 154 --    # <line> "<file>" <extra-stuff> \n
 155 <line_prag1> $digit+                    { set_line line_prag1a }
 156 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 157 <line_prag1b> .*                        { pop }
 158
 159 -- Haskell-style line pragmas, of the form
 160 --    {-# LINE <line> "<file>" #-}
 161 <line_prag2> $digit+                    { set_line line_prag2a }
 162 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 163 <line_prag2b> "#-}"                     { pop }
 164
 165 <0,glaexts> {
 166   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 167                                         { token ITspecialise_prag }
 168   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 169   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 170   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 171                                         { token ITnoinline_prag }
 172   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 173   "{-#" $whitechar* (DEPRECATED|deprecated)
 174                                         { token ITdeprecated_prag }
 175   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 176   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 177
 178   "{-#"                                 { nested_comment }
 179
 180   -- ToDo: should only be valid inside a pragma:
 181   "#-}"                                 { token ITclose_prag}
 182 }
 183
 184
 185 -- '0' state: ordinary lexemes
 186 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 187
 188 -- "special" symbols
 189
 190 <0,glaexts> {
 191   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 192   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 193 }
 194
 195 <0,glaexts> {
 196   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 197   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 198   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 199   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 200   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 201   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 202   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 203   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 204 }
 205
 206 <0,glaexts> {
 207   "(|" / { ifExtension arrowsEnabled }  { special IToparenbar }
 208   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 209 }
 210
 211 <0,glaexts> {
 212   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 213   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 214 }
 215
 216 <glaexts> {
 217   "(#"                                  { token IToubxparen }
 218   "#)"                                  { token ITcubxparen }
 219   "{|"                                  { token ITocurlybar }
 220   "|}"                                  { token ITccurlybar }
 221 }
 222
 223 <0,glaexts> {
 224   \(                                    { special IToparen }
 225   \)                                    { special ITcparen }
 226   \[                                    { special ITobrack }
 227   \]                                    { special ITcbrack }
 228   \,                                    { special ITcomma }
 229   \;                                    { special ITsemi }
 230   \`                                    { special ITbackquote }
 231
 232   \{                                    { open_brace }
 233   \}                                    { close_brace }
 234 }
 235
 236 <0,glaexts> {
 237   @qual @varid                  { check_qvarid }
 238   @qual @conid                  { idtoken qconid }
 239   @varid                        { varid }
 240   @conid                        { idtoken conid }
 241 }
 242
 243 -- after an illegal qvarid, such as 'M.let',
 244 -- we back up and try again in the bad_qvarid state:
 245 <bad_qvarid> {
 246   @conid                        { pop_and (idtoken conid) }
 247   @qual @conid                  { pop_and (idtoken qconid) }
 248 }
 249
 250 <glaexts> {
 251   @qual @varid "#"+             { idtoken qvarid }
 252   @qual @conid "#"+             { idtoken qconid }
 253   @varid "#"+                   { varid }
 254   @conid "#"+                   { idtoken conid }
 255 }
 256
 257 -- ToDo: M.(,,,)
 258
 259 <0,glaexts> {
 260   @qual @varsym                 { idtoken qvarsym }
 261   @qual @consym                 { idtoken qconsym }
 262   @varsym                       { varsym }
 263   @consym                       { consym }
 264 }
 265
 266 <0,glaexts> {
 267   @decimal                      { tok_decimal }
 268   0[oO] @octal                  { tok_octal }
 269   0[xX] @hexadecimal            { tok_hexadecimal }
 270 }
 271
 272 <glaexts> {
 273   @decimal \#                   { prim_decimal }
 274   0[oO] @octal \#               { prim_octal }
 275   0[xX] @hexadecimal \#         { prim_hexadecimal }
 276 }
 277
 278 <0,glaexts> @floating_point             { strtoken tok_float }
 279 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 280 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 281
 282 -- Strings and chars are lexed by hand-written code.  The reason is
 283 -- that even if we recognise the string or char here in the regex
 284 -- lexer, we would still have to parse the string afterward in order
 285 -- to convert it to a String.
 286 <0,glaexts> {
 287   \'                            { lex_char_tok }
 288   \"                            { lex_string_tok }
 289 }
 290
 291 <glaexts> "``" (([$graphic $whitechar] # \') | \' ([$graphic $whitechar] # \'))*
 292                 "''"            { clitlit }
 293
 294 {
 295 -- work around bug in Alex 2.0
 296 #if __GLASGOW_HASKELL__ < 503
 297 unsafeAt arr i = arr ! i
 298 #endif
 299
 300 -- -----------------------------------------------------------------------------
 301 -- The token type
 302
 303 data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__
 304
 305 data Token__
 306   = ITas                        -- Haskell keywords
 307   | ITcase
 308   | ITclass
 309   | ITdata
 310   | ITdefault
 311   | ITderiving
 312   | ITdo
 313   | ITelse
 314   | IThiding
 315   | ITif
 316   | ITimport
 317   | ITin
 318   | ITinfix
 319   | ITinfixl
 320   | ITinfixr
 321   | ITinstance
 322   | ITlet
 323   | ITmodule
 324   | ITnewtype
 325   | ITof
 326   | ITqualified
 327   | ITthen
 328   | ITtype
 329   | ITwhere
 330   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 331
 332   | ITforall                    -- GHC extension keywords
 333   | ITforeign
 334   | ITexport
 335   | ITlabel
 336   | ITdynamic
 337   | ITsafe
 338   | ITthreadsafe
 339   | ITunsafe
 340   | ITwith
 341   | ITstdcallconv
 342   | ITccallconv
 343   | ITdotnet
 344   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 345   | ITmdo
 346
 347   | ITspecialise_prag           -- Pragmas
 348   | ITsource_prag
 349   | ITinline_prag
 350   | ITnoinline_prag
 351   | ITrules_prag
 352   | ITdeprecated_prag
 353   | ITline_prag
 354   | ITscc_prag
 355   | ITcore_prag                 -- hdaume: core annotations
 356   | ITclose_prag
 357
 358   | ITdotdot                    -- reserved symbols
 359   | ITcolon
 360   | ITdcolon
 361   | ITequal
 362   | ITlam
 363   | ITvbar
 364   | ITlarrow
 365   | ITrarrow
 366   | ITat
 367   | ITtilde
 368   | ITdarrow
 369   | ITminus
 370   | ITbang
 371   | ITstar
 372   | ITdot
 373
 374   | ITbiglam                    -- GHC-extension symbols
 375
 376   | ITocurly                    -- special symbols
 377   | ITccurly
 378   | ITocurlybar                 -- {|, for type applications
 379   | ITccurlybar                 -- |}, for type applications
 380   | ITvocurly
 381   | ITvccurly
 382   | ITobrack
 383   | ITopabrack                  -- [:, for parallel arrays with -fparr
 384   | ITcpabrack                  -- :], for parallel arrays with -fparr
 385   | ITcbrack
 386   | IToparen
 387   | ITcparen
 388   | IToubxparen
 389   | ITcubxparen
 390   | ITsemi
 391   | ITcomma
 392   | ITunderscore
 393   | ITbackquote
 394
 395   | ITvarid   FastString        -- identifiers
 396   | ITconid   FastString
 397   | ITvarsym  FastString
 398   | ITconsym  FastString
 399   | ITqvarid  (FastString,FastString)
 400   | ITqconid  (FastString,FastString)
 401   | ITqvarsym (FastString,FastString)
 402   | ITqconsym (FastString,FastString)
 403
 404   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 405   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 406
 407   | ITpragma StringBuffer
 408
 409   | ITchar       Char
 410   | ITstring     FastString
 411   | ITinteger    Integer
 412   | ITrational   Rational
 413
 414   | ITprimchar   Char
 415   | ITprimstring FastString
 416   | ITprimint    Integer
 417   | ITprimfloat  Rational
 418   | ITprimdouble Rational
 419   | ITlitlit     FastString
 420
 421   -- MetaHaskell extension tokens
 422   | ITopenExpQuote              -- [| or [e|
 423   | ITopenPatQuote              -- [p|
 424   | ITopenDecQuote              -- [d|
 425   | ITopenTypQuote              -- [t|
 426   | ITcloseQuote                -- |]
 427   | ITidEscape   FastString     -- $x
 428   | ITparenEscape               -- $(
 429   | ITreifyType
 430   | ITreifyDecl
 431   | ITreifyFixity
 432
 433   -- Arrow notation extension
 434   | ITproc
 435   | ITrec
 436   | IToparenbar                 -- (|
 437   | ITcparenbar                 -- |)
 438   | ITlarrowtail                -- -<
 439   | ITrarrowtail                -- >-
 440   | ITLarrowtail                -- -<<
 441   | ITRarrowtail                -- >>-
 442
 443   | ITunknown String            -- Used when the lexer can't make sense of it
 444   | ITeof                       -- end of file token
 445 #ifdef DEBUG
 446   deriving Show -- debugging
 447 #endif
 448
 449 isSpecial :: Token__ -> Bool
 450 -- If we see M.x, where x is a keyword, but
 451 -- is special, we treat is as just plain M.x,
 452 -- not as a keyword.
 453 isSpecial ITas          = True
 454 isSpecial IThiding      = True
 455 isSpecial ITqualified   = True
 456 isSpecial ITforall      = True
 457 isSpecial ITexport      = True
 458 isSpecial ITlabel       = True
 459 isSpecial ITdynamic     = True
 460 isSpecial ITsafe        = True
 461 isSpecial ITthreadsafe  = True
 462 isSpecial ITunsafe      = True
 463 isSpecial ITwith        = True
 464 isSpecial ITccallconv   = True
 465 isSpecial ITstdcallconv = True
 466 isSpecial ITmdo         = True
 467 isSpecial _             = False
 468
 469 -- the bitmap provided as the third component indicates whether the
 470 -- corresponding extension keyword is valid under the extension options
 471 -- provided to the compiler; if the extension corresponding to *any* of the
 472 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 473 -- facilitates using a keyword in two different extensions that can be
 474 -- activated independently)
 475 --
 476 reservedWordsFM = listToUFM $
 477         map (\(x, y, z) -> (mkFastString x, (y, z)))
 478        [( "_",          ITunderscore,   0 ),
 479         ( "as",         ITas,           0 ),
 480         ( "case",       ITcase,         0 ),
 481         ( "class",      ITclass,        0 ),
 482         ( "data",       ITdata,         0 ),
 483         ( "default",    ITdefault,      0 ),
 484         ( "deriving",   ITderiving,     0 ),
 485         ( "do",         ITdo,           0 ),
 486         ( "else",       ITelse,         0 ),
 487         ( "hiding",     IThiding,       0 ),
 488         ( "if",         ITif,           0 ),
 489         ( "import",     ITimport,       0 ),
 490         ( "in",         ITin,           0 ),
 491         ( "infix",      ITinfix,        0 ),
 492         ( "infixl",     ITinfixl,       0 ),
 493         ( "infixr",     ITinfixr,       0 ),
 494         ( "instance",   ITinstance,     0 ),
 495         ( "let",        ITlet,          0 ),
 496         ( "module",     ITmodule,       0 ),
 497         ( "newtype",    ITnewtype,      0 ),
 498         ( "of",         ITof,           0 ),
 499         ( "qualified",  ITqualified,    0 ),
 500         ( "then",       ITthen,         0 ),
 501         ( "type",       ITtype,         0 ),
 502         ( "where",      ITwhere,        0 ),
 503         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 504
 505         ( "forall",     ITforall,        bit glaExtsBit),
 506         ( "mdo",        ITmdo,           bit glaExtsBit),
 507         ( "reifyDecl",  ITreifyDecl,     bit thBit),
 508         ( "reifyType",  ITreifyType,     bit thBit),
 509         ( "reifyFixity",ITreifyFixity,   bit thBit),
 510
 511         ( "foreign",    ITforeign,       bit ffiBit),
 512         ( "export",     ITexport,        bit ffiBit),
 513         ( "label",      ITlabel,         bit ffiBit),
 514         ( "dynamic",    ITdynamic,       bit ffiBit),
 515         ( "safe",       ITsafe,          bit ffiBit),
 516         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 517         ( "unsafe",     ITunsafe,        bit ffiBit),
 518         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 519         ( "ccall",      ITccallconv,     bit ffiBit),
 520         ( "dotnet",     ITdotnet,        bit ffiBit),
 521
 522         ( "with",       ITwith,          bit withBit),
 523
 524         ( "rec",        ITrec,           bit arrowsBit),
 525         ( "proc",       ITproc,          bit arrowsBit),
 526
 527         -- On death row
 528         ("_ccall_",     ITccall (False, False, PlayRisky),
 529                                          bit glaExtsBit),
 530         ("_ccall_GC_",  ITccall (False, False, PlaySafe False),
 531                                          bit glaExtsBit),
 532         ("_casm_",      ITccall (False, True,  PlayRisky),
 533                                          bit glaExtsBit),
 534         ("_casm_GC_",   ITccall (False, True,  PlaySafe False),
 535                                          bit glaExtsBit)
 536      ]
 537
 538 reservedSymsFM = listToUFM $
 539         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 540       [ ("..",  ITdotdot,       0)
 541        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 542                                                 -- meaning only list cons
 543        ,("::",  ITdcolon,       0)
 544        ,("=",   ITequal,        0)
 545        ,("\\",  ITlam,          0)
 546        ,("|",   ITvbar,         0)
 547        ,("<-",  ITlarrow,       0)
 548        ,("->",  ITrarrow,       0)
 549        ,("@",   ITat,           0)
 550        ,("~",   ITtilde,        0)
 551        ,("=>",  ITdarrow,       0)
 552        ,("-",   ITminus,        0)
 553        ,("!",   ITbang,         0)
 554
 555        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 556        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 557
 558        ,("-<",  ITlarrowtail,   bit arrowsBit)
 559        ,(">-",  ITrarrowtail,   bit arrowsBit)
 560        ,("-<<", ITLarrowtail,   bit arrowsBit)
 561        ,(">>-", ITRarrowtail,   bit arrowsBit)
 562        ]
 563
 564 -- -----------------------------------------------------------------------------
 565 -- Lexer actions
 566
 567 type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token
 568
 569 special :: Token__ -> Action
 570 special tok loc end _buf len = return (T loc end tok)
 571
 572 token, layout_token :: Token__ -> Action
 573 token t loc end buf len = return (T loc end t)
 574 layout_token t loc end buf len = pushLexState layout >> return (T loc end t)
 575
 576 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 577 idtoken f loc end buf len = return (T loc end $! (f buf len))
 578
 579 skip_one_varid :: (FastString -> Token__) -> Action
 580 skip_one_varid f loc end buf len
 581   = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1)))
 582
 583 strtoken :: (String -> Token__) -> Action
 584 strtoken f loc end buf len =
 585   return (T loc end $! (f $! lexemeToString buf len))
 586
 587 init_strtoken :: Int -> (String -> Token__) -> Action
 588 -- like strtoken, but drops the last N character(s)
 589 init_strtoken drop f loc end buf len =
 590   return (T loc end $! (f $! lexemeToString buf (len-drop)))
 591
 592 begin :: Int -> Action
 593 begin code _loc _end _str _len = do pushLexState code; lexToken
 594
 595 pop :: Action
 596 pop _loc _end _buf _len = do popLexState; lexToken
 597
 598 pop_and :: Action -> Action
 599 pop_and act loc end buf len = do popLexState; act loc end buf len
 600
 601 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 602
 603 ifExtension pred bits _ _ _ = pred bits
 604
 605 {-
 606   nested comments require traversing by hand, they can't be parsed
 607   using regular expressions.
 608 -}
 609 nested_comment :: Action
 610 nested_comment loc _end _str _len = do
 611   input <- getInput
 612   go 1 input
 613   where go 0 input = do setInput input; lexToken
 614         go n input = do
 615           case alexGetChar input of
 616             Nothing  -> err input
 617             Just (c,input) -> do
 618               case c of
 619                 '-' -> do
 620                   case alexGetChar input of
 621                     Nothing  -> err input
 622                     Just ('\125',input) -> go (n-1) input
 623                     Just (c,_)          -> go n input
 624                 '\123' -> do
 625                   case alexGetChar input of
 626                     Nothing  -> err input
 627                     Just ('-',input') -> go (n+1) input'
 628                     Just (c,input)    -> go n input
 629                 c -> go n input
 630
 631         err input = do failLocMsgP loc (fst input) "unterminated `{-'"
 632
 633 open_brace, close_brace :: Action
 634 open_brace  loc end _str _len = do
 635   ctx <- getContext
 636   setContext (NoLayout:ctx)
 637   return (T loc end ITocurly)
 638 close_brace loc end _str _len = do
 639   popContext
 640   return (T loc end ITccurly)
 641
 642 -- We have to be careful not to count M.<varid> as a qualified name
 643 -- when <varid> is a keyword.  We hack around this by catching
 644 -- the offending tokens afterward, and re-lexing in a different state.
 645 check_qvarid loc end buf len = do
 646   case lookupUFM reservedWordsFM var of
 647         Just (keyword,exts)
 648           | not (isSpecial keyword) ->
 649           if exts == 0
 650              then try_again
 651              else do
 652                 b <- extension (\i -> exts .&. i /= 0)
 653                 if b then try_again
 654                      else return token
 655         _other -> return token
 656   where
 657         (mod,var) = splitQualName buf len
 658         token     = T loc end (ITqvarid (mod,var))
 659
 660         try_again = do
 661                 setInput (loc,buf)
 662                 pushLexState bad_qvarid
 663                 lexToken
 664
 665 qvarid buf len = ITqvarid $! splitQualName buf len
 666 qconid buf len = ITqconid $! splitQualName buf len
 667
 668 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 669 -- takes a StringBuffer and a length, and returns the module name
 670 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 671 -- because of hierarchical module names.
 672 splitQualName orig_buf len = split orig_buf 0 0
 673   where
 674     split buf dot_off n
 675         | n == len                = done dot_off
 676         | lookAhead buf n == '.'  = split2 buf n (n+1)
 677         | otherwise               = split buf dot_off (n+1)
 678
 679     -- careful, we might get names like M....
 680     -- so, if the character after the dot is not upper-case, this is
 681     -- the end of the qualifier part.
 682     split2 buf dot_off n
 683         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 684         | otherwise                 = done dot_off
 685
 686     done dot_off =
 687         (lexemeToFastString orig_buf dot_off,
 688          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 689
 690 varid loc end buf len =
 691   case lookupUFM reservedWordsFM fs of
 692         Just (keyword,0)    -> do
 693                 maybe_layout keyword
 694                 return (T loc end keyword)
 695         Just (keyword,exts) -> do
 696                 b <- extension (\i -> exts .&. i /= 0)
 697                 if b then do maybe_layout keyword
 698                              return (T loc end keyword)
 699                      else return (T loc end (ITvarid fs))
 700         _other -> return (T loc end (ITvarid fs))
 701   where
 702         fs = lexemeToFastString buf len
 703
 704 conid buf len = ITconid fs
 705   where fs = lexemeToFastString buf len
 706
 707 qvarsym buf len = ITqvarsym $! splitQualName buf len
 708 qconsym buf len = ITqconsym $! splitQualName buf len
 709
 710 varsym = sym ITvarsym
 711 consym = sym ITconsym
 712
 713 sym con loc end buf len =
 714   case lookupUFM reservedSymsFM fs of
 715         Just (keyword,0)    -> return (T loc end keyword)
 716         Just (keyword,exts) -> do
 717                 b <- extension (\i -> exts .&. i /= 0)
 718                 if b then return (T loc end keyword)
 719                      else return (T loc end $! con fs)
 720         _other -> return (T loc end $! con fs)
 721   where
 722         fs = lexemeToFastString buf len
 723
 724 tok_decimal loc end buf len
 725   = return (T loc end (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 726
 727 tok_octal loc end buf len
 728   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 729
 730 tok_hexadecimal loc end buf len
 731   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 732
 733 prim_decimal loc end buf len
 734   = return (T loc end (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 735
 736 prim_octal loc end buf len
 737   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 738
 739 prim_hexadecimal loc end buf len
 740   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 741
 742 tok_float        str = ITrational $! readRational__ str
 743 prim_float       str = ITprimfloat  $! readRational__ str
 744 prim_double      str = ITprimdouble $! readRational__ str
 745
 746 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 747 parseInteger buf len radix to_int
 748   = go 0 0
 749   where go i x | i == len  = x
 750                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 751
 752 clitlit :: Action
 753 clitlit loc end buf len =
 754   return (T loc end (ITlitlit $! lexemeToFastString (stepOnBy 2 buf) (len-4)))
 755
 756 -- -----------------------------------------------------------------------------
 757 -- Layout processing
 758
 759 -- we're at the first token on a line, insert layout tokens if necessary
 760 do_bol :: Action
 761 do_bol loc end _str _len = do
 762         pos <- getOffside end
 763         case pos of
 764             LT -> do
 765                 --trace "layout: inserting '}'" $ do
 766                 popContext
 767                 -- do NOT pop the lex state, we might have a ';' to insert
 768                 return (T loc end ITvccurly)
 769             EQ -> do
 770                 --trace "layout: inserting ';'" $ do
 771                 popLexState
 772                 return (T loc end ITsemi)
 773             GT -> do
 774                 popLexState
 775                 lexToken
 776
 777 -- certain keywords put us in the "layout" state, where we might
 778 -- add an opening curly brace.
 779 maybe_layout ITdo       = pushLexState layout_do
 780 maybe_layout ITmdo      = pushLexState layout_do
 781 maybe_layout ITof       = pushLexState layout
 782 maybe_layout ITlet      = pushLexState layout
 783 maybe_layout ITwhere    = pushLexState layout
 784 maybe_layout ITrec      = pushLexState layout
 785 maybe_layout _          = return ()
 786
 787 -- Pushing a new implicit layout context.  If the indentation of the
 788 -- next token is not greater than the previous layout context, then
 789 -- Haskell 98 says that the new layout context should be empty; that is
 790 -- the lexer must generate {}.
 791 --
 792 -- We are slightly more lenient than this: when the new context is started
 793 -- by a 'do', then we allow the new context to be at the same indentation as
 794 -- the previous context.  This is what the 'strict' argument is for.
 795 --
 796 new_layout_context strict loc end _buf _len = do
 797     popLexState
 798     let offset = srcLocCol loc
 799     ctx <- getContext
 800     case ctx of
 801         Layout prev_off : _  |
 802            (strict     && prev_off >= offset  ||
 803             not strict && prev_off > offset) -> do
 804                 -- token is indented to the left of the previous context.
 805                 -- we must generate a {} sequence now.
 806                 pushLexState layout_left
 807                 return (T loc end ITvocurly)
 808         other -> do
 809                 setContext (Layout offset : ctx)
 810                 return (T loc end ITvocurly)
 811
 812 do_layout_left loc end _buf _len = do
 813     popLexState
 814     pushLexState bol  -- we must be at the start of a line
 815     return (T loc end ITvccurly)
 816
 817 -- -----------------------------------------------------------------------------
 818 -- LINE pragmas
 819
 820 set_line :: Int -> Action
 821 set_line code loc end buf len = do
 822   let line = parseInteger buf len 10 oct_or_dec
 823   setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0)
 824         -- subtract one: the line number refers to the *following* line
 825   popLexState
 826   pushLexState code
 827   lexToken
 828
 829 set_file :: Int -> Action
 830 set_file code loc end buf len = do
 831   let file = lexemeToFastString (stepOn buf) (len-2)
 832   setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end))
 833   popLexState
 834   pushLexState code
 835   lexToken
 836
 837 -- -----------------------------------------------------------------------------
 838 -- Strings & Chars
 839
 840 -- This stuff is horrible.  I hates it.
 841
 842 lex_string_tok :: Action
 843 lex_string_tok loc end buf len = do
 844   tok <- lex_string ""
 845   end <- getSrcLoc
 846   return (T loc end tok)
 847
 848 lex_string :: String -> P Token__
 849 lex_string s = do
 850   i <- getInput
 851   case alexGetChar i of
 852     Nothing -> lit_error
 853
 854     Just ('"',i)  -> do
 855         setInput i
 856         glaexts <- extension glaExtsEnabled
 857         if glaexts
 858           then do
 859             i <- getInput
 860             case alexGetChar i of
 861               Just ('#',i) -> do
 862                    setInput i
 863                    if any (> '\xFF') s
 864                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 865                     else let s' = mkFastStringNarrow (reverse s) in
 866                          -- always a narrow string/byte array
 867                          return (ITprimstring s')
 868               _other ->
 869                 return (ITstring (mkFastString (reverse s)))
 870           else
 871                 return (ITstring (mkFastString (reverse s)))
 872
 873     Just ('\\',i)
 874         | Just ('&',i) <- next -> do
 875                 setInput i; lex_string s
 876         | Just (c,i) <- next, is_space c -> do
 877                 setInput i; lex_stringgap s
 878         where next = alexGetChar i
 879
 880     Just _ -> do
 881         c <- lex_char
 882         lex_string (c:s)
 883
 884
 885 lex_stringgap s = do
 886   c <- getCharOrFail
 887   case c of
 888     '\\' -> lex_string s
 889     c | is_space c -> lex_stringgap s
 890     _other -> lit_error
 891
 892
 893 lex_char_tok :: Action
 894 lex_char_tok loc _end buf len = do
 895    c <- lex_char
 896    mc <- getCharOrFail
 897    case mc of
 898         '\'' -> do
 899            glaexts <- extension glaExtsEnabled
 900            if glaexts
 901                 then do
 902                    i@(end,_) <- getInput
 903                    case alexGetChar i of
 904                         Just ('#',i@(end,_)) -> do
 905                                 setInput i
 906                                 return (T loc end (ITprimchar c))
 907                         _other ->
 908                                 return (T loc end (ITchar c))
 909                 else do
 910                    end <- getSrcLoc
 911                    return (T loc end (ITchar c))
 912
 913         _other -> lit_error
 914
 915 lex_char :: P Char
 916 lex_char = do
 917   mc <- getCharOrFail
 918   case mc of
 919       '\\' -> lex_escape
 920       c | is_any c -> return c
 921       _other -> lit_error
 922
 923 lex_escape :: P Char
 924 lex_escape = do
 925   c <- getCharOrFail
 926   case c of
 927         'a'   -> return '\a'
 928         'b'   -> return '\b'
 929         'f'   -> return '\f'
 930         'n'   -> return '\n'
 931         'r'   -> return '\r'
 932         't'   -> return '\t'
 933         'v'   -> return '\v'
 934         '\\'  -> return '\\'
 935         '"'   -> return '\"'
 936         '\''  -> return '\''
 937         '^'   -> do c <- getCharOrFail
 938                     if c >= '@' && c <= '_'
 939                         then return (chr (ord c - ord '@'))
 940                         else lit_error
 941
 942         'x'   -> readNum is_hexdigit 16 hex
 943         'o'   -> readNum is_octdigit  8 oct_or_dec
 944         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 945
 946         c1 ->  do
 947            i <- getInput
 948            case alexGetChar i of
 949             Nothing -> lit_error
 950             Just (c2,i2) ->
 951               case alexGetChar i2 of
 952                 Nothing -> lit_error
 953                 Just (c3,i3) ->
 954                    let str = [c1,c2,c3] in
 955                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 956                                      Just rest <- [maybePrefixMatch p str] ] of
 957                           (escape_char,[]):_ -> do
 958                                 setInput i3
 959                                 return escape_char
 960                           (escape_char,_:_):_ -> do
 961                                 setInput i2
 962                                 return escape_char
 963                           [] -> lit_error
 964
 965 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 966 readNum is_digit base conv = do
 967   c <- getCharOrFail
 968   if is_digit c
 969         then readNum2 is_digit base conv (conv c)
 970         else lit_error
 971
 972 readNum2 is_digit base conv i = do
 973   input <- getInput
 974   read i input
 975   where read i input = do
 976           case alexGetChar input of
 977             Just (c,input') | is_digit c -> do
 978                 read (i*base + conv c) input'
 979             _other -> do
 980                 setInput input
 981                 if i >= 0 && i <= 0x10FFFF
 982                    then return (chr i)
 983                    else lit_error
 984
 985 is_hexdigit c
 986         =  is_digit c
 987         || (c >= 'a' && c <= 'f')
 988         || (c >= 'A' && c <= 'F')
 989
 990 hex c | is_digit c = ord c - ord '0'
 991       | otherwise  = ord (to_lower c) - ord 'a' + 10
 992
 993 oct_or_dec c = ord c - ord '0'
 994
 995 is_octdigit c = c >= '0' && c <= '7'
 996
 997 to_lower c
 998   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
 999   | otherwise = c
1000
1001 silly_escape_chars = [
1002         ("NUL", '\NUL'),
1003         ("SOH", '\SOH'),
1004         ("STX", '\STX'),
1005         ("ETX", '\ETX'),
1006         ("EOT", '\EOT'),
1007         ("ENQ", '\ENQ'),
1008         ("ACK", '\ACK'),
1009         ("BEL", '\BEL'),
1010         ("BS", '\BS'),
1011         ("HT", '\HT'),
1012         ("LF", '\LF'),
1013         ("VT", '\VT'),
1014         ("FF", '\FF'),
1015         ("CR", '\CR'),
1016         ("SO", '\SO'),
1017         ("SI", '\SI'),
1018         ("DLE", '\DLE'),
1019         ("DC1", '\DC1'),
1020         ("DC2", '\DC2'),
1021         ("DC3", '\DC3'),
1022         ("DC4", '\DC4'),
1023         ("NAK", '\NAK'),
1024         ("SYN", '\SYN'),
1025         ("ETB", '\ETB'),
1026         ("CAN", '\CAN'),
1027         ("EM", '\EM'),
1028         ("SUB", '\SUB'),
1029         ("ESC", '\ESC'),
1030         ("FS", '\FS'),
1031         ("GS", '\GS'),
1032         ("RS", '\RS'),
1033         ("US", '\US'),
1034         ("SP", '\SP'),
1035         ("DEL", '\DEL')
1036         ]
1037
1038 lit_error = lexError "lexical error in string/character literal"
1039
1040 getCharOrFail :: P Char
1041 getCharOrFail =  do
1042   i <- getInput
1043   case alexGetChar i of
1044         Nothing -> lexError "unexpected end-of-file in string/character literal"
1045         Just (c,i)  -> do setInput i; return c
1046
1047 -- -----------------------------------------------------------------------------
1048 -- Floats
1049
1050 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1051 readRational r = do
1052      (n,d,s) <- readFix r
1053      (k,t)   <- readExp s
1054      return ((n%1)*10^^(k-d), t)
1055  where
1056      readFix r = do
1057         (ds,s)  <- lexDecDigits r
1058         (ds',t) <- lexDotDigits s
1059         return (read (ds++ds'), length ds', t)
1060
1061      readExp (e:s) | e `elem` "eE" = readExp' s
1062      readExp s                     = return (0,s)
1063
1064      readExp' ('+':s) = readDec s
1065      readExp' ('-':s) = do
1066                         (k,t) <- readDec s
1067                         return (-k,t)
1068      readExp' s       = readDec s
1069
1070      readDec s = do
1071         (ds,r) <- nonnull isDigit s
1072         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1073                 r)
1074
1075      lexDecDigits = nonnull isDigit
1076
1077      lexDotDigits ('.':s) = return (span isDigit s)
1078      lexDotDigits s       = return ("",s)
1079
1080      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1081                       return (cs,t)
1082
1083 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1084 readRational__ top_s
1085   = case top_s of
1086       '-' : xs -> - (read_me xs)
1087       xs       -> read_me xs
1088   where
1089     read_me s
1090       = case (do { (x,"") <- readRational s ; return x }) of
1091           [x] -> x
1092           []  -> error ("readRational__: no parse:"        ++ top_s)
1093           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1094
1095 -- -----------------------------------------------------------------------------
1096 -- The Parse Monad
1097
1098 data LayoutContext
1099   = NoLayout
1100   | Layout !Int
1101
1102 data ParseResult a
1103   = POk PState a
1104   | PFailed
1105         SrcLoc SrcLoc   -- The start and end of the text span related to
1106                         -- the error.  Might be used in environments which can
1107                         -- show this span, e.g. by highlighting it.
1108         Message         -- The error message
1109
1110 showPFailed loc1 loc2 err
1111  = showSDoc (hcat [ppr loc1, text ": ", err])
1112
1113 data PState = PState {
1114         buffer     :: StringBuffer,
1115         last_loc   :: SrcLoc,           -- pos of previous token
1116         last_len   :: !Int,             -- len of previous token
1117         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1118         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1119         context    :: [LayoutContext],
1120         lex_state  :: [Int]
1121      }
1122         -- last_loc and last_len are used when generating error messages,
1123         -- and in pushCurrentContext only.
1124
1125 newtype P a = P { unP :: PState -> ParseResult a }
1126
1127 instance Monad P where
1128   return = returnP
1129   (>>=) = thenP
1130   fail = failP
1131
1132 returnP :: a -> P a
1133 returnP a = P $ \s -> POk s a
1134
1135 thenP :: P a -> (a -> P b) -> P b
1136 (P m) `thenP` k = P $ \ s ->
1137         case m s of
1138                 POk s1 a          -> (unP (k a)) s1
1139                 PFailed l1 l2 err -> PFailed l1 l2 err
1140
1141 failP :: String -> P a
1142 failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1143
1144 failMsgP :: String -> P a
1145 failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1146
1147 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1148 failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str)
1149
1150 extension :: (Int -> Bool) -> P Bool
1151 extension p = P $ \s -> POk s (p $! extsBitmap s)
1152
1153 getExts :: P Int
1154 getExts = P $ \s -> POk s (extsBitmap s)
1155
1156 setSrcLoc :: SrcLoc -> P ()
1157 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1158
1159 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1160 -- any calls to the lexer, because it assumes things about the SrcLoc.
1161 setSrcLocFor :: SrcLoc -> P a -> P a
1162 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1163   case unP scope s{loc=new_loc} of
1164         PFailed l1 l2 msg -> PFailed l1 l2 msg
1165         POk _ r -> POk s r
1166
1167 getSrcLoc :: P SrcLoc
1168 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1169
1170 setLastToken :: SrcLoc -> Int -> P ()
1171 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1172
1173 type AlexInput = (SrcLoc,StringBuffer)
1174
1175 alexInputPrevChar :: AlexInput -> Char
1176 alexInputPrevChar (_,s) = prevChar s '\n'
1177
1178 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1179 alexGetChar (loc,s)
1180   | atEnd s   = Nothing
1181   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1182   where c = currentChar s
1183         loc' = advanceSrcLoc loc c
1184         s'   = stepOn s
1185
1186 getInput :: P AlexInput
1187 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1188
1189 setInput :: AlexInput -> P ()
1190 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1191
1192 pushLexState :: Int -> P ()
1193 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1194
1195 popLexState :: P Int
1196 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1197
1198 getLexState :: P Int
1199 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1200
1201 -- for reasons of efficiency, flags indicating language extensions (eg,
1202 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1203 -- integer
1204
1205 glaExtsBit, ffiBit, parrBit :: Int
1206 glaExtsBit = 0
1207 ffiBit     = 1
1208 parrBit    = 2
1209 withBit    = 3
1210 arrowsBit  = 4
1211 thBit      = 5
1212 ipBit      = 6
1213
1214 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1215 glaExtsEnabled flags = testBit flags glaExtsBit
1216 ffiEnabled     flags = testBit flags ffiBit
1217 withEnabled    flags = testBit flags withBit
1218 parrEnabled    flags = testBit flags parrBit
1219 arrowsEnabled  flags = testBit flags arrowsBit
1220 thEnabled      flags = testBit flags thBit
1221 ipEnabled      flags = testBit flags ipBit
1222
1223 -- create a parse state
1224 --
1225 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1226 mkPState buf loc flags  =
1227   PState {
1228       buffer     = buf,
1229       last_loc   = loc,
1230       last_len   = 0,
1231       loc        = loc,
1232       extsBitmap = fromIntegral bitmap,
1233       context    = [],
1234       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1235         -- we begin in the layout state if toplev_layout is set
1236     }
1237     where
1238       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1239                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1240                .|. withBit    `setBitIf` dopt Opt_With        flags
1241                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1242                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1243                .|. thBit      `setBitIf` dopt Opt_TH          flags
1244                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1245       --
1246       setBitIf :: Int -> Bool -> Int
1247       b `setBitIf` cond | cond      = bit b
1248                         | otherwise = 0
1249
1250 getContext :: P [LayoutContext]
1251 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1252
1253 setContext :: [LayoutContext] -> P ()
1254 setContext ctx = P $ \s -> POk s{context=ctx} ()
1255
1256 popContext :: P ()
1257 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1258                            loc = loc, last_len = len, last_loc = last_loc }) ->
1259   case ctx of
1260         (_:tl) -> POk s{ context = tl } ()
1261         []     -> PFailed last_loc loc (srcParseErr buf len)
1262
1263 -- Push a new layout context at the indentation of the last token read.
1264 -- This is only used at the outer level of a module when the 'module'
1265 -- keyword is missing.
1266 pushCurrentContext :: P ()
1267 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1268   POk s{ context = Layout (srcLocCol loc) : ctx} ()
1269
1270 getOffside :: SrcLoc -> P Ordering
1271 getOffside loc = P $ \s@PState{context=stk} ->
1272                 let ord = case stk of
1273                         (Layout n:_) -> compare (srcLocCol loc) n
1274                         _            -> GT
1275                 in POk s ord
1276
1277 -- ---------------------------------------------------------------------------
1278 -- Construct a parse error
1279
1280 srcParseErr
1281   :: StringBuffer       -- current buffer (placed just after the last token)
1282   -> Int                -- length of the previous token
1283   -> Message
1284 srcParseErr buf len
1285   = hcat [ if null token
1286              then ptext SLIT("parse error (possibly incorrect indentation)")
1287              else hcat [ptext SLIT("parse error on input "),
1288                         char '`', text token, char '\'']
1289     ]
1290   where token = lexemeToString (stepOnBy (-len) buf) len
1291
1292 -- Report a parse failure, giving the span of the previous token as
1293 -- the location of the error.  This is the entry point for errors
1294 -- detected during parsing.
1295 srcParseFail :: P a
1296 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1297                                 last_loc = last_loc, loc = loc } ->
1298     PFailed last_loc loc (srcParseErr buf len)
1299
1300 -- A lexical error is reported at a particular position in the source file,
1301 -- not over a token range.  TODO: this is slightly wrong, because we record
1302 -- the error at the character position following the one which caused the
1303 -- error.  We should somehow back up by one character.
1304 lexError :: String -> P a
1305 lexError str = do
1306   loc <- getSrcLoc
1307   failLocMsgP loc loc str
1308
1309 -- -----------------------------------------------------------------------------
1310 -- This is the top-level function: called from the parser each time a
1311 -- new token is to be read from the input.
1312
1313 lexer :: (Token -> P a) -> P a
1314 lexer cont = do
1315   tok@(T _ _ tok__) <- lexToken
1316   --trace ("token: " ++ show tok__) $ do
1317   cont tok
1318
1319 lexToken :: P Token
1320 lexToken = do
1321   inp@(loc1,buf) <- getInput
1322   sc <- getLexState
1323   exts <- getExts
1324   case alexScanUser exts inp sc of
1325     AlexEOF -> do setLastToken loc1 0
1326                   return (T loc1 loc1 ITeof)
1327     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1328     AlexSkip inp2 _ -> do
1329         setInput inp2
1330         lexToken
1331     AlexToken inp2@(end,buf2) len t -> do
1332         setInput inp2
1333         setLastToken loc1 len
1334         t loc1 end buf len
1335 }