ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, ExtFlags(..), mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ForeignCall      ( Safety(..) )
  34 import ErrUtils         ( Message )
  35 import Outputable
  36 import StringBuffer
  37 import FastString
  38 import FastTypes
  39 import SrcLoc
  40 import UniqFM
  41 import Ctype
  42 import Util             ( maybePrefixMatch )
  43
  44 import DATA_BITS
  45 import Char
  46 import Ratio
  47 import TRACE
  48 }
  49
  50 $whitechar   = [\ \t\n\r\f\v]
  51 $white_no_nl = $whitechar # \n
  52
  53 $ascdigit  = 0-9
  54 $unidigit  = \x01
  55 $digit     = [$ascdigit $unidigit]
  56
  57 $special   = [\(\)\,\;\[\]\`\{\}]
  58 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  59 $unisymbol = \x02
  60 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  61
  62 $unilarge  = \x03
  63 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  64 $large     = [$asclarge $unilarge]
  65
  66 $unismall  = \x04
  67 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  68 $small     = [$ascsmall $unismall \_]
  69
  70 $graphic   = [$small $large $symbol $digit $special \:\"\']
  71
  72 $octit     = 0-7
  73 $hexit     = [$digit A-F a-f]
  74 $symchar   = [$symbol \:]
  75 $nl        = [\n\r]
  76 $idchar    = [$small $large $digit \']
  77
  78 @varid     = $small $idchar*
  79 @conid     = $large $idchar*
  80
  81 @varsym    = $symbol $symchar*
  82 @consym    = \: $symchar*
  83
  84 @decimal     = $digit+
  85 @octal       = $octit+
  86 @hexadecimal = $hexit+
  87 @exponent    = [eE] [\-\+]? @decimal
  88
  89 -- we support the hierarchical module name extension:
  90 @qual = (@conid \.)+
  91
  92 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  93
  94 haskell :-
  95
  96 -- everywhere: skip whitespace and comments
  97 $white_no_nl+                           ;
  98
  99 -- Everywhere: deal with nested comments.  We explicitly rule out
 100 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 101 -- (this can happen even though pragmas will normally take precedence due to
 102 -- longest-match, because pragmas aren't valid in every state, but comments
 103 -- are).
 104 "{-" / { notFollowedBy '#' }            { nested_comment }
 105
 106 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 107 -- more dashes followed by a symbol should be parsed as a varsym, so we
 108 -- have to exclude those.
 109 -- The regex says: "munch all the characters after the dashes, as long as
 110 -- the first one is not a symbol".
 111 "--"\-* ([^$symbol] .*)?                ;
 112
 113 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 114 -- blank lines) until we find a non-whitespace character, then do layout
 115 -- processing.
 116 --
 117 -- One slight wibble here: what if the line begins with {-#? In
 118 -- theory, we have to lex the pragma to see if it's one we recognise,
 119 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 120 -- as a nested comment.  We don't bother with this: if the line begins
 121 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 122 <bol> {
 123   \n                                    ;
 124   ^\# (line)?                           { begin line_prag1 }
 125   ()                                    { do_bol }
 126 }
 127
 128 -- after a layout keyword (let, where, do, of), we begin a new layout
 129 -- context if the curly brace is missing.
 130 -- Careful! This stuff is quite delicate.
 131 <layout, layout_do> {
 132   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 133         -- we might encounter {-# here, but {- has been handled already
 134   \n                                    ;
 135   ^\# (line)?                           { begin line_prag1 }
 136 }
 137
 138 -- do is treated in a subtly different way, see new_layout_context
 139 <layout>    ()                          { new_layout_context True }
 140 <layout_do> ()                          { new_layout_context False }
 141
 142 -- after a new layout context which was found to be to the left of the
 143 -- previous context, we have generated a '{' token, and we now need to
 144 -- generate a matching '}' token.
 145 <layout_left>  ()                       { do_layout_left }
 146
 147 <0,glaexts> \n                          { begin bol }
 148
 149 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 150
 151 -- single-line line pragmas, of the form
 152 --    # <line> "<file>" <extra-stuff> \n
 153 <line_prag1> $digit+                    { set_line line_prag1a }
 154 <line_prag1a> \" $graphic* \"           { set_file line_prag1b }
 155 <line_prag1b> .*                        { pop }
 156
 157 -- Haskell-style line pragmas, of the form
 158 --    {-# LINE <line> "<file>" #-}
 159 <line_prag2> $digit+                    { set_line line_prag2a }
 160 <line_prag2a> \" $graphic* \"           { set_file line_prag2b }
 161 <line_prag2b> "#-}"                     { pop }
 162
 163 <0,glaexts> {
 164   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 165                                         { token ITspecialise_prag }
 166   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 167   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 168   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 169                                         { token ITnoinline_prag }
 170   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 171   "{-#" $whitechar* (DEPRECATED|deprecated)
 172                                         { token ITdeprecated_prag }
 173   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 174   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 175
 176   "{-#"                                 { nested_comment }
 177
 178   -- ToDo: should only be valid inside a pragma:
 179   "#-}"                                 { token ITclose_prag}
 180 }
 181
 182
 183 -- '0' state: ordinary lexemes
 184 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 185
 186 -- "special" symbols
 187
 188 <glaexts> {
 189   "(#"                                  { token IToubxparen }
 190   "#)"                                  { token ITcubxparen }
 191
 192   "[:"                                  { token ITopabrack }
 193   ":]"                                  { token ITcpabrack }
 194
 195   "{|"                                  { token ITocurlybar }
 196   "|}"                                  { token ITccurlybar }
 197
 198   "[|"                                  { token ITopenExpQuote }
 199   "[e|"                                 { token ITopenExpQuote }
 200   "[p|"                                 { token ITopenPatQuote }
 201   "[d|"                                 { token ITopenDecQuote }
 202   "[t|"                                 { token ITopenTypQuote }
 203   "|]"                                  { token ITcloseQuote }
 204 }
 205
 206 <0,glaexts> {
 207   "(|" / { \b _ _ _ -> arrowsEnabled b} { special IToparenbar }
 208   "|)" / { \b _ _ _ -> arrowsEnabled b} { special ITcparenbar }
 209   \(                                    { special IToparen }
 210   \)                                    { special ITcparen }
 211   \[                                    { special ITobrack }
 212   \]                                    { special ITcbrack }
 213   \,                                    { special ITcomma }
 214   \;                                    { special ITsemi }
 215   \`                                    { special ITbackquote }
 216
 217   \{                                    { open_brace }
 218   \}                                    { close_brace }
 219 }
 220
 221 <glaexts> {
 222   \? @varid                     { skip_one_varid ITdupipvarid }
 223   \% @varid                     { skip_one_varid ITsplitipvarid }
 224   \$ @varid                     { skip_one_varid ITidEscape }
 225   "$("                          { token ITparenEscape }
 226 }
 227
 228 <0,glaexts> {
 229   @qual @varid                  { check_qvarid }
 230   @qual @conid                  { idtoken qconid }
 231   @varid                        { varid }
 232   @conid                        { idtoken conid }
 233 }
 234
 235 -- after an illegal qvarid, such as 'M.let',
 236 -- we back up and try again in the bad_qvarid state:
 237 <bad_qvarid> {
 238   @conid                        { pop_and (idtoken conid) }
 239   @qual @conid                  { pop_and (idtoken qconid) }
 240 }
 241
 242 <glaexts> {
 243   @qual @varid "#"+             { idtoken qvarid }
 244   @qual @conid "#"+             { idtoken qconid }
 245   @varid "#"+                   { varid }
 246   @conid "#"+                   { idtoken conid }
 247 }
 248
 249 -- ToDo: M.(,,,)
 250
 251 <0,glaexts> {
 252   @qual @varsym                 { idtoken qvarsym }
 253   @qual @consym                 { idtoken qconsym }
 254   @varsym                       { varsym }
 255   @consym                       { consym }
 256 }
 257
 258 <0,glaexts> {
 259   @decimal                      { tok_decimal }
 260   0[oO] @octal                  { tok_octal }
 261   0[xX] @hexadecimal            { tok_hexadecimal }
 262 }
 263
 264 <glaexts> {
 265   @decimal \#                   { prim_decimal }
 266   0[oO] @octal \#               { prim_octal }
 267   0[xX] @hexadecimal \#         { prim_hexadecimal }
 268 }
 269
 270 <0,glaexts> @floating_point             { strtoken tok_float }
 271 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 272 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 273
 274 -- Strings and chars are lexed by hand-written code.  The reason is
 275 -- that even if we recognise the string or char here in the regex
 276 -- lexer, we would still have to parse the string afterward in order
 277 -- to convert it to a String.
 278 <0,glaexts> {
 279   \'                            { lex_char_tok }
 280   \"                            { lex_string_tok }
 281 }
 282
 283 <glaexts> "``" (([$graphic $whitechar] # \') | \' ([$graphic $whitechar] # \'))*
 284                 "''"            { clitlit }
 285
 286 {
 287 -- work around bug in Alex 2.0
 288 #if __GLASGOW_HASKELL__ < 503
 289 unsafeAt arr i = arr ! i
 290 #endif
 291
 292 -- -----------------------------------------------------------------------------
 293 -- The token type
 294
 295 data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__
 296
 297 data Token__
 298   = ITas                        -- Haskell keywords
 299   | ITcase
 300   | ITclass
 301   | ITdata
 302   | ITdefault
 303   | ITderiving
 304   | ITdo
 305   | ITelse
 306   | IThiding
 307   | ITif
 308   | ITimport
 309   | ITin
 310   | ITinfix
 311   | ITinfixl
 312   | ITinfixr
 313   | ITinstance
 314   | ITlet
 315   | ITmodule
 316   | ITnewtype
 317   | ITof
 318   | ITqualified
 319   | ITthen
 320   | ITtype
 321   | ITwhere
 322   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 323
 324   | ITforall                    -- GHC extension keywords
 325   | ITforeign
 326   | ITexport
 327   | ITlabel
 328   | ITdynamic
 329   | ITsafe
 330   | ITthreadsafe
 331   | ITunsafe
 332   | ITwith
 333   | ITstdcallconv
 334   | ITccallconv
 335   | ITdotnet
 336   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 337   | ITmdo
 338
 339   | ITspecialise_prag           -- Pragmas
 340   | ITsource_prag
 341   | ITinline_prag
 342   | ITnoinline_prag
 343   | ITrules_prag
 344   | ITdeprecated_prag
 345   | ITline_prag
 346   | ITscc_prag
 347   | ITcore_prag                 -- hdaume: core annotations
 348   | ITclose_prag
 349
 350   | ITdotdot                    -- reserved symbols
 351   | ITcolon
 352   | ITdcolon
 353   | ITequal
 354   | ITlam
 355   | ITvbar
 356   | ITlarrow
 357   | ITrarrow
 358   | ITat
 359   | ITtilde
 360   | ITdarrow
 361   | ITminus
 362   | ITbang
 363   | ITstar
 364   | ITdot
 365
 366   | ITbiglam                    -- GHC-extension symbols
 367
 368   | ITocurly                    -- special symbols
 369   | ITccurly
 370   | ITocurlybar                 -- {|, for type applications
 371   | ITccurlybar                 -- |}, for type applications
 372   | ITvocurly
 373   | ITvccurly
 374   | ITobrack
 375   | ITopabrack                  -- [:, for parallel arrays with -fparr
 376   | ITcpabrack                  -- :], for parallel arrays with -fparr
 377   | ITcbrack
 378   | IToparen
 379   | ITcparen
 380   | IToubxparen
 381   | ITcubxparen
 382   | ITsemi
 383   | ITcomma
 384   | ITunderscore
 385   | ITbackquote
 386
 387   | ITvarid   FastString        -- identifiers
 388   | ITconid   FastString
 389   | ITvarsym  FastString
 390   | ITconsym  FastString
 391   | ITqvarid  (FastString,FastString)
 392   | ITqconid  (FastString,FastString)
 393   | ITqvarsym (FastString,FastString)
 394   | ITqconsym (FastString,FastString)
 395
 396   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 397   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 398
 399   | ITpragma StringBuffer
 400
 401   | ITchar       Char
 402   | ITstring     FastString
 403   | ITinteger    Integer
 404   | ITrational   Rational
 405
 406   | ITprimchar   Char
 407   | ITprimstring FastString
 408   | ITprimint    Integer
 409   | ITprimfloat  Rational
 410   | ITprimdouble Rational
 411   | ITlitlit     FastString
 412
 413   -- MetaHaskell extension tokens
 414   | ITopenExpQuote              -- [| or [e|
 415   | ITopenPatQuote              -- [p|
 416   | ITopenDecQuote              -- [d|
 417   | ITopenTypQuote              -- [t|
 418   | ITcloseQuote                -- |]
 419   | ITidEscape   FastString     -- $x
 420   | ITparenEscape               -- $(
 421   | ITreifyType
 422   | ITreifyDecl
 423   | ITreifyFixity
 424
 425   -- Arrow notation extension
 426   | ITproc
 427   | ITrec
 428   | IToparenbar                 -- (|
 429   | ITcparenbar                 -- |)
 430   | ITlarrowtail                -- -<
 431   | ITrarrowtail                -- >-
 432   | ITLarrowtail                -- -<<
 433   | ITRarrowtail                -- >>-
 434
 435   | ITunknown String            -- Used when the lexer can't make sense of it
 436   | ITeof                       -- end of file token
 437 #ifdef DEBUG
 438   deriving Show -- debugging
 439 #endif
 440
 441 isSpecial :: Token__ -> Bool
 442 -- If we see M.x, where x is a keyword, but
 443 -- is special, we treat is as just plain M.x,
 444 -- not as a keyword.
 445 isSpecial ITas          = True
 446 isSpecial IThiding      = True
 447 isSpecial ITqualified   = True
 448 isSpecial ITforall      = True
 449 isSpecial ITexport      = True
 450 isSpecial ITlabel       = True
 451 isSpecial ITdynamic     = True
 452 isSpecial ITsafe        = True
 453 isSpecial ITthreadsafe  = True
 454 isSpecial ITunsafe      = True
 455 isSpecial ITwith        = True
 456 isSpecial ITccallconv   = True
 457 isSpecial ITstdcallconv = True
 458 isSpecial ITmdo         = True
 459 isSpecial _             = False
 460
 461 -- the bitmap provided as the third component indicates whether the
 462 -- corresponding extension keyword is valid under the extension options
 463 -- provided to the compiler; if the extension corresponding to *any* of the
 464 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 465 -- facilitates using a keyword in two different extensions that can be
 466 -- activated independently)
 467 --
 468 reservedWordsFM = listToUFM $
 469         map (\(x, y, z) -> (mkFastString x, (y, z)))
 470        [( "_",          ITunderscore,   0 ),
 471         ( "as",         ITas,           0 ),
 472         ( "case",       ITcase,         0 ),
 473         ( "class",      ITclass,        0 ),
 474         ( "data",       ITdata,         0 ),
 475         ( "default",    ITdefault,      0 ),
 476         ( "deriving",   ITderiving,     0 ),
 477         ( "do",         ITdo,           0 ),
 478         ( "else",       ITelse,         0 ),
 479         ( "hiding",     IThiding,       0 ),
 480         ( "if",         ITif,           0 ),
 481         ( "import",     ITimport,       0 ),
 482         ( "in",         ITin,           0 ),
 483         ( "infix",      ITinfix,        0 ),
 484         ( "infixl",     ITinfixl,       0 ),
 485         ( "infixr",     ITinfixr,       0 ),
 486         ( "instance",   ITinstance,     0 ),
 487         ( "let",        ITlet,          0 ),
 488         ( "module",     ITmodule,       0 ),
 489         ( "newtype",    ITnewtype,      0 ),
 490         ( "of",         ITof,           0 ),
 491         ( "qualified",  ITqualified,    0 ),
 492         ( "then",       ITthen,         0 ),
 493         ( "type",       ITtype,         0 ),
 494         ( "where",      ITwhere,        0 ),
 495         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 496
 497         ( "forall",     ITforall,        bit glaExtsBit),
 498         ( "mdo",        ITmdo,           bit glaExtsBit),
 499         ( "reifyDecl",  ITreifyDecl,     bit glaExtsBit),
 500         ( "reifyType",  ITreifyType,     bit glaExtsBit),
 501         ( "reifyFixity",ITreifyFixity,   bit glaExtsBit),
 502
 503         ( "foreign",    ITforeign,       bit ffiBit),
 504         ( "export",     ITexport,        bit ffiBit),
 505         ( "label",      ITlabel,         bit ffiBit),
 506         ( "dynamic",    ITdynamic,       bit ffiBit),
 507         ( "safe",       ITsafe,          bit ffiBit),
 508         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 509         ( "unsafe",     ITunsafe,        bit ffiBit),
 510         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 511         ( "ccall",      ITccallconv,     bit ffiBit),
 512         ( "dotnet",     ITdotnet,        bit ffiBit),
 513
 514         ( "with",       ITwith,          bit withBit),
 515
 516         ( "rec",        ITrec,           bit arrowsBit),
 517         ( "proc",       ITproc,          bit arrowsBit),
 518
 519         -- On death row
 520         ("_ccall_",     ITccall (False, False, PlayRisky),
 521                                          bit glaExtsBit),
 522         ("_ccall_GC_",  ITccall (False, False, PlaySafe False),
 523                                          bit glaExtsBit),
 524         ("_casm_",      ITccall (False, True,  PlayRisky),
 525                                          bit glaExtsBit),
 526         ("_casm_GC_",   ITccall (False, True,  PlaySafe False),
 527                                          bit glaExtsBit)
 528      ]
 529
 530 reservedSymsFM = listToUFM $
 531         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 532       [ ("..",  ITdotdot,       0)
 533        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 534                                                 -- meaning only list cons
 535        ,("::",  ITdcolon,       0)
 536        ,("=",   ITequal,        0)
 537        ,("\\",  ITlam,          0)
 538        ,("|",   ITvbar,         0)
 539        ,("<-",  ITlarrow,       0)
 540        ,("->",  ITrarrow,       0)
 541        ,("@",   ITat,           0)
 542        ,("~",   ITtilde,        0)
 543        ,("=>",  ITdarrow,       0)
 544        ,("-",   ITminus,        0)
 545        ,("!",   ITbang,         0)
 546
 547        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 548        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 549
 550        ,("-<",  ITlarrowtail,   bit arrowsBit)
 551        ,(">-",  ITrarrowtail,   bit arrowsBit)
 552        ,("-<<", ITLarrowtail,   bit arrowsBit)
 553        ,(">>-", ITRarrowtail,   bit arrowsBit)
 554        ]
 555
 556 -- -----------------------------------------------------------------------------
 557 -- Lexer actions
 558
 559 type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token
 560
 561 special :: Token__ -> Action
 562 special tok loc end _buf len = return (T loc end tok)
 563
 564 token :: Token__ -> Action
 565 token t loc end buf len = return (T loc end t)
 566
 567 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 568 idtoken f loc end buf len = return (T loc end $! (f buf len))
 569
 570 skip_one_varid :: (FastString -> Token__) -> Action
 571 skip_one_varid f loc end buf len
 572   = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1)))
 573
 574 strtoken :: (String -> Token__) -> Action
 575 strtoken f loc end buf len =
 576   return (T loc end $! (f $! lexemeToString buf len))
 577
 578 init_strtoken :: Int -> (String -> Token__) -> Action
 579 -- like strtoken, but drops the last N character(s)
 580 init_strtoken drop f loc end buf len =
 581   return (T loc end $! (f $! lexemeToString buf (len-drop)))
 582
 583 begin :: Int -> Action
 584 begin code _loc _end _str _len = do pushLexState code; lexToken
 585
 586 pop :: Action
 587 pop _loc _end _buf _len = do popLexState; lexToken
 588
 589 pop_and :: Action -> Action
 590 pop_and act loc end buf len = do popLexState; act loc end buf len
 591
 592 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 593
 594 {-
 595   nested comments require traversing by hand, they can't be parsed
 596   using regular expressions.
 597 -}
 598 nested_comment :: Action
 599 nested_comment loc _end _str _len = do
 600   input <- getInput
 601   go 1 input
 602   where go 0 input = do setInput input; lexToken
 603         go n input = do
 604           case alexGetChar input of
 605             Nothing  -> err input
 606             Just (c,input) -> do
 607               case c of
 608                 '-' -> do
 609                   case alexGetChar input of
 610                     Nothing  -> err input
 611                     Just ('\125',input) -> go (n-1) input
 612                     Just (c,_)          -> go n input
 613                 '\123' -> do
 614                   case alexGetChar input of
 615                     Nothing  -> err input
 616                     Just ('-',input') -> go (n+1) input'
 617                     Just (c,input)    -> go n input
 618                 c -> go n input
 619
 620         err input = do failLocMsgP loc (fst input) "unterminated `{-'"
 621
 622 open_brace, close_brace :: Action
 623 open_brace  loc end _str _len = do
 624   ctx <- getContext
 625   setContext (NoLayout:ctx)
 626   return (T loc end ITocurly)
 627 close_brace loc end _str _len = do
 628   popContext
 629   return (T loc end ITccurly)
 630
 631 -- We have to be careful not to count M.<varid> as a qualified name
 632 -- when <varid> is a keyword.  We hack around this by catching
 633 -- the offending tokens afterward, and re-lexing in a different state.
 634 check_qvarid loc end buf len = do
 635   case lookupUFM reservedWordsFM var of
 636         Just (keyword,exts)
 637           | not (isSpecial keyword) ->
 638           if exts == 0
 639              then try_again
 640              else do
 641                 b <- extension (\i -> exts .&. i /= 0)
 642                 if b then try_again
 643                      else return token
 644         _other -> return token
 645   where
 646         (mod,var) = splitQualName buf len
 647         token     = T loc end (ITqvarid (mod,var))
 648
 649         try_again = do
 650                 setInput (loc,buf)
 651                 pushLexState bad_qvarid
 652                 lexToken
 653
 654 qvarid buf len = ITqvarid $! splitQualName buf len
 655 qconid buf len = ITqconid $! splitQualName buf len
 656
 657 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 658 -- takes a StringBuffer and a length, and returns the module name
 659 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 660 -- because of hierarchical module names.
 661 splitQualName orig_buf len = split orig_buf 0 0
 662   where
 663     split buf dot_off n
 664         | n == len                = done dot_off
 665         | lookAhead buf n == '.'  = split2 buf n (n+1)
 666         | otherwise               = split buf dot_off (n+1)
 667
 668     -- careful, we might get names like M....
 669     -- so, if the character after the dot is not upper-case, this is
 670     -- the end of the qualifier part.
 671     split2 buf dot_off n
 672         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 673         | otherwise                 = done dot_off
 674
 675     done dot_off =
 676         (lexemeToFastString orig_buf dot_off,
 677          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 678
 679 varid loc end buf len =
 680   case lookupUFM reservedWordsFM fs of
 681         Just (keyword,0)    -> do
 682                 maybe_layout keyword
 683                 return (T loc end keyword)
 684         Just (keyword,exts) -> do
 685                 b <- extension (\i -> exts .&. i /= 0)
 686                 if b then do maybe_layout keyword
 687                              return (T loc end keyword)
 688                      else return (T loc end (ITvarid fs))
 689         _other -> return (T loc end (ITvarid fs))
 690   where
 691         fs = lexemeToFastString buf len
 692
 693 conid buf len = ITconid fs
 694   where fs = lexemeToFastString buf len
 695
 696 qvarsym buf len = ITqvarsym $! splitQualName buf len
 697 qconsym buf len = ITqconsym $! splitQualName buf len
 698
 699 varsym = sym ITvarsym
 700 consym = sym ITconsym
 701
 702 sym con loc end buf len =
 703   case lookupUFM reservedSymsFM fs of
 704         Just (keyword,0)    -> return (T loc end keyword)
 705         Just (keyword,exts) -> do
 706                 b <- extension (\i -> exts .&. i /= 0)
 707                 if b then return (T loc end keyword)
 708                      else return (T loc end $! con fs)
 709         _other -> return (T loc end $! con fs)
 710   where
 711         fs = lexemeToFastString buf len
 712
 713 tok_decimal loc end buf len
 714   = return (T loc end (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 715
 716 tok_octal loc end buf len
 717   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 718
 719 tok_hexadecimal loc end buf len
 720   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 721
 722 prim_decimal loc end buf len
 723   = return (T loc end (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 724
 725 prim_octal loc end buf len
 726   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 727
 728 prim_hexadecimal loc end buf len
 729   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 730
 731 tok_float        str = ITrational $! readRational__ str
 732 prim_float       str = ITprimfloat  $! readRational__ str
 733 prim_double      str = ITprimdouble $! readRational__ str
 734
 735 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 736 parseInteger buf len radix to_int
 737   = go 0 0
 738   where go i x | i == len  = x
 739                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 740
 741 clitlit :: Action
 742 clitlit loc end buf len =
 743   return (T loc end (ITlitlit $! lexemeToFastString (stepOnBy 2 buf) (len-4)))
 744
 745 -- -----------------------------------------------------------------------------
 746 -- Layout processing
 747
 748 -- we're at the first token on a line, insert layout tokens if necessary
 749 do_bol :: Action
 750 do_bol loc end _str _len = do
 751         pos <- getOffside end
 752         case pos of
 753             LT -> do
 754                 --trace "layout: inserting '}'" $ do
 755                 popContext
 756                 -- do NOT pop the lex state, we might have a ';' to insert
 757                 return (T loc end ITvccurly)
 758             EQ -> do
 759                 --trace "layout: inserting ';'" $ do
 760                 popLexState
 761                 return (T loc end ITsemi)
 762             GT -> do
 763                 popLexState
 764                 lexToken
 765
 766 -- certain keywords put us in the "layout" state, where we might
 767 -- add an opening curly brace.
 768 maybe_layout ITdo       = pushLexState layout_do
 769 maybe_layout ITof       = pushLexState layout
 770 maybe_layout ITlet      = pushLexState layout
 771 maybe_layout ITwhere    = pushLexState layout
 772 maybe_layout _          = return ()
 773
 774 -- Pushing a new implicit layout context.  If the indentation of the
 775 -- next token is not greater than the previous layout context, then
 776 -- Haskell 98 says that the new layout context should be empty; that is
 777 -- the lexer must generate {}.
 778 --
 779 -- We are slightly more lenient than this: when the new context is started
 780 -- by a 'do', then we allow the new context to be at the same indentation as
 781 -- the previous context.  This is what the 'strict' argument is for.
 782 --
 783 new_layout_context strict loc end _buf _len = do
 784     popLexState
 785     let offset = srcLocCol loc
 786     ctx <- getContext
 787     case ctx of
 788         Layout prev_off : _  |
 789            (strict     && prev_off >= offset  ||
 790             not strict && prev_off > offset) -> do
 791                 -- token is indented to the left of the previous context.
 792                 -- we must generate a {} sequence now.
 793                 pushLexState layout_left
 794                 return (T loc end ITvocurly)
 795         other -> do
 796                 setContext (Layout offset : ctx)
 797                 return (T loc end ITvocurly)
 798
 799 do_layout_left loc end _buf _len = do
 800     popLexState
 801     pushLexState bol  -- we must be at the start of a line
 802     return (T loc end ITvccurly)
 803
 804 -- -----------------------------------------------------------------------------
 805 -- LINE pragmas
 806
 807 set_line :: Int -> Action
 808 set_line code loc end buf len = do
 809   let line = parseInteger buf len 10 oct_or_dec
 810   setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0)
 811         -- subtract one: the line number refers to the *following* line
 812   popLexState
 813   pushLexState code
 814   lexToken
 815
 816 set_file :: Int -> Action
 817 set_file code loc end buf len = do
 818   let file = lexemeToFastString (stepOn buf) (len-2)
 819   setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end))
 820   popLexState
 821   pushLexState code
 822   lexToken
 823
 824 -- -----------------------------------------------------------------------------
 825 -- Strings & Chars
 826
 827 -- This stuff is horrible.  I hates it.
 828
 829 lex_string_tok :: Action
 830 lex_string_tok loc end buf len = do
 831   tok <- lex_string ""
 832   end <- getSrcLoc
 833   return (T loc end tok)
 834
 835 lex_string :: String -> P Token__
 836 lex_string s = do
 837   i <- getInput
 838   case alexGetChar i of
 839     Nothing -> lit_error
 840
 841     Just ('"',i)  -> do
 842         setInput i
 843         glaexts <- extension glaExtsEnabled
 844         if glaexts
 845           then do
 846             i <- getInput
 847             case alexGetChar i of
 848               Just ('#',i) -> do
 849                    setInput i
 850                    if any (> '\xFF') s
 851                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 852                     else let s' = mkFastStringNarrow (reverse s) in
 853                          -- always a narrow string/byte array
 854                          return (ITprimstring s')
 855               _other ->
 856                 return (ITstring (mkFastString (reverse s)))
 857           else
 858                 return (ITstring (mkFastString (reverse s)))
 859
 860     Just ('\\',i)
 861         | Just ('&',i) <- next -> do
 862                 setInput i; lex_string s
 863         | Just (c,i) <- next, is_space c -> do
 864                 setInput i; lex_stringgap s
 865         where next = alexGetChar i
 866
 867     Just _ -> do
 868         c <- lex_char
 869         lex_string (c:s)
 870
 871
 872 lex_stringgap s = do
 873   c <- getCharOrFail
 874   case c of
 875     '\\' -> lex_string s
 876     c | is_space c -> lex_stringgap s
 877     _other -> lit_error
 878
 879
 880 lex_char_tok :: Action
 881 lex_char_tok loc _end buf len = do
 882    c <- lex_char
 883    mc <- getCharOrFail
 884    case mc of
 885         '\'' -> do
 886            glaexts <- extension glaExtsEnabled
 887            if glaexts
 888                 then do
 889                    i@(end,_) <- getInput
 890                    case alexGetChar i of
 891                         Just ('#',i@(end,_)) -> do
 892                                 setInput i
 893                                 return (T loc end (ITprimchar c))
 894                         _other ->
 895                                 return (T loc end (ITchar c))
 896                 else do
 897                    end <- getSrcLoc
 898                    return (T loc end (ITchar c))
 899
 900         _other -> lit_error
 901
 902 lex_char :: P Char
 903 lex_char = do
 904   mc <- getCharOrFail
 905   case mc of
 906       '\\' -> lex_escape
 907       c | is_any c -> return c
 908       _other -> lit_error
 909
 910 lex_escape :: P Char
 911 lex_escape = do
 912   c <- getCharOrFail
 913   case c of
 914         'a'   -> return '\a'
 915         'b'   -> return '\b'
 916         'f'   -> return '\f'
 917         'n'   -> return '\n'
 918         'r'   -> return '\r'
 919         't'   -> return '\t'
 920         'v'   -> return '\v'
 921         '\\'  -> return '\\'
 922         '"'   -> return '\"'
 923         '\''  -> return '\''
 924         '^'   -> do c <- getCharOrFail
 925                     if c >= '@' && c <= '_'
 926                         then return (chr (ord c - ord '@'))
 927                         else lit_error
 928
 929         'x'   -> readNum is_hexdigit 16 hex
 930         'o'   -> readNum is_octdigit  8 oct_or_dec
 931         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 932
 933         c1 ->  do
 934            i <- getInput
 935            case alexGetChar i of
 936             Nothing -> lit_error
 937             Just (c2,i2) ->
 938               case alexGetChar i2 of
 939                 Nothing -> lit_error
 940                 Just (c3,i3) ->
 941                    let str = [c1,c2,c3] in
 942                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 943                                      Just rest <- [maybePrefixMatch p str] ] of
 944                           (escape_char,[]):_ -> do
 945                                 setInput i3
 946                                 return escape_char
 947                           (escape_char,_:_):_ -> do
 948                                 setInput i2
 949                                 return escape_char
 950                           [] -> lit_error
 951
 952 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 953 readNum is_digit base conv = do
 954   c <- getCharOrFail
 955   if is_digit c
 956         then readNum2 is_digit base conv (conv c)
 957         else lit_error
 958
 959 readNum2 is_digit base conv i = do
 960   input <- getInput
 961   read i input
 962   where read i input = do
 963           case alexGetChar input of
 964             Just (c,input') | is_digit c -> do
 965                 read (i*base + conv c) input'
 966             _other -> do
 967                 setInput input
 968                 if i >= 0 && i <= 0x10FFFF
 969                    then return (chr i)
 970                    else lit_error
 971
 972 is_hexdigit c
 973         =  is_digit c
 974         || (c >= 'a' && c <= 'f')
 975         || (c >= 'A' && c <= 'F')
 976
 977 hex c | is_digit c = ord c - ord '0'
 978       | otherwise  = ord (to_lower c) - ord 'a' + 10
 979
 980 oct_or_dec c = ord c - ord '0'
 981
 982 is_octdigit c = c >= '0' && c <= '7'
 983
 984 to_lower c
 985   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
 986   | otherwise = c
 987
 988 silly_escape_chars = [
 989         ("NUL", '\NUL'),
 990         ("SOH", '\SOH'),
 991         ("STX", '\STX'),
 992         ("ETX", '\ETX'),
 993         ("EOT", '\EOT'),
 994         ("ENQ", '\ENQ'),
 995         ("ACK", '\ACK'),
 996         ("BEL", '\BEL'),
 997         ("BS", '\BS'),
 998         ("HT", '\HT'),
 999         ("LF", '\LF'),
1000         ("VT", '\VT'),
1001         ("FF", '\FF'),
1002         ("CR", '\CR'),
1003         ("SO", '\SO'),
1004         ("SI", '\SI'),
1005         ("DLE", '\DLE'),
1006         ("DC1", '\DC1'),
1007         ("DC2", '\DC2'),
1008         ("DC3", '\DC3'),
1009         ("DC4", '\DC4'),
1010         ("NAK", '\NAK'),
1011         ("SYN", '\SYN'),
1012         ("ETB", '\ETB'),
1013         ("CAN", '\CAN'),
1014         ("EM", '\EM'),
1015         ("SUB", '\SUB'),
1016         ("ESC", '\ESC'),
1017         ("FS", '\FS'),
1018         ("GS", '\GS'),
1019         ("RS", '\RS'),
1020         ("US", '\US'),
1021         ("SP", '\SP'),
1022         ("DEL", '\DEL')
1023         ]
1024
1025 lit_error = lexError "lexical error in string/character literal"
1026
1027 getCharOrFail :: P Char
1028 getCharOrFail =  do
1029   i <- getInput
1030   case alexGetChar i of
1031         Nothing -> lexError "unexpected end-of-file in string/character literal"
1032         Just (c,i)  -> do setInput i; return c
1033
1034 -- -----------------------------------------------------------------------------
1035 -- Floats
1036
1037 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1038 readRational r = do
1039      (n,d,s) <- readFix r
1040      (k,t)   <- readExp s
1041      return ((n%1)*10^^(k-d), t)
1042  where
1043      readFix r = do
1044         (ds,s)  <- lexDecDigits r
1045         (ds',t) <- lexDotDigits s
1046         return (read (ds++ds'), length ds', t)
1047
1048      readExp (e:s) | e `elem` "eE" = readExp' s
1049      readExp s                     = return (0,s)
1050
1051      readExp' ('+':s) = readDec s
1052      readExp' ('-':s) = do
1053                         (k,t) <- readDec s
1054                         return (-k,t)
1055      readExp' s       = readDec s
1056
1057      readDec s = do
1058         (ds,r) <- nonnull isDigit s
1059         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1060                 r)
1061
1062      lexDecDigits = nonnull isDigit
1063
1064      lexDotDigits ('.':s) = return (span isDigit s)
1065      lexDotDigits s       = return ("",s)
1066
1067      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1068                       return (cs,t)
1069
1070 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1071 readRational__ top_s
1072   = case top_s of
1073       '-' : xs -> - (read_me xs)
1074       xs       -> read_me xs
1075   where
1076     read_me s
1077       = case (do { (x,"") <- readRational s ; return x }) of
1078           [x] -> x
1079           []  -> error ("readRational__: no parse:"        ++ top_s)
1080           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1081
1082 -- -----------------------------------------------------------------------------
1083 -- The Parse Monad
1084
1085 data LayoutContext
1086   = NoLayout
1087   | Layout !Int
1088
1089 data ParseResult a
1090   = POk PState a
1091   | PFailed
1092         SrcLoc SrcLoc   -- The start and end of the text span related to
1093                         -- the error.  Might be used in environments which can
1094                         -- show this span, e.g. by highlighting it.
1095         Message         -- The error message
1096
1097 showPFailed loc1 loc2 err
1098  = showSDoc (hcat [ppr loc1, text ": ", err])
1099
1100 data PState = PState {
1101         buffer     :: StringBuffer,
1102         last_loc   :: SrcLoc,           -- pos of previous token
1103         last_len   :: !Int,             -- len of previous token
1104         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1105         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1106         context    :: [LayoutContext],
1107         lex_state  :: [Int]
1108      }
1109         -- last_loc and last_len are used when generating error messages,
1110         -- and in pushCurrentContext only.
1111
1112 newtype P a = P { unP :: PState -> ParseResult a }
1113
1114 instance Monad P where
1115   return = returnP
1116   (>>=) = thenP
1117   fail = failP
1118
1119 returnP :: a -> P a
1120 returnP a = P $ \s -> POk s a
1121
1122 thenP :: P a -> (a -> P b) -> P b
1123 (P m) `thenP` k = P $ \ s ->
1124         case m s of
1125                 POk s1 a          -> (unP (k a)) s1
1126                 PFailed l1 l2 err -> PFailed l1 l2 err
1127
1128 failP :: String -> P a
1129 failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1130
1131 failMsgP :: String -> P a
1132 failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1133
1134 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1135 failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str)
1136
1137 extension :: (Int -> Bool) -> P Bool
1138 extension p = P $ \s -> POk s (p $! extsBitmap s)
1139
1140 getExts :: P Int
1141 getExts = P $ \s -> POk s (extsBitmap s)
1142
1143 setSrcLoc :: SrcLoc -> P ()
1144 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1145
1146 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1147 -- any calls to the lexer, because it assumes things about the SrcLoc.
1148 setSrcLocFor :: SrcLoc -> P a -> P a
1149 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1150   case unP scope s{loc=new_loc} of
1151         PFailed l1 l2 msg -> PFailed l1 l2 msg
1152         POk _ r -> POk s r
1153
1154 getSrcLoc :: P SrcLoc
1155 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1156
1157 setLastToken :: SrcLoc -> Int -> P ()
1158 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1159
1160 type AlexInput = (SrcLoc,StringBuffer)
1161
1162 alexInputPrevChar :: AlexInput -> Char
1163 alexInputPrevChar (_,s) = prevChar s '\n'
1164
1165 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1166 alexGetChar (loc,s)
1167   | atEnd s   = Nothing
1168   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1169   where c = currentChar s
1170         loc' = advanceSrcLoc loc c
1171         s'   = stepOn s
1172
1173 getInput :: P AlexInput
1174 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1175
1176 setInput :: AlexInput -> P ()
1177 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1178
1179 pushLexState :: Int -> P ()
1180 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1181
1182 popLexState :: P Int
1183 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1184
1185 getLexState :: P Int
1186 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1187
1188 -- for reasons of efficiency, flags indicating language extensions (eg,
1189 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1190 -- integer
1191
1192 glaExtsBit, ffiBit, parrBit :: Int
1193 glaExtsBit = 0
1194 ffiBit     = 1
1195 parrBit    = 2
1196 withBit    = 3
1197 arrowsBit  = 4
1198
1199 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1200 glaExtsEnabled flags = testBit flags glaExtsBit
1201 ffiEnabled     flags = testBit flags ffiBit
1202 withEnabled    flags = testBit flags withBit
1203 parrEnabled    flags = testBit flags parrBit
1204 arrowsEnabled  flags = testBit flags arrowsBit
1205
1206 -- convenient record-based bitmap for the interface to the rest of the world
1207 --
1208 -- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below)
1209 --
1210 data ExtFlags = ExtFlags {
1211                   glasgowExtsEF :: Bool,
1212                   ffiEF         :: Bool,
1213                   withEF        :: Bool,
1214                   parrEF        :: Bool,
1215                   arrowsEF      :: Bool
1216                 }
1217
1218 -- create a parse state
1219 --
1220 mkPState :: StringBuffer -> SrcLoc -> ExtFlags -> PState
1221 mkPState buf loc exts  =
1222   PState {
1223       buffer     = buf,
1224       last_loc   = loc,
1225       last_len   = 0,
1226       loc        = loc,
1227       extsBitmap = fromIntegral bitmap,
1228       context    = [],
1229       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1230         -- we begin in the layout state if toplev_layout is set
1231     }
1232     where
1233       bitmap =     glaExtsBit `setBitIf` glasgowExtsEF     exts
1234                .|. ffiBit     `setBitIf` (ffiEF            exts
1235                                           || glasgowExtsEF exts)
1236                .|. withBit    `setBitIf` withEF            exts
1237                .|. parrBit    `setBitIf` parrEF            exts
1238                .|. arrowsBit  `setBitIf` arrowsEF          exts
1239       --
1240       setBitIf :: Int -> Bool -> Int
1241       b `setBitIf` cond | cond      = bit b
1242                         | otherwise = 0
1243
1244 getContext :: P [LayoutContext]
1245 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1246
1247 setContext :: [LayoutContext] -> P ()
1248 setContext ctx = P $ \s -> POk s{context=ctx} ()
1249
1250 popContext :: P ()
1251 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1252                            loc = loc, last_len = len, last_loc = last_loc }) ->
1253   case ctx of
1254         (_:tl) -> POk s{ context = tl } ()
1255         []     -> PFailed last_loc loc (srcParseErr buf len)
1256
1257 -- Push a new layout context at the indentation of the last token read.
1258 -- This is only used at the outer level of a module when the 'module'
1259 -- keyword is missing.
1260 pushCurrentContext :: P ()
1261 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1262   POk s{ context = Layout (srcLocCol loc) : ctx} ()
1263
1264 getOffside :: SrcLoc -> P Ordering
1265 getOffside loc = P $ \s@PState{context=stk} ->
1266                 let ord = case stk of
1267                         (Layout n:_) -> compare (srcLocCol loc) n
1268                         _            -> GT
1269                 in POk s ord
1270
1271 -- ---------------------------------------------------------------------------
1272 -- Construct a parse error
1273
1274 srcParseErr
1275   :: StringBuffer       -- current buffer (placed just after the last token)
1276   -> Int                -- length of the previous token
1277   -> Message
1278 srcParseErr buf len
1279   = hcat [ if null token
1280              then ptext SLIT("parse error (possibly incorrect indentation)")
1281              else hcat [ptext SLIT("parse error on input "),
1282                         char '`', text token, char '\'']
1283     ]
1284   where token = lexemeToString (stepOnBy (-len) buf) len
1285
1286 -- Report a parse failure, giving the span of the previous token as
1287 -- the location of the error.  This is the entry point for errors
1288 -- detected during parsing.
1289 srcParseFail :: P a
1290 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1291                                 last_loc = last_loc, loc = loc } ->
1292     PFailed last_loc loc (srcParseErr buf len)
1293
1294 -- A lexical error is reported at a particular position in the source file,
1295 -- not over a token range.  TODO: this is slightly wrong, because we record
1296 -- the error at the character position following the one which caused the
1297 -- error.  We should somehow back up by one character.
1298 lexError :: String -> P a
1299 lexError str = do
1300   loc <- getSrcLoc
1301   failLocMsgP loc loc str
1302
1303 -- -----------------------------------------------------------------------------
1304 -- This is the top-level function: called from the parser each time a
1305 -- new token is to be read from the input.
1306
1307 lexer :: (Token -> P a) -> P a
1308 lexer cont = do
1309   tok@(T _ _ tok__) <- lexToken
1310   --trace ("token: " ++ show tok__) $ do
1311   cont tok
1312
1313 lexToken :: P Token
1314 lexToken = do
1315   inp@(loc1,buf) <- getInput
1316   sc <- getLexState
1317   exts <- getExts
1318   case alexScanUser exts inp sc of
1319     AlexEOF -> do setLastToken loc1 0
1320                   return (T loc1 loc1 ITeof)
1321     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1322     AlexSkip inp2 _ -> do
1323         setInput inp2
1324         lexToken
1325     AlexToken inp2@(end,buf2) len t -> do
1326         setInput inp2
1327         setLastToken loc1 len
1328         t loc1 end buf len
1329 }