ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ForeignCall      ( Safety(..) )
  34 import ErrUtils         ( Message )
  35 import Outputable
  36 import StringBuffer
  37 import FastString
  38 import FastTypes
  39 import SrcLoc
  40 import UniqFM
  41 import CmdLineOpts
  42 import Ctype
  43 import Util             ( maybePrefixMatch )
  44
  45 import DATA_BITS
  46 import Char
  47 import Ratio
  48 import TRACE
  49 }
  50
  51 $whitechar   = [\ \t\n\r\f\v]
  52 $white_no_nl = $whitechar # \n
  53
  54 $ascdigit  = 0-9
  55 $unidigit  = \x01
  56 $digit     = [$ascdigit $unidigit]
  57
  58 $special   = [\(\)\,\;\[\]\`\{\}]
  59 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  60 $unisymbol = \x02
  61 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  62
  63 $unilarge  = \x03
  64 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  65 $large     = [$asclarge $unilarge]
  66
  67 $unismall  = \x04
  68 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  69 $small     = [$ascsmall $unismall \_]
  70
  71 $graphic   = [$small $large $symbol $digit $special \:\"\']
  72
  73 $octit     = 0-7
  74 $hexit     = [$digit A-F a-f]
  75 $symchar   = [$symbol \:]
  76 $nl        = [\n\r]
  77 $idchar    = [$small $large $digit \']
  78
  79 @varid     = $small $idchar*
  80 @conid     = $large $idchar*
  81
  82 @varsym    = $symbol $symchar*
  83 @consym    = \: $symchar*
  84
  85 @decimal     = $digit+
  86 @octal       = $octit+
  87 @hexadecimal = $hexit+
  88 @exponent    = [eE] [\-\+]? @decimal
  89
  90 -- we support the hierarchical module name extension:
  91 @qual = (@conid \.)+
  92
  93 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  94
  95 haskell :-
  96
  97 -- everywhere: skip whitespace and comments
  98 $white_no_nl+                           ;
  99
 100 -- Everywhere: deal with nested comments.  We explicitly rule out
 101 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 102 -- (this can happen even though pragmas will normally take precedence due to
 103 -- longest-match, because pragmas aren't valid in every state, but comments
 104 -- are).
 105 "{-" / { notFollowedBy '#' }            { nested_comment }
 106
 107 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 108 -- more dashes followed by a symbol should be parsed as a varsym, so we
 109 -- have to exclude those.
 110 -- The regex says: "munch all the characters after the dashes, as long as
 111 -- the first one is not a symbol".
 112 "--"\-* ([^$symbol] .*)?                ;
 113
 114 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 115 -- blank lines) until we find a non-whitespace character, then do layout
 116 -- processing.
 117 --
 118 -- One slight wibble here: what if the line begins with {-#? In
 119 -- theory, we have to lex the pragma to see if it's one we recognise,
 120 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 121 -- as a nested comment.  We don't bother with this: if the line begins
 122 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 123 <bol> {
 124   \n                                    ;
 125   ^\# (line)?                           { begin line_prag1 }
 126   ()                                    { do_bol }
 127 }
 128
 129 -- after a layout keyword (let, where, do, of), we begin a new layout
 130 -- context if the curly brace is missing.
 131 -- Careful! This stuff is quite delicate.
 132 <layout, layout_do> {
 133   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 134         -- we might encounter {-# here, but {- has been handled already
 135   \n                                    ;
 136   ^\# (line)?                           { begin line_prag1 }
 137 }
 138
 139 -- do is treated in a subtly different way, see new_layout_context
 140 <layout>    ()                          { new_layout_context True }
 141 <layout_do> ()                          { new_layout_context False }
 142
 143 -- after a new layout context which was found to be to the left of the
 144 -- previous context, we have generated a '{' token, and we now need to
 145 -- generate a matching '}' token.
 146 <layout_left>  ()                       { do_layout_left }
 147
 148 <0,glaexts> \n                          { begin bol }
 149
 150 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 151
 152 -- single-line line pragmas, of the form
 153 --    # <line> "<file>" <extra-stuff> \n
 154 <line_prag1> $digit+                    { set_line line_prag1a }
 155 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 156 <line_prag1b> .*                        { pop }
 157
 158 -- Haskell-style line pragmas, of the form
 159 --    {-# LINE <line> "<file>" #-}
 160 <line_prag2> $digit+                    { set_line line_prag2a }
 161 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 162 <line_prag2b> "#-}"                     { pop }
 163
 164 <0,glaexts> {
 165   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 166                                         { token ITspecialise_prag }
 167   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 168   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 169   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 170                                         { token ITnoinline_prag }
 171   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 172   "{-#" $whitechar* (DEPRECATED|deprecated)
 173                                         { token ITdeprecated_prag }
 174   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 175   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 176
 177   "{-#"                                 { nested_comment }
 178
 179   -- ToDo: should only be valid inside a pragma:
 180   "#-}"                                 { token ITclose_prag}
 181 }
 182
 183
 184 -- '0' state: ordinary lexemes
 185 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 186
 187 -- "special" symbols
 188
 189 <0,glaexts> {
 190   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 191   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 192 }
 193
 194 <0,glaexts> {
 195   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 196   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 197   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 198   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 199   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 200   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 201   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 202   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 203 }
 204
 205 <0,glaexts> {
 206   "(|" / { ifExtension arrowsEnabled }  { special IToparenbar }
 207   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 208 }
 209
 210 <0,glaexts> {
 211   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 212   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 213 }
 214
 215 <glaexts> {
 216   "(#"                                  { token IToubxparen }
 217   "#)"                                  { token ITcubxparen }
 218   "{|"                                  { token ITocurlybar }
 219   "|}"                                  { token ITccurlybar }
 220 }
 221
 222 <0,glaexts> {
 223   \(                                    { special IToparen }
 224   \)                                    { special ITcparen }
 225   \[                                    { special ITobrack }
 226   \]                                    { special ITcbrack }
 227   \,                                    { special ITcomma }
 228   \;                                    { special ITsemi }
 229   \`                                    { special ITbackquote }
 230
 231   \{                                    { open_brace }
 232   \}                                    { close_brace }
 233 }
 234
 235 <0,glaexts> {
 236   @qual @varid                  { check_qvarid }
 237   @qual @conid                  { idtoken qconid }
 238   @varid                        { varid }
 239   @conid                        { idtoken conid }
 240 }
 241
 242 -- after an illegal qvarid, such as 'M.let',
 243 -- we back up and try again in the bad_qvarid state:
 244 <bad_qvarid> {
 245   @conid                        { pop_and (idtoken conid) }
 246   @qual @conid                  { pop_and (idtoken qconid) }
 247 }
 248
 249 <glaexts> {
 250   @qual @varid "#"+             { idtoken qvarid }
 251   @qual @conid "#"+             { idtoken qconid }
 252   @varid "#"+                   { varid }
 253   @conid "#"+                   { idtoken conid }
 254 }
 255
 256 -- ToDo: M.(,,,)
 257
 258 <0,glaexts> {
 259   @qual @varsym                 { idtoken qvarsym }
 260   @qual @consym                 { idtoken qconsym }
 261   @varsym                       { varsym }
 262   @consym                       { consym }
 263 }
 264
 265 <0,glaexts> {
 266   @decimal                      { tok_decimal }
 267   0[oO] @octal                  { tok_octal }
 268   0[xX] @hexadecimal            { tok_hexadecimal }
 269 }
 270
 271 <glaexts> {
 272   @decimal \#                   { prim_decimal }
 273   0[oO] @octal \#               { prim_octal }
 274   0[xX] @hexadecimal \#         { prim_hexadecimal }
 275 }
 276
 277 <0,glaexts> @floating_point             { strtoken tok_float }
 278 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 279 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 280
 281 -- Strings and chars are lexed by hand-written code.  The reason is
 282 -- that even if we recognise the string or char here in the regex
 283 -- lexer, we would still have to parse the string afterward in order
 284 -- to convert it to a String.
 285 <0,glaexts> {
 286   \'                            { lex_char_tok }
 287   \"                            { lex_string_tok }
 288 }
 289
 290 <glaexts> "``" (([$graphic $whitechar] # \') | \' ([$graphic $whitechar] # \'))*
 291                 "''"            { clitlit }
 292
 293 {
 294 -- work around bug in Alex 2.0
 295 #if __GLASGOW_HASKELL__ < 503
 296 unsafeAt arr i = arr ! i
 297 #endif
 298
 299 -- -----------------------------------------------------------------------------
 300 -- The token type
 301
 302 data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__
 303
 304 data Token__
 305   = ITas                        -- Haskell keywords
 306   | ITcase
 307   | ITclass
 308   | ITdata
 309   | ITdefault
 310   | ITderiving
 311   | ITdo
 312   | ITelse
 313   | IThiding
 314   | ITif
 315   | ITimport
 316   | ITin
 317   | ITinfix
 318   | ITinfixl
 319   | ITinfixr
 320   | ITinstance
 321   | ITlet
 322   | ITmodule
 323   | ITnewtype
 324   | ITof
 325   | ITqualified
 326   | ITthen
 327   | ITtype
 328   | ITwhere
 329   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 330
 331   | ITforall                    -- GHC extension keywords
 332   | ITforeign
 333   | ITexport
 334   | ITlabel
 335   | ITdynamic
 336   | ITsafe
 337   | ITthreadsafe
 338   | ITunsafe
 339   | ITwith
 340   | ITstdcallconv
 341   | ITccallconv
 342   | ITdotnet
 343   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 344   | ITmdo
 345
 346   | ITspecialise_prag           -- Pragmas
 347   | ITsource_prag
 348   | ITinline_prag
 349   | ITnoinline_prag
 350   | ITrules_prag
 351   | ITdeprecated_prag
 352   | ITline_prag
 353   | ITscc_prag
 354   | ITcore_prag                 -- hdaume: core annotations
 355   | ITclose_prag
 356
 357   | ITdotdot                    -- reserved symbols
 358   | ITcolon
 359   | ITdcolon
 360   | ITequal
 361   | ITlam
 362   | ITvbar
 363   | ITlarrow
 364   | ITrarrow
 365   | ITat
 366   | ITtilde
 367   | ITdarrow
 368   | ITminus
 369   | ITbang
 370   | ITstar
 371   | ITdot
 372
 373   | ITbiglam                    -- GHC-extension symbols
 374
 375   | ITocurly                    -- special symbols
 376   | ITccurly
 377   | ITocurlybar                 -- {|, for type applications
 378   | ITccurlybar                 -- |}, for type applications
 379   | ITvocurly
 380   | ITvccurly
 381   | ITobrack
 382   | ITopabrack                  -- [:, for parallel arrays with -fparr
 383   | ITcpabrack                  -- :], for parallel arrays with -fparr
 384   | ITcbrack
 385   | IToparen
 386   | ITcparen
 387   | IToubxparen
 388   | ITcubxparen
 389   | ITsemi
 390   | ITcomma
 391   | ITunderscore
 392   | ITbackquote
 393
 394   | ITvarid   FastString        -- identifiers
 395   | ITconid   FastString
 396   | ITvarsym  FastString
 397   | ITconsym  FastString
 398   | ITqvarid  (FastString,FastString)
 399   | ITqconid  (FastString,FastString)
 400   | ITqvarsym (FastString,FastString)
 401   | ITqconsym (FastString,FastString)
 402
 403   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 404   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 405
 406   | ITpragma StringBuffer
 407
 408   | ITchar       Char
 409   | ITstring     FastString
 410   | ITinteger    Integer
 411   | ITrational   Rational
 412
 413   | ITprimchar   Char
 414   | ITprimstring FastString
 415   | ITprimint    Integer
 416   | ITprimfloat  Rational
 417   | ITprimdouble Rational
 418   | ITlitlit     FastString
 419
 420   -- MetaHaskell extension tokens
 421   | ITopenExpQuote              -- [| or [e|
 422   | ITopenPatQuote              -- [p|
 423   | ITopenDecQuote              -- [d|
 424   | ITopenTypQuote              -- [t|
 425   | ITcloseQuote                -- |]
 426   | ITidEscape   FastString     -- $x
 427   | ITparenEscape               -- $(
 428   | ITreifyType
 429   | ITreifyDecl
 430   | ITreifyFixity
 431
 432   -- Arrow notation extension
 433   | ITproc
 434   | ITrec
 435   | IToparenbar                 -- (|
 436   | ITcparenbar                 -- |)
 437   | ITlarrowtail                -- -<
 438   | ITrarrowtail                -- >-
 439   | ITLarrowtail                -- -<<
 440   | ITRarrowtail                -- >>-
 441
 442   | ITunknown String            -- Used when the lexer can't make sense of it
 443   | ITeof                       -- end of file token
 444 #ifdef DEBUG
 445   deriving Show -- debugging
 446 #endif
 447
 448 isSpecial :: Token__ -> Bool
 449 -- If we see M.x, where x is a keyword, but
 450 -- is special, we treat is as just plain M.x,
 451 -- not as a keyword.
 452 isSpecial ITas          = True
 453 isSpecial IThiding      = True
 454 isSpecial ITqualified   = True
 455 isSpecial ITforall      = True
 456 isSpecial ITexport      = True
 457 isSpecial ITlabel       = True
 458 isSpecial ITdynamic     = True
 459 isSpecial ITsafe        = True
 460 isSpecial ITthreadsafe  = True
 461 isSpecial ITunsafe      = True
 462 isSpecial ITwith        = True
 463 isSpecial ITccallconv   = True
 464 isSpecial ITstdcallconv = True
 465 isSpecial ITmdo         = True
 466 isSpecial _             = False
 467
 468 -- the bitmap provided as the third component indicates whether the
 469 -- corresponding extension keyword is valid under the extension options
 470 -- provided to the compiler; if the extension corresponding to *any* of the
 471 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 472 -- facilitates using a keyword in two different extensions that can be
 473 -- activated independently)
 474 --
 475 reservedWordsFM = listToUFM $
 476         map (\(x, y, z) -> (mkFastString x, (y, z)))
 477        [( "_",          ITunderscore,   0 ),
 478         ( "as",         ITas,           0 ),
 479         ( "case",       ITcase,         0 ),
 480         ( "class",      ITclass,        0 ),
 481         ( "data",       ITdata,         0 ),
 482         ( "default",    ITdefault,      0 ),
 483         ( "deriving",   ITderiving,     0 ),
 484         ( "do",         ITdo,           0 ),
 485         ( "else",       ITelse,         0 ),
 486         ( "hiding",     IThiding,       0 ),
 487         ( "if",         ITif,           0 ),
 488         ( "import",     ITimport,       0 ),
 489         ( "in",         ITin,           0 ),
 490         ( "infix",      ITinfix,        0 ),
 491         ( "infixl",     ITinfixl,       0 ),
 492         ( "infixr",     ITinfixr,       0 ),
 493         ( "instance",   ITinstance,     0 ),
 494         ( "let",        ITlet,          0 ),
 495         ( "module",     ITmodule,       0 ),
 496         ( "newtype",    ITnewtype,      0 ),
 497         ( "of",         ITof,           0 ),
 498         ( "qualified",  ITqualified,    0 ),
 499         ( "then",       ITthen,         0 ),
 500         ( "type",       ITtype,         0 ),
 501         ( "where",      ITwhere,        0 ),
 502         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 503
 504         ( "forall",     ITforall,        bit glaExtsBit),
 505         ( "mdo",        ITmdo,           bit glaExtsBit),
 506         ( "reifyDecl",  ITreifyDecl,     bit thBit),
 507         ( "reifyType",  ITreifyType,     bit thBit),
 508         ( "reifyFixity",ITreifyFixity,   bit thBit),
 509
 510         ( "foreign",    ITforeign,       bit ffiBit),
 511         ( "export",     ITexport,        bit ffiBit),
 512         ( "label",      ITlabel,         bit ffiBit),
 513         ( "dynamic",    ITdynamic,       bit ffiBit),
 514         ( "safe",       ITsafe,          bit ffiBit),
 515         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 516         ( "unsafe",     ITunsafe,        bit ffiBit),
 517         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 518         ( "ccall",      ITccallconv,     bit ffiBit),
 519         ( "dotnet",     ITdotnet,        bit ffiBit),
 520
 521         ( "with",       ITwith,          bit withBit),
 522
 523         ( "rec",        ITrec,           bit arrowsBit),
 524         ( "proc",       ITproc,          bit arrowsBit),
 525
 526         -- On death row
 527         ("_ccall_",     ITccall (False, False, PlayRisky),
 528                                          bit glaExtsBit),
 529         ("_ccall_GC_",  ITccall (False, False, PlaySafe False),
 530                                          bit glaExtsBit),
 531         ("_casm_",      ITccall (False, True,  PlayRisky),
 532                                          bit glaExtsBit),
 533         ("_casm_GC_",   ITccall (False, True,  PlaySafe False),
 534                                          bit glaExtsBit)
 535      ]
 536
 537 reservedSymsFM = listToUFM $
 538         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 539       [ ("..",  ITdotdot,       0)
 540        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 541                                                 -- meaning only list cons
 542        ,("::",  ITdcolon,       0)
 543        ,("=",   ITequal,        0)
 544        ,("\\",  ITlam,          0)
 545        ,("|",   ITvbar,         0)
 546        ,("<-",  ITlarrow,       0)
 547        ,("->",  ITrarrow,       0)
 548        ,("@",   ITat,           0)
 549        ,("~",   ITtilde,        0)
 550        ,("=>",  ITdarrow,       0)
 551        ,("-",   ITminus,        0)
 552        ,("!",   ITbang,         0)
 553
 554        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 555        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 556
 557        ,("-<",  ITlarrowtail,   bit arrowsBit)
 558        ,(">-",  ITrarrowtail,   bit arrowsBit)
 559        ,("-<<", ITLarrowtail,   bit arrowsBit)
 560        ,(">>-", ITRarrowtail,   bit arrowsBit)
 561        ]
 562
 563 -- -----------------------------------------------------------------------------
 564 -- Lexer actions
 565
 566 type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token
 567
 568 special :: Token__ -> Action
 569 special tok loc end _buf len = return (T loc end tok)
 570
 571 token, layout_token :: Token__ -> Action
 572 token t loc end buf len = return (T loc end t)
 573 layout_token t loc end buf len = pushLexState layout >> return (T loc end t)
 574
 575 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 576 idtoken f loc end buf len = return (T loc end $! (f buf len))
 577
 578 skip_one_varid :: (FastString -> Token__) -> Action
 579 skip_one_varid f loc end buf len
 580   = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1)))
 581
 582 strtoken :: (String -> Token__) -> Action
 583 strtoken f loc end buf len =
 584   return (T loc end $! (f $! lexemeToString buf len))
 585
 586 init_strtoken :: Int -> (String -> Token__) -> Action
 587 -- like strtoken, but drops the last N character(s)
 588 init_strtoken drop f loc end buf len =
 589   return (T loc end $! (f $! lexemeToString buf (len-drop)))
 590
 591 begin :: Int -> Action
 592 begin code _loc _end _str _len = do pushLexState code; lexToken
 593
 594 pop :: Action
 595 pop _loc _end _buf _len = do popLexState; lexToken
 596
 597 pop_and :: Action -> Action
 598 pop_and act loc end buf len = do popLexState; act loc end buf len
 599
 600 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 601
 602 ifExtension pred bits _ _ _ = pred bits
 603
 604 {-
 605   nested comments require traversing by hand, they can't be parsed
 606   using regular expressions.
 607 -}
 608 nested_comment :: Action
 609 nested_comment loc _end _str _len = do
 610   input <- getInput
 611   go 1 input
 612   where go 0 input = do setInput input; lexToken
 613         go n input = do
 614           case alexGetChar input of
 615             Nothing  -> err input
 616             Just (c,input) -> do
 617               case c of
 618                 '-' -> do
 619                   case alexGetChar input of
 620                     Nothing  -> err input
 621                     Just ('\125',input) -> go (n-1) input
 622                     Just (c,_)          -> go n input
 623                 '\123' -> do
 624                   case alexGetChar input of
 625                     Nothing  -> err input
 626                     Just ('-',input') -> go (n+1) input'
 627                     Just (c,input)    -> go n input
 628                 c -> go n input
 629
 630         err input = do failLocMsgP loc (fst input) "unterminated `{-'"
 631
 632 open_brace, close_brace :: Action
 633 open_brace  loc end _str _len = do
 634   ctx <- getContext
 635   setContext (NoLayout:ctx)
 636   return (T loc end ITocurly)
 637 close_brace loc end _str _len = do
 638   popContext
 639   return (T loc end ITccurly)
 640
 641 -- We have to be careful not to count M.<varid> as a qualified name
 642 -- when <varid> is a keyword.  We hack around this by catching
 643 -- the offending tokens afterward, and re-lexing in a different state.
 644 check_qvarid loc end buf len = do
 645   case lookupUFM reservedWordsFM var of
 646         Just (keyword,exts)
 647           | not (isSpecial keyword) ->
 648           if exts == 0
 649              then try_again
 650              else do
 651                 b <- extension (\i -> exts .&. i /= 0)
 652                 if b then try_again
 653                      else return token
 654         _other -> return token
 655   where
 656         (mod,var) = splitQualName buf len
 657         token     = T loc end (ITqvarid (mod,var))
 658
 659         try_again = do
 660                 setInput (loc,buf)
 661                 pushLexState bad_qvarid
 662                 lexToken
 663
 664 qvarid buf len = ITqvarid $! splitQualName buf len
 665 qconid buf len = ITqconid $! splitQualName buf len
 666
 667 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 668 -- takes a StringBuffer and a length, and returns the module name
 669 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 670 -- because of hierarchical module names.
 671 splitQualName orig_buf len = split orig_buf 0 0
 672   where
 673     split buf dot_off n
 674         | n == len                = done dot_off
 675         | lookAhead buf n == '.'  = split2 buf n (n+1)
 676         | otherwise               = split buf dot_off (n+1)
 677
 678     -- careful, we might get names like M....
 679     -- so, if the character after the dot is not upper-case, this is
 680     -- the end of the qualifier part.
 681     split2 buf dot_off n
 682         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 683         | otherwise                 = done dot_off
 684
 685     done dot_off =
 686         (lexemeToFastString orig_buf dot_off,
 687          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 688
 689 varid loc end buf len =
 690   case lookupUFM reservedWordsFM fs of
 691         Just (keyword,0)    -> do
 692                 maybe_layout keyword
 693                 return (T loc end keyword)
 694         Just (keyword,exts) -> do
 695                 b <- extension (\i -> exts .&. i /= 0)
 696                 if b then do maybe_layout keyword
 697                              return (T loc end keyword)
 698                      else return (T loc end (ITvarid fs))
 699         _other -> return (T loc end (ITvarid fs))
 700   where
 701         fs = lexemeToFastString buf len
 702
 703 conid buf len = ITconid fs
 704   where fs = lexemeToFastString buf len
 705
 706 qvarsym buf len = ITqvarsym $! splitQualName buf len
 707 qconsym buf len = ITqconsym $! splitQualName buf len
 708
 709 varsym = sym ITvarsym
 710 consym = sym ITconsym
 711
 712 sym con loc end buf len =
 713   case lookupUFM reservedSymsFM fs of
 714         Just (keyword,0)    -> return (T loc end keyword)
 715         Just (keyword,exts) -> do
 716                 b <- extension (\i -> exts .&. i /= 0)
 717                 if b then return (T loc end keyword)
 718                      else return (T loc end $! con fs)
 719         _other -> return (T loc end $! con fs)
 720   where
 721         fs = lexemeToFastString buf len
 722
 723 tok_decimal loc end buf len
 724   = return (T loc end (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 725
 726 tok_octal loc end buf len
 727   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 728
 729 tok_hexadecimal loc end buf len
 730   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 731
 732 prim_decimal loc end buf len
 733   = return (T loc end (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 734
 735 prim_octal loc end buf len
 736   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 737
 738 prim_hexadecimal loc end buf len
 739   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 740
 741 tok_float        str = ITrational $! readRational__ str
 742 prim_float       str = ITprimfloat  $! readRational__ str
 743 prim_double      str = ITprimdouble $! readRational__ str
 744
 745 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 746 parseInteger buf len radix to_int
 747   = go 0 0
 748   where go i x | i == len  = x
 749                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 750
 751 clitlit :: Action
 752 clitlit loc end buf len =
 753   return (T loc end (ITlitlit $! lexemeToFastString (stepOnBy 2 buf) (len-4)))
 754
 755 -- -----------------------------------------------------------------------------
 756 -- Layout processing
 757
 758 -- we're at the first token on a line, insert layout tokens if necessary
 759 do_bol :: Action
 760 do_bol loc end _str _len = do
 761         pos <- getOffside end
 762         case pos of
 763             LT -> do
 764                 --trace "layout: inserting '}'" $ do
 765                 popContext
 766                 -- do NOT pop the lex state, we might have a ';' to insert
 767                 return (T loc end ITvccurly)
 768             EQ -> do
 769                 --trace "layout: inserting ';'" $ do
 770                 popLexState
 771                 return (T loc end ITsemi)
 772             GT -> do
 773                 popLexState
 774                 lexToken
 775
 776 -- certain keywords put us in the "layout" state, where we might
 777 -- add an opening curly brace.
 778 maybe_layout ITdo       = pushLexState layout_do
 779 maybe_layout ITmdo      = pushLexState layout_do
 780 maybe_layout ITof       = pushLexState layout
 781 maybe_layout ITlet      = pushLexState layout
 782 maybe_layout ITwhere    = pushLexState layout
 783 maybe_layout ITrec      = pushLexState layout
 784 maybe_layout _          = return ()
 785
 786 -- Pushing a new implicit layout context.  If the indentation of the
 787 -- next token is not greater than the previous layout context, then
 788 -- Haskell 98 says that the new layout context should be empty; that is
 789 -- the lexer must generate {}.
 790 --
 791 -- We are slightly more lenient than this: when the new context is started
 792 -- by a 'do', then we allow the new context to be at the same indentation as
 793 -- the previous context.  This is what the 'strict' argument is for.
 794 --
 795 new_layout_context strict loc end _buf _len = do
 796     popLexState
 797     let offset = srcLocCol loc
 798     ctx <- getContext
 799     case ctx of
 800         Layout prev_off : _  |
 801            (strict     && prev_off >= offset  ||
 802             not strict && prev_off > offset) -> do
 803                 -- token is indented to the left of the previous context.
 804                 -- we must generate a {} sequence now.
 805                 pushLexState layout_left
 806                 return (T loc end ITvocurly)
 807         other -> do
 808                 setContext (Layout offset : ctx)
 809                 return (T loc end ITvocurly)
 810
 811 do_layout_left loc end _buf _len = do
 812     popLexState
 813     pushLexState bol  -- we must be at the start of a line
 814     return (T loc end ITvccurly)
 815
 816 -- -----------------------------------------------------------------------------
 817 -- LINE pragmas
 818
 819 set_line :: Int -> Action
 820 set_line code loc end buf len = do
 821   let line = parseInteger buf len 10 oct_or_dec
 822   setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0)
 823         -- subtract one: the line number refers to the *following* line
 824   popLexState
 825   pushLexState code
 826   lexToken
 827
 828 set_file :: Int -> Action
 829 set_file code loc end buf len = do
 830   let file = lexemeToFastString (stepOn buf) (len-2)
 831   setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end))
 832   popLexState
 833   pushLexState code
 834   lexToken
 835
 836 -- -----------------------------------------------------------------------------
 837 -- Strings & Chars
 838
 839 -- This stuff is horrible.  I hates it.
 840
 841 lex_string_tok :: Action
 842 lex_string_tok loc end buf len = do
 843   tok <- lex_string ""
 844   end <- getSrcLoc
 845   return (T loc end tok)
 846
 847 lex_string :: String -> P Token__
 848 lex_string s = do
 849   i <- getInput
 850   case alexGetChar i of
 851     Nothing -> lit_error
 852
 853     Just ('"',i)  -> do
 854         setInput i
 855         glaexts <- extension glaExtsEnabled
 856         if glaexts
 857           then do
 858             i <- getInput
 859             case alexGetChar i of
 860               Just ('#',i) -> do
 861                    setInput i
 862                    if any (> '\xFF') s
 863                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 864                     else let s' = mkFastStringNarrow (reverse s) in
 865                          -- always a narrow string/byte array
 866                          return (ITprimstring s')
 867               _other ->
 868                 return (ITstring (mkFastString (reverse s)))
 869           else
 870                 return (ITstring (mkFastString (reverse s)))
 871
 872     Just ('\\',i)
 873         | Just ('&',i) <- next -> do
 874                 setInput i; lex_string s
 875         | Just (c,i) <- next, is_space c -> do
 876                 setInput i; lex_stringgap s
 877         where next = alexGetChar i
 878
 879     Just _ -> do
 880         c <- lex_char
 881         lex_string (c:s)
 882
 883
 884 lex_stringgap s = do
 885   c <- getCharOrFail
 886   case c of
 887     '\\' -> lex_string s
 888     c | is_space c -> lex_stringgap s
 889     _other -> lit_error
 890
 891
 892 lex_char_tok :: Action
 893 lex_char_tok loc _end buf len = do
 894    c <- lex_char
 895    mc <- getCharOrFail
 896    case mc of
 897         '\'' -> do
 898            glaexts <- extension glaExtsEnabled
 899            if glaexts
 900                 then do
 901                    i@(end,_) <- getInput
 902                    case alexGetChar i of
 903                         Just ('#',i@(end,_)) -> do
 904                                 setInput i
 905                                 return (T loc end (ITprimchar c))
 906                         _other ->
 907                                 return (T loc end (ITchar c))
 908                 else do
 909                    end <- getSrcLoc
 910                    return (T loc end (ITchar c))
 911
 912         _other -> lit_error
 913
 914 lex_char :: P Char
 915 lex_char = do
 916   mc <- getCharOrFail
 917   case mc of
 918       '\\' -> lex_escape
 919       c | is_any c -> return c
 920       _other -> lit_error
 921
 922 lex_escape :: P Char
 923 lex_escape = do
 924   c <- getCharOrFail
 925   case c of
 926         'a'   -> return '\a'
 927         'b'   -> return '\b'
 928         'f'   -> return '\f'
 929         'n'   -> return '\n'
 930         'r'   -> return '\r'
 931         't'   -> return '\t'
 932         'v'   -> return '\v'
 933         '\\'  -> return '\\'
 934         '"'   -> return '\"'
 935         '\''  -> return '\''
 936         '^'   -> do c <- getCharOrFail
 937                     if c >= '@' && c <= '_'
 938                         then return (chr (ord c - ord '@'))
 939                         else lit_error
 940
 941         'x'   -> readNum is_hexdigit 16 hex
 942         'o'   -> readNum is_octdigit  8 oct_or_dec
 943         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 944
 945         c1 ->  do
 946            i <- getInput
 947            case alexGetChar i of
 948             Nothing -> lit_error
 949             Just (c2,i2) ->
 950               case alexGetChar i2 of
 951                 Nothing -> lit_error
 952                 Just (c3,i3) ->
 953                    let str = [c1,c2,c3] in
 954                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 955                                      Just rest <- [maybePrefixMatch p str] ] of
 956                           (escape_char,[]):_ -> do
 957                                 setInput i3
 958                                 return escape_char
 959                           (escape_char,_:_):_ -> do
 960                                 setInput i2
 961                                 return escape_char
 962                           [] -> lit_error
 963
 964 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 965 readNum is_digit base conv = do
 966   c <- getCharOrFail
 967   if is_digit c
 968         then readNum2 is_digit base conv (conv c)
 969         else lit_error
 970
 971 readNum2 is_digit base conv i = do
 972   input <- getInput
 973   read i input
 974   where read i input = do
 975           case alexGetChar input of
 976             Just (c,input') | is_digit c -> do
 977                 read (i*base + conv c) input'
 978             _other -> do
 979                 setInput input
 980                 if i >= 0 && i <= 0x10FFFF
 981                    then return (chr i)
 982                    else lit_error
 983
 984 is_hexdigit c
 985         =  is_digit c
 986         || (c >= 'a' && c <= 'f')
 987         || (c >= 'A' && c <= 'F')
 988
 989 hex c | is_digit c = ord c - ord '0'
 990       | otherwise  = ord (to_lower c) - ord 'a' + 10
 991
 992 oct_or_dec c = ord c - ord '0'
 993
 994 is_octdigit c = c >= '0' && c <= '7'
 995
 996 to_lower c
 997   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
 998   | otherwise = c
 999
1000 silly_escape_chars = [
1001         ("NUL", '\NUL'),
1002         ("SOH", '\SOH'),
1003         ("STX", '\STX'),
1004         ("ETX", '\ETX'),
1005         ("EOT", '\EOT'),
1006         ("ENQ", '\ENQ'),
1007         ("ACK", '\ACK'),
1008         ("BEL", '\BEL'),
1009         ("BS", '\BS'),
1010         ("HT", '\HT'),
1011         ("LF", '\LF'),
1012         ("VT", '\VT'),
1013         ("FF", '\FF'),
1014         ("CR", '\CR'),
1015         ("SO", '\SO'),
1016         ("SI", '\SI'),
1017         ("DLE", '\DLE'),
1018         ("DC1", '\DC1'),
1019         ("DC2", '\DC2'),
1020         ("DC3", '\DC3'),
1021         ("DC4", '\DC4'),
1022         ("NAK", '\NAK'),
1023         ("SYN", '\SYN'),
1024         ("ETB", '\ETB'),
1025         ("CAN", '\CAN'),
1026         ("EM", '\EM'),
1027         ("SUB", '\SUB'),
1028         ("ESC", '\ESC'),
1029         ("FS", '\FS'),
1030         ("GS", '\GS'),
1031         ("RS", '\RS'),
1032         ("US", '\US'),
1033         ("SP", '\SP'),
1034         ("DEL", '\DEL')
1035         ]
1036
1037 lit_error = lexError "lexical error in string/character literal"
1038
1039 getCharOrFail :: P Char
1040 getCharOrFail =  do
1041   i <- getInput
1042   case alexGetChar i of
1043         Nothing -> lexError "unexpected end-of-file in string/character literal"
1044         Just (c,i)  -> do setInput i; return c
1045
1046 -- -----------------------------------------------------------------------------
1047 -- Floats
1048
1049 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1050 readRational r = do
1051      (n,d,s) <- readFix r
1052      (k,t)   <- readExp s
1053      return ((n%1)*10^^(k-d), t)
1054  where
1055      readFix r = do
1056         (ds,s)  <- lexDecDigits r
1057         (ds',t) <- lexDotDigits s
1058         return (read (ds++ds'), length ds', t)
1059
1060      readExp (e:s) | e `elem` "eE" = readExp' s
1061      readExp s                     = return (0,s)
1062
1063      readExp' ('+':s) = readDec s
1064      readExp' ('-':s) = do
1065                         (k,t) <- readDec s
1066                         return (-k,t)
1067      readExp' s       = readDec s
1068
1069      readDec s = do
1070         (ds,r) <- nonnull isDigit s
1071         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1072                 r)
1073
1074      lexDecDigits = nonnull isDigit
1075
1076      lexDotDigits ('.':s) = return (span isDigit s)
1077      lexDotDigits s       = return ("",s)
1078
1079      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1080                       return (cs,t)
1081
1082 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1083 readRational__ top_s
1084   = case top_s of
1085       '-' : xs -> - (read_me xs)
1086       xs       -> read_me xs
1087   where
1088     read_me s
1089       = case (do { (x,"") <- readRational s ; return x }) of
1090           [x] -> x
1091           []  -> error ("readRational__: no parse:"        ++ top_s)
1092           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1093
1094 -- -----------------------------------------------------------------------------
1095 -- The Parse Monad
1096
1097 data LayoutContext
1098   = NoLayout
1099   | Layout !Int
1100
1101 data ParseResult a
1102   = POk PState a
1103   | PFailed
1104         SrcLoc SrcLoc   -- The start and end of the text span related to
1105                         -- the error.  Might be used in environments which can
1106                         -- show this span, e.g. by highlighting it.
1107         Message         -- The error message
1108
1109 showPFailed loc1 loc2 err
1110  = showSDoc (hcat [ppr loc1, text ": ", err])
1111
1112 data PState = PState {
1113         buffer     :: StringBuffer,
1114         last_loc   :: SrcLoc,           -- pos of previous token
1115         last_len   :: !Int,             -- len of previous token
1116         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1117         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1118         context    :: [LayoutContext],
1119         lex_state  :: [Int]
1120      }
1121         -- last_loc and last_len are used when generating error messages,
1122         -- and in pushCurrentContext only.
1123
1124 newtype P a = P { unP :: PState -> ParseResult a }
1125
1126 instance Monad P where
1127   return = returnP
1128   (>>=) = thenP
1129   fail = failP
1130
1131 returnP :: a -> P a
1132 returnP a = P $ \s -> POk s a
1133
1134 thenP :: P a -> (a -> P b) -> P b
1135 (P m) `thenP` k = P $ \ s ->
1136         case m s of
1137                 POk s1 a          -> (unP (k a)) s1
1138                 PFailed l1 l2 err -> PFailed l1 l2 err
1139
1140 failP :: String -> P a
1141 failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1142
1143 failMsgP :: String -> P a
1144 failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1145
1146 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1147 failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str)
1148
1149 extension :: (Int -> Bool) -> P Bool
1150 extension p = P $ \s -> POk s (p $! extsBitmap s)
1151
1152 getExts :: P Int
1153 getExts = P $ \s -> POk s (extsBitmap s)
1154
1155 setSrcLoc :: SrcLoc -> P ()
1156 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1157
1158 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1159 -- any calls to the lexer, because it assumes things about the SrcLoc.
1160 setSrcLocFor :: SrcLoc -> P a -> P a
1161 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1162   case unP scope s{loc=new_loc} of
1163         PFailed l1 l2 msg -> PFailed l1 l2 msg
1164         POk _ r -> POk s r
1165
1166 getSrcLoc :: P SrcLoc
1167 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1168
1169 setLastToken :: SrcLoc -> Int -> P ()
1170 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1171
1172 type AlexInput = (SrcLoc,StringBuffer)
1173
1174 alexInputPrevChar :: AlexInput -> Char
1175 alexInputPrevChar (_,s) = prevChar s '\n'
1176
1177 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1178 alexGetChar (loc,s)
1179   | atEnd s   = Nothing
1180   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1181   where c = currentChar s
1182         loc' = advanceSrcLoc loc c
1183         s'   = stepOn s
1184
1185 getInput :: P AlexInput
1186 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1187
1188 setInput :: AlexInput -> P ()
1189 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1190
1191 pushLexState :: Int -> P ()
1192 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1193
1194 popLexState :: P Int
1195 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1196
1197 getLexState :: P Int
1198 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1199
1200 -- for reasons of efficiency, flags indicating language extensions (eg,
1201 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1202 -- integer
1203
1204 glaExtsBit, ffiBit, parrBit :: Int
1205 glaExtsBit = 0
1206 ffiBit     = 1
1207 parrBit    = 2
1208 withBit    = 3
1209 arrowsBit  = 4
1210 thBit      = 5
1211 ipBit      = 6
1212
1213 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1214 glaExtsEnabled flags = testBit flags glaExtsBit
1215 ffiEnabled     flags = testBit flags ffiBit
1216 withEnabled    flags = testBit flags withBit
1217 parrEnabled    flags = testBit flags parrBit
1218 arrowsEnabled  flags = testBit flags arrowsBit
1219 thEnabled      flags = testBit flags thBit
1220 ipEnabled      flags = testBit flags ipBit
1221
1222 -- create a parse state
1223 --
1224 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1225 mkPState buf loc flags  =
1226   PState {
1227       buffer     = buf,
1228       last_loc   = loc,
1229       last_len   = 0,
1230       loc        = loc,
1231       extsBitmap = fromIntegral bitmap,
1232       context    = [],
1233       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1234         -- we begin in the layout state if toplev_layout is set
1235     }
1236     where
1237       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1238                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1239                .|. withBit    `setBitIf` dopt Opt_With        flags
1240                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1241                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1242                .|. thBit      `setBitIf` dopt Opt_TH          flags
1243                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1244       --
1245       setBitIf :: Int -> Bool -> Int
1246       b `setBitIf` cond | cond      = bit b
1247                         | otherwise = 0
1248
1249 getContext :: P [LayoutContext]
1250 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1251
1252 setContext :: [LayoutContext] -> P ()
1253 setContext ctx = P $ \s -> POk s{context=ctx} ()
1254
1255 popContext :: P ()
1256 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1257                            loc = loc, last_len = len, last_loc = last_loc }) ->
1258   case ctx of
1259         (_:tl) -> POk s{ context = tl } ()
1260         []     -> PFailed last_loc loc (srcParseErr buf len)
1261
1262 -- Push a new layout context at the indentation of the last token read.
1263 -- This is only used at the outer level of a module when the 'module'
1264 -- keyword is missing.
1265 pushCurrentContext :: P ()
1266 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1267   POk s{ context = Layout (srcLocCol loc) : ctx} ()
1268
1269 getOffside :: SrcLoc -> P Ordering
1270 getOffside loc = P $ \s@PState{context=stk} ->
1271                 let ord = case stk of
1272                         (Layout n:_) -> compare (srcLocCol loc) n
1273                         _            -> GT
1274                 in POk s ord
1275
1276 -- ---------------------------------------------------------------------------
1277 -- Construct a parse error
1278
1279 srcParseErr
1280   :: StringBuffer       -- current buffer (placed just after the last token)
1281   -> Int                -- length of the previous token
1282   -> Message
1283 srcParseErr buf len
1284   = hcat [ if null token
1285              then ptext SLIT("parse error (possibly incorrect indentation)")
1286              else hcat [ptext SLIT("parse error on input "),
1287                         char '`', text token, char '\'']
1288     ]
1289   where token = lexemeToString (stepOnBy (-len) buf) len
1290
1291 -- Report a parse failure, giving the span of the previous token as
1292 -- the location of the error.  This is the entry point for errors
1293 -- detected during parsing.
1294 srcParseFail :: P a
1295 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1296                                 last_loc = last_loc, loc = loc } ->
1297     PFailed last_loc loc (srcParseErr buf len)
1298
1299 -- A lexical error is reported at a particular position in the source file,
1300 -- not over a token range.  TODO: this is slightly wrong, because we record
1301 -- the error at the character position following the one which caused the
1302 -- error.  We should somehow back up by one character.
1303 lexError :: String -> P a
1304 lexError str = do
1305   loc <- getSrcLoc
1306   failLocMsgP loc loc str
1307
1308 -- -----------------------------------------------------------------------------
1309 -- This is the top-level function: called from the parser each time a
1310 -- new token is to be read from the input.
1311
1312 lexer :: (Token -> P a) -> P a
1313 lexer cont = do
1314   tok@(T _ _ tok__) <- lexToken
1315   --trace ("token: " ++ show tok__) $ do
1316   cont tok
1317
1318 lexToken :: P Token
1319 lexToken = do
1320   inp@(loc1,buf) <- getInput
1321   sc <- getLexState
1322   exts <- getExts
1323   case alexScanUser exts inp sc of
1324     AlexEOF -> do setLastToken loc1 0
1325                   return (T loc1 loc1 ITeof)
1326     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1327     AlexSkip inp2 _ -> do
1328         setInput inp2
1329         lexToken
1330     AlexToken inp2@(end,buf2) len t -> do
1331         setInput inp2
1332         setLastToken loc1 len
1333         t loc1 end buf len
1334 }