ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, ExtFlags(..), mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ForeignCall      ( Safety(..) )
  34 import ErrUtils         ( Message )
  35 import Outputable
  36 import StringBuffer
  37 import FastString
  38 import FastTypes
  39 import SrcLoc
  40 import UniqFM
  41 import Ctype
  42 import Util             ( maybePrefixMatch )
  43
  44 import DATA_BITS
  45 import Char
  46 import Ratio
  47 import TRACE
  48 }
  49
  50 $whitechar   = [\ \t\n\r\f\v]
  51 $white_no_nl = $whitechar # \n
  52
  53 $ascdigit  = 0-9
  54 $unidigit  = \x01
  55 $digit     = [$ascdigit $unidigit]
  56
  57 $special   = [\(\)\,\;\[\]\`\{\}]
  58 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  59 $unisymbol = \x02
  60 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  61
  62 $unilarge  = \x03
  63 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  64 $large     = [$asclarge $unilarge]
  65
  66 $unismall  = \x04
  67 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  68 $small     = [$ascsmall $unismall \_]
  69
  70 $graphic   = [$small $large $symbol $digit $special \:\"\']
  71
  72 $octit     = 0-7
  73 $hexit     = [$digit A-F a-f]
  74 $symchar   = [$symbol \:]
  75 $nl        = [\n\r]
  76 $idchar    = [$small $large $digit \']
  77
  78 @varid     = $small $idchar*
  79 @conid     = $large $idchar*
  80
  81 @varsym    = $symbol $symchar*
  82 @consym    = \: $symchar*
  83
  84 @decimal     = $digit+
  85 @octal       = $octit+
  86 @hexadecimal = $hexit+
  87 @exponent    = [eE] [\-\+]? @decimal
  88
  89 -- we support the hierarchical module name extension:
  90 @qual = (@conid \.)+
  91
  92 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  93
  94 haskell :-
  95
  96 -- everywhere: skip whitespace and comments
  97 $white_no_nl+                           ;
  98
  99 -- Everywhere: deal with nested comments.  We explicitly rule out
 100 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 101 -- (this can happen even though pragmas will normally take precedence due to
 102 -- longest-match, because pragmas aren't valid in every state, but comments
 103 -- are).
 104 "{-" / { notFollowedBy '#' }            { nested_comment }
 105
 106 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 107 -- more dashes followed by a symbol should be parsed as a varsym, so we
 108 -- have to exclude those.
 109 -- The regex says: "munch all the characters after the dashes, as long as
 110 -- the first one is not a symbol".
 111 "--"\-* ([^$symbol] .*)?                ;
 112
 113 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 114 -- blank lines) until we find a non-whitespace character, then do layout
 115 -- processing.
 116 --
 117 -- One slight wibble here: what if the line begins with {-#? In
 118 -- theory, we have to lex the pragma to see if it's one we recognise,
 119 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 120 -- as a nested comment.  We don't bother with this: if the line begins
 121 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 122 <bol> {
 123   \n                                    ;
 124   ^\# (line)?                           { begin line_prag1 }
 125   ()                                    { do_bol }
 126 }
 127
 128 -- after a layout keyword (let, where, do, of), we begin a new layout
 129 -- context if the curly brace is missing.
 130 -- Careful! This stuff is quite delicate.
 131 <layout, layout_do> {
 132   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 133         -- we might encounter {-# here, but {- has been handled already
 134   \n                                    ;
 135   ^\# (line)?                           { begin line_prag1 }
 136 }
 137
 138 -- do is treated in a subtly different way, see new_layout_context
 139 <layout>    ()                          { new_layout_context True }
 140 <layout_do> ()                          { new_layout_context False }
 141
 142 -- after a new layout context which was found to be to the left of the
 143 -- previous context, we have generated a '{' token, and we now need to
 144 -- generate a matching '}' token.
 145 <layout_left>  ()                       { do_layout_left }
 146
 147 <0,glaexts> \n                          { begin bol }
 148
 149 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 150
 151 -- single-line line pragmas, of the form
 152 --    # <line> "<file>" <extra-stuff> \n
 153 <line_prag1> $digit+                    { set_line line_prag1a }
 154 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 155 <line_prag1b> .*                        { pop }
 156
 157 -- Haskell-style line pragmas, of the form
 158 --    {-# LINE <line> "<file>" #-}
 159 <line_prag2> $digit+                    { set_line line_prag2a }
 160 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 161 <line_prag2b> "#-}"                     { pop }
 162
 163 <0,glaexts> {
 164   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 165                                         { token ITspecialise_prag }
 166   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 167   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 168   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 169                                         { token ITnoinline_prag }
 170   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 171   "{-#" $whitechar* (DEPRECATED|deprecated)
 172                                         { token ITdeprecated_prag }
 173   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 174   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 175
 176   "{-#"                                 { nested_comment }
 177
 178   -- ToDo: should only be valid inside a pragma:
 179   "#-}"                                 { token ITclose_prag}
 180 }
 181
 182
 183 -- '0' state: ordinary lexemes
 184 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 185
 186 -- "special" symbols
 187
 188 <glaexts> {
 189   "(#"                                  { token IToubxparen }
 190   "#)"                                  { token ITcubxparen }
 191
 192   "[:"                                  { token ITopabrack }
 193   ":]"                                  { token ITcpabrack }
 194
 195   "{|"                                  { token ITocurlybar }
 196   "|}"                                  { token ITccurlybar }
 197
 198   "[|"                                  { token ITopenExpQuote }
 199   "[e|"                                 { token ITopenExpQuote }
 200   "[p|"                                 { token ITopenPatQuote }
 201   "[d|"                                 { layout_token ITopenDecQuote }
 202   "[t|"                                 { token ITopenTypQuote }
 203   "|]"                                  { token ITcloseQuote }
 204 }
 205
 206 <0,glaexts> {
 207   "(|" / { \b _ _ _ -> arrowsEnabled b} { special IToparenbar }
 208   "|)" / { \b _ _ _ -> arrowsEnabled b} { special ITcparenbar }
 209   \(                                    { special IToparen }
 210   \)                                    { special ITcparen }
 211   \[                                    { special ITobrack }
 212   \]                                    { special ITcbrack }
 213   \,                                    { special ITcomma }
 214   \;                                    { special ITsemi }
 215   \`                                    { special ITbackquote }
 216
 217   \{                                    { open_brace }
 218   \}                                    { close_brace }
 219 }
 220
 221 <glaexts> {
 222   \? @varid                     { skip_one_varid ITdupipvarid }
 223   \% @varid                     { skip_one_varid ITsplitipvarid }
 224   \$ @varid                     { skip_one_varid ITidEscape }
 225   "$("                          { token ITparenEscape }
 226 }
 227
 228 <0,glaexts> {
 229   @qual @varid                  { check_qvarid }
 230   @qual @conid                  { idtoken qconid }
 231   @varid                        { varid }
 232   @conid                        { idtoken conid }
 233 }
 234
 235 -- after an illegal qvarid, such as 'M.let',
 236 -- we back up and try again in the bad_qvarid state:
 237 <bad_qvarid> {
 238   @conid                        { pop_and (idtoken conid) }
 239   @qual @conid                  { pop_and (idtoken qconid) }
 240 }
 241
 242 <glaexts> {
 243   @qual @varid "#"+             { idtoken qvarid }
 244   @qual (@conid "#"+ | "()")    { idtoken qconid }
 245   @varid "#"+                   { varid }
 246   @conid "#"+                   { idtoken conid }
 247 }
 248
 249 -- ToDo: M.(,,,)
 250
 251 <0,glaexts> {
 252   @qual @varsym                 { idtoken qvarsym }
 253   @qual @consym                 { idtoken qconsym }
 254   @varsym                       { varsym }
 255   @consym                       { consym }
 256 }
 257
 258 <0,glaexts> {
 259   @decimal                      { tok_decimal }
 260   0[oO] @octal                  { tok_octal }
 261   0[xX] @hexadecimal            { tok_hexadecimal }
 262 }
 263
 264 <glaexts> {
 265   @decimal \#                   { prim_decimal }
 266   0[oO] @octal \#               { prim_octal }
 267   0[xX] @hexadecimal \#         { prim_hexadecimal }
 268 }
 269
 270 <0,glaexts> @floating_point             { strtoken tok_float }
 271 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 272 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 273
 274 -- Strings and chars are lexed by hand-written code.  The reason is
 275 -- that even if we recognise the string or char here in the regex
 276 -- lexer, we would still have to parse the string afterward in order
 277 -- to convert it to a String.
 278 <0,glaexts> {
 279   \'                            { lex_char_tok }
 280   \"                            { lex_string_tok }
 281 }
 282
 283 <glaexts> "``" (([$graphic $whitechar] # \') | \' ([$graphic $whitechar] # \'))*
 284                 "''"            { clitlit }
 285
 286 {
 287 -- work around bug in Alex 2.0
 288 #if __GLASGOW_HASKELL__ < 503
 289 unsafeAt arr i = arr ! i
 290 #endif
 291
 292 -- -----------------------------------------------------------------------------
 293 -- The token type
 294
 295 data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__
 296
 297 data Token__
 298   = ITas                        -- Haskell keywords
 299   | ITcase
 300   | ITclass
 301   | ITdata
 302   | ITdefault
 303   | ITderiving
 304   | ITdo
 305   | ITelse
 306   | IThiding
 307   | ITif
 308   | ITimport
 309   | ITin
 310   | ITinfix
 311   | ITinfixl
 312   | ITinfixr
 313   | ITinstance
 314   | ITlet
 315   | ITmodule
 316   | ITnewtype
 317   | ITof
 318   | ITqualified
 319   | ITthen
 320   | ITtype
 321   | ITwhere
 322   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 323
 324   | ITforall                    -- GHC extension keywords
 325   | ITforeign
 326   | ITexport
 327   | ITlabel
 328   | ITdynamic
 329   | ITsafe
 330   | ITthreadsafe
 331   | ITunsafe
 332   | ITwith
 333   | ITstdcallconv
 334   | ITccallconv
 335   | ITdotnet
 336   | ITccall (Bool,Bool,Safety)  -- (is_dyn, is_casm, may_gc)
 337   | ITmdo
 338
 339   | ITspecialise_prag           -- Pragmas
 340   | ITsource_prag
 341   | ITinline_prag
 342   | ITnoinline_prag
 343   | ITrules_prag
 344   | ITdeprecated_prag
 345   | ITline_prag
 346   | ITscc_prag
 347   | ITcore_prag                 -- hdaume: core annotations
 348   | ITclose_prag
 349
 350   | ITdotdot                    -- reserved symbols
 351   | ITcolon
 352   | ITdcolon
 353   | ITequal
 354   | ITlam
 355   | ITvbar
 356   | ITlarrow
 357   | ITrarrow
 358   | ITat
 359   | ITtilde
 360   | ITdarrow
 361   | ITminus
 362   | ITbang
 363   | ITstar
 364   | ITdot
 365
 366   | ITbiglam                    -- GHC-extension symbols
 367
 368   | ITocurly                    -- special symbols
 369   | ITccurly
 370   | ITocurlybar                 -- {|, for type applications
 371   | ITccurlybar                 -- |}, for type applications
 372   | ITvocurly
 373   | ITvccurly
 374   | ITobrack
 375   | ITopabrack                  -- [:, for parallel arrays with -fparr
 376   | ITcpabrack                  -- :], for parallel arrays with -fparr
 377   | ITcbrack
 378   | IToparen
 379   | ITcparen
 380   | IToubxparen
 381   | ITcubxparen
 382   | ITsemi
 383   | ITcomma
 384   | ITunderscore
 385   | ITbackquote
 386
 387   | ITvarid   FastString        -- identifiers
 388   | ITconid   FastString
 389   | ITvarsym  FastString
 390   | ITconsym  FastString
 391   | ITqvarid  (FastString,FastString)
 392   | ITqconid  (FastString,FastString)
 393   | ITqvarsym (FastString,FastString)
 394   | ITqconsym (FastString,FastString)
 395
 396   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 397   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 398
 399   | ITpragma StringBuffer
 400
 401   | ITchar       Char
 402   | ITstring     FastString
 403   | ITinteger    Integer
 404   | ITrational   Rational
 405
 406   | ITprimchar   Char
 407   | ITprimstring FastString
 408   | ITprimint    Integer
 409   | ITprimfloat  Rational
 410   | ITprimdouble Rational
 411   | ITlitlit     FastString
 412
 413   -- MetaHaskell extension tokens
 414   | ITopenExpQuote              -- [| or [e|
 415   | ITopenPatQuote              -- [p|
 416   | ITopenDecQuote              -- [d|
 417   | ITopenTypQuote              -- [t|
 418   | ITcloseQuote                -- |]
 419   | ITidEscape   FastString     -- $x
 420   | ITparenEscape               -- $(
 421   | ITreifyType
 422   | ITreifyDecl
 423   | ITreifyFixity
 424
 425   -- Arrow notation extension
 426   | ITproc
 427   | ITrec
 428   | IToparenbar                 -- (|
 429   | ITcparenbar                 -- |)
 430   | ITlarrowtail                -- -<
 431   | ITrarrowtail                -- >-
 432   | ITLarrowtail                -- -<<
 433   | ITRarrowtail                -- >>-
 434
 435   | ITunknown String            -- Used when the lexer can't make sense of it
 436   | ITeof                       -- end of file token
 437 #ifdef DEBUG
 438   deriving Show -- debugging
 439 #endif
 440
 441 isSpecial :: Token__ -> Bool
 442 -- If we see M.x, where x is a keyword, but
 443 -- is special, we treat is as just plain M.x,
 444 -- not as a keyword.
 445 isSpecial ITas          = True
 446 isSpecial IThiding      = True
 447 isSpecial ITqualified   = True
 448 isSpecial ITforall      = True
 449 isSpecial ITexport      = True
 450 isSpecial ITlabel       = True
 451 isSpecial ITdynamic     = True
 452 isSpecial ITsafe        = True
 453 isSpecial ITthreadsafe  = True
 454 isSpecial ITunsafe      = True
 455 isSpecial ITwith        = True
 456 isSpecial ITccallconv   = True
 457 isSpecial ITstdcallconv = True
 458 isSpecial ITmdo         = True
 459 isSpecial _             = False
 460
 461 -- the bitmap provided as the third component indicates whether the
 462 -- corresponding extension keyword is valid under the extension options
 463 -- provided to the compiler; if the extension corresponding to *any* of the
 464 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 465 -- facilitates using a keyword in two different extensions that can be
 466 -- activated independently)
 467 --
 468 reservedWordsFM = listToUFM $
 469         map (\(x, y, z) -> (mkFastString x, (y, z)))
 470        [( "_",          ITunderscore,   0 ),
 471         ( "as",         ITas,           0 ),
 472         ( "case",       ITcase,         0 ),
 473         ( "class",      ITclass,        0 ),
 474         ( "data",       ITdata,         0 ),
 475         ( "default",    ITdefault,      0 ),
 476         ( "deriving",   ITderiving,     0 ),
 477         ( "do",         ITdo,           0 ),
 478         ( "else",       ITelse,         0 ),
 479         ( "hiding",     IThiding,       0 ),
 480         ( "if",         ITif,           0 ),
 481         ( "import",     ITimport,       0 ),
 482         ( "in",         ITin,           0 ),
 483         ( "infix",      ITinfix,        0 ),
 484         ( "infixl",     ITinfixl,       0 ),
 485         ( "infixr",     ITinfixr,       0 ),
 486         ( "instance",   ITinstance,     0 ),
 487         ( "let",        ITlet,          0 ),
 488         ( "module",     ITmodule,       0 ),
 489         ( "newtype",    ITnewtype,      0 ),
 490         ( "of",         ITof,           0 ),
 491         ( "qualified",  ITqualified,    0 ),
 492         ( "then",       ITthen,         0 ),
 493         ( "type",       ITtype,         0 ),
 494         ( "where",      ITwhere,        0 ),
 495         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 496
 497         ( "forall",     ITforall,        bit glaExtsBit),
 498         ( "mdo",        ITmdo,           bit glaExtsBit),
 499         ( "reifyDecl",  ITreifyDecl,     bit glaExtsBit),
 500         ( "reifyType",  ITreifyType,     bit glaExtsBit),
 501         ( "reifyFixity",ITreifyFixity,   bit glaExtsBit),
 502
 503         ( "foreign",    ITforeign,       bit ffiBit),
 504         ( "export",     ITexport,        bit ffiBit),
 505         ( "label",      ITlabel,         bit ffiBit),
 506         ( "dynamic",    ITdynamic,       bit ffiBit),
 507         ( "safe",       ITsafe,          bit ffiBit),
 508         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 509         ( "unsafe",     ITunsafe,        bit ffiBit),
 510         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 511         ( "ccall",      ITccallconv,     bit ffiBit),
 512         ( "dotnet",     ITdotnet,        bit ffiBit),
 513
 514         ( "with",       ITwith,          bit withBit),
 515
 516         ( "rec",        ITrec,           bit arrowsBit),
 517         ( "proc",       ITproc,          bit arrowsBit),
 518
 519         -- On death row
 520         ("_ccall_",     ITccall (False, False, PlayRisky),
 521                                          bit glaExtsBit),
 522         ("_ccall_GC_",  ITccall (False, False, PlaySafe False),
 523                                          bit glaExtsBit),
 524         ("_casm_",      ITccall (False, True,  PlayRisky),
 525                                          bit glaExtsBit),
 526         ("_casm_GC_",   ITccall (False, True,  PlaySafe False),
 527                                          bit glaExtsBit)
 528      ]
 529
 530 reservedSymsFM = listToUFM $
 531         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 532       [ ("..",  ITdotdot,       0)
 533        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 534                                                 -- meaning only list cons
 535        ,("::",  ITdcolon,       0)
 536        ,("=",   ITequal,        0)
 537        ,("\\",  ITlam,          0)
 538        ,("|",   ITvbar,         0)
 539        ,("<-",  ITlarrow,       0)
 540        ,("->",  ITrarrow,       0)
 541        ,("@",   ITat,           0)
 542        ,("~",   ITtilde,        0)
 543        ,("=>",  ITdarrow,       0)
 544        ,("-",   ITminus,        0)
 545        ,("!",   ITbang,         0)
 546
 547        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 548        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 549
 550        ,("-<",  ITlarrowtail,   bit arrowsBit)
 551        ,(">-",  ITrarrowtail,   bit arrowsBit)
 552        ,("-<<", ITLarrowtail,   bit arrowsBit)
 553        ,(">>-", ITRarrowtail,   bit arrowsBit)
 554        ]
 555
 556 -- -----------------------------------------------------------------------------
 557 -- Lexer actions
 558
 559 type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token
 560
 561 special :: Token__ -> Action
 562 special tok loc end _buf len = return (T loc end tok)
 563
 564 token, layout_token :: Token__ -> Action
 565 token t loc end buf len = return (T loc end t)
 566 layout_token t loc end buf len = pushLexState layout >> return (T loc end t)
 567
 568 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 569 idtoken f loc end buf len = return (T loc end $! (f buf len))
 570
 571 skip_one_varid :: (FastString -> Token__) -> Action
 572 skip_one_varid f loc end buf len
 573   = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1)))
 574
 575 strtoken :: (String -> Token__) -> Action
 576 strtoken f loc end buf len =
 577   return (T loc end $! (f $! lexemeToString buf len))
 578
 579 init_strtoken :: Int -> (String -> Token__) -> Action
 580 -- like strtoken, but drops the last N character(s)
 581 init_strtoken drop f loc end buf len =
 582   return (T loc end $! (f $! lexemeToString buf (len-drop)))
 583
 584 begin :: Int -> Action
 585 begin code _loc _end _str _len = do pushLexState code; lexToken
 586
 587 pop :: Action
 588 pop _loc _end _buf _len = do popLexState; lexToken
 589
 590 pop_and :: Action -> Action
 591 pop_and act loc end buf len = do popLexState; act loc end buf len
 592
 593 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 594
 595 {-
 596   nested comments require traversing by hand, they can't be parsed
 597   using regular expressions.
 598 -}
 599 nested_comment :: Action
 600 nested_comment loc _end _str _len = do
 601   input <- getInput
 602   go 1 input
 603   where go 0 input = do setInput input; lexToken
 604         go n input = do
 605           case alexGetChar input of
 606             Nothing  -> err input
 607             Just (c,input) -> do
 608               case c of
 609                 '-' -> do
 610                   case alexGetChar input of
 611                     Nothing  -> err input
 612                     Just ('\125',input) -> go (n-1) input
 613                     Just (c,_)          -> go n input
 614                 '\123' -> do
 615                   case alexGetChar input of
 616                     Nothing  -> err input
 617                     Just ('-',input') -> go (n+1) input'
 618                     Just (c,input)    -> go n input
 619                 c -> go n input
 620
 621         err input = do failLocMsgP loc (fst input) "unterminated `{-'"
 622
 623 open_brace, close_brace :: Action
 624 open_brace  loc end _str _len = do
 625   ctx <- getContext
 626   setContext (NoLayout:ctx)
 627   return (T loc end ITocurly)
 628 close_brace loc end _str _len = do
 629   popContext
 630   return (T loc end ITccurly)
 631
 632 -- We have to be careful not to count M.<varid> as a qualified name
 633 -- when <varid> is a keyword.  We hack around this by catching
 634 -- the offending tokens afterward, and re-lexing in a different state.
 635 check_qvarid loc end buf len = do
 636   case lookupUFM reservedWordsFM var of
 637         Just (keyword,exts)
 638           | not (isSpecial keyword) ->
 639           if exts == 0
 640              then try_again
 641              else do
 642                 b <- extension (\i -> exts .&. i /= 0)
 643                 if b then try_again
 644                      else return token
 645         _other -> return token
 646   where
 647         (mod,var) = splitQualName buf len
 648         token     = T loc end (ITqvarid (mod,var))
 649
 650         try_again = do
 651                 setInput (loc,buf)
 652                 pushLexState bad_qvarid
 653                 lexToken
 654
 655 qvarid buf len = ITqvarid $! splitQualName buf len
 656 qconid buf len = ITqconid $! splitQualName buf len
 657
 658 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 659 -- takes a StringBuffer and a length, and returns the module name
 660 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 661 -- because of hierarchical module names.
 662 splitQualName orig_buf len = split orig_buf 0 0
 663   where
 664     split buf dot_off n
 665         | n == len                = done dot_off
 666         | lookAhead buf n == '.'  = split2 buf n (n+1)
 667         | otherwise               = split buf dot_off (n+1)
 668
 669     -- careful, we might get names like M....
 670     -- so, if the character after the dot is not upper-case, this is
 671     -- the end of the qualifier part.
 672     split2 buf dot_off n
 673         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 674         | otherwise                 = done dot_off
 675
 676     done dot_off =
 677         (lexemeToFastString orig_buf dot_off,
 678          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 679
 680 varid loc end buf len =
 681   case lookupUFM reservedWordsFM fs of
 682         Just (keyword,0)    -> do
 683                 maybe_layout keyword
 684                 return (T loc end keyword)
 685         Just (keyword,exts) -> do
 686                 b <- extension (\i -> exts .&. i /= 0)
 687                 if b then do maybe_layout keyword
 688                              return (T loc end keyword)
 689                      else return (T loc end (ITvarid fs))
 690         _other -> return (T loc end (ITvarid fs))
 691   where
 692         fs = lexemeToFastString buf len
 693
 694 conid buf len = ITconid fs
 695   where fs = lexemeToFastString buf len
 696
 697 qvarsym buf len = ITqvarsym $! splitQualName buf len
 698 qconsym buf len = ITqconsym $! splitQualName buf len
 699
 700 varsym = sym ITvarsym
 701 consym = sym ITconsym
 702
 703 sym con loc end buf len =
 704   case lookupUFM reservedSymsFM fs of
 705         Just (keyword,0)    -> return (T loc end keyword)
 706         Just (keyword,exts) -> do
 707                 b <- extension (\i -> exts .&. i /= 0)
 708                 if b then return (T loc end keyword)
 709                      else return (T loc end $! con fs)
 710         _other -> return (T loc end $! con fs)
 711   where
 712         fs = lexemeToFastString buf len
 713
 714 tok_decimal loc end buf len
 715   = return (T loc end (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 716
 717 tok_octal loc end buf len
 718   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 719
 720 tok_hexadecimal loc end buf len
 721   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 722
 723 prim_decimal loc end buf len
 724   = return (T loc end (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 725
 726 prim_octal loc end buf len
 727   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 728
 729 prim_hexadecimal loc end buf len
 730   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 731
 732 tok_float        str = ITrational $! readRational__ str
 733 prim_float       str = ITprimfloat  $! readRational__ str
 734 prim_double      str = ITprimdouble $! readRational__ str
 735
 736 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 737 parseInteger buf len radix to_int
 738   = go 0 0
 739   where go i x | i == len  = x
 740                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 741
 742 clitlit :: Action
 743 clitlit loc end buf len =
 744   return (T loc end (ITlitlit $! lexemeToFastString (stepOnBy 2 buf) (len-4)))
 745
 746 -- -----------------------------------------------------------------------------
 747 -- Layout processing
 748
 749 -- we're at the first token on a line, insert layout tokens if necessary
 750 do_bol :: Action
 751 do_bol loc end _str _len = do
 752         pos <- getOffside end
 753         case pos of
 754             LT -> do
 755                 --trace "layout: inserting '}'" $ do
 756                 popContext
 757                 -- do NOT pop the lex state, we might have a ';' to insert
 758                 return (T loc end ITvccurly)
 759             EQ -> do
 760                 --trace "layout: inserting ';'" $ do
 761                 popLexState
 762                 return (T loc end ITsemi)
 763             GT -> do
 764                 popLexState
 765                 lexToken
 766
 767 -- certain keywords put us in the "layout" state, where we might
 768 -- add an opening curly brace.
 769 maybe_layout ITdo       = pushLexState layout_do
 770 maybe_layout ITmdo      = pushLexState layout_do
 771 maybe_layout ITof       = pushLexState layout
 772 maybe_layout ITlet      = pushLexState layout
 773 maybe_layout ITwhere    = pushLexState layout
 774 maybe_layout ITrec      = pushLexState layout
 775 maybe_layout _          = return ()
 776
 777 -- Pushing a new implicit layout context.  If the indentation of the
 778 -- next token is not greater than the previous layout context, then
 779 -- Haskell 98 says that the new layout context should be empty; that is
 780 -- the lexer must generate {}.
 781 --
 782 -- We are slightly more lenient than this: when the new context is started
 783 -- by a 'do', then we allow the new context to be at the same indentation as
 784 -- the previous context.  This is what the 'strict' argument is for.
 785 --
 786 new_layout_context strict loc end _buf _len = do
 787     popLexState
 788     let offset = srcLocCol loc
 789     ctx <- getContext
 790     case ctx of
 791         Layout prev_off : _  |
 792            (strict     && prev_off >= offset  ||
 793             not strict && prev_off > offset) -> do
 794                 -- token is indented to the left of the previous context.
 795                 -- we must generate a {} sequence now.
 796                 pushLexState layout_left
 797                 return (T loc end ITvocurly)
 798         other -> do
 799                 setContext (Layout offset : ctx)
 800                 return (T loc end ITvocurly)
 801
 802 do_layout_left loc end _buf _len = do
 803     popLexState
 804     pushLexState bol  -- we must be at the start of a line
 805     return (T loc end ITvccurly)
 806
 807 -- -----------------------------------------------------------------------------
 808 -- LINE pragmas
 809
 810 set_line :: Int -> Action
 811 set_line code loc end buf len = do
 812   let line = parseInteger buf len 10 oct_or_dec
 813   setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0)
 814         -- subtract one: the line number refers to the *following* line
 815   popLexState
 816   pushLexState code
 817   lexToken
 818
 819 set_file :: Int -> Action
 820 set_file code loc end buf len = do
 821   let file = lexemeToFastString (stepOn buf) (len-2)
 822   setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end))
 823   popLexState
 824   pushLexState code
 825   lexToken
 826
 827 -- -----------------------------------------------------------------------------
 828 -- Strings & Chars
 829
 830 -- This stuff is horrible.  I hates it.
 831
 832 lex_string_tok :: Action
 833 lex_string_tok loc end buf len = do
 834   tok <- lex_string ""
 835   end <- getSrcLoc
 836   return (T loc end tok)
 837
 838 lex_string :: String -> P Token__
 839 lex_string s = do
 840   i <- getInput
 841   case alexGetChar i of
 842     Nothing -> lit_error
 843
 844     Just ('"',i)  -> do
 845         setInput i
 846         glaexts <- extension glaExtsEnabled
 847         if glaexts
 848           then do
 849             i <- getInput
 850             case alexGetChar i of
 851               Just ('#',i) -> do
 852                    setInput i
 853                    if any (> '\xFF') s
 854                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 855                     else let s' = mkFastStringNarrow (reverse s) in
 856                          -- always a narrow string/byte array
 857                          return (ITprimstring s')
 858               _other ->
 859                 return (ITstring (mkFastString (reverse s)))
 860           else
 861                 return (ITstring (mkFastString (reverse s)))
 862
 863     Just ('\\',i)
 864         | Just ('&',i) <- next -> do
 865                 setInput i; lex_string s
 866         | Just (c,i) <- next, is_space c -> do
 867                 setInput i; lex_stringgap s
 868         where next = alexGetChar i
 869
 870     Just _ -> do
 871         c <- lex_char
 872         lex_string (c:s)
 873
 874
 875 lex_stringgap s = do
 876   c <- getCharOrFail
 877   case c of
 878     '\\' -> lex_string s
 879     c | is_space c -> lex_stringgap s
 880     _other -> lit_error
 881
 882
 883 lex_char_tok :: Action
 884 lex_char_tok loc _end buf len = do
 885    c <- lex_char
 886    mc <- getCharOrFail
 887    case mc of
 888         '\'' -> do
 889            glaexts <- extension glaExtsEnabled
 890            if glaexts
 891                 then do
 892                    i@(end,_) <- getInput
 893                    case alexGetChar i of
 894                         Just ('#',i@(end,_)) -> do
 895                                 setInput i
 896                                 return (T loc end (ITprimchar c))
 897                         _other ->
 898                                 return (T loc end (ITchar c))
 899                 else do
 900                    end <- getSrcLoc
 901                    return (T loc end (ITchar c))
 902
 903         _other -> lit_error
 904
 905 lex_char :: P Char
 906 lex_char = do
 907   mc <- getCharOrFail
 908   case mc of
 909       '\\' -> lex_escape
 910       c | is_any c -> return c
 911       _other -> lit_error
 912
 913 lex_escape :: P Char
 914 lex_escape = do
 915   c <- getCharOrFail
 916   case c of
 917         'a'   -> return '\a'
 918         'b'   -> return '\b'
 919         'f'   -> return '\f'
 920         'n'   -> return '\n'
 921         'r'   -> return '\r'
 922         't'   -> return '\t'
 923         'v'   -> return '\v'
 924         '\\'  -> return '\\'
 925         '"'   -> return '\"'
 926         '\''  -> return '\''
 927         '^'   -> do c <- getCharOrFail
 928                     if c >= '@' && c <= '_'
 929                         then return (chr (ord c - ord '@'))
 930                         else lit_error
 931
 932         'x'   -> readNum is_hexdigit 16 hex
 933         'o'   -> readNum is_octdigit  8 oct_or_dec
 934         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 935
 936         c1 ->  do
 937            i <- getInput
 938            case alexGetChar i of
 939             Nothing -> lit_error
 940             Just (c2,i2) ->
 941               case alexGetChar i2 of
 942                 Nothing -> lit_error
 943                 Just (c3,i3) ->
 944                    let str = [c1,c2,c3] in
 945                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 946                                      Just rest <- [maybePrefixMatch p str] ] of
 947                           (escape_char,[]):_ -> do
 948                                 setInput i3
 949                                 return escape_char
 950                           (escape_char,_:_):_ -> do
 951                                 setInput i2
 952                                 return escape_char
 953                           [] -> lit_error
 954
 955 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 956 readNum is_digit base conv = do
 957   c <- getCharOrFail
 958   if is_digit c
 959         then readNum2 is_digit base conv (conv c)
 960         else lit_error
 961
 962 readNum2 is_digit base conv i = do
 963   input <- getInput
 964   read i input
 965   where read i input = do
 966           case alexGetChar input of
 967             Just (c,input') | is_digit c -> do
 968                 read (i*base + conv c) input'
 969             _other -> do
 970                 setInput input
 971                 if i >= 0 && i <= 0x10FFFF
 972                    then return (chr i)
 973                    else lit_error
 974
 975 is_hexdigit c
 976         =  is_digit c
 977         || (c >= 'a' && c <= 'f')
 978         || (c >= 'A' && c <= 'F')
 979
 980 hex c | is_digit c = ord c - ord '0'
 981       | otherwise  = ord (to_lower c) - ord 'a' + 10
 982
 983 oct_or_dec c = ord c - ord '0'
 984
 985 is_octdigit c = c >= '0' && c <= '7'
 986
 987 to_lower c
 988   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
 989   | otherwise = c
 990
 991 silly_escape_chars = [
 992         ("NUL", '\NUL'),
 993         ("SOH", '\SOH'),
 994         ("STX", '\STX'),
 995         ("ETX", '\ETX'),
 996         ("EOT", '\EOT'),
 997         ("ENQ", '\ENQ'),
 998         ("ACK", '\ACK'),
 999         ("BEL", '\BEL'),
1000         ("BS", '\BS'),
1001         ("HT", '\HT'),
1002         ("LF", '\LF'),
1003         ("VT", '\VT'),
1004         ("FF", '\FF'),
1005         ("CR", '\CR'),
1006         ("SO", '\SO'),
1007         ("SI", '\SI'),
1008         ("DLE", '\DLE'),
1009         ("DC1", '\DC1'),
1010         ("DC2", '\DC2'),
1011         ("DC3", '\DC3'),
1012         ("DC4", '\DC4'),
1013         ("NAK", '\NAK'),
1014         ("SYN", '\SYN'),
1015         ("ETB", '\ETB'),
1016         ("CAN", '\CAN'),
1017         ("EM", '\EM'),
1018         ("SUB", '\SUB'),
1019         ("ESC", '\ESC'),
1020         ("FS", '\FS'),
1021         ("GS", '\GS'),
1022         ("RS", '\RS'),
1023         ("US", '\US'),
1024         ("SP", '\SP'),
1025         ("DEL", '\DEL')
1026         ]
1027
1028 lit_error = lexError "lexical error in string/character literal"
1029
1030 getCharOrFail :: P Char
1031 getCharOrFail =  do
1032   i <- getInput
1033   case alexGetChar i of
1034         Nothing -> lexError "unexpected end-of-file in string/character literal"
1035         Just (c,i)  -> do setInput i; return c
1036
1037 -- -----------------------------------------------------------------------------
1038 -- Floats
1039
1040 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1041 readRational r = do
1042      (n,d,s) <- readFix r
1043      (k,t)   <- readExp s
1044      return ((n%1)*10^^(k-d), t)
1045  where
1046      readFix r = do
1047         (ds,s)  <- lexDecDigits r
1048         (ds',t) <- lexDotDigits s
1049         return (read (ds++ds'), length ds', t)
1050
1051      readExp (e:s) | e `elem` "eE" = readExp' s
1052      readExp s                     = return (0,s)
1053
1054      readExp' ('+':s) = readDec s
1055      readExp' ('-':s) = do
1056                         (k,t) <- readDec s
1057                         return (-k,t)
1058      readExp' s       = readDec s
1059
1060      readDec s = do
1061         (ds,r) <- nonnull isDigit s
1062         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1063                 r)
1064
1065      lexDecDigits = nonnull isDigit
1066
1067      lexDotDigits ('.':s) = return (span isDigit s)
1068      lexDotDigits s       = return ("",s)
1069
1070      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1071                       return (cs,t)
1072
1073 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1074 readRational__ top_s
1075   = case top_s of
1076       '-' : xs -> - (read_me xs)
1077       xs       -> read_me xs
1078   where
1079     read_me s
1080       = case (do { (x,"") <- readRational s ; return x }) of
1081           [x] -> x
1082           []  -> error ("readRational__: no parse:"        ++ top_s)
1083           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1084
1085 -- -----------------------------------------------------------------------------
1086 -- The Parse Monad
1087
1088 data LayoutContext
1089   = NoLayout
1090   | Layout !Int
1091
1092 data ParseResult a
1093   = POk PState a
1094   | PFailed
1095         SrcLoc SrcLoc   -- The start and end of the text span related to
1096                         -- the error.  Might be used in environments which can
1097                         -- show this span, e.g. by highlighting it.
1098         Message         -- The error message
1099
1100 showPFailed loc1 loc2 err
1101  = showSDoc (hcat [ppr loc1, text ": ", err])
1102
1103 data PState = PState {
1104         buffer     :: StringBuffer,
1105         last_loc   :: SrcLoc,           -- pos of previous token
1106         last_len   :: !Int,             -- len of previous token
1107         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1108         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1109         context    :: [LayoutContext],
1110         lex_state  :: [Int]
1111      }
1112         -- last_loc and last_len are used when generating error messages,
1113         -- and in pushCurrentContext only.
1114
1115 newtype P a = P { unP :: PState -> ParseResult a }
1116
1117 instance Monad P where
1118   return = returnP
1119   (>>=) = thenP
1120   fail = failP
1121
1122 returnP :: a -> P a
1123 returnP a = P $ \s -> POk s a
1124
1125 thenP :: P a -> (a -> P b) -> P b
1126 (P m) `thenP` k = P $ \ s ->
1127         case m s of
1128                 POk s1 a          -> (unP (k a)) s1
1129                 PFailed l1 l2 err -> PFailed l1 l2 err
1130
1131 failP :: String -> P a
1132 failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1133
1134 failMsgP :: String -> P a
1135 failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1136
1137 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1138 failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str)
1139
1140 extension :: (Int -> Bool) -> P Bool
1141 extension p = P $ \s -> POk s (p $! extsBitmap s)
1142
1143 getExts :: P Int
1144 getExts = P $ \s -> POk s (extsBitmap s)
1145
1146 setSrcLoc :: SrcLoc -> P ()
1147 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1148
1149 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1150 -- any calls to the lexer, because it assumes things about the SrcLoc.
1151 setSrcLocFor :: SrcLoc -> P a -> P a
1152 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1153   case unP scope s{loc=new_loc} of
1154         PFailed l1 l2 msg -> PFailed l1 l2 msg
1155         POk _ r -> POk s r
1156
1157 getSrcLoc :: P SrcLoc
1158 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1159
1160 setLastToken :: SrcLoc -> Int -> P ()
1161 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1162
1163 type AlexInput = (SrcLoc,StringBuffer)
1164
1165 alexInputPrevChar :: AlexInput -> Char
1166 alexInputPrevChar (_,s) = prevChar s '\n'
1167
1168 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1169 alexGetChar (loc,s)
1170   | atEnd s   = Nothing
1171   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1172   where c = currentChar s
1173         loc' = advanceSrcLoc loc c
1174         s'   = stepOn s
1175
1176 getInput :: P AlexInput
1177 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1178
1179 setInput :: AlexInput -> P ()
1180 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1181
1182 pushLexState :: Int -> P ()
1183 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1184
1185 popLexState :: P Int
1186 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1187
1188 getLexState :: P Int
1189 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1190
1191 -- for reasons of efficiency, flags indicating language extensions (eg,
1192 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1193 -- integer
1194
1195 glaExtsBit, ffiBit, parrBit :: Int
1196 glaExtsBit = 0
1197 ffiBit     = 1
1198 parrBit    = 2
1199 withBit    = 3
1200 arrowsBit  = 4
1201
1202 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1203 glaExtsEnabled flags = testBit flags glaExtsBit
1204 ffiEnabled     flags = testBit flags ffiBit
1205 withEnabled    flags = testBit flags withBit
1206 parrEnabled    flags = testBit flags parrBit
1207 arrowsEnabled  flags = testBit flags arrowsBit
1208
1209 -- convenient record-based bitmap for the interface to the rest of the world
1210 --
1211 -- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below)
1212 --
1213 data ExtFlags = ExtFlags {
1214                   glasgowExtsEF :: Bool,
1215                   ffiEF         :: Bool,
1216                   withEF        :: Bool,
1217                   parrEF        :: Bool,
1218                   arrowsEF      :: Bool
1219                 }
1220
1221 -- create a parse state
1222 --
1223 mkPState :: StringBuffer -> SrcLoc -> ExtFlags -> PState
1224 mkPState buf loc exts  =
1225   PState {
1226       buffer     = buf,
1227       last_loc   = loc,
1228       last_len   = 0,
1229       loc        = loc,
1230       extsBitmap = fromIntegral bitmap,
1231       context    = [],
1232       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1233         -- we begin in the layout state if toplev_layout is set
1234     }
1235     where
1236       bitmap =     glaExtsBit `setBitIf` glasgowExtsEF     exts
1237                .|. ffiBit     `setBitIf` (ffiEF            exts
1238                                           || glasgowExtsEF exts)
1239                .|. withBit    `setBitIf` withEF            exts
1240                .|. parrBit    `setBitIf` parrEF            exts
1241                .|. arrowsBit  `setBitIf` arrowsEF          exts
1242       --
1243       setBitIf :: Int -> Bool -> Int
1244       b `setBitIf` cond | cond      = bit b
1245                         | otherwise = 0
1246
1247 getContext :: P [LayoutContext]
1248 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1249
1250 setContext :: [LayoutContext] -> P ()
1251 setContext ctx = P $ \s -> POk s{context=ctx} ()
1252
1253 popContext :: P ()
1254 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1255                            loc = loc, last_len = len, last_loc = last_loc }) ->
1256   case ctx of
1257         (_:tl) -> POk s{ context = tl } ()
1258         []     -> PFailed last_loc loc (srcParseErr buf len)
1259
1260 -- Push a new layout context at the indentation of the last token read.
1261 -- This is only used at the outer level of a module when the 'module'
1262 -- keyword is missing.
1263 pushCurrentContext :: P ()
1264 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1265   POk s{ context = Layout (srcLocCol loc) : ctx} ()
1266
1267 getOffside :: SrcLoc -> P Ordering
1268 getOffside loc = P $ \s@PState{context=stk} ->
1269                 let ord = case stk of
1270                         (Layout n:_) -> compare (srcLocCol loc) n
1271                         _            -> GT
1272                 in POk s ord
1273
1274 -- ---------------------------------------------------------------------------
1275 -- Construct a parse error
1276
1277 srcParseErr
1278   :: StringBuffer       -- current buffer (placed just after the last token)
1279   -> Int                -- length of the previous token
1280   -> Message
1281 srcParseErr buf len
1282   = hcat [ if null token
1283              then ptext SLIT("parse error (possibly incorrect indentation)")
1284              else hcat [ptext SLIT("parse error on input "),
1285                         char '`', text token, char '\'']
1286     ]
1287   where token = lexemeToString (stepOnBy (-len) buf) len
1288
1289 -- Report a parse failure, giving the span of the previous token as
1290 -- the location of the error.  This is the entry point for errors
1291 -- detected during parsing.
1292 srcParseFail :: P a
1293 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1294                                 last_loc = last_loc, loc = loc } ->
1295     PFailed last_loc loc (srcParseErr buf len)
1296
1297 -- A lexical error is reported at a particular position in the source file,
1298 -- not over a token range.  TODO: this is slightly wrong, because we record
1299 -- the error at the character position following the one which caused the
1300 -- error.  We should somehow back up by one character.
1301 lexError :: String -> P a
1302 lexError str = do
1303   loc <- getSrcLoc
1304   failLocMsgP loc loc str
1305
1306 -- -----------------------------------------------------------------------------
1307 -- This is the top-level function: called from the parser each time a
1308 -- new token is to be read from the input.
1309
1310 lexer :: (Token -> P a) -> P a
1311 lexer cont = do
1312   tok@(T _ _ tok__) <- lexToken
1313   --trace ("token: " ++ show tok__) $ do
1314   cont tok
1315
1316 lexToken :: P Token
1317 lexToken = do
1318   inp@(loc1,buf) <- getInput
1319   sc <- getLexState
1320   exts <- getExts
1321   case alexScanUser exts inp sc of
1322     AlexEOF -> do setLastToken loc1 0
1323                   return (T loc1 loc1 ITeof)
1324     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1325     AlexSkip inp2 _ -> do
1326         setInput inp2
1327         lexToken
1328     AlexToken inp2@(end,buf2) len t -> do
1329         setInput inp2
1330         setLastToken loc1 len
1331         t loc1 end buf len
1332 }