ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ForeignCall      ( Safety(..) )
  34 import ErrUtils         ( Message )
  35 import Outputable
  36 import StringBuffer
  37 import FastString
  38 import FastTypes
  39 import SrcLoc
  40 import UniqFM
  41 import CmdLineOpts
  42 import Ctype
  43 import Util             ( maybePrefixMatch )
  44
  45 import DATA_BITS
  46 import Char
  47 import Ratio
  48 import TRACE
  49 }
  50
  51 $whitechar   = [\ \t\n\r\f\v]
  52 $white_no_nl = $whitechar # \n
  53
  54 $ascdigit  = 0-9
  55 $unidigit  = \x01
  56 $digit     = [$ascdigit $unidigit]
  57
  58 $special   = [\(\)\,\;\[\]\`\{\}]
  59 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  60 $unisymbol = \x02
  61 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  62
  63 $unilarge  = \x03
  64 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  65 $large     = [$asclarge $unilarge]
  66
  67 $unismall  = \x04
  68 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  69 $small     = [$ascsmall $unismall \_]
  70
  71 $graphic   = [$small $large $symbol $digit $special \:\"\']
  72
  73 $octit     = 0-7
  74 $hexit     = [$digit A-F a-f]
  75 $symchar   = [$symbol \:]
  76 $nl        = [\n\r]
  77 $idchar    = [$small $large $digit \']
  78
  79 @varid     = $small $idchar*
  80 @conid     = $large $idchar*
  81
  82 @varsym    = $symbol $symchar*
  83 @consym    = \: $symchar*
  84
  85 @decimal     = $digit+
  86 @octal       = $octit+
  87 @hexadecimal = $hexit+
  88 @exponent    = [eE] [\-\+]? @decimal
  89
  90 -- we support the hierarchical module name extension:
  91 @qual = (@conid \.)+
  92
  93 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  94
  95 haskell :-
  96
  97 -- everywhere: skip whitespace and comments
  98 $white_no_nl+                           ;
  99
 100 -- Everywhere: deal with nested comments.  We explicitly rule out
 101 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 102 -- (this can happen even though pragmas will normally take precedence due to
 103 -- longest-match, because pragmas aren't valid in every state, but comments
 104 -- are).
 105 "{-" / { notFollowedBy '#' }            { nested_comment }
 106
 107 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 108 -- more dashes followed by a symbol should be parsed as a varsym, so we
 109 -- have to exclude those.
 110 -- The regex says: "munch all the characters after the dashes, as long as
 111 -- the first one is not a symbol".
 112 "--"\-* ([^$symbol] .*)?                ;
 113
 114 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 115 -- blank lines) until we find a non-whitespace character, then do layout
 116 -- processing.
 117 --
 118 -- One slight wibble here: what if the line begins with {-#? In
 119 -- theory, we have to lex the pragma to see if it's one we recognise,
 120 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 121 -- as a nested comment.  We don't bother with this: if the line begins
 122 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 123 <bol> {
 124   \n                                    ;
 125   ^\# (line)?                           { begin line_prag1 }
 126   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 127   ()                                    { do_bol }
 128 }
 129
 130 -- after a layout keyword (let, where, do, of), we begin a new layout
 131 -- context if the curly brace is missing.
 132 -- Careful! This stuff is quite delicate.
 133 <layout, layout_do> {
 134   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 135         -- we might encounter {-# here, but {- has been handled already
 136   \n                                    ;
 137   ^\# (line)?                           { begin line_prag1 }
 138 }
 139
 140 -- do is treated in a subtly different way, see new_layout_context
 141 <layout>    ()                          { new_layout_context True }
 142 <layout_do> ()                          { new_layout_context False }
 143
 144 -- after a new layout context which was found to be to the left of the
 145 -- previous context, we have generated a '{' token, and we now need to
 146 -- generate a matching '}' token.
 147 <layout_left>  ()                       { do_layout_left }
 148
 149 <0,glaexts> \n                          { begin bol }
 150
 151 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 152
 153 -- single-line line pragmas, of the form
 154 --    # <line> "<file>" <extra-stuff> \n
 155 <line_prag1> $digit+                    { set_line line_prag1a }
 156 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 157 <line_prag1b> .*                        { pop }
 158
 159 -- Haskell-style line pragmas, of the form
 160 --    {-# LINE <line> "<file>" #-}
 161 <line_prag2> $digit+                    { set_line line_prag2a }
 162 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 163 <line_prag2b> "#-}"                     { pop }
 164
 165 <0,glaexts> {
 166   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 167                                         { token ITspecialise_prag }
 168   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 169   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 170   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 171                                         { token ITnoinline_prag }
 172   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 173   "{-#" $whitechar* (DEPRECATED|deprecated)
 174                                         { token ITdeprecated_prag }
 175   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 176   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 177
 178   "{-#"                                 { nested_comment }
 179
 180   -- ToDo: should only be valid inside a pragma:
 181   "#-}"                                 { token ITclose_prag}
 182 }
 183
 184
 185 -- '0' state: ordinary lexemes
 186 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 187
 188 -- "special" symbols
 189
 190 <0,glaexts> {
 191   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 192   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 193 }
 194
 195 <0,glaexts> {
 196   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 197   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 198   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 199   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 200   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 201   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 202   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 203   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 204 }
 205
 206 <0,glaexts> {
 207   "(|" / { ifExtension arrowsEnabled }  { special IToparenbar }
 208   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 209 }
 210
 211 <0,glaexts> {
 212   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 213   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 214 }
 215
 216 <glaexts> {
 217   "(#"                                  { token IToubxparen }
 218   "#)"                                  { token ITcubxparen }
 219   "{|"                                  { token ITocurlybar }
 220   "|}"                                  { token ITccurlybar }
 221 }
 222
 223 <0,glaexts> {
 224   \(                                    { special IToparen }
 225   \)                                    { special ITcparen }
 226   \[                                    { special ITobrack }
 227   \]                                    { special ITcbrack }
 228   \,                                    { special ITcomma }
 229   \;                                    { special ITsemi }
 230   \`                                    { special ITbackquote }
 231
 232   \{                                    { open_brace }
 233   \}                                    { close_brace }
 234 }
 235
 236 <0,glaexts> {
 237   @qual @varid                  { check_qvarid }
 238   @qual @conid                  { idtoken qconid }
 239   @varid                        { varid }
 240   @conid                        { idtoken conid }
 241 }
 242
 243 -- after an illegal qvarid, such as 'M.let',
 244 -- we back up and try again in the bad_qvarid state:
 245 <bad_qvarid> {
 246   @conid                        { pop_and (idtoken conid) }
 247   @qual @conid                  { pop_and (idtoken qconid) }
 248 }
 249
 250 <glaexts> {
 251   @qual @varid "#"+             { idtoken qvarid }
 252   @qual @conid "#"+             { idtoken qconid }
 253   @varid "#"+                   { varid }
 254   @conid "#"+                   { idtoken conid }
 255 }
 256
 257 -- ToDo: M.(,,,)
 258
 259 <0,glaexts> {
 260   @qual @varsym                 { idtoken qvarsym }
 261   @qual @consym                 { idtoken qconsym }
 262   @varsym                       { varsym }
 263   @consym                       { consym }
 264 }
 265
 266 <0,glaexts> {
 267   @decimal                      { tok_decimal }
 268   0[oO] @octal                  { tok_octal }
 269   0[xX] @hexadecimal            { tok_hexadecimal }
 270 }
 271
 272 <glaexts> {
 273   @decimal \#                   { prim_decimal }
 274   0[oO] @octal \#               { prim_octal }
 275   0[xX] @hexadecimal \#         { prim_hexadecimal }
 276 }
 277
 278 <0,glaexts> @floating_point             { strtoken tok_float }
 279 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 280 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 281
 282 -- Strings and chars are lexed by hand-written code.  The reason is
 283 -- that even if we recognise the string or char here in the regex
 284 -- lexer, we would still have to parse the string afterward in order
 285 -- to convert it to a String.
 286 <0,glaexts> {
 287   \'                            { lex_char_tok }
 288   \"                            { lex_string_tok }
 289 }
 290
 291 {
 292 -- work around bug in Alex 2.0
 293 #if __GLASGOW_HASKELL__ < 503
 294 unsafeAt arr i = arr ! i
 295 #endif
 296
 297 -- -----------------------------------------------------------------------------
 298 -- The token type
 299
 300 data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__
 301
 302 data Token__
 303   = ITas                        -- Haskell keywords
 304   | ITcase
 305   | ITclass
 306   | ITdata
 307   | ITdefault
 308   | ITderiving
 309   | ITdo
 310   | ITelse
 311   | IThiding
 312   | ITif
 313   | ITimport
 314   | ITin
 315   | ITinfix
 316   | ITinfixl
 317   | ITinfixr
 318   | ITinstance
 319   | ITlet
 320   | ITmodule
 321   | ITnewtype
 322   | ITof
 323   | ITqualified
 324   | ITthen
 325   | ITtype
 326   | ITwhere
 327   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 328
 329   | ITforall                    -- GHC extension keywords
 330   | ITforeign
 331   | ITexport
 332   | ITlabel
 333   | ITdynamic
 334   | ITsafe
 335   | ITthreadsafe
 336   | ITunsafe
 337   | ITwith
 338   | ITstdcallconv
 339   | ITccallconv
 340   | ITdotnet
 341   | ITmdo
 342
 343   | ITspecialise_prag           -- Pragmas
 344   | ITsource_prag
 345   | ITinline_prag
 346   | ITnoinline_prag
 347   | ITrules_prag
 348   | ITdeprecated_prag
 349   | ITline_prag
 350   | ITscc_prag
 351   | ITcore_prag                 -- hdaume: core annotations
 352   | ITclose_prag
 353
 354   | ITdotdot                    -- reserved symbols
 355   | ITcolon
 356   | ITdcolon
 357   | ITequal
 358   | ITlam
 359   | ITvbar
 360   | ITlarrow
 361   | ITrarrow
 362   | ITat
 363   | ITtilde
 364   | ITdarrow
 365   | ITminus
 366   | ITbang
 367   | ITstar
 368   | ITdot
 369
 370   | ITbiglam                    -- GHC-extension symbols
 371
 372   | ITocurly                    -- special symbols
 373   | ITccurly
 374   | ITocurlybar                 -- {|, for type applications
 375   | ITccurlybar                 -- |}, for type applications
 376   | ITvocurly
 377   | ITvccurly
 378   | ITobrack
 379   | ITopabrack                  -- [:, for parallel arrays with -fparr
 380   | ITcpabrack                  -- :], for parallel arrays with -fparr
 381   | ITcbrack
 382   | IToparen
 383   | ITcparen
 384   | IToubxparen
 385   | ITcubxparen
 386   | ITsemi
 387   | ITcomma
 388   | ITunderscore
 389   | ITbackquote
 390
 391   | ITvarid   FastString        -- identifiers
 392   | ITconid   FastString
 393   | ITvarsym  FastString
 394   | ITconsym  FastString
 395   | ITqvarid  (FastString,FastString)
 396   | ITqconid  (FastString,FastString)
 397   | ITqvarsym (FastString,FastString)
 398   | ITqconsym (FastString,FastString)
 399
 400   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 401   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 402
 403   | ITpragma StringBuffer
 404
 405   | ITchar       Char
 406   | ITstring     FastString
 407   | ITinteger    Integer
 408   | ITrational   Rational
 409
 410   | ITprimchar   Char
 411   | ITprimstring FastString
 412   | ITprimint    Integer
 413   | ITprimfloat  Rational
 414   | ITprimdouble Rational
 415
 416   -- MetaHaskell extension tokens
 417   | ITopenExpQuote              -- [| or [e|
 418   | ITopenPatQuote              -- [p|
 419   | ITopenDecQuote              -- [d|
 420   | ITopenTypQuote              -- [t|
 421   | ITcloseQuote                -- |]
 422   | ITidEscape   FastString     -- $x
 423   | ITparenEscape               -- $(
 424   | ITreifyType
 425   | ITreifyDecl
 426   | ITreifyFixity
 427
 428   -- Arrow notation extension
 429   | ITproc
 430   | ITrec
 431   | IToparenbar                 -- (|
 432   | ITcparenbar                 -- |)
 433   | ITlarrowtail                -- -<
 434   | ITrarrowtail                -- >-
 435   | ITLarrowtail                -- -<<
 436   | ITRarrowtail                -- >>-
 437
 438   | ITunknown String            -- Used when the lexer can't make sense of it
 439   | ITeof                       -- end of file token
 440 #ifdef DEBUG
 441   deriving Show -- debugging
 442 #endif
 443
 444 isSpecial :: Token__ -> Bool
 445 -- If we see M.x, where x is a keyword, but
 446 -- is special, we treat is as just plain M.x,
 447 -- not as a keyword.
 448 isSpecial ITas          = True
 449 isSpecial IThiding      = True
 450 isSpecial ITqualified   = True
 451 isSpecial ITforall      = True
 452 isSpecial ITexport      = True
 453 isSpecial ITlabel       = True
 454 isSpecial ITdynamic     = True
 455 isSpecial ITsafe        = True
 456 isSpecial ITthreadsafe  = True
 457 isSpecial ITunsafe      = True
 458 isSpecial ITwith        = True
 459 isSpecial ITccallconv   = True
 460 isSpecial ITstdcallconv = True
 461 isSpecial ITmdo         = True
 462 isSpecial _             = False
 463
 464 -- the bitmap provided as the third component indicates whether the
 465 -- corresponding extension keyword is valid under the extension options
 466 -- provided to the compiler; if the extension corresponding to *any* of the
 467 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 468 -- facilitates using a keyword in two different extensions that can be
 469 -- activated independently)
 470 --
 471 reservedWordsFM = listToUFM $
 472         map (\(x, y, z) -> (mkFastString x, (y, z)))
 473        [( "_",          ITunderscore,   0 ),
 474         ( "as",         ITas,           0 ),
 475         ( "case",       ITcase,         0 ),
 476         ( "class",      ITclass,        0 ),
 477         ( "data",       ITdata,         0 ),
 478         ( "default",    ITdefault,      0 ),
 479         ( "deriving",   ITderiving,     0 ),
 480         ( "do",         ITdo,           0 ),
 481         ( "else",       ITelse,         0 ),
 482         ( "hiding",     IThiding,       0 ),
 483         ( "if",         ITif,           0 ),
 484         ( "import",     ITimport,       0 ),
 485         ( "in",         ITin,           0 ),
 486         ( "infix",      ITinfix,        0 ),
 487         ( "infixl",     ITinfixl,       0 ),
 488         ( "infixr",     ITinfixr,       0 ),
 489         ( "instance",   ITinstance,     0 ),
 490         ( "let",        ITlet,          0 ),
 491         ( "module",     ITmodule,       0 ),
 492         ( "newtype",    ITnewtype,      0 ),
 493         ( "of",         ITof,           0 ),
 494         ( "qualified",  ITqualified,    0 ),
 495         ( "then",       ITthen,         0 ),
 496         ( "type",       ITtype,         0 ),
 497         ( "where",      ITwhere,        0 ),
 498         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 499
 500         ( "forall",     ITforall,        bit glaExtsBit),
 501         ( "mdo",        ITmdo,           bit glaExtsBit),
 502         ( "reifyDecl",  ITreifyDecl,     bit thBit),
 503         ( "reifyType",  ITreifyType,     bit thBit),
 504         ( "reifyFixity",ITreifyFixity,   bit thBit),
 505
 506         ( "foreign",    ITforeign,       bit ffiBit),
 507         ( "export",     ITexport,        bit ffiBit),
 508         ( "label",      ITlabel,         bit ffiBit),
 509         ( "dynamic",    ITdynamic,       bit ffiBit),
 510         ( "safe",       ITsafe,          bit ffiBit),
 511         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 512         ( "unsafe",     ITunsafe,        bit ffiBit),
 513         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 514         ( "ccall",      ITccallconv,     bit ffiBit),
 515         ( "dotnet",     ITdotnet,        bit ffiBit),
 516
 517         ( "with",       ITwith,          bit withBit),
 518
 519         ( "rec",        ITrec,           bit arrowsBit),
 520         ( "proc",       ITproc,          bit arrowsBit)
 521      ]
 522
 523 reservedSymsFM = listToUFM $
 524         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 525       [ ("..",  ITdotdot,       0)
 526        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 527                                                 -- meaning only list cons
 528        ,("::",  ITdcolon,       0)
 529        ,("=",   ITequal,        0)
 530        ,("\\",  ITlam,          0)
 531        ,("|",   ITvbar,         0)
 532        ,("<-",  ITlarrow,       0)
 533        ,("->",  ITrarrow,       0)
 534        ,("@",   ITat,           0)
 535        ,("~",   ITtilde,        0)
 536        ,("=>",  ITdarrow,       0)
 537        ,("-",   ITminus,        0)
 538        ,("!",   ITbang,         0)
 539
 540        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 541        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 542
 543        ,("-<",  ITlarrowtail,   bit arrowsBit)
 544        ,(">-",  ITrarrowtail,   bit arrowsBit)
 545        ,("-<<", ITLarrowtail,   bit arrowsBit)
 546        ,(">>-", ITRarrowtail,   bit arrowsBit)
 547        ]
 548
 549 -- -----------------------------------------------------------------------------
 550 -- Lexer actions
 551
 552 type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token
 553
 554 special :: Token__ -> Action
 555 special tok loc end _buf len = return (T loc end tok)
 556
 557 token, layout_token :: Token__ -> Action
 558 token t loc end buf len = return (T loc end t)
 559 layout_token t loc end buf len = pushLexState layout >> return (T loc end t)
 560
 561 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 562 idtoken f loc end buf len = return (T loc end $! (f buf len))
 563
 564 skip_one_varid :: (FastString -> Token__) -> Action
 565 skip_one_varid f loc end buf len
 566   = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1)))
 567
 568 strtoken :: (String -> Token__) -> Action
 569 strtoken f loc end buf len =
 570   return (T loc end $! (f $! lexemeToString buf len))
 571
 572 init_strtoken :: Int -> (String -> Token__) -> Action
 573 -- like strtoken, but drops the last N character(s)
 574 init_strtoken drop f loc end buf len =
 575   return (T loc end $! (f $! lexemeToString buf (len-drop)))
 576
 577 begin :: Int -> Action
 578 begin code _loc _end _str _len = do pushLexState code; lexToken
 579
 580 pop :: Action
 581 pop _loc _end _buf _len = do popLexState; lexToken
 582
 583 pop_and :: Action -> Action
 584 pop_and act loc end buf len = do popLexState; act loc end buf len
 585
 586 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 587
 588 ifExtension pred bits _ _ _ = pred bits
 589
 590 {-
 591   nested comments require traversing by hand, they can't be parsed
 592   using regular expressions.
 593 -}
 594 nested_comment :: Action
 595 nested_comment loc _end _str _len = do
 596   input <- getInput
 597   go 1 input
 598   where go 0 input = do setInput input; lexToken
 599         go n input = do
 600           case alexGetChar input of
 601             Nothing  -> err input
 602             Just (c,input) -> do
 603               case c of
 604                 '-' -> do
 605                   case alexGetChar input of
 606                     Nothing  -> err input
 607                     Just ('\125',input) -> go (n-1) input
 608                     Just (c,_)          -> go n input
 609                 '\123' -> do
 610                   case alexGetChar input of
 611                     Nothing  -> err input
 612                     Just ('-',input') -> go (n+1) input'
 613                     Just (c,input)    -> go n input
 614                 c -> go n input
 615
 616         err input = do failLocMsgP loc (fst input) "unterminated `{-'"
 617
 618 open_brace, close_brace :: Action
 619 open_brace  loc end _str _len = do
 620   ctx <- getContext
 621   setContext (NoLayout:ctx)
 622   return (T loc end ITocurly)
 623 close_brace loc end _str _len = do
 624   popContext
 625   return (T loc end ITccurly)
 626
 627 -- We have to be careful not to count M.<varid> as a qualified name
 628 -- when <varid> is a keyword.  We hack around this by catching
 629 -- the offending tokens afterward, and re-lexing in a different state.
 630 check_qvarid loc end buf len = do
 631   case lookupUFM reservedWordsFM var of
 632         Just (keyword,exts)
 633           | not (isSpecial keyword) ->
 634           if exts == 0
 635              then try_again
 636              else do
 637                 b <- extension (\i -> exts .&. i /= 0)
 638                 if b then try_again
 639                      else return token
 640         _other -> return token
 641   where
 642         (mod,var) = splitQualName buf len
 643         token     = T loc end (ITqvarid (mod,var))
 644
 645         try_again = do
 646                 setInput (loc,buf)
 647                 pushLexState bad_qvarid
 648                 lexToken
 649
 650 qvarid buf len = ITqvarid $! splitQualName buf len
 651 qconid buf len = ITqconid $! splitQualName buf len
 652
 653 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 654 -- takes a StringBuffer and a length, and returns the module name
 655 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 656 -- because of hierarchical module names.
 657 splitQualName orig_buf len = split orig_buf 0 0
 658   where
 659     split buf dot_off n
 660         | n == len                = done dot_off
 661         | lookAhead buf n == '.'  = split2 buf n (n+1)
 662         | otherwise               = split buf dot_off (n+1)
 663
 664     -- careful, we might get names like M....
 665     -- so, if the character after the dot is not upper-case, this is
 666     -- the end of the qualifier part.
 667     split2 buf dot_off n
 668         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 669         | otherwise                 = done dot_off
 670
 671     done dot_off =
 672         (lexemeToFastString orig_buf dot_off,
 673          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 674
 675 varid loc end buf len =
 676   case lookupUFM reservedWordsFM fs of
 677         Just (keyword,0)    -> do
 678                 maybe_layout keyword
 679                 return (T loc end keyword)
 680         Just (keyword,exts) -> do
 681                 b <- extension (\i -> exts .&. i /= 0)
 682                 if b then do maybe_layout keyword
 683                              return (T loc end keyword)
 684                      else return (T loc end (ITvarid fs))
 685         _other -> return (T loc end (ITvarid fs))
 686   where
 687         fs = lexemeToFastString buf len
 688
 689 conid buf len = ITconid fs
 690   where fs = lexemeToFastString buf len
 691
 692 qvarsym buf len = ITqvarsym $! splitQualName buf len
 693 qconsym buf len = ITqconsym $! splitQualName buf len
 694
 695 varsym = sym ITvarsym
 696 consym = sym ITconsym
 697
 698 sym con loc end buf len =
 699   case lookupUFM reservedSymsFM fs of
 700         Just (keyword,0)    -> return (T loc end keyword)
 701         Just (keyword,exts) -> do
 702                 b <- extension (\i -> exts .&. i /= 0)
 703                 if b then return (T loc end keyword)
 704                      else return (T loc end $! con fs)
 705         _other -> return (T loc end $! con fs)
 706   where
 707         fs = lexemeToFastString buf len
 708
 709 tok_decimal loc end buf len
 710   = return (T loc end (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 711
 712 tok_octal loc end buf len
 713   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 714
 715 tok_hexadecimal loc end buf len
 716   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 717
 718 prim_decimal loc end buf len
 719   = return (T loc end (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 720
 721 prim_octal loc end buf len
 722   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 723
 724 prim_hexadecimal loc end buf len
 725   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 726
 727 tok_float        str = ITrational $! readRational__ str
 728 prim_float       str = ITprimfloat  $! readRational__ str
 729 prim_double      str = ITprimdouble $! readRational__ str
 730
 731 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 732 parseInteger buf len radix to_int
 733   = go 0 0
 734   where go i x | i == len  = x
 735                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 736
 737 -- -----------------------------------------------------------------------------
 738 -- Layout processing
 739
 740 -- we're at the first token on a line, insert layout tokens if necessary
 741 do_bol :: Action
 742 do_bol loc end _str _len = do
 743         pos <- getOffside end
 744         case pos of
 745             LT -> do
 746                 --trace "layout: inserting '}'" $ do
 747                 popContext
 748                 -- do NOT pop the lex state, we might have a ';' to insert
 749                 return (T loc end ITvccurly)
 750             EQ -> do
 751                 --trace "layout: inserting ';'" $ do
 752                 popLexState
 753                 return (T loc end ITsemi)
 754             GT -> do
 755                 popLexState
 756                 lexToken
 757
 758 -- certain keywords put us in the "layout" state, where we might
 759 -- add an opening curly brace.
 760 maybe_layout ITdo       = pushLexState layout_do
 761 maybe_layout ITmdo      = pushLexState layout_do
 762 maybe_layout ITof       = pushLexState layout
 763 maybe_layout ITlet      = pushLexState layout
 764 maybe_layout ITwhere    = pushLexState layout
 765 maybe_layout ITrec      = pushLexState layout
 766 maybe_layout _          = return ()
 767
 768 -- Pushing a new implicit layout context.  If the indentation of the
 769 -- next token is not greater than the previous layout context, then
 770 -- Haskell 98 says that the new layout context should be empty; that is
 771 -- the lexer must generate {}.
 772 --
 773 -- We are slightly more lenient than this: when the new context is started
 774 -- by a 'do', then we allow the new context to be at the same indentation as
 775 -- the previous context.  This is what the 'strict' argument is for.
 776 --
 777 new_layout_context strict loc end _buf _len = do
 778     popLexState
 779     let offset = srcLocCol loc
 780     ctx <- getContext
 781     case ctx of
 782         Layout prev_off : _  |
 783            (strict     && prev_off >= offset  ||
 784             not strict && prev_off > offset) -> do
 785                 -- token is indented to the left of the previous context.
 786                 -- we must generate a {} sequence now.
 787                 pushLexState layout_left
 788                 return (T loc end ITvocurly)
 789         other -> do
 790                 setContext (Layout offset : ctx)
 791                 return (T loc end ITvocurly)
 792
 793 do_layout_left loc end _buf _len = do
 794     popLexState
 795     pushLexState bol  -- we must be at the start of a line
 796     return (T loc end ITvccurly)
 797
 798 -- -----------------------------------------------------------------------------
 799 -- LINE pragmas
 800
 801 set_line :: Int -> Action
 802 set_line code loc end buf len = do
 803   let line = parseInteger buf len 10 oct_or_dec
 804   setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0)
 805         -- subtract one: the line number refers to the *following* line
 806   popLexState
 807   pushLexState code
 808   lexToken
 809
 810 set_file :: Int -> Action
 811 set_file code loc end buf len = do
 812   let file = lexemeToFastString (stepOn buf) (len-2)
 813   setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end))
 814   popLexState
 815   pushLexState code
 816   lexToken
 817
 818 -- -----------------------------------------------------------------------------
 819 -- Strings & Chars
 820
 821 -- This stuff is horrible.  I hates it.
 822
 823 lex_string_tok :: Action
 824 lex_string_tok loc end buf len = do
 825   tok <- lex_string ""
 826   end <- getSrcLoc
 827   return (T loc end tok)
 828
 829 lex_string :: String -> P Token__
 830 lex_string s = do
 831   i <- getInput
 832   case alexGetChar i of
 833     Nothing -> lit_error
 834
 835     Just ('"',i)  -> do
 836         setInput i
 837         glaexts <- extension glaExtsEnabled
 838         if glaexts
 839           then do
 840             i <- getInput
 841             case alexGetChar i of
 842               Just ('#',i) -> do
 843                    setInput i
 844                    if any (> '\xFF') s
 845                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 846                     else let s' = mkFastStringNarrow (reverse s) in
 847                          -- always a narrow string/byte array
 848                          return (ITprimstring s')
 849               _other ->
 850                 return (ITstring (mkFastString (reverse s)))
 851           else
 852                 return (ITstring (mkFastString (reverse s)))
 853
 854     Just ('\\',i)
 855         | Just ('&',i) <- next -> do
 856                 setInput i; lex_string s
 857         | Just (c,i) <- next, is_space c -> do
 858                 setInput i; lex_stringgap s
 859         where next = alexGetChar i
 860
 861     Just _ -> do
 862         c <- lex_char
 863         lex_string (c:s)
 864
 865
 866 lex_stringgap s = do
 867   c <- getCharOrFail
 868   case c of
 869     '\\' -> lex_string s
 870     c | is_space c -> lex_stringgap s
 871     _other -> lit_error
 872
 873
 874 lex_char_tok :: Action
 875 lex_char_tok loc _end buf len = do
 876    c <- lex_char
 877    mc <- getCharOrFail
 878    case mc of
 879         '\'' -> do
 880            glaexts <- extension glaExtsEnabled
 881            if glaexts
 882                 then do
 883                    i@(end,_) <- getInput
 884                    case alexGetChar i of
 885                         Just ('#',i@(end,_)) -> do
 886                                 setInput i
 887                                 return (T loc end (ITprimchar c))
 888                         _other ->
 889                                 return (T loc end (ITchar c))
 890                 else do
 891                    end <- getSrcLoc
 892                    return (T loc end (ITchar c))
 893
 894         _other -> lit_error
 895
 896 lex_char :: P Char
 897 lex_char = do
 898   mc <- getCharOrFail
 899   case mc of
 900       '\\' -> lex_escape
 901       c | is_any c -> return c
 902       _other -> lit_error
 903
 904 lex_escape :: P Char
 905 lex_escape = do
 906   c <- getCharOrFail
 907   case c of
 908         'a'   -> return '\a'
 909         'b'   -> return '\b'
 910         'f'   -> return '\f'
 911         'n'   -> return '\n'
 912         'r'   -> return '\r'
 913         't'   -> return '\t'
 914         'v'   -> return '\v'
 915         '\\'  -> return '\\'
 916         '"'   -> return '\"'
 917         '\''  -> return '\''
 918         '^'   -> do c <- getCharOrFail
 919                     if c >= '@' && c <= '_'
 920                         then return (chr (ord c - ord '@'))
 921                         else lit_error
 922
 923         'x'   -> readNum is_hexdigit 16 hex
 924         'o'   -> readNum is_octdigit  8 oct_or_dec
 925         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 926
 927         c1 ->  do
 928            i <- getInput
 929            case alexGetChar i of
 930             Nothing -> lit_error
 931             Just (c2,i2) ->
 932               case alexGetChar i2 of
 933                 Nothing -> lit_error
 934                 Just (c3,i3) ->
 935                    let str = [c1,c2,c3] in
 936                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 937                                      Just rest <- [maybePrefixMatch p str] ] of
 938                           (escape_char,[]):_ -> do
 939                                 setInput i3
 940                                 return escape_char
 941                           (escape_char,_:_):_ -> do
 942                                 setInput i2
 943                                 return escape_char
 944                           [] -> lit_error
 945
 946 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 947 readNum is_digit base conv = do
 948   c <- getCharOrFail
 949   if is_digit c
 950         then readNum2 is_digit base conv (conv c)
 951         else lit_error
 952
 953 readNum2 is_digit base conv i = do
 954   input <- getInput
 955   read i input
 956   where read i input = do
 957           case alexGetChar input of
 958             Just (c,input') | is_digit c -> do
 959                 read (i*base + conv c) input'
 960             _other -> do
 961                 setInput input
 962                 if i >= 0 && i <= 0x10FFFF
 963                    then return (chr i)
 964                    else lit_error
 965
 966 is_hexdigit c
 967         =  is_digit c
 968         || (c >= 'a' && c <= 'f')
 969         || (c >= 'A' && c <= 'F')
 970
 971 hex c | is_digit c = ord c - ord '0'
 972       | otherwise  = ord (to_lower c) - ord 'a' + 10
 973
 974 oct_or_dec c = ord c - ord '0'
 975
 976 is_octdigit c = c >= '0' && c <= '7'
 977
 978 to_lower c
 979   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
 980   | otherwise = c
 981
 982 silly_escape_chars = [
 983         ("NUL", '\NUL'),
 984         ("SOH", '\SOH'),
 985         ("STX", '\STX'),
 986         ("ETX", '\ETX'),
 987         ("EOT", '\EOT'),
 988         ("ENQ", '\ENQ'),
 989         ("ACK", '\ACK'),
 990         ("BEL", '\BEL'),
 991         ("BS", '\BS'),
 992         ("HT", '\HT'),
 993         ("LF", '\LF'),
 994         ("VT", '\VT'),
 995         ("FF", '\FF'),
 996         ("CR", '\CR'),
 997         ("SO", '\SO'),
 998         ("SI", '\SI'),
 999         ("DLE", '\DLE'),
1000         ("DC1", '\DC1'),
1001         ("DC2", '\DC2'),
1002         ("DC3", '\DC3'),
1003         ("DC4", '\DC4'),
1004         ("NAK", '\NAK'),
1005         ("SYN", '\SYN'),
1006         ("ETB", '\ETB'),
1007         ("CAN", '\CAN'),
1008         ("EM", '\EM'),
1009         ("SUB", '\SUB'),
1010         ("ESC", '\ESC'),
1011         ("FS", '\FS'),
1012         ("GS", '\GS'),
1013         ("RS", '\RS'),
1014         ("US", '\US'),
1015         ("SP", '\SP'),
1016         ("DEL", '\DEL')
1017         ]
1018
1019 lit_error = lexError "lexical error in string/character literal"
1020
1021 getCharOrFail :: P Char
1022 getCharOrFail =  do
1023   i <- getInput
1024   case alexGetChar i of
1025         Nothing -> lexError "unexpected end-of-file in string/character literal"
1026         Just (c,i)  -> do setInput i; return c
1027
1028 -- -----------------------------------------------------------------------------
1029 -- Floats
1030
1031 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1032 readRational r = do
1033      (n,d,s) <- readFix r
1034      (k,t)   <- readExp s
1035      return ((n%1)*10^^(k-d), t)
1036  where
1037      readFix r = do
1038         (ds,s)  <- lexDecDigits r
1039         (ds',t) <- lexDotDigits s
1040         return (read (ds++ds'), length ds', t)
1041
1042      readExp (e:s) | e `elem` "eE" = readExp' s
1043      readExp s                     = return (0,s)
1044
1045      readExp' ('+':s) = readDec s
1046      readExp' ('-':s) = do
1047                         (k,t) <- readDec s
1048                         return (-k,t)
1049      readExp' s       = readDec s
1050
1051      readDec s = do
1052         (ds,r) <- nonnull isDigit s
1053         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1054                 r)
1055
1056      lexDecDigits = nonnull isDigit
1057
1058      lexDotDigits ('.':s) = return (span isDigit s)
1059      lexDotDigits s       = return ("",s)
1060
1061      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1062                       return (cs,t)
1063
1064 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1065 readRational__ top_s
1066   = case top_s of
1067       '-' : xs -> - (read_me xs)
1068       xs       -> read_me xs
1069   where
1070     read_me s
1071       = case (do { (x,"") <- readRational s ; return x }) of
1072           [x] -> x
1073           []  -> error ("readRational__: no parse:"        ++ top_s)
1074           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1075
1076 -- -----------------------------------------------------------------------------
1077 -- The Parse Monad
1078
1079 data LayoutContext
1080   = NoLayout
1081   | Layout !Int
1082
1083 data ParseResult a
1084   = POk PState a
1085   | PFailed
1086         SrcLoc SrcLoc   -- The start and end of the text span related to
1087                         -- the error.  Might be used in environments which can
1088                         -- show this span, e.g. by highlighting it.
1089         Message         -- The error message
1090
1091 showPFailed loc1 loc2 err
1092  = showSDoc (hcat [ppr loc1, text ": ", err])
1093
1094 data PState = PState {
1095         buffer     :: StringBuffer,
1096         last_loc   :: SrcLoc,           -- pos of previous token
1097         last_len   :: !Int,             -- len of previous token
1098         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1099         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1100         context    :: [LayoutContext],
1101         lex_state  :: [Int]
1102      }
1103         -- last_loc and last_len are used when generating error messages,
1104         -- and in pushCurrentContext only.
1105
1106 newtype P a = P { unP :: PState -> ParseResult a }
1107
1108 instance Monad P where
1109   return = returnP
1110   (>>=) = thenP
1111   fail = failP
1112
1113 returnP :: a -> P a
1114 returnP a = P $ \s -> POk s a
1115
1116 thenP :: P a -> (a -> P b) -> P b
1117 (P m) `thenP` k = P $ \ s ->
1118         case m s of
1119                 POk s1 a          -> (unP (k a)) s1
1120                 PFailed l1 l2 err -> PFailed l1 l2 err
1121
1122 failP :: String -> P a
1123 failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1124
1125 failMsgP :: String -> P a
1126 failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1127
1128 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1129 failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str)
1130
1131 extension :: (Int -> Bool) -> P Bool
1132 extension p = P $ \s -> POk s (p $! extsBitmap s)
1133
1134 getExts :: P Int
1135 getExts = P $ \s -> POk s (extsBitmap s)
1136
1137 setSrcLoc :: SrcLoc -> P ()
1138 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1139
1140 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1141 -- any calls to the lexer, because it assumes things about the SrcLoc.
1142 setSrcLocFor :: SrcLoc -> P a -> P a
1143 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1144   case unP scope s{loc=new_loc} of
1145         PFailed l1 l2 msg -> PFailed l1 l2 msg
1146         POk _ r -> POk s r
1147
1148 getSrcLoc :: P SrcLoc
1149 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1150
1151 setLastToken :: SrcLoc -> Int -> P ()
1152 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1153
1154 type AlexInput = (SrcLoc,StringBuffer)
1155
1156 alexInputPrevChar :: AlexInput -> Char
1157 alexInputPrevChar (_,s) = prevChar s '\n'
1158
1159 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1160 alexGetChar (loc,s)
1161   | atEnd s   = Nothing
1162   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1163   where c = currentChar s
1164         loc' = advanceSrcLoc loc c
1165         s'   = stepOn s
1166
1167 getInput :: P AlexInput
1168 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1169
1170 setInput :: AlexInput -> P ()
1171 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1172
1173 pushLexState :: Int -> P ()
1174 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1175
1176 popLexState :: P Int
1177 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1178
1179 getLexState :: P Int
1180 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1181
1182 -- for reasons of efficiency, flags indicating language extensions (eg,
1183 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1184 -- integer
1185
1186 glaExtsBit, ffiBit, parrBit :: Int
1187 glaExtsBit = 0
1188 ffiBit     = 1
1189 parrBit    = 2
1190 withBit    = 3
1191 arrowsBit  = 4
1192 thBit      = 5
1193 ipBit      = 6
1194
1195 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1196 glaExtsEnabled flags = testBit flags glaExtsBit
1197 ffiEnabled     flags = testBit flags ffiBit
1198 withEnabled    flags = testBit flags withBit
1199 parrEnabled    flags = testBit flags parrBit
1200 arrowsEnabled  flags = testBit flags arrowsBit
1201 thEnabled      flags = testBit flags thBit
1202 ipEnabled      flags = testBit flags ipBit
1203
1204 -- create a parse state
1205 --
1206 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1207 mkPState buf loc flags  =
1208   PState {
1209       buffer     = buf,
1210       last_loc   = loc,
1211       last_len   = 0,
1212       loc        = loc,
1213       extsBitmap = fromIntegral bitmap,
1214       context    = [],
1215       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1216         -- we begin in the layout state if toplev_layout is set
1217     }
1218     where
1219       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1220                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1221                .|. withBit    `setBitIf` dopt Opt_With        flags
1222                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1223                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1224                .|. thBit      `setBitIf` dopt Opt_TH          flags
1225                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1226       --
1227       setBitIf :: Int -> Bool -> Int
1228       b `setBitIf` cond | cond      = bit b
1229                         | otherwise = 0
1230
1231 getContext :: P [LayoutContext]
1232 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1233
1234 setContext :: [LayoutContext] -> P ()
1235 setContext ctx = P $ \s -> POk s{context=ctx} ()
1236
1237 popContext :: P ()
1238 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1239                            loc = loc, last_len = len, last_loc = last_loc }) ->
1240   case ctx of
1241         (_:tl) -> POk s{ context = tl } ()
1242         []     -> PFailed last_loc loc (srcParseErr buf len)
1243
1244 -- Push a new layout context at the indentation of the last token read.
1245 -- This is only used at the outer level of a module when the 'module'
1246 -- keyword is missing.
1247 pushCurrentContext :: P ()
1248 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1249   POk s{ context = Layout (srcLocCol loc) : ctx} ()
1250
1251 getOffside :: SrcLoc -> P Ordering
1252 getOffside loc = P $ \s@PState{context=stk} ->
1253                 let ord = case stk of
1254                         (Layout n:_) -> compare (srcLocCol loc) n
1255                         _            -> GT
1256                 in POk s ord
1257
1258 -- ---------------------------------------------------------------------------
1259 -- Construct a parse error
1260
1261 srcParseErr
1262   :: StringBuffer       -- current buffer (placed just after the last token)
1263   -> Int                -- length of the previous token
1264   -> Message
1265 srcParseErr buf len
1266   = hcat [ if null token
1267              then ptext SLIT("parse error (possibly incorrect indentation)")
1268              else hcat [ptext SLIT("parse error on input "),
1269                         char '`', text token, char '\'']
1270     ]
1271   where token = lexemeToString (stepOnBy (-len) buf) len
1272
1273 -- Report a parse failure, giving the span of the previous token as
1274 -- the location of the error.  This is the entry point for errors
1275 -- detected during parsing.
1276 srcParseFail :: P a
1277 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1278                                 last_loc = last_loc, loc = loc } ->
1279     PFailed last_loc loc (srcParseErr buf len)
1280
1281 -- A lexical error is reported at a particular position in the source file,
1282 -- not over a token range.  TODO: this is slightly wrong, because we record
1283 -- the error at the character position following the one which caused the
1284 -- error.  We should somehow back up by one character.
1285 lexError :: String -> P a
1286 lexError str = do
1287   loc <- getSrcLoc
1288   failLocMsgP loc loc str
1289
1290 -- -----------------------------------------------------------------------------
1291 -- This is the top-level function: called from the parser each time a
1292 -- new token is to be read from the input.
1293
1294 lexer :: (Token -> P a) -> P a
1295 lexer cont = do
1296   tok@(T _ _ tok__) <- lexToken
1297   --trace ("token: " ++ show tok__) $ do
1298   cont tok
1299
1300 lexToken :: P Token
1301 lexToken = do
1302   inp@(loc1,buf) <- getInput
1303   sc <- getLexState
1304   exts <- getExts
1305   case alexScanUser exts inp sc of
1306     AlexEOF -> do setLastToken loc1 0
1307                   return (T loc1 loc1 ITeof)
1308     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1309     AlexSkip inp2 _ -> do
1310         setInput inp2
1311         lexToken
1312     AlexToken inp2@(end,buf2) len t -> do
1313         setInput inp2
1314         setLastToken loc1 len
1315         t loc1 end buf len
1316 }