ghc/compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2003
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - Unicode
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 {
  24 module Lexer (
  25    Token(..), Token__(..), lexer, mkPState, showPFailed,
  26    P(..), ParseResult(..), setSrcLocFor, getSrcLoc,
  27    failMsgP, failLocMsgP, srcParseFail,
  28    popContext, pushCurrentContext,
  29   ) where
  30
  31 #include "HsVersions.h"
  32
  33 import ForeignCall      ( Safety(..) )
  34 import ErrUtils         ( Message )
  35 import Outputable
  36 import StringBuffer
  37 import FastString
  38 import FastTypes
  39 import SrcLoc
  40 import UniqFM
  41 import CmdLineOpts
  42 import Ctype
  43 import Util             ( maybePrefixMatch )
  44
  45 import DATA_BITS
  46 import Char
  47 import Ratio
  48 import TRACE
  49 }
  50
  51 $whitechar   = [\ \t\n\r\f\v]
  52 $white_no_nl = $whitechar # \n
  53
  54 $ascdigit  = 0-9
  55 $unidigit  = \x01
  56 $digit     = [$ascdigit $unidigit]
  57
  58 $special   = [\(\)\,\;\[\]\`\{\}]
  59 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  60 $unisymbol = \x02
  61 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  62
  63 $unilarge  = \x03
  64 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  65 $large     = [$asclarge $unilarge]
  66
  67 $unismall  = \x04
  68 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  69 $small     = [$ascsmall $unismall \_]
  70
  71 $graphic   = [$small $large $symbol $digit $special \:\"\']
  72
  73 $octit     = 0-7
  74 $hexit     = [$digit A-F a-f]
  75 $symchar   = [$symbol \:]
  76 $nl        = [\n\r]
  77 $idchar    = [$small $large $digit \']
  78
  79 @varid     = $small $idchar*
  80 @conid     = $large $idchar*
  81
  82 @varsym    = $symbol $symchar*
  83 @consym    = \: $symchar*
  84
  85 @decimal     = $digit+
  86 @octal       = $octit+
  87 @hexadecimal = $hexit+
  88 @exponent    = [eE] [\-\+]? @decimal
  89
  90 -- we support the hierarchical module name extension:
  91 @qual = (@conid \.)+
  92
  93 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  94
  95 haskell :-
  96
  97 -- everywhere: skip whitespace and comments
  98 $white_no_nl+                           ;
  99
 100 -- Everywhere: deal with nested comments.  We explicitly rule out
 101 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 102 -- (this can happen even though pragmas will normally take precedence due to
 103 -- longest-match, because pragmas aren't valid in every state, but comments
 104 -- are).
 105 "{-" / { notFollowedBy '#' }            { nested_comment }
 106
 107 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 108 -- more dashes followed by a symbol should be parsed as a varsym, so we
 109 -- have to exclude those.
 110 -- The regex says: "munch all the characters after the dashes, as long as
 111 -- the first one is not a symbol".
 112 "--"\-* ([^$symbol] .*)?                ;
 113
 114 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 115 -- blank lines) until we find a non-whitespace character, then do layout
 116 -- processing.
 117 --
 118 -- One slight wibble here: what if the line begins with {-#? In
 119 -- theory, we have to lex the pragma to see if it's one we recognise,
 120 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 121 -- as a nested comment.  We don't bother with this: if the line begins
 122 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 123 <bol> {
 124   \n                                    ;
 125   ^\# (line)?                           { begin line_prag1 }
 126   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 127   ()                                    { do_bol }
 128 }
 129
 130 -- after a layout keyword (let, where, do, of), we begin a new layout
 131 -- context if the curly brace is missing.
 132 -- Careful! This stuff is quite delicate.
 133 <layout, layout_do> {
 134   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 135         -- we might encounter {-# here, but {- has been handled already
 136   \n                                    ;
 137   ^\# (line)?                           { begin line_prag1 }
 138 }
 139
 140 -- do is treated in a subtly different way, see new_layout_context
 141 <layout>    ()                          { new_layout_context True }
 142 <layout_do> ()                          { new_layout_context False }
 143
 144 -- after a new layout context which was found to be to the left of the
 145 -- previous context, we have generated a '{' token, and we now need to
 146 -- generate a matching '}' token.
 147 <layout_left>  ()                       { do_layout_left }
 148
 149 <0,glaexts> \n                          { begin bol }
 150
 151 "{-#" $whitechar* (line|LINE)           { begin line_prag2 }
 152
 153 -- single-line line pragmas, of the form
 154 --    # <line> "<file>" <extra-stuff> \n
 155 <line_prag1> $digit+                    { set_line line_prag1a }
 156 <line_prag1a> \" [$graphic \ ]* \"      { set_file line_prag1b }
 157 <line_prag1b> .*                        { pop }
 158
 159 -- Haskell-style line pragmas, of the form
 160 --    {-# LINE <line> "<file>" #-}
 161 <line_prag2> $digit+                    { set_line line_prag2a }
 162 <line_prag2a> \" [$graphic \ ]* \"      { set_file line_prag2b }
 163 <line_prag2b> "#-}"                     { pop }
 164
 165 <0,glaexts> {
 166   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
 167                                         { token ITspecialise_prag }
 168   "{-#" $whitechar* (SOURCE|source)     { token ITsource_prag }
 169   "{-#" $whitechar* (INLINE|inline)     { token ITinline_prag }
 170   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
 171                                         { token ITnoinline_prag }
 172   "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
 173   "{-#" $whitechar* (DEPRECATED|deprecated)
 174                                         { token ITdeprecated_prag }
 175   "{-#" $whitechar* (SCC|scc)           { token ITscc_prag }
 176   "{-#" $whitechar* (CORE|core)         { token ITcore_prag }
 177
 178   "{-#"                                 { nested_comment }
 179
 180   -- ToDo: should only be valid inside a pragma:
 181   "#-}"                                 { token ITclose_prag}
 182 }
 183
 184
 185 -- '0' state: ordinary lexemes
 186 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 187
 188 -- "special" symbols
 189
 190 <0,glaexts> {
 191   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 192   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 193 }
 194
 195 <0,glaexts> {
 196   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 197   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 198   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 199   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 200   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 201   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 202   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 203   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 204 }
 205
 206 <0,glaexts> {
 207   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 208                                         { special IToparenbar }
 209   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 210 }
 211
 212 <0,glaexts> {
 213   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 214   \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
 215 }
 216
 217 <glaexts> {
 218   "(#" / { notFollowedBySymbol }        { token IToubxparen }
 219   "#)"                                  { token ITcubxparen }
 220   "{|"                                  { token ITocurlybar }
 221   "|}"                                  { token ITccurlybar }
 222 }
 223
 224 <0,glaexts> {
 225   \(                                    { special IToparen }
 226   \)                                    { special ITcparen }
 227   \[                                    { special ITobrack }
 228   \]                                    { special ITcbrack }
 229   \,                                    { special ITcomma }
 230   \;                                    { special ITsemi }
 231   \`                                    { special ITbackquote }
 232
 233   \{                                    { open_brace }
 234   \}                                    { close_brace }
 235 }
 236
 237 <0,glaexts> {
 238   @qual @varid                  { check_qvarid }
 239   @qual @conid                  { idtoken qconid }
 240   @varid                        { varid }
 241   @conid                        { idtoken conid }
 242 }
 243
 244 -- after an illegal qvarid, such as 'M.let',
 245 -- we back up and try again in the bad_qvarid state:
 246 <bad_qvarid> {
 247   @conid                        { pop_and (idtoken conid) }
 248   @qual @conid                  { pop_and (idtoken qconid) }
 249 }
 250
 251 <glaexts> {
 252   @qual @varid "#"+             { idtoken qvarid }
 253   @qual @conid "#"+             { idtoken qconid }
 254   @varid "#"+                   { varid }
 255   @conid "#"+                   { idtoken conid }
 256 }
 257
 258 -- ToDo: M.(,,,)
 259
 260 <0,glaexts> {
 261   @qual @varsym                 { idtoken qvarsym }
 262   @qual @consym                 { idtoken qconsym }
 263   @varsym                       { varsym }
 264   @consym                       { consym }
 265 }
 266
 267 <0,glaexts> {
 268   @decimal                      { tok_decimal }
 269   0[oO] @octal                  { tok_octal }
 270   0[xX] @hexadecimal            { tok_hexadecimal }
 271 }
 272
 273 <glaexts> {
 274   @decimal \#                   { prim_decimal }
 275   0[oO] @octal \#               { prim_octal }
 276   0[xX] @hexadecimal \#         { prim_hexadecimal }
 277 }
 278
 279 <0,glaexts> @floating_point             { strtoken tok_float }
 280 <glaexts>   @floating_point \#          { init_strtoken 1 prim_float }
 281 <glaexts>   @floating_point \# \#       { init_strtoken 2 prim_double }
 282
 283 -- Strings and chars are lexed by hand-written code.  The reason is
 284 -- that even if we recognise the string or char here in the regex
 285 -- lexer, we would still have to parse the string afterward in order
 286 -- to convert it to a String.
 287 <0,glaexts> {
 288   \'                            { lex_char_tok }
 289   \"                            { lex_string_tok }
 290 }
 291
 292 {
 293 -- work around bug in Alex 2.0
 294 #if __GLASGOW_HASKELL__ < 503
 295 unsafeAt arr i = arr ! i
 296 #endif
 297
 298 -- -----------------------------------------------------------------------------
 299 -- The token type
 300
 301 data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__
 302
 303 data Token__
 304   = ITas                        -- Haskell keywords
 305   | ITcase
 306   | ITclass
 307   | ITdata
 308   | ITdefault
 309   | ITderiving
 310   | ITdo
 311   | ITelse
 312   | IThiding
 313   | ITif
 314   | ITimport
 315   | ITin
 316   | ITinfix
 317   | ITinfixl
 318   | ITinfixr
 319   | ITinstance
 320   | ITlet
 321   | ITmodule
 322   | ITnewtype
 323   | ITof
 324   | ITqualified
 325   | ITthen
 326   | ITtype
 327   | ITwhere
 328   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 329
 330   | ITforall                    -- GHC extension keywords
 331   | ITforeign
 332   | ITexport
 333   | ITlabel
 334   | ITdynamic
 335   | ITsafe
 336   | ITthreadsafe
 337   | ITunsafe
 338   | ITstdcallconv
 339   | ITccallconv
 340   | ITdotnet
 341   | ITmdo
 342
 343   | ITspecialise_prag           -- Pragmas
 344   | ITsource_prag
 345   | ITinline_prag
 346   | ITnoinline_prag
 347   | ITrules_prag
 348   | ITdeprecated_prag
 349   | ITline_prag
 350   | ITscc_prag
 351   | ITcore_prag                 -- hdaume: core annotations
 352   | ITclose_prag
 353
 354   | ITdotdot                    -- reserved symbols
 355   | ITcolon
 356   | ITdcolon
 357   | ITequal
 358   | ITlam
 359   | ITvbar
 360   | ITlarrow
 361   | ITrarrow
 362   | ITat
 363   | ITtilde
 364   | ITdarrow
 365   | ITminus
 366   | ITbang
 367   | ITstar
 368   | ITdot
 369
 370   | ITbiglam                    -- GHC-extension symbols
 371
 372   | ITocurly                    -- special symbols
 373   | ITccurly
 374   | ITocurlybar                 -- {|, for type applications
 375   | ITccurlybar                 -- |}, for type applications
 376   | ITvocurly
 377   | ITvccurly
 378   | ITobrack
 379   | ITopabrack                  -- [:, for parallel arrays with -fparr
 380   | ITcpabrack                  -- :], for parallel arrays with -fparr
 381   | ITcbrack
 382   | IToparen
 383   | ITcparen
 384   | IToubxparen
 385   | ITcubxparen
 386   | ITsemi
 387   | ITcomma
 388   | ITunderscore
 389   | ITbackquote
 390
 391   | ITvarid   FastString        -- identifiers
 392   | ITconid   FastString
 393   | ITvarsym  FastString
 394   | ITconsym  FastString
 395   | ITqvarid  (FastString,FastString)
 396   | ITqconid  (FastString,FastString)
 397   | ITqvarsym (FastString,FastString)
 398   | ITqconsym (FastString,FastString)
 399
 400   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 401   | ITsplitipvarid FastString   -- GHC extension: implicit param: %x
 402
 403   | ITpragma StringBuffer
 404
 405   | ITchar       Char
 406   | ITstring     FastString
 407   | ITinteger    Integer
 408   | ITrational   Rational
 409
 410   | ITprimchar   Char
 411   | ITprimstring FastString
 412   | ITprimint    Integer
 413   | ITprimfloat  Rational
 414   | ITprimdouble Rational
 415
 416   -- MetaHaskell extension tokens
 417   | ITopenExpQuote              -- [| or [e|
 418   | ITopenPatQuote              -- [p|
 419   | ITopenDecQuote              -- [d|
 420   | ITopenTypQuote              -- [t|
 421   | ITcloseQuote                -- |]
 422   | ITidEscape   FastString     -- $x
 423   | ITparenEscape               -- $(
 424   | ITreifyType
 425   | ITreifyDecl
 426   | ITreifyFixity
 427
 428   -- Arrow notation extension
 429   | ITproc
 430   | ITrec
 431   | IToparenbar                 -- (|
 432   | ITcparenbar                 -- |)
 433   | ITlarrowtail                -- -<
 434   | ITrarrowtail                -- >-
 435   | ITLarrowtail                -- -<<
 436   | ITRarrowtail                -- >>-
 437
 438   | ITunknown String            -- Used when the lexer can't make sense of it
 439   | ITeof                       -- end of file token
 440 #ifdef DEBUG
 441   deriving Show -- debugging
 442 #endif
 443
 444 isSpecial :: Token__ -> Bool
 445 -- If we see M.x, where x is a keyword, but
 446 -- is special, we treat is as just plain M.x,
 447 -- not as a keyword.
 448 isSpecial ITas          = True
 449 isSpecial IThiding      = True
 450 isSpecial ITqualified   = True
 451 isSpecial ITforall      = True
 452 isSpecial ITexport      = True
 453 isSpecial ITlabel       = True
 454 isSpecial ITdynamic     = True
 455 isSpecial ITsafe        = True
 456 isSpecial ITthreadsafe  = True
 457 isSpecial ITunsafe      = True
 458 isSpecial ITccallconv   = True
 459 isSpecial ITstdcallconv = True
 460 isSpecial ITmdo         = True
 461 isSpecial _             = False
 462
 463 -- the bitmap provided as the third component indicates whether the
 464 -- corresponding extension keyword is valid under the extension options
 465 -- provided to the compiler; if the extension corresponding to *any* of the
 466 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 467 -- facilitates using a keyword in two different extensions that can be
 468 -- activated independently)
 469 --
 470 reservedWordsFM = listToUFM $
 471         map (\(x, y, z) -> (mkFastString x, (y, z)))
 472        [( "_",          ITunderscore,   0 ),
 473         ( "as",         ITas,           0 ),
 474         ( "case",       ITcase,         0 ),
 475         ( "class",      ITclass,        0 ),
 476         ( "data",       ITdata,         0 ),
 477         ( "default",    ITdefault,      0 ),
 478         ( "deriving",   ITderiving,     0 ),
 479         ( "do",         ITdo,           0 ),
 480         ( "else",       ITelse,         0 ),
 481         ( "hiding",     IThiding,       0 ),
 482         ( "if",         ITif,           0 ),
 483         ( "import",     ITimport,       0 ),
 484         ( "in",         ITin,           0 ),
 485         ( "infix",      ITinfix,        0 ),
 486         ( "infixl",     ITinfixl,       0 ),
 487         ( "infixr",     ITinfixr,       0 ),
 488         ( "instance",   ITinstance,     0 ),
 489         ( "let",        ITlet,          0 ),
 490         ( "module",     ITmodule,       0 ),
 491         ( "newtype",    ITnewtype,      0 ),
 492         ( "of",         ITof,           0 ),
 493         ( "qualified",  ITqualified,    0 ),
 494         ( "then",       ITthen,         0 ),
 495         ( "type",       ITtype,         0 ),
 496         ( "where",      ITwhere,        0 ),
 497         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 498
 499         ( "forall",     ITforall,        bit glaExtsBit),
 500         ( "mdo",        ITmdo,           bit glaExtsBit),
 501         ( "reifyDecl",  ITreifyDecl,     bit thBit),
 502         ( "reifyType",  ITreifyType,     bit thBit),
 503         ( "reifyFixity",ITreifyFixity,   bit thBit),
 504
 505         ( "foreign",    ITforeign,       bit ffiBit),
 506         ( "export",     ITexport,        bit ffiBit),
 507         ( "label",      ITlabel,         bit ffiBit),
 508         ( "dynamic",    ITdynamic,       bit ffiBit),
 509         ( "safe",       ITsafe,          bit ffiBit),
 510         ( "threadsafe", ITthreadsafe,    bit ffiBit),
 511         ( "unsafe",     ITunsafe,        bit ffiBit),
 512         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 513         ( "ccall",      ITccallconv,     bit ffiBit),
 514         ( "dotnet",     ITdotnet,        bit ffiBit),
 515
 516         ( "rec",        ITrec,           bit arrowsBit),
 517         ( "proc",       ITproc,          bit arrowsBit)
 518      ]
 519
 520 reservedSymsFM = listToUFM $
 521         map (\ (x,y,z) -> (mkFastString x,(y,z)))
 522       [ ("..",  ITdotdot,       0)
 523        ,(":",   ITcolon,        0)      -- (:) is a reserved op,
 524                                                 -- meaning only list cons
 525        ,("::",  ITdcolon,       0)
 526        ,("=",   ITequal,        0)
 527        ,("\\",  ITlam,          0)
 528        ,("|",   ITvbar,         0)
 529        ,("<-",  ITlarrow,       0)
 530        ,("->",  ITrarrow,       0)
 531        ,("@",   ITat,           0)
 532        ,("~",   ITtilde,        0)
 533        ,("=>",  ITdarrow,       0)
 534        ,("-",   ITminus,        0)
 535        ,("!",   ITbang,         0)
 536
 537        ,("*",   ITstar,         bit glaExtsBit) -- For data T (a::*) = MkT
 538        ,(".",   ITdot,          bit glaExtsBit) -- For 'forall a . t'
 539
 540        ,("-<",  ITlarrowtail,   bit arrowsBit)
 541        ,(">-",  ITrarrowtail,   bit arrowsBit)
 542        ,("-<<", ITLarrowtail,   bit arrowsBit)
 543        ,(">>-", ITRarrowtail,   bit arrowsBit)
 544        ]
 545
 546 -- -----------------------------------------------------------------------------
 547 -- Lexer actions
 548
 549 type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token
 550
 551 special :: Token__ -> Action
 552 special tok loc end _buf len = return (T loc end tok)
 553
 554 token, layout_token :: Token__ -> Action
 555 token t loc end buf len = return (T loc end t)
 556 layout_token t loc end buf len = pushLexState layout >> return (T loc end t)
 557
 558 idtoken :: (StringBuffer -> Int -> Token__) -> Action
 559 idtoken f loc end buf len = return (T loc end $! (f buf len))
 560
 561 skip_one_varid :: (FastString -> Token__) -> Action
 562 skip_one_varid f loc end buf len
 563   = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1)))
 564
 565 strtoken :: (String -> Token__) -> Action
 566 strtoken f loc end buf len =
 567   return (T loc end $! (f $! lexemeToString buf len))
 568
 569 init_strtoken :: Int -> (String -> Token__) -> Action
 570 -- like strtoken, but drops the last N character(s)
 571 init_strtoken drop f loc end buf len =
 572   return (T loc end $! (f $! lexemeToString buf (len-drop)))
 573
 574 begin :: Int -> Action
 575 begin code _loc _end _str _len = do pushLexState code; lexToken
 576
 577 pop :: Action
 578 pop _loc _end _buf _len = do popLexState; lexToken
 579
 580 pop_and :: Action -> Action
 581 pop_and act loc end buf len = do popLexState; act loc end buf len
 582
 583 notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
 584
 585 notFollowedBySymbol _ _ _ (_,buf)
 586   = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
 587
 588 ifExtension pred bits _ _ _ = pred bits
 589
 590 {-
 591   nested comments require traversing by hand, they can't be parsed
 592   using regular expressions.
 593 -}
 594 nested_comment :: Action
 595 nested_comment loc _end _str _len = do
 596   input <- getInput
 597   go 1 input
 598   where go 0 input = do setInput input; lexToken
 599         go n input = do
 600           case alexGetChar input of
 601             Nothing  -> err input
 602             Just (c,input) -> do
 603               case c of
 604                 '-' -> do
 605                   case alexGetChar input of
 606                     Nothing  -> err input
 607                     Just ('\125',input) -> go (n-1) input
 608                     Just (c,_)          -> go n input
 609                 '\123' -> do
 610                   case alexGetChar input of
 611                     Nothing  -> err input
 612                     Just ('-',input') -> go (n+1) input'
 613                     Just (c,input)    -> go n input
 614                 c -> go n input
 615
 616         err input = do failLocMsgP loc (fst input) "unterminated `{-'"
 617
 618 open_brace, close_brace :: Action
 619 open_brace  loc end _str _len = do
 620   ctx <- getContext
 621   setContext (NoLayout:ctx)
 622   return (T loc end ITocurly)
 623 close_brace loc end _str _len = do
 624   popContext
 625   return (T loc end ITccurly)
 626
 627 -- We have to be careful not to count M.<varid> as a qualified name
 628 -- when <varid> is a keyword.  We hack around this by catching
 629 -- the offending tokens afterward, and re-lexing in a different state.
 630 check_qvarid loc end buf len = do
 631   case lookupUFM reservedWordsFM var of
 632         Just (keyword,exts)
 633           | not (isSpecial keyword) ->
 634           if exts == 0
 635              then try_again
 636              else do
 637                 b <- extension (\i -> exts .&. i /= 0)
 638                 if b then try_again
 639                      else return token
 640         _other -> return token
 641   where
 642         (mod,var) = splitQualName buf len
 643         token     = T loc end (ITqvarid (mod,var))
 644
 645         try_again = do
 646                 setInput (loc,buf)
 647                 pushLexState bad_qvarid
 648                 lexToken
 649
 650 qvarid buf len = ITqvarid $! splitQualName buf len
 651 qconid buf len = ITqconid $! splitQualName buf len
 652
 653 splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 654 -- takes a StringBuffer and a length, and returns the module name
 655 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 656 -- because of hierarchical module names.
 657 splitQualName orig_buf len = split orig_buf 0 0
 658   where
 659     split buf dot_off n
 660         | n == len                = done dot_off
 661         | lookAhead buf n == '.'  = split2 buf n (n+1)
 662         | otherwise               = split buf dot_off (n+1)
 663
 664     -- careful, we might get names like M....
 665     -- so, if the character after the dot is not upper-case, this is
 666     -- the end of the qualifier part.
 667     split2 buf dot_off n
 668         | isUpper (lookAhead buf n) = split buf dot_off (n+1)
 669         | otherwise                 = done dot_off
 670
 671     done dot_off =
 672         (lexemeToFastString orig_buf dot_off,
 673          lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
 674
 675 varid loc end buf len =
 676   case lookupUFM reservedWordsFM fs of
 677         Just (keyword,0)    -> do
 678                 maybe_layout keyword
 679                 return (T loc end keyword)
 680         Just (keyword,exts) -> do
 681                 b <- extension (\i -> exts .&. i /= 0)
 682                 if b then do maybe_layout keyword
 683                              return (T loc end keyword)
 684                      else return (T loc end (ITvarid fs))
 685         _other -> return (T loc end (ITvarid fs))
 686   where
 687         fs = lexemeToFastString buf len
 688
 689 conid buf len = ITconid fs
 690   where fs = lexemeToFastString buf len
 691
 692 qvarsym buf len = ITqvarsym $! splitQualName buf len
 693 qconsym buf len = ITqconsym $! splitQualName buf len
 694
 695 varsym = sym ITvarsym
 696 consym = sym ITconsym
 697
 698 sym con loc end buf len =
 699   case lookupUFM reservedSymsFM fs of
 700         Just (keyword,0)    -> return (T loc end keyword)
 701         Just (keyword,exts) -> do
 702                 b <- extension (\i -> exts .&. i /= 0)
 703                 if b then return (T loc end keyword)
 704                      else return (T loc end $! con fs)
 705         _other -> return (T loc end $! con fs)
 706   where
 707         fs = lexemeToFastString buf len
 708
 709 tok_decimal loc end buf len
 710   = return (T loc end (ITinteger  $! parseInteger buf len 10 oct_or_dec))
 711
 712 tok_octal loc end buf len
 713   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec))
 714
 715 tok_hexadecimal loc end buf len
 716   = return (T loc end (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex))
 717
 718 prim_decimal loc end buf len
 719   = return (T loc end (ITprimint  $! parseInteger buf (len-1) 10 oct_or_dec))
 720
 721 prim_octal loc end buf len
 722   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec))
 723
 724 prim_hexadecimal loc end buf len
 725   = return (T loc end (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex))
 726
 727 tok_float        str = ITrational $! readRational__ str
 728 prim_float       str = ITprimfloat  $! readRational__ str
 729 prim_double      str = ITprimdouble $! readRational__ str
 730
 731 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 732 parseInteger buf len radix to_int
 733   = go 0 0
 734   where go i x | i == len  = x
 735                | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
 736
 737 -- -----------------------------------------------------------------------------
 738 -- Layout processing
 739
 740 -- we're at the first token on a line, insert layout tokens if necessary
 741 do_bol :: Action
 742 do_bol loc end _str _len = do
 743         pos <- getOffside end
 744         case pos of
 745             LT -> do
 746                 --trace "layout: inserting '}'" $ do
 747                 popContext
 748                 -- do NOT pop the lex state, we might have a ';' to insert
 749                 return (T loc end ITvccurly)
 750             EQ -> do
 751                 --trace "layout: inserting ';'" $ do
 752                 popLexState
 753                 return (T loc end ITsemi)
 754             GT -> do
 755                 popLexState
 756                 lexToken
 757
 758 -- certain keywords put us in the "layout" state, where we might
 759 -- add an opening curly brace.
 760 maybe_layout ITdo       = pushLexState layout_do
 761 maybe_layout ITmdo      = pushLexState layout_do
 762 maybe_layout ITof       = pushLexState layout
 763 maybe_layout ITlet      = pushLexState layout
 764 maybe_layout ITwhere    = pushLexState layout
 765 maybe_layout ITrec      = pushLexState layout
 766 maybe_layout _          = return ()
 767
 768 -- Pushing a new implicit layout context.  If the indentation of the
 769 -- next token is not greater than the previous layout context, then
 770 -- Haskell 98 says that the new layout context should be empty; that is
 771 -- the lexer must generate {}.
 772 --
 773 -- We are slightly more lenient than this: when the new context is started
 774 -- by a 'do', then we allow the new context to be at the same indentation as
 775 -- the previous context.  This is what the 'strict' argument is for.
 776 --
 777 new_layout_context strict loc end _buf _len = do
 778     popLexState
 779     let offset = srcLocCol loc
 780     ctx <- getContext
 781     case ctx of
 782         Layout prev_off : _  |
 783            (strict     && prev_off >= offset  ||
 784             not strict && prev_off > offset) -> do
 785                 -- token is indented to the left of the previous context.
 786                 -- we must generate a {} sequence now.
 787                 pushLexState layout_left
 788                 return (T loc end ITvocurly)
 789         other -> do
 790                 setContext (Layout offset : ctx)
 791                 return (T loc end ITvocurly)
 792
 793 do_layout_left loc end _buf _len = do
 794     popLexState
 795     pushLexState bol  -- we must be at the start of a line
 796     return (T loc end ITvccurly)
 797
 798 -- -----------------------------------------------------------------------------
 799 -- LINE pragmas
 800
 801 set_line :: Int -> Action
 802 set_line code loc end buf len = do
 803   let line = parseInteger buf len 10 oct_or_dec
 804   setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0)
 805         -- subtract one: the line number refers to the *following* line
 806   popLexState
 807   pushLexState code
 808   lexToken
 809
 810 set_file :: Int -> Action
 811 set_file code loc end buf len = do
 812   let file = lexemeToFastString (stepOn buf) (len-2)
 813   setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end))
 814   popLexState
 815   pushLexState code
 816   lexToken
 817
 818 -- -----------------------------------------------------------------------------
 819 -- Strings & Chars
 820
 821 -- This stuff is horrible.  I hates it.
 822
 823 lex_string_tok :: Action
 824 lex_string_tok loc end buf len = do
 825   tok <- lex_string ""
 826   end <- getSrcLoc
 827   return (T loc end tok)
 828
 829 lex_string :: String -> P Token__
 830 lex_string s = do
 831   i <- getInput
 832   case alexGetChar i of
 833     Nothing -> lit_error
 834
 835     Just ('"',i)  -> do
 836         setInput i
 837         glaexts <- extension glaExtsEnabled
 838         if glaexts
 839           then do
 840             i <- getInput
 841             case alexGetChar i of
 842               Just ('#',i) -> do
 843                    setInput i
 844                    if any (> '\xFF') s
 845                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
 846                     else let s' = mkFastStringNarrow (reverse s) in
 847                          -- always a narrow string/byte array
 848                          return (ITprimstring s')
 849               _other ->
 850                 return (ITstring (mkFastString (reverse s)))
 851           else
 852                 return (ITstring (mkFastString (reverse s)))
 853
 854     Just ('\\',i)
 855         | Just ('&',i) <- next -> do
 856                 setInput i; lex_string s
 857         | Just (c,i) <- next, is_space c -> do
 858                 setInput i; lex_stringgap s
 859         where next = alexGetChar i
 860
 861     Just _ -> do
 862         c <- lex_char
 863         lex_string (c:s)
 864
 865
 866 lex_stringgap s = do
 867   c <- getCharOrFail
 868   case c of
 869     '\\' -> lex_string s
 870     c | is_space c -> lex_stringgap s
 871     _other -> lit_error
 872
 873
 874 lex_char_tok :: Action
 875 lex_char_tok loc _end buf len = do
 876    c <- lex_char
 877    mc <- getCharOrFail
 878    case mc of
 879         '\'' -> do
 880            glaexts <- extension glaExtsEnabled
 881            if glaexts
 882                 then do
 883                    i@(end,_) <- getInput
 884                    case alexGetChar i of
 885                         Just ('#',i@(end,_)) -> do
 886                                 setInput i
 887                                 return (T loc end (ITprimchar c))
 888                         _other ->
 889                                 return (T loc end (ITchar c))
 890                 else do
 891                    end <- getSrcLoc
 892                    return (T loc end (ITchar c))
 893
 894         _other -> lit_error
 895
 896 lex_char :: P Char
 897 lex_char = do
 898   mc <- getCharOrFail
 899   case mc of
 900       '\\' -> lex_escape
 901       c | is_any c -> return c
 902       _other -> lit_error
 903
 904 lex_escape :: P Char
 905 lex_escape = do
 906   c <- getCharOrFail
 907   case c of
 908         'a'   -> return '\a'
 909         'b'   -> return '\b'
 910         'f'   -> return '\f'
 911         'n'   -> return '\n'
 912         'r'   -> return '\r'
 913         't'   -> return '\t'
 914         'v'   -> return '\v'
 915         '\\'  -> return '\\'
 916         '"'   -> return '\"'
 917         '\''  -> return '\''
 918         '^'   -> do c <- getCharOrFail
 919                     if c >= '@' && c <= '_'
 920                         then return (chr (ord c - ord '@'))
 921                         else lit_error
 922
 923         'x'   -> readNum is_hexdigit 16 hex
 924         'o'   -> readNum is_octdigit  8 oct_or_dec
 925         x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x)
 926
 927         c1 ->  do
 928            i <- getInput
 929            case alexGetChar i of
 930             Nothing -> lit_error
 931             Just (c2,i2) ->
 932               case alexGetChar i2 of
 933                 Nothing -> lit_error
 934                 Just (c3,i3) ->
 935                    let str = [c1,c2,c3] in
 936                    case [ (c,rest) | (p,c) <- silly_escape_chars,
 937                                      Just rest <- [maybePrefixMatch p str] ] of
 938                           (escape_char,[]):_ -> do
 939                                 setInput i3
 940                                 return escape_char
 941                           (escape_char,_:_):_ -> do
 942                                 setInput i2
 943                                 return escape_char
 944                           [] -> lit_error
 945
 946 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 947 readNum is_digit base conv = do
 948   c <- getCharOrFail
 949   if is_digit c
 950         then readNum2 is_digit base conv (conv c)
 951         else lit_error
 952
 953 readNum2 is_digit base conv i = do
 954   input <- getInput
 955   read i input
 956   where read i input = do
 957           case alexGetChar input of
 958             Just (c,input') | is_digit c -> do
 959                 read (i*base + conv c) input'
 960             _other -> do
 961                 setInput input
 962                 if i >= 0 && i <= 0x10FFFF
 963                    then return (chr i)
 964                    else lit_error
 965
 966 is_hexdigit c
 967         =  is_digit c
 968         || (c >= 'a' && c <= 'f')
 969         || (c >= 'A' && c <= 'F')
 970
 971 hex c | is_digit c = ord c - ord '0'
 972       | otherwise  = ord (to_lower c) - ord 'a' + 10
 973
 974 oct_or_dec c = ord c - ord '0'
 975
 976 is_octdigit c = c >= '0' && c <= '7'
 977
 978 to_lower c
 979   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
 980   | otherwise = c
 981
 982 silly_escape_chars = [
 983         ("NUL", '\NUL'),
 984         ("SOH", '\SOH'),
 985         ("STX", '\STX'),
 986         ("ETX", '\ETX'),
 987         ("EOT", '\EOT'),
 988         ("ENQ", '\ENQ'),
 989         ("ACK", '\ACK'),
 990         ("BEL", '\BEL'),
 991         ("BS", '\BS'),
 992         ("HT", '\HT'),
 993         ("LF", '\LF'),
 994         ("VT", '\VT'),
 995         ("FF", '\FF'),
 996         ("CR", '\CR'),
 997         ("SO", '\SO'),
 998         ("SI", '\SI'),
 999         ("DLE", '\DLE'),
1000         ("DC1", '\DC1'),
1001         ("DC2", '\DC2'),
1002         ("DC3", '\DC3'),
1003         ("DC4", '\DC4'),
1004         ("NAK", '\NAK'),
1005         ("SYN", '\SYN'),
1006         ("ETB", '\ETB'),
1007         ("CAN", '\CAN'),
1008         ("EM", '\EM'),
1009         ("SUB", '\SUB'),
1010         ("ESC", '\ESC'),
1011         ("FS", '\FS'),
1012         ("GS", '\GS'),
1013         ("RS", '\RS'),
1014         ("US", '\US'),
1015         ("SP", '\SP'),
1016         ("DEL", '\DEL')
1017         ]
1018
1019 lit_error = lexError "lexical error in string/character literal"
1020
1021 getCharOrFail :: P Char
1022 getCharOrFail =  do
1023   i <- getInput
1024   case alexGetChar i of
1025         Nothing -> lexError "unexpected end-of-file in string/character literal"
1026         Just (c,i)  -> do setInput i; return c
1027
1028 -- -----------------------------------------------------------------------------
1029 -- Floats
1030
1031 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
1032 readRational r = do
1033      (n,d,s) <- readFix r
1034      (k,t)   <- readExp s
1035      return ((n%1)*10^^(k-d), t)
1036  where
1037      readFix r = do
1038         (ds,s)  <- lexDecDigits r
1039         (ds',t) <- lexDotDigits s
1040         return (read (ds++ds'), length ds', t)
1041
1042      readExp (e:s) | e `elem` "eE" = readExp' s
1043      readExp s                     = return (0,s)
1044
1045      readExp' ('+':s) = readDec s
1046      readExp' ('-':s) = do
1047                         (k,t) <- readDec s
1048                         return (-k,t)
1049      readExp' s       = readDec s
1050
1051      readDec s = do
1052         (ds,r) <- nonnull isDigit s
1053         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
1054                 r)
1055
1056      lexDecDigits = nonnull isDigit
1057
1058      lexDotDigits ('.':s) = return (span isDigit s)
1059      lexDotDigits s       = return ("",s)
1060
1061      nonnull p s = do (cs@(_:_),t) <- return (span p s)
1062                       return (cs,t)
1063
1064 readRational__ :: String -> Rational -- NB: *does* handle a leading "-"
1065 readRational__ top_s
1066   = case top_s of
1067       '-' : xs -> - (read_me xs)
1068       xs       -> read_me xs
1069   where
1070     read_me s
1071       = case (do { (x,"") <- readRational s ; return x }) of
1072           [x] -> x
1073           []  -> error ("readRational__: no parse:"        ++ top_s)
1074           _   -> error ("readRational__: ambiguous parse:" ++ top_s)
1075
1076 -- -----------------------------------------------------------------------------
1077 -- The Parse Monad
1078
1079 data LayoutContext
1080   = NoLayout
1081   | Layout !Int
1082
1083 data ParseResult a
1084   = POk PState a
1085   | PFailed
1086         SrcLoc SrcLoc   -- The start and end of the text span related to
1087                         -- the error.  Might be used in environments which can
1088                         -- show this span, e.g. by highlighting it.
1089         Message         -- The error message
1090
1091 showPFailed loc1 loc2 err = hcat [ppr loc1, text ": ", err]
1092
1093 data PState = PState {
1094         buffer     :: StringBuffer,
1095         last_loc   :: SrcLoc,           -- pos of previous token
1096         last_len   :: !Int,             -- len of previous token
1097         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1098         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1099         context    :: [LayoutContext],
1100         lex_state  :: [Int]
1101      }
1102         -- last_loc and last_len are used when generating error messages,
1103         -- and in pushCurrentContext only.
1104
1105 newtype P a = P { unP :: PState -> ParseResult a }
1106
1107 instance Monad P where
1108   return = returnP
1109   (>>=) = thenP
1110   fail = failP
1111
1112 returnP :: a -> P a
1113 returnP a = P $ \s -> POk s a
1114
1115 thenP :: P a -> (a -> P b) -> P b
1116 (P m) `thenP` k = P $ \ s ->
1117         case m s of
1118                 POk s1 a          -> (unP (k a)) s1
1119                 PFailed l1 l2 err -> PFailed l1 l2 err
1120
1121 failP :: String -> P a
1122 failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1123
1124 failMsgP :: String -> P a
1125 failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg)
1126
1127 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1128 failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str)
1129
1130 extension :: (Int -> Bool) -> P Bool
1131 extension p = P $ \s -> POk s (p $! extsBitmap s)
1132
1133 getExts :: P Int
1134 getExts = P $ \s -> POk s (extsBitmap s)
1135
1136 setSrcLoc :: SrcLoc -> P ()
1137 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1138
1139 -- tmp, for supporting stuff in RdrHsSyn.  The scope better not include
1140 -- any calls to the lexer, because it assumes things about the SrcLoc.
1141 setSrcLocFor :: SrcLoc -> P a -> P a
1142 setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } ->
1143   case unP scope s{loc=new_loc} of
1144         PFailed l1 l2 msg -> PFailed l1 l2 msg
1145         POk _ r -> POk s r
1146
1147 getSrcLoc :: P SrcLoc
1148 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1149
1150 setLastToken :: SrcLoc -> Int -> P ()
1151 setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
1152
1153 type AlexInput = (SrcLoc,StringBuffer)
1154
1155 alexInputPrevChar :: AlexInput -> Char
1156 alexInputPrevChar (_,s) = prevChar s '\n'
1157
1158 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1159 alexGetChar (loc,s)
1160   | atEnd s   = Nothing
1161   | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
1162   where c = currentChar s
1163         loc' = advanceSrcLoc loc c
1164         s'   = stepOn s
1165
1166 getInput :: P AlexInput
1167 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
1168
1169 setInput :: AlexInput -> P ()
1170 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1171
1172 pushLexState :: Int -> P ()
1173 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1174
1175 popLexState :: P Int
1176 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1177
1178 getLexState :: P Int
1179 getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
1180
1181 -- for reasons of efficiency, flags indicating language extensions (eg,
1182 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
1183 -- integer
1184
1185 glaExtsBit, ffiBit, parrBit :: Int
1186 glaExtsBit = 0
1187 ffiBit     = 1
1188 parrBit    = 2
1189 arrowsBit  = 4
1190 thBit      = 5
1191 ipBit      = 6
1192
1193 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
1194 glaExtsEnabled flags = testBit flags glaExtsBit
1195 ffiEnabled     flags = testBit flags ffiBit
1196 parrEnabled    flags = testBit flags parrBit
1197 arrowsEnabled  flags = testBit flags arrowsBit
1198 thEnabled      flags = testBit flags thBit
1199 ipEnabled      flags = testBit flags ipBit
1200
1201 -- create a parse state
1202 --
1203 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
1204 mkPState buf loc flags  =
1205   PState {
1206       buffer     = buf,
1207       last_loc   = loc,
1208       last_len   = 0,
1209       loc        = loc,
1210       extsBitmap = fromIntegral bitmap,
1211       context    = [],
1212       lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
1213         -- we begin in the layout state if toplev_layout is set
1214     }
1215     where
1216       bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
1217                .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
1218                .|. parrBit    `setBitIf` dopt Opt_PArr        flags
1219                .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
1220                .|. thBit      `setBitIf` dopt Opt_TH          flags
1221                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
1222       --
1223       setBitIf :: Int -> Bool -> Int
1224       b `setBitIf` cond | cond      = bit b
1225                         | otherwise = 0
1226
1227 getContext :: P [LayoutContext]
1228 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1229
1230 setContext :: [LayoutContext] -> P ()
1231 setContext ctx = P $ \s -> POk s{context=ctx} ()
1232
1233 popContext :: P ()
1234 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1235                            loc = loc, last_len = len, last_loc = last_loc }) ->
1236   case ctx of
1237         (_:tl) -> POk s{ context = tl } ()
1238         []     -> PFailed last_loc loc (srcParseErr buf len)
1239
1240 -- Push a new layout context at the indentation of the last token read.
1241 -- This is only used at the outer level of a module when the 'module'
1242 -- keyword is missing.
1243 pushCurrentContext :: P ()
1244 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1245   POk s{ context = Layout (srcLocCol loc) : ctx} ()
1246
1247 getOffside :: SrcLoc -> P Ordering
1248 getOffside loc = P $ \s@PState{context=stk} ->
1249                 let ord = case stk of
1250                         (Layout n:_) -> compare (srcLocCol loc) n
1251                         _            -> GT
1252                 in POk s ord
1253
1254 -- ---------------------------------------------------------------------------
1255 -- Construct a parse error
1256
1257 srcParseErr
1258   :: StringBuffer       -- current buffer (placed just after the last token)
1259   -> Int                -- length of the previous token
1260   -> Message
1261 srcParseErr buf len
1262   = hcat [ if null token
1263              then ptext SLIT("parse error (possibly incorrect indentation)")
1264              else hcat [ptext SLIT("parse error on input "),
1265                         char '`', text token, char '\'']
1266     ]
1267   where token = lexemeToString (stepOnBy (-len) buf) len
1268
1269 -- Report a parse failure, giving the span of the previous token as
1270 -- the location of the error.  This is the entry point for errors
1271 -- detected during parsing.
1272 srcParseFail :: P a
1273 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1274                                 last_loc = last_loc, loc = loc } ->
1275     PFailed last_loc loc (srcParseErr buf len)
1276
1277 -- A lexical error is reported at a particular position in the source file,
1278 -- not over a token range.  TODO: this is slightly wrong, because we record
1279 -- the error at the character position following the one which caused the
1280 -- error.  We should somehow back up by one character.
1281 lexError :: String -> P a
1282 lexError str = do
1283   loc <- getSrcLoc
1284   failLocMsgP loc loc str
1285
1286 -- -----------------------------------------------------------------------------
1287 -- This is the top-level function: called from the parser each time a
1288 -- new token is to be read from the input.
1289
1290 lexer :: (Token -> P a) -> P a
1291 lexer cont = do
1292   tok@(T _ _ tok__) <- lexToken
1293   --trace ("token: " ++ show tok__) $ do
1294   cont tok
1295
1296 lexToken :: P Token
1297 lexToken = do
1298   inp@(loc1,buf) <- getInput
1299   sc <- getLexState
1300   exts <- getExts
1301   case alexScanUser exts inp sc of
1302     AlexEOF -> do setLastToken loc1 0
1303                   return (T loc1 loc1 ITeof)
1304     AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
1305     AlexSkip inp2 _ -> do
1306         setInput inp2
1307         lexToken
1308     AlexToken inp2@(end,buf2) len t -> do
1309         setInput inp2
1310         setLastToken loc1 len
1311         t loc1 end buf len
1312 }