compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# LANGUAGE BangPatterns #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  38 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  39 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  40 -- But alex still generates some code that causes the "lazy unlifted bindings"
  41 -- warning, and old compilers don't know about it so we can't easily turn
  42 -- it off, so for now we use the sledge hammer:
  43 {-# OPTIONS_GHC -w #-}
  44
  45 {-# OPTIONS_GHC -funbox-strict-fields #-}
  46
  47 module Lexer (
  48    Token(..), lexer, pragState, mkPState, PState(..),
  49    P(..), ParseResult(..), getSrcLoc,
  50    getPState, getDynFlags, withThisPackage,
  51    failLocMsgP, failSpanMsgP, srcParseFail,
  52    getMessages,
  53    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  54    activeContext, nextIsEOF,
  55    getLexState, popLexState, pushLexState,
  56    extension, bangPatEnabled, datatypeContextsEnabled,
  57    addWarning,
  58    lexTokenStream
  59   ) where
  60
  61 import Bag
  62 import ErrUtils
  63 import Outputable
  64 import StringBuffer
  65 import FastString
  66 import SrcLoc
  67 import UniqFM
  68 import DynFlags
  69 import Module
  70 import Ctype
  71 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..) )
  72 import Util             ( readRational )
  73
  74 import Control.Monad
  75 import Data.Bits
  76 import Data.Char
  77 import Data.List
  78 import Data.Maybe
  79 import Data.Map (Map)
  80 import qualified Data.Map as Map
  81 import Data.Ratio
  82 }
  83
  84 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  85 $whitechar   = [\ \n\r\f\v $unispace]
  86 $white_no_nl = $whitechar # \n
  87 $tab         = \t
  88
  89 $ascdigit  = 0-9
  90 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  91 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  92 $digit     = [$ascdigit $unidigit]
  93
  94 $special   = [\(\)\,\;\[\]\`\{\}]
  95 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  96 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  97 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  98
  99 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
 100 $asclarge  = [A-Z]
 101 $large     = [$asclarge $unilarge]
 102
 103 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 104 $ascsmall  = [a-z]
 105 $small     = [$ascsmall $unismall \_]
 106
 107 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 108 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 109
 110 $octit     = 0-7
 111 $hexit     = [$decdigit A-F a-f]
 112 $symchar   = [$symbol \:]
 113 $nl        = [\n\r]
 114 $idchar    = [$small $large $digit \']
 115
 116 $pragmachar = [$small $large $digit]
 117
 118 $docsym    = [\| \^ \* \$]
 119
 120 @varid     = $small $idchar*
 121 @conid     = $large $idchar*
 122
 123 @varsym    = $symbol $symchar*
 124 @consym    = \: $symchar*
 125
 126 @decimal     = $decdigit+
 127 @octal       = $octit+
 128 @hexadecimal = $hexit+
 129 @exponent    = [eE] [\-\+]? @decimal
 130
 131 -- we support the hierarchical module name extension:
 132 @qual = (@conid \.)+
 133
 134 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 135
 136 -- normal signed numerical literals can only be explicitly negative,
 137 -- not explicitly positive (contrast @exponent)
 138 @negative = \-
 139 @signed = @negative ?
 140
 141 haskell :-
 142
 143 -- everywhere: skip whitespace and comments
 144 $white_no_nl+                           ;
 145 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 146
 147 -- Everywhere: deal with nested comments.  We explicitly rule out
 148 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 149 -- (this can happen even though pragmas will normally take precedence due to
 150 -- longest-match, because pragmas aren't valid in every state, but comments
 151 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 152 -- set.
 153
 154 "{-" / { isNormalComment } { nested_comment lexToken }
 155
 156 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 157 -- more dashes followed by a symbol should be parsed as a varsym, so we
 158 -- have to exclude those.
 159
 160 -- Since Haddock comments aren't valid in every state, we need to rule them
 161 -- out here.
 162
 163 -- The following two rules match comments that begin with two dashes, but
 164 -- continue with a different character. The rules test that this character
 165 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 166 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 167 -- have a Haddock comment). The rules then munch the rest of the line.
 168
 169 "-- " ~[$docsym \#] .* { lineCommentToken }
 170 "--" [^$symbol : \ ] .* { lineCommentToken }
 171
 172 -- Next, match Haddock comments if no -haddock flag
 173
 174 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 175
 176 -- Now, when we've matched comments that begin with 2 dashes and continue
 177 -- with a different character, we need to match comments that begin with three
 178 -- or more dashes (which clearly can't be Haddock comments). We only need to
 179 -- make sure that the first non-dash character isn't a symbol, and munch the
 180 -- rest of the line.
 181
 182 "---"\-* [^$symbol :] .* { lineCommentToken }
 183
 184 -- Since the previous rules all match dashes followed by at least one
 185 -- character, we also need to match a whole line filled with just dashes.
 186
 187 "--"\-* / { atEOL } { lineCommentToken }
 188
 189 -- We need this rule since none of the other single line comment rules
 190 -- actually match this case.
 191
 192 "-- " / { atEOL } { lineCommentToken }
 193
 194 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 195 -- blank lines) until we find a non-whitespace character, then do layout
 196 -- processing.
 197 --
 198 -- One slight wibble here: what if the line begins with {-#? In
 199 -- theory, we have to lex the pragma to see if it's one we recognise,
 200 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 201 -- as a nested comment.  We don't bother with this: if the line begins
 202 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 203 <bol> {
 204   \n                                    ;
 205   ^\# (line)?                           { begin line_prag1 }
 206   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 207   ^\# \! .* \n                          ; -- #!, for scripts
 208   ()                                    { do_bol }
 209 }
 210
 211 -- after a layout keyword (let, where, do, of), we begin a new layout
 212 -- context if the curly brace is missing.
 213 -- Careful! This stuff is quite delicate.
 214 <layout, layout_do> {
 215   \{ / { notFollowedBy '-' }            { hopefully_open_brace }
 216         -- we might encounter {-# here, but {- has been handled already
 217   \n                                    ;
 218   ^\# (line)?                           { begin line_prag1 }
 219 }
 220
 221 -- do is treated in a subtly different way, see new_layout_context
 222 <layout>    ()                          { new_layout_context True }
 223 <layout_do> ()                          { new_layout_context False }
 224
 225 -- after a new layout context which was found to be to the left of the
 226 -- previous context, we have generated a '{' token, and we now need to
 227 -- generate a matching '}' token.
 228 <layout_left>  ()                       { do_layout_left }
 229
 230 <0,option_prags> \n                             { begin bol }
 231
 232 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 233                                 { dispatch_pragmas linePrags }
 234
 235 -- single-line line pragmas, of the form
 236 --    # <line> "<file>" <extra-stuff> \n
 237 <line_prag1> $decdigit+                 { setLine line_prag1a }
 238 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 239 <line_prag1b> .*                        { pop }
 240
 241 -- Haskell-style line pragmas, of the form
 242 --    {-# LINE <line> "<file>" #-}
 243 <line_prag2> $decdigit+                 { setLine line_prag2a }
 244 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 245 <line_prag2b> "#-}"|"-}"                { pop }
 246    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 247    -- with older versions of GHC which generated these.
 248
 249 <0,option_prags> {
 250   "{-#" $whitechar* $pragmachar+
 251         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 252                                  { dispatch_pragmas twoWordPrags }
 253
 254   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 255                                  { dispatch_pragmas oneWordPrags }
 256
 257   -- We ignore all these pragmas, but don't generate a warning for them
 258   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 259                                  { dispatch_pragmas ignoredPrags }
 260
 261   -- ToDo: should only be valid inside a pragma:
 262   "#-}"                                 { endPrag }
 263 }
 264
 265 <option_prags> {
 266   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 267                                    { dispatch_pragmas fileHeaderPrags }
 268
 269   "-- #"                                 { multiline_doc_comment }
 270 }
 271
 272 <0> {
 273   -- In the "0" mode we ignore these pragmas
 274   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 275                      { nested_comment lexToken }
 276 }
 277
 278 <0> {
 279   "-- #" .* { lineCommentToken }
 280 }
 281
 282 <0,option_prags> {
 283   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 284                     (nested_comment lexToken) }
 285 }
 286
 287 -- '0' state: ordinary lexemes
 288
 289 -- Haddock comments
 290
 291 <0,option_prags> {
 292   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 293   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 294 }
 295
 296 -- "special" symbols
 297
 298 <0> {
 299   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 300   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 301 }
 302
 303 <0> {
 304   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 305   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 306   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 307   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 308   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 309   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 310   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 311   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 312
 313 -- For backward compatibility, accept the old dollar syntax
 314   "[$" @varid "|"  / { ifExtension qqEnabled }
 315                      { lex_quasiquote_tok }
 316
 317   "[" @varid "|"  / { ifExtension qqEnabled }
 318                      { lex_quasiquote_tok }
 319 }
 320
 321 <0> {
 322   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 323                                         { special IToparenbar }
 324   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 325 }
 326
 327 <0> {
 328   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 329 }
 330
 331 <0> {
 332   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 333          { token IToubxparen }
 334   "#)" / { ifExtension unboxedTuplesEnabled }
 335          { token ITcubxparen }
 336 }
 337
 338 <0> {
 339   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 340   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 341 }
 342
 343 <0,option_prags> {
 344   \(                                    { special IToparen }
 345   \)                                    { special ITcparen }
 346   \[                                    { special ITobrack }
 347   \]                                    { special ITcbrack }
 348   \,                                    { special ITcomma }
 349   \;                                    { special ITsemi }
 350   \`                                    { special ITbackquote }
 351
 352   \{                                    { open_brace }
 353   \}                                    { close_brace }
 354 }
 355
 356 <0,option_prags> {
 357   @qual @varid                  { idtoken qvarid }
 358   @qual @conid                  { idtoken qconid }
 359   @varid                        { varid }
 360   @conid                        { idtoken conid }
 361 }
 362
 363 <0> {
 364   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 365   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 366   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 367   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 368 }
 369
 370 -- ToDo: - move `var` and (sym) into lexical syntax?
 371 --       - remove backquote from $special?
 372 <0> {
 373   @qual @varsym                                    { idtoken qvarsym }
 374   @qual @consym                                    { idtoken qconsym }
 375   @varsym                                          { varsym }
 376   @consym                                          { consym }
 377 }
 378
 379 -- For the normal boxed literals we need to be careful
 380 -- when trying to be close to Haskell98
 381 <0> {
 382   -- Normal integral literals (:: Num a => a, from Integer)
 383   @decimal           { tok_num positive 0 0 decimal }
 384   0[oO] @octal       { tok_num positive 2 2 octal }
 385   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 386
 387   -- Normal rational literals (:: Fractional a => a, from Rational)
 388   @floating_point    { strtoken tok_float }
 389 }
 390
 391 <0> {
 392   -- Unboxed ints (:: Int#) and words (:: Word#)
 393   -- It's simpler (and faster?) to give separate cases to the negatives,
 394   -- especially considering octal/hexadecimal prefixes.
 395   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 396   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 397   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 398   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 399   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 400   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 401
 402   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 403   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 404   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 405
 406   -- Unboxed floats and doubles (:: Float#, :: Double#)
 407   -- prim_{float,double} work with signed literals
 408   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 409   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 410 }
 411
 412 -- Strings and chars are lexed by hand-written code.  The reason is
 413 -- that even if we recognise the string or char here in the regex
 414 -- lexer, we would still have to parse the string afterward in order
 415 -- to convert it to a String.
 416 <0> {
 417   \'                            { lex_char_tok }
 418   \"                            { lex_string_tok }
 419 }
 420
 421 {
 422 -- -----------------------------------------------------------------------------
 423 -- The token type
 424
 425 data Token
 426   = ITas                        -- Haskell keywords
 427   | ITcase
 428   | ITclass
 429   | ITdata
 430   | ITdefault
 431   | ITderiving
 432   | ITdo
 433   | ITelse
 434   | IThiding
 435   | ITif
 436   | ITimport
 437   | ITin
 438   | ITinfix
 439   | ITinfixl
 440   | ITinfixr
 441   | ITinstance
 442   | ITlet
 443   | ITmodule
 444   | ITnewtype
 445   | ITof
 446   | ITqualified
 447   | ITthen
 448   | ITtype
 449   | ITwhere
 450   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 451
 452   | ITforall                    -- GHC extension keywords
 453   | ITforeign
 454   | ITexport
 455   | ITlabel
 456   | ITdynamic
 457   | ITsafe
 458   | ITthreadsafe
 459   | ITinterruptible
 460   | ITunsafe
 461   | ITstdcallconv
 462   | ITccallconv
 463   | ITprimcallconv
 464   | ITmdo
 465   | ITfamily
 466   | ITgroup
 467   | ITby
 468   | ITusing
 469
 470         -- Pragmas
 471   | ITinline_prag InlineSpec RuleMatchInfo
 472   | ITspec_prag                 -- SPECIALISE
 473   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 474   | ITsource_prag
 475   | ITrules_prag
 476   | ITwarning_prag
 477   | ITdeprecated_prag
 478   | ITline_prag
 479   | ITscc_prag
 480   | ITgenerated_prag
 481   | ITcore_prag                 -- hdaume: core annotations
 482   | ITunpack_prag
 483   | ITann_prag
 484   | ITclose_prag
 485   | IToptions_prag String
 486   | ITinclude_prag String
 487   | ITlanguage_prag
 488
 489   | ITdotdot                    -- reserved symbols
 490   | ITcolon
 491   | ITdcolon
 492   | ITequal
 493   | ITlam
 494   | ITvbar
 495   | ITlarrow
 496   | ITrarrow
 497   | ITat
 498   | ITtilde
 499   | ITdarrow
 500   | ITminus
 501   | ITbang
 502   | ITstar
 503   | ITdot
 504
 505   | ITbiglam                    -- GHC-extension symbols
 506
 507   | ITocurly                    -- special symbols
 508   | ITccurly
 509   | ITocurlybar                 -- {|, for type applications
 510   | ITccurlybar                 -- |}, for type applications
 511   | ITvocurly
 512   | ITvccurly
 513   | ITobrack
 514   | ITopabrack                  -- [:, for parallel arrays with -XParallelArrays
 515   | ITcpabrack                  -- :], for parallel arrays with -XParallelArrays
 516   | ITcbrack
 517   | IToparen
 518   | ITcparen
 519   | IToubxparen
 520   | ITcubxparen
 521   | ITsemi
 522   | ITcomma
 523   | ITunderscore
 524   | ITbackquote
 525
 526   | ITvarid   FastString        -- identifiers
 527   | ITconid   FastString
 528   | ITvarsym  FastString
 529   | ITconsym  FastString
 530   | ITqvarid  (FastString,FastString)
 531   | ITqconid  (FastString,FastString)
 532   | ITqvarsym (FastString,FastString)
 533   | ITqconsym (FastString,FastString)
 534   | ITprefixqvarsym (FastString,FastString)
 535   | ITprefixqconsym (FastString,FastString)
 536
 537   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 538
 539   | ITchar       Char
 540   | ITstring     FastString
 541   | ITinteger    Integer
 542   | ITrational   Rational
 543
 544   | ITprimchar   Char
 545   | ITprimstring FastString
 546   | ITprimint    Integer
 547   | ITprimword   Integer
 548   | ITprimfloat  Rational
 549   | ITprimdouble Rational
 550
 551   -- Template Haskell extension tokens
 552   | ITopenExpQuote              --  [| or [e|
 553   | ITopenPatQuote              --  [p|
 554   | ITopenDecQuote              --  [d|
 555   | ITopenTypQuote              --  [t|
 556   | ITcloseQuote                --  |]
 557   | ITidEscape   FastString     --  $x
 558   | ITparenEscape               --  $(
 559   | ITvarQuote                  --  '
 560   | ITtyQuote                   --  ''
 561   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 562
 563   -- Arrow notation extension
 564   | ITproc
 565   | ITrec
 566   | IToparenbar                 --  (|
 567   | ITcparenbar                 --  |)
 568   | ITlarrowtail                --  -<
 569   | ITrarrowtail                --  >-
 570   | ITLarrowtail                --  -<<
 571   | ITRarrowtail                --  >>-
 572
 573   | ITunknown String            -- Used when the lexer can't make sense of it
 574   | ITeof                       -- end of file token
 575
 576   -- Documentation annotations
 577   | ITdocCommentNext  String     -- something beginning '-- |'
 578   | ITdocCommentPrev  String     -- something beginning '-- ^'
 579   | ITdocCommentNamed String     -- something beginning '-- $'
 580   | ITdocSection      Int String -- a section heading
 581   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 582   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 583   | ITlineComment     String     -- comment starting by "--"
 584   | ITblockComment    String     -- comment in {- -}
 585
 586 #ifdef DEBUG
 587   deriving Show -- debugging
 588 #endif
 589
 590 {-
 591 isSpecial :: Token -> Bool
 592 -- If we see M.x, where x is a keyword, but
 593 -- is special, we treat is as just plain M.x,
 594 -- not as a keyword.
 595 isSpecial ITas          = True
 596 isSpecial IThiding      = True
 597 isSpecial ITqualified   = True
 598 isSpecial ITforall      = True
 599 isSpecial ITexport      = True
 600 isSpecial ITlabel       = True
 601 isSpecial ITdynamic     = True
 602 isSpecial ITsafe        = True
 603 isSpecial ITthreadsafe  = True
 604 isSpecial ITinterruptible = True
 605 isSpecial ITunsafe      = True
 606 isSpecial ITccallconv   = True
 607 isSpecial ITstdcallconv = True
 608 isSpecial ITprimcallconv = True
 609 isSpecial ITmdo         = True
 610 isSpecial ITfamily      = True
 611 isSpecial ITgroup   = True
 612 isSpecial ITby      = True
 613 isSpecial ITusing   = True
 614 isSpecial _             = False
 615 -}
 616
 617 -- the bitmap provided as the third component indicates whether the
 618 -- corresponding extension keyword is valid under the extension options
 619 -- provided to the compiler; if the extension corresponding to *any* of the
 620 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 621 -- facilitates using a keyword in two different extensions that can be
 622 -- activated independently)
 623 --
 624 reservedWordsFM :: UniqFM (Token, Int)
 625 reservedWordsFM = listToUFM $
 626         map (\(x, y, z) -> (mkFastString x, (y, z)))
 627        [( "_",          ITunderscore,   0 ),
 628         ( "as",         ITas,           0 ),
 629         ( "case",       ITcase,         0 ),
 630         ( "class",      ITclass,        0 ),
 631         ( "data",       ITdata,         0 ),
 632         ( "default",    ITdefault,      0 ),
 633         ( "deriving",   ITderiving,     0 ),
 634         ( "do",         ITdo,           0 ),
 635         ( "else",       ITelse,         0 ),
 636         ( "hiding",     IThiding,       0 ),
 637         ( "if",         ITif,           0 ),
 638         ( "import",     ITimport,       0 ),
 639         ( "in",         ITin,           0 ),
 640         ( "infix",      ITinfix,        0 ),
 641         ( "infixl",     ITinfixl,       0 ),
 642         ( "infixr",     ITinfixr,       0 ),
 643         ( "instance",   ITinstance,     0 ),
 644         ( "let",        ITlet,          0 ),
 645         ( "module",     ITmodule,       0 ),
 646         ( "newtype",    ITnewtype,      0 ),
 647         ( "of",         ITof,           0 ),
 648         ( "qualified",  ITqualified,    0 ),
 649         ( "then",       ITthen,         0 ),
 650         ( "type",       ITtype,         0 ),
 651         ( "where",      ITwhere,        0 ),
 652         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 653
 654     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 655         ( "mdo",        ITmdo,           bit recursiveDoBit),
 656         ( "family",     ITfamily,        bit tyFamBit),
 657     ( "group",  ITgroup,     bit transformComprehensionsBit),
 658     ( "by",     ITby,        bit transformComprehensionsBit),
 659     ( "using",  ITusing,     bit transformComprehensionsBit),
 660
 661         ( "foreign",    ITforeign,       bit ffiBit),
 662         ( "export",     ITexport,        bit ffiBit),
 663         ( "label",      ITlabel,         bit ffiBit),
 664         ( "dynamic",    ITdynamic,       bit ffiBit),
 665         ( "safe",       ITsafe,          bit ffiBit),
 666         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 667         ( "interruptible", ITinterruptible, bit ffiBit),
 668         ( "unsafe",     ITunsafe,        bit ffiBit),
 669         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 670         ( "ccall",      ITccallconv,     bit ffiBit),
 671         ( "prim",       ITprimcallconv,  bit ffiBit),
 672
 673         ( "rec",        ITrec,           bit recBit),
 674         ( "proc",       ITproc,          bit arrowsBit)
 675      ]
 676
 677 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 678 reservedSymsFM = listToUFM $
 679     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 680       [ ("..",  ITdotdot,   always)
 681         -- (:) is a reserved op, meaning only list cons
 682        ,(":",   ITcolon,    always)
 683        ,("::",  ITdcolon,   always)
 684        ,("=",   ITequal,    always)
 685        ,("\\",  ITlam,      always)
 686        ,("|",   ITvbar,     always)
 687        ,("<-",  ITlarrow,   always)
 688        ,("->",  ITrarrow,   always)
 689        ,("@",   ITat,       always)
 690        ,("~",   ITtilde,    always)
 691        ,("=>",  ITdarrow,   always)
 692        ,("-",   ITminus,    always)
 693        ,("!",   ITbang,     always)
 694
 695         -- For data T (a::*) = MkT
 696        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 697         -- For 'forall a . t'
 698        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 699
 700        ,("-<",  ITlarrowtail, arrowsEnabled)
 701        ,(">-",  ITrarrowtail, arrowsEnabled)
 702        ,("-<<", ITLarrowtail, arrowsEnabled)
 703        ,(">>-", ITRarrowtail, arrowsEnabled)
 704
 705        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 706        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 707        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 708                                 explicitForallEnabled i)
 709        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 710        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 711
 712        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 713        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 714        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 716
 717        ,("★", ITstar, unicodeSyntaxEnabled)
 718
 719         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 720         -- form part of a large operator.  This would let us have a better
 721         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 722        ]
 723
 724 -- -----------------------------------------------------------------------------
 725 -- Lexer actions
 726
 727 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 728
 729 special :: Token -> Action
 730 special tok span _buf _len = return (L span tok)
 731
 732 token, layout_token :: Token -> Action
 733 token t span _buf _len = return (L span t)
 734 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 735
 736 idtoken :: (StringBuffer -> Int -> Token) -> Action
 737 idtoken f span buf len = return (L span $! (f buf len))
 738
 739 skip_one_varid :: (FastString -> Token) -> Action
 740 skip_one_varid f span buf len
 741   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 742
 743 strtoken :: (String -> Token) -> Action
 744 strtoken f span buf len =
 745   return (L span $! (f $! lexemeToString buf len))
 746
 747 init_strtoken :: Int -> (String -> Token) -> Action
 748 -- like strtoken, but drops the last N character(s)
 749 init_strtoken drop f span buf len =
 750   return (L span $! (f $! lexemeToString buf (len-drop)))
 751
 752 begin :: Int -> Action
 753 begin code _span _str _len = do pushLexState code; lexToken
 754
 755 pop :: Action
 756 pop _span _buf _len = do _ <- popLexState
 757                          lexToken
 758
 759 hopefully_open_brace :: Action
 760 hopefully_open_brace span buf len
 761  = do relaxed <- extension relaxedLayout
 762       ctx <- getContext
 763       (AI l _) <- getInput
 764       let offset = srcLocCol l
 765           isOK = relaxed ||
 766                  case ctx of
 767                  Layout prev_off : _ -> prev_off < offset
 768                  _                   -> True
 769       if isOK then pop_and open_brace span buf len
 770               else failSpanMsgP span (text "Missing block")
 771
 772 pop_and :: Action -> Action
 773 pop_and act span buf len = do _ <- popLexState
 774                               act span buf len
 775
 776 {-# INLINE nextCharIs #-}
 777 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 778 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 779
 780 notFollowedBy :: Char -> AlexAccPred Int
 781 notFollowedBy char _ _ _ (AI _ buf)
 782   = nextCharIs buf (/=char)
 783
 784 notFollowedBySymbol :: AlexAccPred Int
 785 notFollowedBySymbol _ _ _ (AI _ buf)
 786   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 787
 788 -- We must reject doc comments as being ordinary comments everywhere.
 789 -- In some cases the doc comment will be selected as the lexeme due to
 790 -- maximal munch, but not always, because the nested comment rule is
 791 -- valid in all states, but the doc-comment rules are only valid in
 792 -- the non-layout states.
 793 isNormalComment :: AlexAccPred Int
 794 isNormalComment bits _ _ (AI _ buf)
 795   | haddockEnabled bits = notFollowedByDocOrPragma
 796   | otherwise           = nextCharIs buf (/='#')
 797   where
 798     notFollowedByDocOrPragma
 799        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 800
 801 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 802 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 803
 804 {-
 805 haddockDisabledAnd p bits _ _ (AI _ buf)
 806   = if haddockEnabled bits then False else (p buf)
 807 -}
 808
 809 atEOL :: AlexAccPred Int
 810 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 811
 812 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 813 ifExtension pred bits _ _ _ = pred bits
 814
 815 multiline_doc_comment :: Action
 816 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 817   where
 818     worker commentAcc input docType oneLine = case alexGetChar input of
 819       Just ('\n', input')
 820         | oneLine -> docCommentEnd input commentAcc docType buf span
 821         | otherwise -> case checkIfCommentLine input' of
 822           Just input -> worker ('\n':commentAcc) input docType False
 823           Nothing -> docCommentEnd input commentAcc docType buf span
 824       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 825       Nothing -> docCommentEnd input commentAcc docType buf span
 826
 827     checkIfCommentLine input = check (dropNonNewlineSpace input)
 828       where
 829         check input = case alexGetChar input of
 830           Just ('-', input) -> case alexGetChar input of
 831             Just ('-', input) -> case alexGetChar input of
 832               Just (c, _) | c /= '-' -> Just input
 833               _ -> Nothing
 834             _ -> Nothing
 835           _ -> Nothing
 836
 837         dropNonNewlineSpace input = case alexGetChar input of
 838           Just (c, input')
 839             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 840             | otherwise -> input
 841           Nothing -> input
 842
 843 lineCommentToken :: Action
 844 lineCommentToken span buf len = do
 845   b <- extension rawTokenStreamEnabled
 846   if b then strtoken ITlineComment span buf len else lexToken
 847
 848 {-
 849   nested comments require traversing by hand, they can't be parsed
 850   using regular expressions.
 851 -}
 852 nested_comment :: P (Located Token) -> Action
 853 nested_comment cont span _str _len = do
 854   input <- getInput
 855   go "" (1::Int) input
 856   where
 857     go commentAcc 0 input = do setInput input
 858                                b <- extension rawTokenStreamEnabled
 859                                if b
 860                                  then docCommentEnd input commentAcc ITblockComment _str span
 861                                  else cont
 862     go commentAcc n input = case alexGetChar input of
 863       Nothing -> errBrace input span
 864       Just ('-',input) -> case alexGetChar input of
 865         Nothing  -> errBrace input span
 866         Just ('\125',input) -> go commentAcc (n-1) input
 867         Just (_,_)          -> go ('-':commentAcc) n input
 868       Just ('\123',input) -> case alexGetChar input of
 869         Nothing  -> errBrace input span
 870         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 871         Just (_,_)       -> go ('\123':commentAcc) n input
 872       Just (c,input) -> go (c:commentAcc) n input
 873
 874 nested_doc_comment :: Action
 875 nested_doc_comment span buf _len = withLexedDocType (go "")
 876   where
 877     go commentAcc input docType _ = case alexGetChar input of
 878       Nothing -> errBrace input span
 879       Just ('-',input) -> case alexGetChar input of
 880         Nothing -> errBrace input span
 881         Just ('\125',input) ->
 882           docCommentEnd input commentAcc docType buf span
 883         Just (_,_) -> go ('-':commentAcc) input docType False
 884       Just ('\123', input) -> case alexGetChar input of
 885         Nothing  -> errBrace input span
 886         Just ('-',input) -> do
 887           setInput input
 888           let cont = do input <- getInput; go commentAcc input docType False
 889           nested_comment cont span buf _len
 890         Just (_,_) -> go ('\123':commentAcc) input docType False
 891       Just (c,input) -> go (c:commentAcc) input docType False
 892
 893 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 894                  -> P (Located Token)
 895 withLexedDocType lexDocComment = do
 896   input@(AI _ buf) <- getInput
 897   case prevChar buf ' ' of
 898     '|' -> lexDocComment input ITdocCommentNext False
 899     '^' -> lexDocComment input ITdocCommentPrev False
 900     '$' -> lexDocComment input ITdocCommentNamed False
 901     '*' -> lexDocSection 1 input
 902     '#' -> lexDocComment input ITdocOptionsOld False
 903     _ -> panic "withLexedDocType: Bad doc type"
 904  where
 905     lexDocSection n input = case alexGetChar input of
 906       Just ('*', input) -> lexDocSection (n+1) input
 907       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 908       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 909
 910 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 911 -- off again at the end of the pragma.
 912 rulePrag :: Action
 913 rulePrag span _buf _len = do
 914   setExts (.|. bit inRulePragBit)
 915   return (L span ITrules_prag)
 916
 917 endPrag :: Action
 918 endPrag span _buf _len = do
 919   setExts (.&. complement (bit inRulePragBit))
 920   return (L span ITclose_prag)
 921
 922 -- docCommentEnd
 923 -------------------------------------------------------------------------------
 924 -- This function is quite tricky. We can't just return a new token, we also
 925 -- need to update the state of the parser. Why? Because the token is longer
 926 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 927 -- it writes the wrong token length to the parser state. This function is
 928 -- called afterwards, so it can just update the state.
 929
 930 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 931                  SrcSpan -> P (Located Token)
 932 docCommentEnd input commentAcc docType buf span = do
 933   setInput input
 934   let (AI loc nextBuf) = input
 935       comment = reverse commentAcc
 936       span' = mkSrcSpan (srcSpanStart span) loc
 937       last_len = byteDiff buf nextBuf
 938
 939   span `seq` setLastToken span' last_len
 940   return (L span' (docType comment))
 941
 942 errBrace :: AlexInput -> SrcSpan -> P a
 943 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 944
 945 open_brace, close_brace :: Action
 946 open_brace span _str _len = do
 947   ctx <- getContext
 948   setContext (NoLayout:ctx)
 949   return (L span ITocurly)
 950 close_brace span _str _len = do
 951   popContext
 952   return (L span ITccurly)
 953
 954 qvarid, qconid :: StringBuffer -> Int -> Token
 955 qvarid buf len = ITqvarid $! splitQualName buf len False
 956 qconid buf len = ITqconid $! splitQualName buf len False
 957
 958 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 959 -- takes a StringBuffer and a length, and returns the module name
 960 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 961 -- because of hierarchical module names.
 962 splitQualName orig_buf len parens = split orig_buf orig_buf
 963   where
 964     split buf dot_buf
 965         | orig_buf `byteDiff` buf >= len  = done dot_buf
 966         | c == '.'                        = found_dot buf'
 967         | otherwise                       = split buf' dot_buf
 968       where
 969        (c,buf') = nextChar buf
 970
 971     -- careful, we might get names like M....
 972     -- so, if the character after the dot is not upper-case, this is
 973     -- the end of the qualifier part.
 974     found_dot buf -- buf points after the '.'
 975         | isUpper c    = split buf' buf
 976         | otherwise    = done buf
 977       where
 978        (c,buf') = nextChar buf
 979
 980     done dot_buf =
 981         (lexemeToFastString orig_buf (qual_size - 1),
 982          if parens -- Prelude.(+)
 983             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 984             else lexemeToFastString dot_buf (len - qual_size))
 985       where
 986         qual_size = orig_buf `byteDiff` dot_buf
 987
 988 varid :: Action
 989 varid span buf len =
 990   fs `seq`
 991   case lookupUFM reservedWordsFM fs of
 992         Just (keyword,0)    -> do
 993                 maybe_layout keyword
 994                 return (L span keyword)
 995         Just (keyword,exts) -> do
 996                 b <- extension (\i -> exts .&. i /= 0)
 997                 if b then do maybe_layout keyword
 998                              return (L span keyword)
 999                      else return (L span (ITvarid fs))
1000         _other -> return (L span (ITvarid fs))
1001   where
1002         fs = lexemeToFastString buf len
1003
1004 conid :: StringBuffer -> Int -> Token
1005 conid buf len = ITconid fs
1006   where fs = lexemeToFastString buf len
1007
1008 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
1009 qvarsym buf len = ITqvarsym $! splitQualName buf len False
1010 qconsym buf len = ITqconsym $! splitQualName buf len False
1011 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
1012 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1013
1014 varsym, consym :: Action
1015 varsym = sym ITvarsym
1016 consym = sym ITconsym
1017
1018 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1019     -> P (Located Token)
1020 sym con span buf len =
1021   case lookupUFM reservedSymsFM fs of
1022         Just (keyword,exts) -> do
1023                 b <- extension exts
1024                 if b then return (L span keyword)
1025                      else return (L span $! con fs)
1026         _other -> return (L span $! con fs)
1027   where
1028         fs = lexemeToFastString buf len
1029
1030 -- Variations on the integral numeric literal.
1031 tok_integral :: (Integer -> Token)
1032      -> (Integer -> Integer)
1033  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1034      -> Int -> Int
1035      -> (Integer, (Char->Int)) -> Action
1036 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1037   return $ L span $ itint $! transint $ parseUnsignedInteger
1038      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1039
1040 -- some conveniences for use with tok_integral
1041 tok_num :: (Integer -> Integer)
1042         -> Int -> Int
1043         -> (Integer, (Char->Int)) -> Action
1044 tok_num = tok_integral ITinteger
1045 tok_primint :: (Integer -> Integer)
1046             -> Int -> Int
1047             -> (Integer, (Char->Int)) -> Action
1048 tok_primint = tok_integral ITprimint
1049 tok_primword :: Int -> Int
1050              -> (Integer, (Char->Int)) -> Action
1051 tok_primword = tok_integral ITprimword positive
1052 positive, negative :: (Integer -> Integer)
1053 positive = id
1054 negative = negate
1055 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1056 decimal = (10,octDecDigit)
1057 octal = (8,octDecDigit)
1058 hexadecimal = (16,hexDigit)
1059
1060 -- readRational can understand negative rationals, exponents, everything.
1061 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1062 tok_float        str = ITrational   $! readRational str
1063 tok_primfloat    str = ITprimfloat  $! readRational str
1064 tok_primdouble   str = ITprimdouble $! readRational str
1065
1066 -- -----------------------------------------------------------------------------
1067 -- Layout processing
1068
1069 -- we're at the first token on a line, insert layout tokens if necessary
1070 do_bol :: Action
1071 do_bol span _str _len = do
1072         pos <- getOffside
1073         case pos of
1074             LT -> do
1075                 --trace "layout: inserting '}'" $ do
1076                 popContext
1077                 -- do NOT pop the lex state, we might have a ';' to insert
1078                 return (L span ITvccurly)
1079             EQ -> do
1080                 --trace "layout: inserting ';'" $ do
1081                 _ <- popLexState
1082                 return (L span ITsemi)
1083             GT -> do
1084                 _ <- popLexState
1085                 lexToken
1086
1087 -- certain keywords put us in the "layout" state, where we might
1088 -- add an opening curly brace.
1089 maybe_layout :: Token -> P ()
1090 maybe_layout t = do -- If the alternative layout rule is enabled then
1091                     -- we never create an implicit layout context here.
1092                     -- Layout is handled XXX instead.
1093                     -- The code for closing implicit contexts, or
1094                     -- inserting implicit semi-colons, is therefore
1095                     -- irrelevant as it only applies in an implicit
1096                     -- context.
1097                     alr <- extension alternativeLayoutRule
1098                     unless alr $ f t
1099     where f ITdo    = pushLexState layout_do
1100           f ITmdo   = pushLexState layout_do
1101           f ITof    = pushLexState layout
1102           f ITlet   = pushLexState layout
1103           f ITwhere = pushLexState layout
1104           f ITrec   = pushLexState layout
1105           f _       = return ()
1106
1107 -- Pushing a new implicit layout context.  If the indentation of the
1108 -- next token is not greater than the previous layout context, then
1109 -- Haskell 98 says that the new layout context should be empty; that is
1110 -- the lexer must generate {}.
1111 --
1112 -- We are slightly more lenient than this: when the new context is started
1113 -- by a 'do', then we allow the new context to be at the same indentation as
1114 -- the previous context.  This is what the 'strict' argument is for.
1115 --
1116 new_layout_context :: Bool -> Action
1117 new_layout_context strict span _buf _len = do
1118     _ <- popLexState
1119     (AI l _) <- getInput
1120     let offset = srcLocCol l
1121     ctx <- getContext
1122     nondecreasing <- extension nondecreasingIndentation
1123     let strict' = strict || not nondecreasing
1124     case ctx of
1125         Layout prev_off : _  |
1126            (strict'     && prev_off >= offset  ||
1127             not strict' && prev_off > offset) -> do
1128                 -- token is indented to the left of the previous context.
1129                 -- we must generate a {} sequence now.
1130                 pushLexState layout_left
1131                 return (L span ITvocurly)
1132         _ -> do
1133                 setContext (Layout offset : ctx)
1134                 return (L span ITvocurly)
1135
1136 do_layout_left :: Action
1137 do_layout_left span _buf _len = do
1138     _ <- popLexState
1139     pushLexState bol  -- we must be at the start of a line
1140     return (L span ITvccurly)
1141
1142 -- -----------------------------------------------------------------------------
1143 -- LINE pragmas
1144
1145 setLine :: Int -> Action
1146 setLine code span buf len = do
1147   let line = parseUnsignedInteger buf len 10 octDecDigit
1148   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1149         -- subtract one: the line number refers to the *following* line
1150   _ <- popLexState
1151   pushLexState code
1152   lexToken
1153
1154 setFile :: Int -> Action
1155 setFile code span buf len = do
1156   let file = lexemeToFastString (stepOn buf) (len-2)
1157   setAlrLastLoc noSrcSpan
1158   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1159   _ <- popLexState
1160   pushLexState code
1161   lexToken
1162
1163
1164 -- -----------------------------------------------------------------------------
1165 -- Options, includes and language pragmas.
1166
1167 lex_string_prag :: (String -> Token) -> Action
1168 lex_string_prag mkTok span _buf _len
1169     = do input <- getInput
1170          start <- getSrcLoc
1171          tok <- go [] input
1172          end <- getSrcLoc
1173          return (L (mkSrcSpan start end) tok)
1174     where go acc input
1175               = if isString input "#-}"
1176                    then do setInput input
1177                            return (mkTok (reverse acc))
1178                    else case alexGetChar input of
1179                           Just (c,i) -> go (c:acc) i
1180                           Nothing -> err input
1181           isString _ [] = True
1182           isString i (x:xs)
1183               = case alexGetChar i of
1184                   Just (c,i') | c == x    -> isString i' xs
1185                   _other -> False
1186           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1187
1188
1189 -- -----------------------------------------------------------------------------
1190 -- Strings & Chars
1191
1192 -- This stuff is horrible.  I hates it.
1193
1194 lex_string_tok :: Action
1195 lex_string_tok span _buf _len = do
1196   tok <- lex_string ""
1197   end <- getSrcLoc
1198   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1199
1200 lex_string :: String -> P Token
1201 lex_string s = do
1202   i <- getInput
1203   case alexGetChar' i of
1204     Nothing -> lit_error i
1205
1206     Just ('"',i)  -> do
1207         setInput i
1208         magicHash <- extension magicHashEnabled
1209         if magicHash
1210           then do
1211             i <- getInput
1212             case alexGetChar' i of
1213               Just ('#',i) -> do
1214                    setInput i
1215                    if any (> '\xFF') s
1216                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1217                     else let s' = mkZFastString (reverse s) in
1218                          return (ITprimstring s')
1219                         -- mkZFastString is a hack to avoid encoding the
1220                         -- string in UTF-8.  We just want the exact bytes.
1221               _other ->
1222                 return (ITstring (mkFastString (reverse s)))
1223           else
1224                 return (ITstring (mkFastString (reverse s)))
1225
1226     Just ('\\',i)
1227         | Just ('&',i) <- next -> do
1228                 setInput i; lex_string s
1229         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1230                            -- is_space only works for <= '\x7f' (#3751)
1231                 setInput i; lex_stringgap s
1232         where next = alexGetChar' i
1233
1234     Just (c, i1) -> do
1235         case c of
1236           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1237           c | isAny c -> do setInput i1; lex_string (c:s)
1238           _other -> lit_error i
1239
1240 lex_stringgap :: String -> P Token
1241 lex_stringgap s = do
1242   i <- getInput
1243   c <- getCharOrFail i
1244   case c of
1245     '\\' -> lex_string s
1246     c | is_space c -> lex_stringgap s
1247     _other -> lit_error i
1248
1249
1250 lex_char_tok :: Action
1251 -- Here we are basically parsing character literals, such as 'x' or '\n'
1252 -- but, when Template Haskell is on, we additionally spot
1253 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1254 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1255 -- So we have to do two characters of lookahead: when we see 'x we need to
1256 -- see if there's a trailing quote
1257 lex_char_tok span _buf _len = do        -- We've seen '
1258    i1 <- getInput       -- Look ahead to first character
1259    let loc = srcSpanStart span
1260    case alexGetChar' i1 of
1261         Nothing -> lit_error  i1
1262
1263         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1264                   th_exts <- extension thEnabled
1265                   if th_exts then do
1266                         setInput i2
1267                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1268                    else lit_error i1
1269
1270         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1271                   setInput i2
1272                   lit_ch <- lex_escape
1273                   i3 <- getInput
1274                   mc <- getCharOrFail i3 -- Trailing quote
1275                   if mc == '\'' then finish_char_tok loc lit_ch
1276                                 else lit_error i3
1277
1278         Just (c, i2@(AI _end2 _))
1279                 | not (isAny c) -> lit_error i1
1280                 | otherwise ->
1281
1282                 -- We've seen 'x, where x is a valid character
1283                 --  (i.e. not newline etc) but not a quote or backslash
1284            case alexGetChar' i2 of      -- Look ahead one more character
1285                 Just ('\'', i3) -> do   -- We've seen 'x'
1286                         setInput i3
1287                         finish_char_tok loc c
1288                 _other -> do            -- We've seen 'x not followed by quote
1289                                         -- (including the possibility of EOF)
1290                                         -- If TH is on, just parse the quote only
1291                         th_exts <- extension thEnabled
1292                         let (AI end _) = i1
1293                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1294                                    else lit_error i2
1295
1296 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1297 finish_char_tok loc ch  -- We've already seen the closing quote
1298                         -- Just need to check for trailing #
1299   = do  magicHash <- extension magicHashEnabled
1300         i@(AI end _) <- getInput
1301         if magicHash then do
1302                 case alexGetChar' i of
1303                         Just ('#',i@(AI end _)) -> do
1304                                 setInput i
1305                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1306                         _other ->
1307                                 return (L (mkSrcSpan loc end) (ITchar ch))
1308             else do
1309                    return (L (mkSrcSpan loc end) (ITchar ch))
1310
1311 isAny :: Char -> Bool
1312 isAny c | c > '\x7f' = isPrint c
1313         | otherwise  = is_any c
1314
1315 lex_escape :: P Char
1316 lex_escape = do
1317   i0 <- getInput
1318   c <- getCharOrFail i0
1319   case c of
1320         'a'   -> return '\a'
1321         'b'   -> return '\b'
1322         'f'   -> return '\f'
1323         'n'   -> return '\n'
1324         'r'   -> return '\r'
1325         't'   -> return '\t'
1326         'v'   -> return '\v'
1327         '\\'  -> return '\\'
1328         '"'   -> return '\"'
1329         '\''  -> return '\''
1330         '^'   -> do i1 <- getInput
1331                     c <- getCharOrFail i1
1332                     if c >= '@' && c <= '_'
1333                         then return (chr (ord c - ord '@'))
1334                         else lit_error i1
1335
1336         'x'   -> readNum is_hexdigit 16 hexDigit
1337         'o'   -> readNum is_octdigit  8 octDecDigit
1338         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1339
1340         c1 ->  do
1341            i <- getInput
1342            case alexGetChar' i of
1343             Nothing -> lit_error i0
1344             Just (c2,i2) ->
1345               case alexGetChar' i2 of
1346                 Nothing -> do lit_error i0
1347                 Just (c3,i3) ->
1348                    let str = [c1,c2,c3] in
1349                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1350                                      Just rest <- [stripPrefix p str] ] of
1351                           (escape_char,[]):_ -> do
1352                                 setInput i3
1353                                 return escape_char
1354                           (escape_char,_:_):_ -> do
1355                                 setInput i2
1356                                 return escape_char
1357                           [] -> lit_error i0
1358
1359 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1360 readNum is_digit base conv = do
1361   i <- getInput
1362   c <- getCharOrFail i
1363   if is_digit c
1364         then readNum2 is_digit base conv (conv c)
1365         else lit_error i
1366
1367 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1368 readNum2 is_digit base conv i = do
1369   input <- getInput
1370   read i input
1371   where read i input = do
1372           case alexGetChar' input of
1373             Just (c,input') | is_digit c -> do
1374                let i' = i*base + conv c
1375                if i' > 0x10ffff
1376                   then setInput input >> lexError "numeric escape sequence out of range"
1377                   else read i' input'
1378             _other -> do
1379               setInput input; return (chr i)
1380
1381
1382 silly_escape_chars :: [(String, Char)]
1383 silly_escape_chars = [
1384         ("NUL", '\NUL'),
1385         ("SOH", '\SOH'),
1386         ("STX", '\STX'),
1387         ("ETX", '\ETX'),
1388         ("EOT", '\EOT'),
1389         ("ENQ", '\ENQ'),
1390         ("ACK", '\ACK'),
1391         ("BEL", '\BEL'),
1392         ("BS", '\BS'),
1393         ("HT", '\HT'),
1394         ("LF", '\LF'),
1395         ("VT", '\VT'),
1396         ("FF", '\FF'),
1397         ("CR", '\CR'),
1398         ("SO", '\SO'),
1399         ("SI", '\SI'),
1400         ("DLE", '\DLE'),
1401         ("DC1", '\DC1'),
1402         ("DC2", '\DC2'),
1403         ("DC3", '\DC3'),
1404         ("DC4", '\DC4'),
1405         ("NAK", '\NAK'),
1406         ("SYN", '\SYN'),
1407         ("ETB", '\ETB'),
1408         ("CAN", '\CAN'),
1409         ("EM", '\EM'),
1410         ("SUB", '\SUB'),
1411         ("ESC", '\ESC'),
1412         ("FS", '\FS'),
1413         ("GS", '\GS'),
1414         ("RS", '\RS'),
1415         ("US", '\US'),
1416         ("SP", '\SP'),
1417         ("DEL", '\DEL')
1418         ]
1419
1420 -- before calling lit_error, ensure that the current input is pointing to
1421 -- the position of the error in the buffer.  This is so that we can report
1422 -- a correct location to the user, but also so we can detect UTF-8 decoding
1423 -- errors if they occur.
1424 lit_error :: AlexInput -> P a
1425 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1426
1427 getCharOrFail :: AlexInput -> P Char
1428 getCharOrFail i =  do
1429   case alexGetChar' i of
1430         Nothing -> lexError "unexpected end-of-file in string/character literal"
1431         Just (c,i)  -> do setInput i; return c
1432
1433 -- -----------------------------------------------------------------------------
1434 -- QuasiQuote
1435
1436 lex_quasiquote_tok :: Action
1437 lex_quasiquote_tok span buf len = do
1438   let quoter = tail (lexemeToString buf (len - 1))
1439                 -- 'tail' drops the initial '[',
1440                 -- while the -1 drops the trailing '|'
1441   quoteStart <- getSrcLoc
1442   quote <- lex_quasiquote ""
1443   end <- getSrcLoc
1444   return (L (mkSrcSpan (srcSpanStart span) end)
1445            (ITquasiQuote (mkFastString quoter,
1446                           mkFastString (reverse quote),
1447                           mkSrcSpan quoteStart end)))
1448
1449 lex_quasiquote :: String -> P String
1450 lex_quasiquote s = do
1451   i <- getInput
1452   case alexGetChar' i of
1453     Nothing -> lit_error i
1454
1455     Just ('\\',i)
1456         | Just ('|',i) <- next -> do
1457                 setInput i; lex_quasiquote ('|' : s)
1458         | Just (']',i) <- next -> do
1459                 setInput i; lex_quasiquote (']' : s)
1460         where next = alexGetChar' i
1461
1462     Just ('|',i)
1463         | Just (']',i) <- next -> do
1464                 setInput i; return s
1465         where next = alexGetChar' i
1466
1467     Just (c, i) -> do
1468          setInput i; lex_quasiquote (c : s)
1469
1470 -- -----------------------------------------------------------------------------
1471 -- Warnings
1472
1473 warn :: DynFlag -> SDoc -> Action
1474 warn option warning srcspan _buf _len = do
1475     addWarning option srcspan warning
1476     lexToken
1477
1478 warnThen :: DynFlag -> SDoc -> Action -> Action
1479 warnThen option warning action srcspan buf len = do
1480     addWarning option srcspan warning
1481     action srcspan buf len
1482
1483 -- -----------------------------------------------------------------------------
1484 -- The Parse Monad
1485
1486 data LayoutContext
1487   = NoLayout
1488   | Layout !Int
1489   deriving Show
1490
1491 data ParseResult a
1492   = POk PState a
1493   | PFailed
1494         SrcSpan         -- The start and end of the text span related to
1495                         -- the error.  Might be used in environments which can
1496                         -- show this span, e.g. by highlighting it.
1497         Message         -- The error message
1498
1499 data PState = PState {
1500         buffer     :: StringBuffer,
1501         dflags     :: DynFlags,
1502         messages   :: Messages,
1503         last_loc   :: SrcSpan,  -- pos of previous token
1504         last_len   :: !Int,     -- len of previous token
1505         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1506         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1507         context    :: [LayoutContext],
1508         lex_state  :: [Int],
1509         -- Used in the alternative layout rule:
1510         -- These tokens are the next ones to be sent out. They are
1511         -- just blindly emitted, without the rule looking at them again:
1512         alr_pending_implicit_tokens :: [Located Token],
1513         -- This is the next token to be considered or, if it is Nothing,
1514         -- we need to get the next token from the input stream:
1515         alr_next_token :: Maybe (Located Token),
1516         -- This is what we consider to be the locatino of the last token
1517         -- emitted:
1518         alr_last_loc :: SrcSpan,
1519         -- The stack of layout contexts:
1520         alr_context :: [ALRContext],
1521         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1522         -- us what sort of layout the '{' will open:
1523         alr_expecting_ocurly :: Maybe ALRLayout,
1524         -- Have we just had the '}' for a let block? If so, than an 'in'
1525         -- token doesn't need to close anything:
1526         alr_justClosedExplicitLetBlock :: Bool
1527      }
1528         -- last_loc and last_len are used when generating error messages,
1529         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1530         -- current token to happyError, we could at least get rid of last_len.
1531         -- Getting rid of last_loc would require finding another way to
1532         -- implement pushCurrentContext (which is only called from one place).
1533
1534 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1535                               Bool{- is it a 'let' block? -}
1536                 | ALRLayout ALRLayout Int
1537 data ALRLayout = ALRLayoutLet
1538                | ALRLayoutWhere
1539                | ALRLayoutOf
1540                | ALRLayoutDo
1541
1542 newtype P a = P { unP :: PState -> ParseResult a }
1543
1544 instance Monad P where
1545   return = returnP
1546   (>>=) = thenP
1547   fail = failP
1548
1549 returnP :: a -> P a
1550 returnP a = a `seq` (P $ \s -> POk s a)
1551
1552 thenP :: P a -> (a -> P b) -> P b
1553 (P m) `thenP` k = P $ \ s ->
1554         case m s of
1555                 POk s1 a         -> (unP (k a)) s1
1556                 PFailed span err -> PFailed span err
1557
1558 failP :: String -> P a
1559 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1560
1561 failMsgP :: String -> P a
1562 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1563
1564 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1565 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1566
1567 failSpanMsgP :: SrcSpan -> SDoc -> P a
1568 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1569
1570 getPState :: P PState
1571 getPState = P $ \s -> POk s s
1572
1573 getDynFlags :: P DynFlags
1574 getDynFlags = P $ \s -> POk s (dflags s)
1575
1576 withThisPackage :: (PackageId -> a) -> P a
1577 withThisPackage f
1578  = do   pkg     <- liftM thisPackage getDynFlags
1579         return  $ f pkg
1580
1581 extension :: (Int -> Bool) -> P Bool
1582 extension p = P $ \s -> POk s (p $! extsBitmap s)
1583
1584 getExts :: P Int
1585 getExts = P $ \s -> POk s (extsBitmap s)
1586
1587 setExts :: (Int -> Int) -> P ()
1588 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1589
1590 setSrcLoc :: SrcLoc -> P ()
1591 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1592
1593 getSrcLoc :: P SrcLoc
1594 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1595
1596 setLastToken :: SrcSpan -> Int -> P ()
1597 setLastToken loc len = P $ \s -> POk s {
1598   last_loc=loc,
1599   last_len=len
1600   } ()
1601
1602 data AlexInput = AI SrcLoc StringBuffer
1603
1604 alexInputPrevChar :: AlexInput -> Char
1605 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1606
1607 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1608 alexGetChar (AI loc s)
1609   | atEnd s   = Nothing
1610   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1611                 --trace (show (ord c)) $
1612                 Just (adj_c, (AI loc' s'))
1613   where (c,s') = nextChar s
1614         loc'   = advanceSrcLoc loc c
1615
1616         non_graphic     = '\x0'
1617         upper           = '\x1'
1618         lower           = '\x2'
1619         digit           = '\x3'
1620         symbol          = '\x4'
1621         space           = '\x5'
1622         other_graphic   = '\x6'
1623
1624         adj_c
1625           | c <= '\x06' = non_graphic
1626           | c <= '\x7f' = c
1627           -- Alex doesn't handle Unicode, so when Unicode
1628           -- character is encountered we output these values
1629           -- with the actual character value hidden in the state.
1630           | otherwise =
1631                 case generalCategory c of
1632                   UppercaseLetter       -> upper
1633                   LowercaseLetter       -> lower
1634                   TitlecaseLetter       -> upper
1635                   ModifierLetter        -> other_graphic
1636                   OtherLetter           -> lower -- see #1103
1637                   NonSpacingMark        -> other_graphic
1638                   SpacingCombiningMark  -> other_graphic
1639                   EnclosingMark         -> other_graphic
1640                   DecimalNumber         -> digit
1641                   LetterNumber          -> other_graphic
1642                   OtherNumber           -> digit -- see #4373
1643                   ConnectorPunctuation  -> symbol
1644                   DashPunctuation       -> symbol
1645                   OpenPunctuation       -> other_graphic
1646                   ClosePunctuation      -> other_graphic
1647                   InitialQuote          -> other_graphic
1648                   FinalQuote            -> other_graphic
1649                   OtherPunctuation      -> symbol
1650                   MathSymbol            -> symbol
1651                   CurrencySymbol        -> symbol
1652                   ModifierSymbol        -> symbol
1653                   OtherSymbol           -> symbol
1654                   Space                 -> space
1655                   _other                -> non_graphic
1656
1657 -- This version does not squash unicode characters, it is used when
1658 -- lexing strings.
1659 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1660 alexGetChar' (AI loc s)
1661   | atEnd s   = Nothing
1662   | otherwise = c `seq` loc' `seq` s' `seq`
1663                 --trace (show (ord c)) $
1664                 Just (c, (AI loc' s'))
1665   where (c,s') = nextChar s
1666         loc'   = advanceSrcLoc loc c
1667
1668 getInput :: P AlexInput
1669 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1670
1671 setInput :: AlexInput -> P ()
1672 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1673
1674 nextIsEOF :: P Bool
1675 nextIsEOF = do
1676   AI _ s <- getInput
1677   return $ atEnd s
1678
1679 pushLexState :: Int -> P ()
1680 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1681
1682 popLexState :: P Int
1683 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1684
1685 getLexState :: P Int
1686 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1687
1688 popNextToken :: P (Maybe (Located Token))
1689 popNextToken
1690     = P $ \s@PState{ alr_next_token = m } ->
1691               POk (s {alr_next_token = Nothing}) m
1692
1693 activeContext :: P Bool
1694 activeContext = do
1695   ctxt <- getALRContext
1696   expc <- getAlrExpectingOCurly
1697   impt <- implicitTokenPending
1698   case (ctxt,expc) of
1699     ([],Nothing) -> return impt
1700     _other       -> return True
1701
1702 setAlrLastLoc :: SrcSpan -> P ()
1703 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1704
1705 getAlrLastLoc :: P SrcSpan
1706 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1707
1708 getALRContext :: P [ALRContext]
1709 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1710
1711 setALRContext :: [ALRContext] -> P ()
1712 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1713
1714 getJustClosedExplicitLetBlock :: P Bool
1715 getJustClosedExplicitLetBlock
1716  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1717
1718 setJustClosedExplicitLetBlock :: Bool -> P ()
1719 setJustClosedExplicitLetBlock b
1720  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1721
1722 setNextToken :: Located Token -> P ()
1723 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1724
1725 implicitTokenPending :: P Bool
1726 implicitTokenPending
1727     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1728               case ts of
1729               [] -> POk s False
1730               _  -> POk s True
1731
1732 popPendingImplicitToken :: P (Maybe (Located Token))
1733 popPendingImplicitToken
1734     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1735               case ts of
1736               [] -> POk s Nothing
1737               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1738
1739 setPendingImplicitTokens :: [Located Token] -> P ()
1740 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1741
1742 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1743 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1744
1745 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1746 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1747
1748 -- for reasons of efficiency, flags indicating language extensions (eg,
1749 -- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
1750 -- integer
1751
1752 genericsBit :: Int
1753 genericsBit = 0 -- {| and |}
1754 ffiBit :: Int
1755 ffiBit     = 1
1756 parrBit :: Int
1757 parrBit    = 2
1758 arrowsBit :: Int
1759 arrowsBit  = 4
1760 thBit :: Int
1761 thBit      = 5
1762 ipBit :: Int
1763 ipBit      = 6
1764 explicitForallBit :: Int
1765 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1766 bangPatBit :: Int
1767 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1768                 -- (doesn't affect the lexer)
1769 tyFamBit :: Int
1770 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1771 haddockBit :: Int
1772 haddockBit = 10 -- Lex and parse Haddock comments
1773 magicHashBit :: Int
1774 magicHashBit = 11 -- "#" in both functions and operators
1775 kindSigsBit :: Int
1776 kindSigsBit = 12 -- Kind signatures on type variables
1777 recursiveDoBit :: Int
1778 recursiveDoBit = 13 -- mdo
1779 unicodeSyntaxBit :: Int
1780 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1781 unboxedTuplesBit :: Int
1782 unboxedTuplesBit = 15 -- (# and #)
1783 datatypeContextsBit :: Int
1784 datatypeContextsBit = 16
1785 transformComprehensionsBit :: Int
1786 transformComprehensionsBit = 17
1787 qqBit :: Int
1788 qqBit      = 18 -- enable quasiquoting
1789 inRulePragBit :: Int
1790 inRulePragBit = 19
1791 rawTokenStreamBit :: Int
1792 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1793 recBit :: Int
1794 recBit = 22 -- rec
1795 alternativeLayoutRuleBit :: Int
1796 alternativeLayoutRuleBit = 23
1797 relaxedLayoutBit :: Int
1798 relaxedLayoutBit = 24
1799 nondecreasingIndentationBit :: Int
1800 nondecreasingIndentationBit = 25
1801
1802 always :: Int -> Bool
1803 always           _     = True
1804 genericsEnabled :: Int -> Bool
1805 genericsEnabled  flags = testBit flags genericsBit
1806 parrEnabled :: Int -> Bool
1807 parrEnabled      flags = testBit flags parrBit
1808 arrowsEnabled :: Int -> Bool
1809 arrowsEnabled    flags = testBit flags arrowsBit
1810 thEnabled :: Int -> Bool
1811 thEnabled        flags = testBit flags thBit
1812 ipEnabled :: Int -> Bool
1813 ipEnabled        flags = testBit flags ipBit
1814 explicitForallEnabled :: Int -> Bool
1815 explicitForallEnabled flags = testBit flags explicitForallBit
1816 bangPatEnabled :: Int -> Bool
1817 bangPatEnabled   flags = testBit flags bangPatBit
1818 -- tyFamEnabled :: Int -> Bool
1819 -- tyFamEnabled     flags = testBit flags tyFamBit
1820 haddockEnabled :: Int -> Bool
1821 haddockEnabled   flags = testBit flags haddockBit
1822 magicHashEnabled :: Int -> Bool
1823 magicHashEnabled flags = testBit flags magicHashBit
1824 -- kindSigsEnabled :: Int -> Bool
1825 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1826 unicodeSyntaxEnabled :: Int -> Bool
1827 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1828 unboxedTuplesEnabled :: Int -> Bool
1829 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1830 datatypeContextsEnabled :: Int -> Bool
1831 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1832 qqEnabled :: Int -> Bool
1833 qqEnabled        flags = testBit flags qqBit
1834 -- inRulePrag :: Int -> Bool
1835 -- inRulePrag       flags = testBit flags inRulePragBit
1836 rawTokenStreamEnabled :: Int -> Bool
1837 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1838 alternativeLayoutRule :: Int -> Bool
1839 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1840 relaxedLayout :: Int -> Bool
1841 relaxedLayout flags = testBit flags relaxedLayoutBit
1842 nondecreasingIndentation :: Int -> Bool
1843 nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
1844
1845 -- PState for parsing options pragmas
1846 --
1847 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1848 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1849                                  lex_state = [bol, option_prags, 0]
1850                              }
1851
1852 -- create a parse state
1853 --
1854 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1855 mkPState flags buf loc =
1856   PState {
1857       buffer          = buf,
1858       dflags        = flags,
1859       messages      = emptyMessages,
1860       last_loc      = mkSrcSpan loc loc,
1861       last_len      = 0,
1862       loc           = loc,
1863       extsBitmap    = fromIntegral bitmap,
1864       context       = [],
1865       lex_state     = [bol, 0],
1866       alr_pending_implicit_tokens = [],
1867       alr_next_token = Nothing,
1868       alr_last_loc = noSrcSpan,
1869       alr_context = [],
1870       alr_expecting_ocurly = Nothing,
1871       alr_justClosedExplicitLetBlock = False
1872     }
1873     where
1874       bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
1875                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1876                .|. parrBit           `setBitIf` xopt Opt_ParallelArrays  flags
1877                .|. arrowsBit         `setBitIf` xopt Opt_Arrows          flags
1878                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1879                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes     flags
1880                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams  flags
1881                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll  flags
1882                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
1883                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
1884                .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
1885                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
1886                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
1887                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
1888                .|. recBit            `setBitIf` xopt Opt_DoRec  flags
1889                .|. recBit            `setBitIf` xopt Opt_Arrows flags
1890                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
1891                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
1892                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1893                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1894                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1895                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1896                .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
1897                .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
1898       --
1899       setBitIf :: Int -> Bool -> Int
1900       b `setBitIf` cond | cond      = bit b
1901                         | otherwise = 0
1902
1903 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1904 addWarning option srcspan warning
1905  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1906        let warning' = mkWarnMsg srcspan alwaysQualify warning
1907            ws' = if dopt option d then ws `snocBag` warning' else ws
1908        in POk s{messages=(ws', es)} ()
1909
1910 getMessages :: PState -> Messages
1911 getMessages PState{messages=ms} = ms
1912
1913 getContext :: P [LayoutContext]
1914 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1915
1916 setContext :: [LayoutContext] -> P ()
1917 setContext ctx = P $ \s -> POk s{context=ctx} ()
1918
1919 popContext :: P ()
1920 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1921                               last_len = len, last_loc = last_loc }) ->
1922   case ctx of
1923         (_:tl) -> POk s{ context = tl } ()
1924         []     -> PFailed last_loc (srcParseErr buf len)
1925
1926 -- Push a new layout context at the indentation of the last token read.
1927 -- This is only used at the outer level of a module when the 'module'
1928 -- keyword is missing.
1929 pushCurrentContext :: P ()
1930 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1931     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1932
1933 getOffside :: P Ordering
1934 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1935                 let offs = srcSpanStartCol loc in
1936                 let ord = case stk of
1937                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1938                                         compare offs n
1939                         _            -> GT
1940                 in POk s ord
1941
1942 -- ---------------------------------------------------------------------------
1943 -- Construct a parse error
1944
1945 srcParseErr
1946   :: StringBuffer       -- current buffer (placed just after the last token)
1947   -> Int                -- length of the previous token
1948   -> Message
1949 srcParseErr buf len
1950   = hcat [ if null token
1951              then ptext (sLit "parse error (possibly incorrect indentation)")
1952              else hcat [ptext (sLit "parse error on input "),
1953                         char '`', text token, char '\'']
1954     ]
1955   where token = lexemeToString (offsetBytes (-len) buf) len
1956
1957 -- Report a parse failure, giving the span of the previous token as
1958 -- the location of the error.  This is the entry point for errors
1959 -- detected during parsing.
1960 srcParseFail :: P a
1961 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1962                             last_loc = last_loc } ->
1963     PFailed last_loc (srcParseErr buf len)
1964
1965 -- A lexical error is reported at a particular position in the source file,
1966 -- not over a token range.
1967 lexError :: String -> P a
1968 lexError str = do
1969   loc <- getSrcLoc
1970   (AI end buf) <- getInput
1971   reportLexError loc end buf str
1972
1973 -- -----------------------------------------------------------------------------
1974 -- This is the top-level function: called from the parser each time a
1975 -- new token is to be read from the input.
1976
1977 lexer :: (Located Token -> P a) -> P a
1978 lexer cont = do
1979   alr <- extension alternativeLayoutRule
1980   let lexTokenFun = if alr then lexTokenAlr else lexToken
1981   tok@(L _span _tok__) <- lexTokenFun
1982   --trace ("token: " ++ show _tok__) $ do
1983   cont tok
1984
1985 lexTokenAlr :: P (Located Token)
1986 lexTokenAlr = do mPending <- popPendingImplicitToken
1987                  t <- case mPending of
1988                       Nothing ->
1989                           do mNext <- popNextToken
1990                              t <- case mNext of
1991                                   Nothing -> lexToken
1992                                   Just next -> return next
1993                              alternativeLayoutRuleToken t
1994                       Just t ->
1995                           return t
1996                  setAlrLastLoc (getLoc t)
1997                  case unLoc t of
1998                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
1999                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
2000                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
2001                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
2002                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
2003                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
2004                      _       -> return ()
2005                  return t
2006
2007 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
2008 alternativeLayoutRuleToken t
2009     = do context <- getALRContext
2010          lastLoc <- getAlrLastLoc
2011          mExpectingOCurly <- getAlrExpectingOCurly
2012          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
2013          setJustClosedExplicitLetBlock False
2014          dflags <- getDynFlags
2015          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
2016              thisLoc = getLoc t
2017              thisCol = srcSpanStartCol thisLoc
2018              newLine = (lastLoc == noSrcSpan)
2019                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
2020          case (unLoc t, context, mExpectingOCurly) of
2021              -- This case handles a GHC extension to the original H98
2022              -- layout rule...
2023              (ITocurly, _, Just alrLayout) ->
2024                  do setAlrExpectingOCurly Nothing
2025                     let isLet = case alrLayout of
2026                                 ALRLayoutLet -> True
2027                                 _ -> False
2028                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
2029                     return t
2030              -- ...and makes this case unnecessary
2031              {-
2032              -- I think our implicit open-curly handling is slightly
2033              -- different to John's, in how it interacts with newlines
2034              -- and "in"
2035              (ITocurly, _, Just _) ->
2036                  do setAlrExpectingOCurly Nothing
2037                     setNextToken t
2038                     lexTokenAlr
2039              -}
2040              (_, ALRLayout _ col : ls, Just expectingOCurly)
2041               | (thisCol > col) ||
2042                 (thisCol == col &&
2043                  isNonDecreasingIntentation expectingOCurly) ->
2044                  do setAlrExpectingOCurly Nothing
2045                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2046                     setNextToken t
2047                     return (L thisLoc ITocurly)
2048               | otherwise ->
2049                  do setAlrExpectingOCurly Nothing
2050                     setPendingImplicitTokens [L lastLoc ITccurly]
2051                     setNextToken t
2052                     return (L lastLoc ITocurly)
2053              (_, _, Just expectingOCurly) ->
2054                  do setAlrExpectingOCurly Nothing
2055                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2056                     setNextToken t
2057                     return (L thisLoc ITocurly)
2058              -- We do the [] cases earlier than in the spec, as we
2059              -- have an actual EOF token
2060              (ITeof, ALRLayout _ _ : ls, _) ->
2061                  do setALRContext ls
2062                     setNextToken t
2063                     return (L thisLoc ITccurly)
2064              (ITeof, _, _) ->
2065                  return t
2066              -- the other ITeof case omitted; general case below covers it
2067              (ITin, _, _)
2068               | justClosedExplicitLetBlock ->
2069                  return t
2070              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2071               | newLine ->
2072                  do setPendingImplicitTokens [t]
2073                     setALRContext ls
2074                     return (L thisLoc ITccurly)
2075              -- This next case is to handle a transitional issue:
2076              (ITwhere, ALRLayout _ col : ls, _)
2077               | newLine && thisCol == col && transitional ->
2078                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2079                                thisLoc
2080                                (transitionalAlternativeLayoutWarning
2081                                     "`where' clause at the same depth as implicit layout block")
2082                     setALRContext ls
2083                     setNextToken t
2084                     -- Note that we use lastLoc, as we may need to close
2085                     -- more layouts, or give a semicolon
2086                     return (L lastLoc ITccurly)
2087              -- This next case is to handle a transitional issue:
2088              (ITvbar, ALRLayout _ col : ls, _)
2089               | newLine && thisCol == col && transitional ->
2090                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2091                                thisLoc
2092                                (transitionalAlternativeLayoutWarning
2093                                     "`|' at the same depth as implicit layout block")
2094                     setALRContext ls
2095                     setNextToken t
2096                     -- Note that we use lastLoc, as we may need to close
2097                     -- more layouts, or give a semicolon
2098                     return (L lastLoc ITccurly)
2099              (_, ALRLayout _ col : ls, _)
2100               | newLine && thisCol == col ->
2101                  do setNextToken t
2102                     return (L thisLoc ITsemi)
2103               | newLine && thisCol < col ->
2104                  do setALRContext ls
2105                     setNextToken t
2106                     -- Note that we use lastLoc, as we may need to close
2107                     -- more layouts, or give a semicolon
2108                     return (L lastLoc ITccurly)
2109              -- We need to handle close before open, as 'then' is both
2110              -- an open and a close
2111              (u, _, _)
2112               | isALRclose u ->
2113                  case context of
2114                  ALRLayout _ _ : ls ->
2115                      do setALRContext ls
2116                         setNextToken t
2117                         return (L thisLoc ITccurly)
2118                  ALRNoLayout _ isLet : ls ->
2119                      do let ls' = if isALRopen u
2120                                      then ALRNoLayout (containsCommas u) False : ls
2121                                      else ls
2122                         setALRContext ls'
2123                         when isLet $ setJustClosedExplicitLetBlock True
2124                         return t
2125                  [] ->
2126                      do let ls = if isALRopen u
2127                                     then [ALRNoLayout (containsCommas u) False]
2128                                     else ls
2129                         setALRContext ls
2130                         -- XXX This is an error in John's code, but
2131                         -- it looks reachable to me at first glance
2132                         return t
2133              (u, _, _)
2134               | isALRopen u ->
2135                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2136                     return t
2137              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2138                  do setALRContext ls
2139                     setPendingImplicitTokens [t]
2140                     return (L thisLoc ITccurly)
2141              (ITin, ALRLayout _ _ : ls, _) ->
2142                  do setALRContext ls
2143                     setNextToken t
2144                     return (L thisLoc ITccurly)
2145              -- the other ITin case omitted; general case below covers it
2146              (ITcomma, ALRLayout _ _ : ls, _)
2147               | topNoLayoutContainsCommas ls ->
2148                  do setALRContext ls
2149                     setNextToken t
2150                     return (L thisLoc ITccurly)
2151              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2152                  do setALRContext ls
2153                     setPendingImplicitTokens [t]
2154                     return (L thisLoc ITccurly)
2155              -- the other ITwhere case omitted; general case below covers it
2156              (_, _, _) -> return t
2157
2158 transitionalAlternativeLayoutWarning :: String -> SDoc
2159 transitionalAlternativeLayoutWarning msg
2160     = text "transitional layout will not be accepted in the future:"
2161    $$ text msg
2162
2163 isALRopen :: Token -> Bool
2164 isALRopen ITcase        = True
2165 isALRopen ITif          = True
2166 isALRopen ITthen        = True
2167 isALRopen IToparen      = True
2168 isALRopen ITobrack      = True
2169 isALRopen ITocurly      = True
2170 -- GHC Extensions:
2171 isALRopen IToubxparen   = True
2172 isALRopen ITparenEscape = True
2173 isALRopen _             = False
2174
2175 isALRclose :: Token -> Bool
2176 isALRclose ITof     = True
2177 isALRclose ITthen   = True
2178 isALRclose ITelse   = True
2179 isALRclose ITcparen = True
2180 isALRclose ITcbrack = True
2181 isALRclose ITccurly = True
2182 -- GHC Extensions:
2183 isALRclose ITcubxparen = True
2184 isALRclose _        = False
2185
2186 isNonDecreasingIntentation :: ALRLayout -> Bool
2187 isNonDecreasingIntentation ALRLayoutDo = True
2188 isNonDecreasingIntentation _           = False
2189
2190 containsCommas :: Token -> Bool
2191 containsCommas IToparen = True
2192 containsCommas ITobrack = True
2193 -- John doesn't have {} as containing commas, but records contain them,
2194 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2195 -- (defaultInstallDirs).
2196 containsCommas ITocurly = True
2197 -- GHC Extensions:
2198 containsCommas IToubxparen = True
2199 containsCommas _        = False
2200
2201 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2202 topNoLayoutContainsCommas [] = False
2203 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2204 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2205
2206 lexToken :: P (Located Token)
2207 lexToken = do
2208   inp@(AI loc1 buf) <- getInput
2209   sc <- getLexState
2210   exts <- getExts
2211   case alexScanUser exts inp sc of
2212     AlexEOF -> do
2213         let span = mkSrcSpan loc1 loc1
2214         setLastToken span 0
2215         return (L span ITeof)
2216     AlexError (AI loc2 buf) ->
2217         reportLexError loc1 loc2 buf "lexical error"
2218     AlexSkip inp2 _ -> do
2219         setInput inp2
2220         lexToken
2221     AlexToken inp2@(AI end buf2) _ t -> do
2222         setInput inp2
2223         let span = mkSrcSpan loc1 end
2224         let bytes = byteDiff buf buf2
2225         span `seq` setLastToken span bytes
2226         t span buf bytes
2227
2228 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2229 reportLexError loc1 loc2 buf str
2230   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2231   | otherwise =
2232   let
2233         c = fst (nextChar buf)
2234   in
2235   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2236     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2237     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2238
2239 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2240 lexTokenStream buf loc dflags = unP go initState
2241     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2242           initState = mkPState dflags' buf loc
2243           go = do
2244             ltok <- lexer return
2245             case ltok of
2246               L _ ITeof -> return []
2247               _ -> liftM (ltok:) go
2248
2249 linePrags = Map.singleton "line" (begin line_prag2)
2250
2251 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2252                                  ("options_ghc", lex_string_prag IToptions_prag),
2253                                  ("options_haddock", lex_string_prag ITdocOptions),
2254                                  ("language", token ITlanguage_prag),
2255                                  ("include", lex_string_prag ITinclude_prag)])
2256
2257 ignoredPrags = Map.fromList (map ignored pragmas)
2258                where ignored opt = (opt, nested_comment lexToken)
2259                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2260                      options_pragmas = map ("options_" ++) impls
2261                      -- CFILES is a hugs-only thing.
2262                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2263
2264 oneWordPrags = Map.fromList([("rules", rulePrag),
2265                            ("inline", token (ITinline_prag Inline FunLike)),
2266                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2267                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2268                                           -- Spelling variant
2269                            ("notinline", token (ITinline_prag NoInline FunLike)),
2270                            ("specialize", token ITspec_prag),
2271                            ("source", token ITsource_prag),
2272                            ("warning", token ITwarning_prag),
2273                            ("deprecated", token ITdeprecated_prag),
2274                            ("scc", token ITscc_prag),
2275                            ("generated", token ITgenerated_prag),
2276                            ("core", token ITcore_prag),
2277                            ("unpack", token ITunpack_prag),
2278                            ("ann", token ITann_prag)])
2279
2280 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2281                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2282                              ("specialize inline", token (ITspec_inline_prag True)),
2283                              ("specialize notinline", token (ITspec_inline_prag False))])
2284
2285
2286 dispatch_pragmas :: Map String Action -> Action
2287 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2288                                        Just found -> found span buf len
2289                                        Nothing -> lexError "unknown pragma"
2290
2291 known_pragma :: Map String Action -> AlexAccPred Int
2292 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2293                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2294
2295 clean_pragma :: String -> String
2296 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2297                     where unprefix prag' = case stripPrefix "{-#" prag' of
2298                                              Just rest -> rest
2299                                              Nothing -> prag'
2300                           canonical prag' = case prag' of
2301                                               "noinline" -> "notinline"
2302                                               "specialise" -> "specialize"
2303                                               "constructorlike" -> "conlike"
2304                                               _ -> prag'
2305                           canon_ws s = unwords (map canonical (words s))
2306 }