compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end RealSrcLoc
  11 -- attached to it.
  12 --
  13 -----------------------------------------------------------------------------
  14
  15 --   ToDo / known bugs:
  16 --    - parsing integers is a bit slow
  17 --    - readRational is a bit slow
  18 --
  19 --   Known bugs, that were also in the previous version:
  20 --    - M... should be 3 tokens, not 1.
  21 --    - pragma-end should be only valid in a pragma
  22
  23 --   qualified operator NOTES.
  24 --
  25 --   - If M.(+) is a single lexeme, then..
  26 --     - Probably (+) should be a single lexeme too, for consistency.
  27 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  28 --     - But we have to rule out reserved operators, otherwise (..) becomes
  29 --       a different lexeme.
  30 --     - Should we therefore also rule out reserved operators in the qualified
  31 --       form?  This is quite difficult to achieve.  We don't do it for
  32 --       qualified varids.
  33
  34 {
  35 -- XXX The above flags turn off warnings in the generated code:
  36 {-# LANGUAGE BangPatterns #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  38 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  39 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  40 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  41 -- But alex still generates some code that causes the "lazy unlifted bindings"
  42 -- warning, and old compilers don't know about it so we can't easily turn
  43 -- it off, so for now we use the sledge hammer:
  44 {-# OPTIONS_GHC -w #-}
  45
  46 {-# OPTIONS_GHC -funbox-strict-fields #-}
  47
  48 module Lexer (
  49    Token(..), lexer, pragState, mkPState, PState(..),
  50    P(..), ParseResult(..), getSrcLoc,
  51    getPState, getDynFlags, withThisPackage,
  52    failLocMsgP, failSpanMsgP, srcParseFail,
  53    getMessages,
  54    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  55    activeContext, nextIsEOF,
  56    getLexState, popLexState, pushLexState,
  57    extension, bangPatEnabled, datatypeContextsEnabled,
  58    addWarning,
  59    lexTokenStream
  60   ) where
  61
  62 import Bag
  63 import ErrUtils
  64 import Outputable
  65 import StringBuffer
  66 import FastString
  67 import SrcLoc
  68 import UniqFM
  69 import DynFlags
  70 import Module
  71 import Ctype
  72 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..), FractionalLit(..) )
  73 import Util             ( readRational )
  74
  75 import Control.Monad
  76 import Data.Bits
  77 import Data.Char
  78 import Data.List
  79 import Data.Maybe
  80 import Data.Map (Map)
  81 import qualified Data.Map as Map
  82 import Data.Ratio
  83 }
  84
  85 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  86 $whitechar   = [\ \n\r\f\v $unispace]
  87 $white_no_nl = $whitechar # \n
  88 $tab         = \t
  89
  90 $ascdigit  = 0-9
  91 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  92 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  93 $digit     = [$ascdigit $unidigit]
  94
  95 $special   = [\(\)\,\;\[\]\`\{\}]
  96 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  97 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  98 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  99
 100 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
 101 $asclarge  = [A-Z]
 102 $large     = [$asclarge $unilarge]
 103
 104 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 105 $ascsmall  = [a-z]
 106 $small     = [$ascsmall $unismall \_]
 107
 108 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 109 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 110
 111 $octit     = 0-7
 112 $hexit     = [$decdigit A-F a-f]
 113 $symchar   = [$symbol \:]
 114 $nl        = [\n\r]
 115 $idchar    = [$small $large $digit \']
 116
 117 $pragmachar = [$small $large $digit]
 118
 119 $docsym    = [\| \^ \* \$]
 120
 121 @varid     = $small $idchar*
 122 @conid     = $large $idchar*
 123
 124 @varsym    = $symbol $symchar*
 125 @consym    = \: $symchar*
 126
 127 @decimal     = $decdigit+
 128 @octal       = $octit+
 129 @hexadecimal = $hexit+
 130 @exponent    = [eE] [\-\+]? @decimal
 131
 132 -- we support the hierarchical module name extension:
 133 @qual = (@conid \.)+
 134
 135 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 136
 137 -- normal signed numerical literals can only be explicitly negative,
 138 -- not explicitly positive (contrast @exponent)
 139 @negative = \-
 140 @signed = @negative ?
 141
 142 haskell :-
 143
 144 -- everywhere: skip whitespace and comments
 145 $white_no_nl+                           ;
 146 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 147
 148 -- Everywhere: deal with nested comments.  We explicitly rule out
 149 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 150 -- (this can happen even though pragmas will normally take precedence due to
 151 -- longest-match, because pragmas aren't valid in every state, but comments
 152 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 153 -- set.
 154
 155 "{-" / { isNormalComment } { nested_comment lexToken }
 156
 157 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 158 -- more dashes followed by a symbol should be parsed as a varsym, so we
 159 -- have to exclude those.
 160
 161 -- Since Haddock comments aren't valid in every state, we need to rule them
 162 -- out here.
 163
 164 -- The following two rules match comments that begin with two dashes, but
 165 -- continue with a different character. The rules test that this character
 166 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 167 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 168 -- have a Haddock comment). The rules then munch the rest of the line.
 169
 170 "-- " ~[$docsym \#] .* { lineCommentToken }
 171 "--" [^$symbol : \ ] .* { lineCommentToken }
 172
 173 -- Next, match Haddock comments if no -haddock flag
 174
 175 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 176
 177 -- Now, when we've matched comments that begin with 2 dashes and continue
 178 -- with a different character, we need to match comments that begin with three
 179 -- or more dashes (which clearly can't be Haddock comments). We only need to
 180 -- make sure that the first non-dash character isn't a symbol, and munch the
 181 -- rest of the line.
 182
 183 "---"\-* [^$symbol :] .* { lineCommentToken }
 184
 185 -- Since the previous rules all match dashes followed by at least one
 186 -- character, we also need to match a whole line filled with just dashes.
 187
 188 "--"\-* / { atEOL } { lineCommentToken }
 189
 190 -- We need this rule since none of the other single line comment rules
 191 -- actually match this case.
 192
 193 "-- " / { atEOL } { lineCommentToken }
 194
 195 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 196 -- blank lines) until we find a non-whitespace character, then do layout
 197 -- processing.
 198 --
 199 -- One slight wibble here: what if the line begins with {-#? In
 200 -- theory, we have to lex the pragma to see if it's one we recognise,
 201 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 202 -- as a nested comment.  We don't bother with this: if the line begins
 203 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 204 <bol> {
 205   \n                                    ;
 206   ^\# (line)?                           { begin line_prag1 }
 207   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 208   ^\# \! .* \n                          ; -- #!, for scripts
 209   ()                                    { do_bol }
 210 }
 211
 212 -- after a layout keyword (let, where, do, of), we begin a new layout
 213 -- context if the curly brace is missing.
 214 -- Careful! This stuff is quite delicate.
 215 <layout, layout_do> {
 216   \{ / { notFollowedBy '-' }            { hopefully_open_brace }
 217         -- we might encounter {-# here, but {- has been handled already
 218   \n                                    ;
 219   ^\# (line)?                           { begin line_prag1 }
 220 }
 221
 222 -- do is treated in a subtly different way, see new_layout_context
 223 <layout>    ()                          { new_layout_context True }
 224 <layout_do> ()                          { new_layout_context False }
 225
 226 -- after a new layout context which was found to be to the left of the
 227 -- previous context, we have generated a '{' token, and we now need to
 228 -- generate a matching '}' token.
 229 <layout_left>  ()                       { do_layout_left }
 230
 231 <0,option_prags> \n                             { begin bol }
 232
 233 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 234                                 { dispatch_pragmas linePrags }
 235
 236 -- single-line line pragmas, of the form
 237 --    # <line> "<file>" <extra-stuff> \n
 238 <line_prag1> $decdigit+                 { setLine line_prag1a }
 239 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 240 <line_prag1b> .*                        { pop }
 241
 242 -- Haskell-style line pragmas, of the form
 243 --    {-# LINE <line> "<file>" #-}
 244 <line_prag2> $decdigit+                 { setLine line_prag2a }
 245 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 246 <line_prag2b> "#-}"|"-}"                { pop }
 247    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 248    -- with older versions of GHC which generated these.
 249
 250 <0,option_prags> {
 251   "{-#" $whitechar* $pragmachar+
 252         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 253                                  { dispatch_pragmas twoWordPrags }
 254
 255   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 256                                  { dispatch_pragmas oneWordPrags }
 257
 258   -- We ignore all these pragmas, but don't generate a warning for them
 259   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 260                                  { dispatch_pragmas ignoredPrags }
 261
 262   -- ToDo: should only be valid inside a pragma:
 263   "#-}"                                 { endPrag }
 264 }
 265
 266 <option_prags> {
 267   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 268                                    { dispatch_pragmas fileHeaderPrags }
 269
 270   "-- #"                                 { multiline_doc_comment }
 271 }
 272
 273 <0> {
 274   -- In the "0" mode we ignore these pragmas
 275   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 276                      { nested_comment lexToken }
 277 }
 278
 279 <0> {
 280   "-- #" .* { lineCommentToken }
 281 }
 282
 283 <0,option_prags> {
 284   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 285                     (nested_comment lexToken) }
 286 }
 287
 288 -- '0' state: ordinary lexemes
 289
 290 -- Haddock comments
 291
 292 <0,option_prags> {
 293   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 294   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 295 }
 296
 297 -- "special" symbols
 298
 299 <0> {
 300   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 301   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 302 }
 303
 304 <0> {
 305   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 306   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 307   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 308   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 309   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 310   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 311   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 312   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 313
 314 -- For backward compatibility, accept the old dollar syntax
 315   "[$" @varid "|"  / { ifExtension qqEnabled }
 316                      { lex_quasiquote_tok }
 317
 318   "[" @varid "|"  / { ifExtension qqEnabled }
 319                      { lex_quasiquote_tok }
 320 }
 321
 322 <0> {
 323   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 324                                         { special IToparenbar }
 325   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 326 }
 327
 328 <0> {
 329   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 330 }
 331
 332 <0> {
 333   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 334          { token IToubxparen }
 335   "#)" / { ifExtension unboxedTuplesEnabled }
 336          { token ITcubxparen }
 337 }
 338
 339 <0,option_prags> {
 340   \(                                    { special IToparen }
 341   \)                                    { special ITcparen }
 342   \[                                    { special ITobrack }
 343   \]                                    { special ITcbrack }
 344   \,                                    { special ITcomma }
 345   \;                                    { special ITsemi }
 346   \`                                    { special ITbackquote }
 347
 348   \{                                    { open_brace }
 349   \}                                    { close_brace }
 350 }
 351
 352 <0,option_prags> {
 353   @qual @varid                  { idtoken qvarid }
 354   @qual @conid                  { idtoken qconid }
 355   @varid                        { varid }
 356   @conid                        { idtoken conid }
 357 }
 358
 359 <0> {
 360   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 361   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 362   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 363   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 364 }
 365
 366 -- ToDo: - move `var` and (sym) into lexical syntax?
 367 --       - remove backquote from $special?
 368 <0> {
 369   @qual @varsym                                    { idtoken qvarsym }
 370   @qual @consym                                    { idtoken qconsym }
 371   @varsym                                          { varsym }
 372   @consym                                          { consym }
 373 }
 374
 375 -- For the normal boxed literals we need to be careful
 376 -- when trying to be close to Haskell98
 377 <0> {
 378   -- Normal integral literals (:: Num a => a, from Integer)
 379   @decimal           { tok_num positive 0 0 decimal }
 380   0[oO] @octal       { tok_num positive 2 2 octal }
 381   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 382
 383   -- Normal rational literals (:: Fractional a => a, from Rational)
 384   @floating_point    { strtoken tok_float }
 385 }
 386
 387 <0> {
 388   -- Unboxed ints (:: Int#) and words (:: Word#)
 389   -- It's simpler (and faster?) to give separate cases to the negatives,
 390   -- especially considering octal/hexadecimal prefixes.
 391   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 392   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 393   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 394   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 395   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 396   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 397
 398   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 399   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 400   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 401
 402   -- Unboxed floats and doubles (:: Float#, :: Double#)
 403   -- prim_{float,double} work with signed literals
 404   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 405   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 406 }
 407
 408 -- Strings and chars are lexed by hand-written code.  The reason is
 409 -- that even if we recognise the string or char here in the regex
 410 -- lexer, we would still have to parse the string afterward in order
 411 -- to convert it to a String.
 412 <0> {
 413   \'                            { lex_char_tok }
 414   \"                            { lex_string_tok }
 415 }
 416
 417 {
 418 -- -----------------------------------------------------------------------------
 419 -- The token type
 420
 421 data Token
 422   = ITas                        -- Haskell keywords
 423   | ITcase
 424   | ITclass
 425   | ITdata
 426   | ITdefault
 427   | ITderiving
 428   | ITdo
 429   | ITelse
 430   | IThiding
 431   | ITif
 432   | ITimport
 433   | ITin
 434   | ITinfix
 435   | ITinfixl
 436   | ITinfixr
 437   | ITinstance
 438   | ITlet
 439   | ITmodule
 440   | ITnewtype
 441   | ITof
 442   | ITqualified
 443   | ITthen
 444   | ITtype
 445   | ITwhere
 446   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 447
 448   | ITforall                    -- GHC extension keywords
 449   | ITforeign
 450   | ITexport
 451   | ITlabel
 452   | ITdynamic
 453   | ITsafe
 454   | ITthreadsafe
 455   | ITinterruptible
 456   | ITunsafe
 457   | ITstdcallconv
 458   | ITccallconv
 459   | ITprimcallconv
 460   | ITmdo
 461   | ITfamily
 462   | ITgroup
 463   | ITby
 464   | ITusing
 465
 466         -- Pragmas
 467   | ITinline_prag InlineSpec RuleMatchInfo
 468   | ITspec_prag                 -- SPECIALISE
 469   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 470   | ITsource_prag
 471   | ITrules_prag
 472   | ITwarning_prag
 473   | ITdeprecated_prag
 474   | ITline_prag
 475   | ITscc_prag
 476   | ITgenerated_prag
 477   | ITcore_prag                 -- hdaume: core annotations
 478   | ITunpack_prag
 479   | ITann_prag
 480   | ITclose_prag
 481   | IToptions_prag String
 482   | ITinclude_prag String
 483   | ITlanguage_prag
 484   | ITvect_prag
 485   | ITvect_scalar_prag
 486   | ITnovect_prag
 487
 488   | ITdotdot                    -- reserved symbols
 489   | ITcolon
 490   | ITdcolon
 491   | ITequal
 492   | ITlam
 493   | ITvbar
 494   | ITlarrow
 495   | ITrarrow
 496   | ITat
 497   | ITtilde
 498   | ITdarrow
 499   | ITminus
 500   | ITbang
 501   | ITstar
 502   | ITdot
 503
 504   | ITbiglam                    -- GHC-extension symbols
 505
 506   | ITocurly                    -- special symbols
 507   | ITccurly
 508   | ITocurlybar                 -- {|, for type applications
 509   | ITccurlybar                 -- |}, for type applications
 510   | ITvocurly
 511   | ITvccurly
 512   | ITobrack
 513   | ITopabrack                  -- [:, for parallel arrays with -XParallelArrays
 514   | ITcpabrack                  -- :], for parallel arrays with -XParallelArrays
 515   | ITcbrack
 516   | IToparen
 517   | ITcparen
 518   | IToubxparen
 519   | ITcubxparen
 520   | ITsemi
 521   | ITcomma
 522   | ITunderscore
 523   | ITbackquote
 524
 525   | ITvarid   FastString        -- identifiers
 526   | ITconid   FastString
 527   | ITvarsym  FastString
 528   | ITconsym  FastString
 529   | ITqvarid  (FastString,FastString)
 530   | ITqconid  (FastString,FastString)
 531   | ITqvarsym (FastString,FastString)
 532   | ITqconsym (FastString,FastString)
 533   | ITprefixqvarsym (FastString,FastString)
 534   | ITprefixqconsym (FastString,FastString)
 535
 536   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 537
 538   | ITchar       Char
 539   | ITstring     FastString
 540   | ITinteger    Integer
 541   | ITrational   FractionalLit
 542
 543   | ITprimchar   Char
 544   | ITprimstring FastString
 545   | ITprimint    Integer
 546   | ITprimword   Integer
 547   | ITprimfloat  FractionalLit
 548   | ITprimdouble FractionalLit
 549
 550   -- Template Haskell extension tokens
 551   | ITopenExpQuote              --  [| or [e|
 552   | ITopenPatQuote              --  [p|
 553   | ITopenDecQuote              --  [d|
 554   | ITopenTypQuote              --  [t|
 555   | ITcloseQuote                --  |]
 556   | ITidEscape   FastString     --  $x
 557   | ITparenEscape               --  $(
 558   | ITvarQuote                  --  '
 559   | ITtyQuote                   --  ''
 560   | ITquasiQuote (FastString,FastString,RealSrcSpan) --  [:...|...|]
 561
 562   -- Arrow notation extension
 563   | ITproc
 564   | ITrec
 565   | IToparenbar                 --  (|
 566   | ITcparenbar                 --  |)
 567   | ITlarrowtail                --  -<
 568   | ITrarrowtail                --  >-
 569   | ITLarrowtail                --  -<<
 570   | ITRarrowtail                --  >>-
 571
 572   | ITunknown String            -- Used when the lexer can't make sense of it
 573   | ITeof                       -- end of file token
 574
 575   -- Documentation annotations
 576   | ITdocCommentNext  String     -- something beginning '-- |'
 577   | ITdocCommentPrev  String     -- something beginning '-- ^'
 578   | ITdocCommentNamed String     -- something beginning '-- $'
 579   | ITdocSection      Int String -- a section heading
 580   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 581   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 582   | ITlineComment     String     -- comment starting by "--"
 583   | ITblockComment    String     -- comment in {- -}
 584
 585 #ifdef DEBUG
 586   deriving Show -- debugging
 587 #endif
 588
 589 {-
 590 isSpecial :: Token -> Bool
 591 -- If we see M.x, where x is a keyword, but
 592 -- is special, we treat is as just plain M.x,
 593 -- not as a keyword.
 594 isSpecial ITas          = True
 595 isSpecial IThiding      = True
 596 isSpecial ITqualified   = True
 597 isSpecial ITforall      = True
 598 isSpecial ITexport      = True
 599 isSpecial ITlabel       = True
 600 isSpecial ITdynamic     = True
 601 isSpecial ITsafe        = True
 602 isSpecial ITthreadsafe  = True
 603 isSpecial ITinterruptible = True
 604 isSpecial ITunsafe      = True
 605 isSpecial ITccallconv   = True
 606 isSpecial ITstdcallconv = True
 607 isSpecial ITprimcallconv = True
 608 isSpecial ITmdo         = True
 609 isSpecial ITfamily      = True
 610 isSpecial ITgroup   = True
 611 isSpecial ITby      = True
 612 isSpecial ITusing   = True
 613 isSpecial _             = False
 614 -}
 615
 616 -- the bitmap provided as the third component indicates whether the
 617 -- corresponding extension keyword is valid under the extension options
 618 -- provided to the compiler; if the extension corresponding to *any* of the
 619 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 620 -- facilitates using a keyword in two different extensions that can be
 621 -- activated independently)
 622 --
 623 reservedWordsFM :: UniqFM (Token, Int)
 624 reservedWordsFM = listToUFM $
 625         map (\(x, y, z) -> (mkFastString x, (y, z)))
 626        [( "_",          ITunderscore,   0 ),
 627         ( "as",         ITas,           0 ),
 628         ( "case",       ITcase,         0 ),
 629         ( "class",      ITclass,        0 ),
 630         ( "data",       ITdata,         0 ),
 631         ( "default",    ITdefault,      0 ),
 632         ( "deriving",   ITderiving,     0 ),
 633         ( "do",         ITdo,           0 ),
 634         ( "else",       ITelse,         0 ),
 635         ( "hiding",     IThiding,       0 ),
 636         ( "if",         ITif,           0 ),
 637         ( "import",     ITimport,       0 ),
 638         ( "in",         ITin,           0 ),
 639         ( "infix",      ITinfix,        0 ),
 640         ( "infixl",     ITinfixl,       0 ),
 641         ( "infixr",     ITinfixr,       0 ),
 642         ( "instance",   ITinstance,     0 ),
 643         ( "let",        ITlet,          0 ),
 644         ( "module",     ITmodule,       0 ),
 645         ( "newtype",    ITnewtype,      0 ),
 646         ( "of",         ITof,           0 ),
 647         ( "qualified",  ITqualified,    0 ),
 648         ( "then",       ITthen,         0 ),
 649         ( "type",       ITtype,         0 ),
 650         ( "where",      ITwhere,        0 ),
 651         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 652
 653     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 654         ( "mdo",        ITmdo,           bit recursiveDoBit),
 655         ( "family",     ITfamily,        bit tyFamBit),
 656     ( "group",  ITgroup,     bit transformComprehensionsBit),
 657     ( "by",     ITby,        bit transformComprehensionsBit),
 658     ( "using",  ITusing,     bit transformComprehensionsBit),
 659
 660         ( "foreign",    ITforeign,       bit ffiBit),
 661         ( "export",     ITexport,        bit ffiBit),
 662         ( "label",      ITlabel,         bit ffiBit),
 663         ( "dynamic",    ITdynamic,       bit ffiBit),
 664         ( "safe",       ITsafe,          bit ffiBit),
 665         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 666         ( "interruptible", ITinterruptible, bit ffiBit),
 667         ( "unsafe",     ITunsafe,        bit ffiBit),
 668         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 669         ( "ccall",      ITccallconv,     bit ffiBit),
 670         ( "prim",       ITprimcallconv,  bit ffiBit),
 671
 672         ( "rec",        ITrec,           bit recBit),
 673         ( "proc",       ITproc,          bit arrowsBit)
 674      ]
 675
 676 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 677 reservedSymsFM = listToUFM $
 678     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 679       [ ("..",  ITdotdot,   always)
 680         -- (:) is a reserved op, meaning only list cons
 681        ,(":",   ITcolon,    always)
 682        ,("::",  ITdcolon,   always)
 683        ,("=",   ITequal,    always)
 684        ,("\\",  ITlam,      always)
 685        ,("|",   ITvbar,     always)
 686        ,("<-",  ITlarrow,   always)
 687        ,("->",  ITrarrow,   always)
 688        ,("@",   ITat,       always)
 689        ,("~",   ITtilde,    always)
 690        ,("=>",  ITdarrow,   always)
 691        ,("-",   ITminus,    always)
 692        ,("!",   ITbang,     always)
 693
 694         -- For data T (a::*) = MkT
 695        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 696         -- For 'forall a . t'
 697        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 698
 699        ,("-<",  ITlarrowtail, arrowsEnabled)
 700        ,(">-",  ITrarrowtail, arrowsEnabled)
 701        ,("-<<", ITLarrowtail, arrowsEnabled)
 702        ,(">>-", ITRarrowtail, arrowsEnabled)
 703
 704        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 705        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 706        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 707                                 explicitForallEnabled i)
 708        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 709        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 710
 711        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 712        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 713        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 714        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715
 716        ,("★", ITstar, unicodeSyntaxEnabled)
 717
 718         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 719         -- form part of a large operator.  This would let us have a better
 720         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 721        ]
 722
 723 -- -----------------------------------------------------------------------------
 724 -- Lexer actions
 725
 726 type Action = RealSrcSpan -> StringBuffer -> Int -> P (RealLocated Token)
 727
 728 special :: Token -> Action
 729 special tok span _buf _len = return (L span tok)
 730
 731 token, layout_token :: Token -> Action
 732 token t span _buf _len = return (L span t)
 733 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 734
 735 idtoken :: (StringBuffer -> Int -> Token) -> Action
 736 idtoken f span buf len = return (L span $! (f buf len))
 737
 738 skip_one_varid :: (FastString -> Token) -> Action
 739 skip_one_varid f span buf len
 740   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 741
 742 strtoken :: (String -> Token) -> Action
 743 strtoken f span buf len =
 744   return (L span $! (f $! lexemeToString buf len))
 745
 746 init_strtoken :: Int -> (String -> Token) -> Action
 747 -- like strtoken, but drops the last N character(s)
 748 init_strtoken drop f span buf len =
 749   return (L span $! (f $! lexemeToString buf (len-drop)))
 750
 751 begin :: Int -> Action
 752 begin code _span _str _len = do pushLexState code; lexToken
 753
 754 pop :: Action
 755 pop _span _buf _len = do _ <- popLexState
 756                          lexToken
 757
 758 hopefully_open_brace :: Action
 759 hopefully_open_brace span buf len
 760  = do relaxed <- extension relaxedLayout
 761       ctx <- getContext
 762       (AI l _) <- getInput
 763       let offset = srcLocCol l
 764           isOK = relaxed ||
 765                  case ctx of
 766                  Layout prev_off : _ -> prev_off < offset
 767                  _                   -> True
 768       if isOK then pop_and open_brace span buf len
 769               else failSpanMsgP (RealSrcSpan span) (text "Missing block")
 770
 771 pop_and :: Action -> Action
 772 pop_and act span buf len = do _ <- popLexState
 773                               act span buf len
 774
 775 {-# INLINE nextCharIs #-}
 776 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 777 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 778
 779 notFollowedBy :: Char -> AlexAccPred Int
 780 notFollowedBy char _ _ _ (AI _ buf)
 781   = nextCharIs buf (/=char)
 782
 783 notFollowedBySymbol :: AlexAccPred Int
 784 notFollowedBySymbol _ _ _ (AI _ buf)
 785   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 786
 787 -- We must reject doc comments as being ordinary comments everywhere.
 788 -- In some cases the doc comment will be selected as the lexeme due to
 789 -- maximal munch, but not always, because the nested comment rule is
 790 -- valid in all states, but the doc-comment rules are only valid in
 791 -- the non-layout states.
 792 isNormalComment :: AlexAccPred Int
 793 isNormalComment bits _ _ (AI _ buf)
 794   | haddockEnabled bits = notFollowedByDocOrPragma
 795   | otherwise           = nextCharIs buf (/='#')
 796   where
 797     notFollowedByDocOrPragma
 798        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 799
 800 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 801 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 802
 803 {-
 804 haddockDisabledAnd p bits _ _ (AI _ buf)
 805   = if haddockEnabled bits then False else (p buf)
 806 -}
 807
 808 atEOL :: AlexAccPred Int
 809 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 810
 811 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 812 ifExtension pred bits _ _ _ = pred bits
 813
 814 multiline_doc_comment :: Action
 815 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 816   where
 817     worker commentAcc input docType oneLine = case alexGetChar input of
 818       Just ('\n', input')
 819         | oneLine -> docCommentEnd input commentAcc docType buf span
 820         | otherwise -> case checkIfCommentLine input' of
 821           Just input -> worker ('\n':commentAcc) input docType False
 822           Nothing -> docCommentEnd input commentAcc docType buf span
 823       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 824       Nothing -> docCommentEnd input commentAcc docType buf span
 825
 826     checkIfCommentLine input = check (dropNonNewlineSpace input)
 827       where
 828         check input = case alexGetChar input of
 829           Just ('-', input) -> case alexGetChar input of
 830             Just ('-', input) -> case alexGetChar input of
 831               Just (c, _) | c /= '-' -> Just input
 832               _ -> Nothing
 833             _ -> Nothing
 834           _ -> Nothing
 835
 836         dropNonNewlineSpace input = case alexGetChar input of
 837           Just (c, input')
 838             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 839             | otherwise -> input
 840           Nothing -> input
 841
 842 lineCommentToken :: Action
 843 lineCommentToken span buf len = do
 844   b <- extension rawTokenStreamEnabled
 845   if b then strtoken ITlineComment span buf len else lexToken
 846
 847 {-
 848   nested comments require traversing by hand, they can't be parsed
 849   using regular expressions.
 850 -}
 851 nested_comment :: P (RealLocated Token) -> Action
 852 nested_comment cont span _str _len = do
 853   input <- getInput
 854   go "" (1::Int) input
 855   where
 856     go commentAcc 0 input = do setInput input
 857                                b <- extension rawTokenStreamEnabled
 858                                if b
 859                                  then docCommentEnd input commentAcc ITblockComment _str span
 860                                  else cont
 861     go commentAcc n input = case alexGetChar input of
 862       Nothing -> errBrace input span
 863       Just ('-',input) -> case alexGetChar input of
 864         Nothing  -> errBrace input span
 865         Just ('\125',input) -> go commentAcc (n-1) input
 866         Just (_,_)          -> go ('-':commentAcc) n input
 867       Just ('\123',input) -> case alexGetChar input of
 868         Nothing  -> errBrace input span
 869         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 870         Just (_,_)       -> go ('\123':commentAcc) n input
 871       Just (c,input) -> go (c:commentAcc) n input
 872
 873 nested_doc_comment :: Action
 874 nested_doc_comment span buf _len = withLexedDocType (go "")
 875   where
 876     go commentAcc input docType _ = case alexGetChar input of
 877       Nothing -> errBrace input span
 878       Just ('-',input) -> case alexGetChar input of
 879         Nothing -> errBrace input span
 880         Just ('\125',input) ->
 881           docCommentEnd input commentAcc docType buf span
 882         Just (_,_) -> go ('-':commentAcc) input docType False
 883       Just ('\123', input) -> case alexGetChar input of
 884         Nothing  -> errBrace input span
 885         Just ('-',input) -> do
 886           setInput input
 887           let cont = do input <- getInput; go commentAcc input docType False
 888           nested_comment cont span buf _len
 889         Just (_,_) -> go ('\123':commentAcc) input docType False
 890       Just (c,input) -> go (c:commentAcc) input docType False
 891
 892 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (RealLocated Token))
 893                  -> P (RealLocated Token)
 894 withLexedDocType lexDocComment = do
 895   input@(AI _ buf) <- getInput
 896   case prevChar buf ' ' of
 897     '|' -> lexDocComment input ITdocCommentNext False
 898     '^' -> lexDocComment input ITdocCommentPrev False
 899     '$' -> lexDocComment input ITdocCommentNamed False
 900     '*' -> lexDocSection 1 input
 901     '#' -> lexDocComment input ITdocOptionsOld False
 902     _ -> panic "withLexedDocType: Bad doc type"
 903  where
 904     lexDocSection n input = case alexGetChar input of
 905       Just ('*', input) -> lexDocSection (n+1) input
 906       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 907       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 908
 909 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 910 -- off again at the end of the pragma.
 911 rulePrag :: Action
 912 rulePrag span _buf _len = do
 913   setExts (.|. bit inRulePragBit)
 914   return (L span ITrules_prag)
 915
 916 endPrag :: Action
 917 endPrag span _buf _len = do
 918   setExts (.&. complement (bit inRulePragBit))
 919   return (L span ITclose_prag)
 920
 921 -- docCommentEnd
 922 -------------------------------------------------------------------------------
 923 -- This function is quite tricky. We can't just return a new token, we also
 924 -- need to update the state of the parser. Why? Because the token is longer
 925 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 926 -- it writes the wrong token length to the parser state. This function is
 927 -- called afterwards, so it can just update the state.
 928
 929 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 930                  RealSrcSpan -> P (RealLocated Token)
 931 docCommentEnd input commentAcc docType buf span = do
 932   setInput input
 933   let (AI loc nextBuf) = input
 934       comment = reverse commentAcc
 935       span' = mkRealSrcSpan (realSrcSpanStart span) loc
 936       last_len = byteDiff buf nextBuf
 937
 938   span `seq` setLastToken span' last_len
 939   return (L span' (docType comment))
 940
 941 errBrace :: AlexInput -> RealSrcSpan -> P a
 942 errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) end "unterminated `{-'"
 943
 944 open_brace, close_brace :: Action
 945 open_brace span _str _len = do
 946   ctx <- getContext
 947   setContext (NoLayout:ctx)
 948   return (L span ITocurly)
 949 close_brace span _str _len = do
 950   popContext
 951   return (L span ITccurly)
 952
 953 qvarid, qconid :: StringBuffer -> Int -> Token
 954 qvarid buf len = ITqvarid $! splitQualName buf len False
 955 qconid buf len = ITqconid $! splitQualName buf len False
 956
 957 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 958 -- takes a StringBuffer and a length, and returns the module name
 959 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 960 -- because of hierarchical module names.
 961 splitQualName orig_buf len parens = split orig_buf orig_buf
 962   where
 963     split buf dot_buf
 964         | orig_buf `byteDiff` buf >= len  = done dot_buf
 965         | c == '.'                        = found_dot buf'
 966         | otherwise                       = split buf' dot_buf
 967       where
 968        (c,buf') = nextChar buf
 969
 970     -- careful, we might get names like M....
 971     -- so, if the character after the dot is not upper-case, this is
 972     -- the end of the qualifier part.
 973     found_dot buf -- buf points after the '.'
 974         | isUpper c    = split buf' buf
 975         | otherwise    = done buf
 976       where
 977        (c,buf') = nextChar buf
 978
 979     done dot_buf =
 980         (lexemeToFastString orig_buf (qual_size - 1),
 981          if parens -- Prelude.(+)
 982             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 983             else lexemeToFastString dot_buf (len - qual_size))
 984       where
 985         qual_size = orig_buf `byteDiff` dot_buf
 986
 987 varid :: Action
 988 varid span buf len =
 989   fs `seq`
 990   case lookupUFM reservedWordsFM fs of
 991         Just (keyword,0)    -> do
 992                 maybe_layout keyword
 993                 return (L span keyword)
 994         Just (keyword,exts) -> do
 995                 b <- extension (\i -> exts .&. i /= 0)
 996                 if b then do maybe_layout keyword
 997                              return (L span keyword)
 998                      else return (L span (ITvarid fs))
 999         _other -> return (L span (ITvarid fs))
1000   where
1001         fs = lexemeToFastString buf len
1002
1003 conid :: StringBuffer -> Int -> Token
1004 conid buf len = ITconid fs
1005   where fs = lexemeToFastString buf len
1006
1007 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
1008 qvarsym buf len = ITqvarsym $! splitQualName buf len False
1009 qconsym buf len = ITqconsym $! splitQualName buf len False
1010 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
1011 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1012
1013 varsym, consym :: Action
1014 varsym = sym ITvarsym
1015 consym = sym ITconsym
1016
1017 sym :: (FastString -> Token) -> RealSrcSpan -> StringBuffer -> Int
1018     -> P (RealLocated Token)
1019 sym con span buf len =
1020   case lookupUFM reservedSymsFM fs of
1021         Just (keyword,exts) -> do
1022                 b <- extension exts
1023                 if b then return (L span keyword)
1024                      else return (L span $! con fs)
1025         _other -> return (L span $! con fs)
1026   where
1027         fs = lexemeToFastString buf len
1028
1029 -- Variations on the integral numeric literal.
1030 tok_integral :: (Integer -> Token)
1031      -> (Integer -> Integer)
1032  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1033      -> Int -> Int
1034      -> (Integer, (Char->Int)) -> Action
1035 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1036   return $ L span $ itint $! transint $ parseUnsignedInteger
1037      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1038
1039 -- some conveniences for use with tok_integral
1040 tok_num :: (Integer -> Integer)
1041         -> Int -> Int
1042         -> (Integer, (Char->Int)) -> Action
1043 tok_num = tok_integral ITinteger
1044 tok_primint :: (Integer -> Integer)
1045             -> Int -> Int
1046             -> (Integer, (Char->Int)) -> Action
1047 tok_primint = tok_integral ITprimint
1048 tok_primword :: Int -> Int
1049              -> (Integer, (Char->Int)) -> Action
1050 tok_primword = tok_integral ITprimword positive
1051 positive, negative :: (Integer -> Integer)
1052 positive = id
1053 negative = negate
1054 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1055 decimal = (10,octDecDigit)
1056 octal = (8,octDecDigit)
1057 hexadecimal = (16,hexDigit)
1058
1059 -- readRational can understand negative rationals, exponents, everything.
1060 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1061 tok_float        str = ITrational   $! readFractionalLit str
1062 tok_primfloat    str = ITprimfloat  $! readFractionalLit str
1063 tok_primdouble   str = ITprimdouble $! readFractionalLit str
1064
1065 readFractionalLit :: String -> FractionalLit
1066 readFractionalLit str = (FL $! str) $! readRational str
1067
1068 -- -----------------------------------------------------------------------------
1069 -- Layout processing
1070
1071 -- we're at the first token on a line, insert layout tokens if necessary
1072 do_bol :: Action
1073 do_bol span _str _len = do
1074         pos <- getOffside
1075         case pos of
1076             LT -> do
1077                 --trace "layout: inserting '}'" $ do
1078                 popContext
1079                 -- do NOT pop the lex state, we might have a ';' to insert
1080                 return (L span ITvccurly)
1081             EQ -> do
1082                 --trace "layout: inserting ';'" $ do
1083                 _ <- popLexState
1084                 return (L span ITsemi)
1085             GT -> do
1086                 _ <- popLexState
1087                 lexToken
1088
1089 -- certain keywords put us in the "layout" state, where we might
1090 -- add an opening curly brace.
1091 maybe_layout :: Token -> P ()
1092 maybe_layout t = do -- If the alternative layout rule is enabled then
1093                     -- we never create an implicit layout context here.
1094                     -- Layout is handled XXX instead.
1095                     -- The code for closing implicit contexts, or
1096                     -- inserting implicit semi-colons, is therefore
1097                     -- irrelevant as it only applies in an implicit
1098                     -- context.
1099                     alr <- extension alternativeLayoutRule
1100                     unless alr $ f t
1101     where f ITdo    = pushLexState layout_do
1102           f ITmdo   = pushLexState layout_do
1103           f ITof    = pushLexState layout
1104           f ITlet   = pushLexState layout
1105           f ITwhere = pushLexState layout
1106           f ITrec   = pushLexState layout
1107           f _       = return ()
1108
1109 -- Pushing a new implicit layout context.  If the indentation of the
1110 -- next token is not greater than the previous layout context, then
1111 -- Haskell 98 says that the new layout context should be empty; that is
1112 -- the lexer must generate {}.
1113 --
1114 -- We are slightly more lenient than this: when the new context is started
1115 -- by a 'do', then we allow the new context to be at the same indentation as
1116 -- the previous context.  This is what the 'strict' argument is for.
1117 --
1118 new_layout_context :: Bool -> Action
1119 new_layout_context strict span _buf _len = do
1120     _ <- popLexState
1121     (AI l _) <- getInput
1122     let offset = srcLocCol l
1123     ctx <- getContext
1124     nondecreasing <- extension nondecreasingIndentation
1125     let strict' = strict || not nondecreasing
1126     case ctx of
1127         Layout prev_off : _  |
1128            (strict'     && prev_off >= offset  ||
1129             not strict' && prev_off > offset) -> do
1130                 -- token is indented to the left of the previous context.
1131                 -- we must generate a {} sequence now.
1132                 pushLexState layout_left
1133                 return (L span ITvocurly)
1134         _ -> do
1135                 setContext (Layout offset : ctx)
1136                 return (L span ITvocurly)
1137
1138 do_layout_left :: Action
1139 do_layout_left span _buf _len = do
1140     _ <- popLexState
1141     pushLexState bol  -- we must be at the start of a line
1142     return (L span ITvccurly)
1143
1144 -- -----------------------------------------------------------------------------
1145 -- LINE pragmas
1146
1147 setLine :: Int -> Action
1148 setLine code span buf len = do
1149   let line = parseUnsignedInteger buf len 10 octDecDigit
1150   setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1151         -- subtract one: the line number refers to the *following* line
1152   _ <- popLexState
1153   pushLexState code
1154   lexToken
1155
1156 setFile :: Int -> Action
1157 setFile code span buf len = do
1158   let file = lexemeToFastString (stepOn buf) (len-2)
1159   setAlrLastLoc $ alrInitialLoc file
1160   setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1161   _ <- popLexState
1162   pushLexState code
1163   lexToken
1164
1165 alrInitialLoc :: FastString -> RealSrcSpan
1166 alrInitialLoc file = mkRealSrcSpan loc loc
1167     where -- This is a hack to ensure that the first line in a file
1168           -- looks like it is after the initial location:
1169           loc = mkRealSrcLoc file (-1) (-1)
1170
1171 -- -----------------------------------------------------------------------------
1172 -- Options, includes and language pragmas.
1173
1174 lex_string_prag :: (String -> Token) -> Action
1175 lex_string_prag mkTok span _buf _len
1176     = do input <- getInput
1177          start <- getSrcLoc
1178          tok <- go [] input
1179          end <- getSrcLoc
1180          return (L (mkRealSrcSpan start end) tok)
1181     where go acc input
1182               = if isString input "#-}"
1183                    then do setInput input
1184                            return (mkTok (reverse acc))
1185                    else case alexGetChar input of
1186                           Just (c,i) -> go (c:acc) i
1187                           Nothing -> err input
1188           isString _ [] = True
1189           isString i (x:xs)
1190               = case alexGetChar i of
1191                   Just (c,i') | c == x    -> isString i' xs
1192                   _other -> False
1193           err (AI end _) = failLocMsgP (realSrcSpanStart span) end "unterminated options pragma"
1194
1195
1196 -- -----------------------------------------------------------------------------
1197 -- Strings & Chars
1198
1199 -- This stuff is horrible.  I hates it.
1200
1201 lex_string_tok :: Action
1202 lex_string_tok span _buf _len = do
1203   tok <- lex_string ""
1204   end <- getSrcLoc
1205   return (L (mkRealSrcSpan (realSrcSpanStart span) end) tok)
1206
1207 lex_string :: String -> P Token
1208 lex_string s = do
1209   i <- getInput
1210   case alexGetChar' i of
1211     Nothing -> lit_error i
1212
1213     Just ('"',i)  -> do
1214         setInput i
1215         magicHash <- extension magicHashEnabled
1216         if magicHash
1217           then do
1218             i <- getInput
1219             case alexGetChar' i of
1220               Just ('#',i) -> do
1221                    setInput i
1222                    if any (> '\xFF') s
1223                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1224                     else let s' = mkZFastString (reverse s) in
1225                          return (ITprimstring s')
1226                         -- mkZFastString is a hack to avoid encoding the
1227                         -- string in UTF-8.  We just want the exact bytes.
1228               _other ->
1229                 return (ITstring (mkFastString (reverse s)))
1230           else
1231                 return (ITstring (mkFastString (reverse s)))
1232
1233     Just ('\\',i)
1234         | Just ('&',i) <- next -> do
1235                 setInput i; lex_string s
1236         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1237                            -- is_space only works for <= '\x7f' (#3751)
1238                 setInput i; lex_stringgap s
1239         where next = alexGetChar' i
1240
1241     Just (c, i1) -> do
1242         case c of
1243           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1244           c | isAny c -> do setInput i1; lex_string (c:s)
1245           _other -> lit_error i
1246
1247 lex_stringgap :: String -> P Token
1248 lex_stringgap s = do
1249   i <- getInput
1250   c <- getCharOrFail i
1251   case c of
1252     '\\' -> lex_string s
1253     c | is_space c -> lex_stringgap s
1254     _other -> lit_error i
1255
1256
1257 lex_char_tok :: Action
1258 -- Here we are basically parsing character literals, such as 'x' or '\n'
1259 -- but, when Template Haskell is on, we additionally spot
1260 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1261 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1262 -- So we have to do two characters of lookahead: when we see 'x we need to
1263 -- see if there's a trailing quote
1264 lex_char_tok span _buf _len = do        -- We've seen '
1265    i1 <- getInput       -- Look ahead to first character
1266    let loc = realSrcSpanStart span
1267    case alexGetChar' i1 of
1268         Nothing -> lit_error  i1
1269
1270         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1271                   th_exts <- extension thEnabled
1272                   if th_exts then do
1273                         setInput i2
1274                         return (L (mkRealSrcSpan loc end2)  ITtyQuote)
1275                    else lit_error i1
1276
1277         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1278                   setInput i2
1279                   lit_ch <- lex_escape
1280                   i3 <- getInput
1281                   mc <- getCharOrFail i3 -- Trailing quote
1282                   if mc == '\'' then finish_char_tok loc lit_ch
1283                                 else lit_error i3
1284
1285         Just (c, i2@(AI _end2 _))
1286                 | not (isAny c) -> lit_error i1
1287                 | otherwise ->
1288
1289                 -- We've seen 'x, where x is a valid character
1290                 --  (i.e. not newline etc) but not a quote or backslash
1291            case alexGetChar' i2 of      -- Look ahead one more character
1292                 Just ('\'', i3) -> do   -- We've seen 'x'
1293                         setInput i3
1294                         finish_char_tok loc c
1295                 _other -> do            -- We've seen 'x not followed by quote
1296                                         -- (including the possibility of EOF)
1297                                         -- If TH is on, just parse the quote only
1298                         th_exts <- extension thEnabled
1299                         let (AI end _) = i1
1300                         if th_exts then return (L (mkRealSrcSpan loc end) ITvarQuote)
1301                                    else lit_error i2
1302
1303 finish_char_tok :: RealSrcLoc -> Char -> P (RealLocated Token)
1304 finish_char_tok loc ch  -- We've already seen the closing quote
1305                         -- Just need to check for trailing #
1306   = do  magicHash <- extension magicHashEnabled
1307         i@(AI end _) <- getInput
1308         if magicHash then do
1309                 case alexGetChar' i of
1310                         Just ('#',i@(AI end _)) -> do
1311                                 setInput i
1312                                 return (L (mkRealSrcSpan loc end) (ITprimchar ch))
1313                         _other ->
1314                                 return (L (mkRealSrcSpan loc end) (ITchar ch))
1315             else do
1316                    return (L (mkRealSrcSpan loc end) (ITchar ch))
1317
1318 isAny :: Char -> Bool
1319 isAny c | c > '\x7f' = isPrint c
1320         | otherwise  = is_any c
1321
1322 lex_escape :: P Char
1323 lex_escape = do
1324   i0 <- getInput
1325   c <- getCharOrFail i0
1326   case c of
1327         'a'   -> return '\a'
1328         'b'   -> return '\b'
1329         'f'   -> return '\f'
1330         'n'   -> return '\n'
1331         'r'   -> return '\r'
1332         't'   -> return '\t'
1333         'v'   -> return '\v'
1334         '\\'  -> return '\\'
1335         '"'   -> return '\"'
1336         '\''  -> return '\''
1337         '^'   -> do i1 <- getInput
1338                     c <- getCharOrFail i1
1339                     if c >= '@' && c <= '_'
1340                         then return (chr (ord c - ord '@'))
1341                         else lit_error i1
1342
1343         'x'   -> readNum is_hexdigit 16 hexDigit
1344         'o'   -> readNum is_octdigit  8 octDecDigit
1345         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1346
1347         c1 ->  do
1348            i <- getInput
1349            case alexGetChar' i of
1350             Nothing -> lit_error i0
1351             Just (c2,i2) ->
1352               case alexGetChar' i2 of
1353                 Nothing -> do lit_error i0
1354                 Just (c3,i3) ->
1355                    let str = [c1,c2,c3] in
1356                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1357                                      Just rest <- [stripPrefix p str] ] of
1358                           (escape_char,[]):_ -> do
1359                                 setInput i3
1360                                 return escape_char
1361                           (escape_char,_:_):_ -> do
1362                                 setInput i2
1363                                 return escape_char
1364                           [] -> lit_error i0
1365
1366 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1367 readNum is_digit base conv = do
1368   i <- getInput
1369   c <- getCharOrFail i
1370   if is_digit c
1371         then readNum2 is_digit base conv (conv c)
1372         else lit_error i
1373
1374 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1375 readNum2 is_digit base conv i = do
1376   input <- getInput
1377   read i input
1378   where read i input = do
1379           case alexGetChar' input of
1380             Just (c,input') | is_digit c -> do
1381                let i' = i*base + conv c
1382                if i' > 0x10ffff
1383                   then setInput input >> lexError "numeric escape sequence out of range"
1384                   else read i' input'
1385             _other -> do
1386               setInput input; return (chr i)
1387
1388
1389 silly_escape_chars :: [(String, Char)]
1390 silly_escape_chars = [
1391         ("NUL", '\NUL'),
1392         ("SOH", '\SOH'),
1393         ("STX", '\STX'),
1394         ("ETX", '\ETX'),
1395         ("EOT", '\EOT'),
1396         ("ENQ", '\ENQ'),
1397         ("ACK", '\ACK'),
1398         ("BEL", '\BEL'),
1399         ("BS", '\BS'),
1400         ("HT", '\HT'),
1401         ("LF", '\LF'),
1402         ("VT", '\VT'),
1403         ("FF", '\FF'),
1404         ("CR", '\CR'),
1405         ("SO", '\SO'),
1406         ("SI", '\SI'),
1407         ("DLE", '\DLE'),
1408         ("DC1", '\DC1'),
1409         ("DC2", '\DC2'),
1410         ("DC3", '\DC3'),
1411         ("DC4", '\DC4'),
1412         ("NAK", '\NAK'),
1413         ("SYN", '\SYN'),
1414         ("ETB", '\ETB'),
1415         ("CAN", '\CAN'),
1416         ("EM", '\EM'),
1417         ("SUB", '\SUB'),
1418         ("ESC", '\ESC'),
1419         ("FS", '\FS'),
1420         ("GS", '\GS'),
1421         ("RS", '\RS'),
1422         ("US", '\US'),
1423         ("SP", '\SP'),
1424         ("DEL", '\DEL')
1425         ]
1426
1427 -- before calling lit_error, ensure that the current input is pointing to
1428 -- the position of the error in the buffer.  This is so that we can report
1429 -- a correct location to the user, but also so we can detect UTF-8 decoding
1430 -- errors if they occur.
1431 lit_error :: AlexInput -> P a
1432 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1433
1434 getCharOrFail :: AlexInput -> P Char
1435 getCharOrFail i =  do
1436   case alexGetChar' i of
1437         Nothing -> lexError "unexpected end-of-file in string/character literal"
1438         Just (c,i)  -> do setInput i; return c
1439
1440 -- -----------------------------------------------------------------------------
1441 -- QuasiQuote
1442
1443 lex_quasiquote_tok :: Action
1444 lex_quasiquote_tok span buf len = do
1445   let quoter = tail (lexemeToString buf (len - 1))
1446                 -- 'tail' drops the initial '[',
1447                 -- while the -1 drops the trailing '|'
1448   quoteStart <- getSrcLoc
1449   quote <- lex_quasiquote ""
1450   end <- getSrcLoc
1451   return (L (mkRealSrcSpan (realSrcSpanStart span) end)
1452            (ITquasiQuote (mkFastString quoter,
1453                           mkFastString (reverse quote),
1454                           mkRealSrcSpan quoteStart end)))
1455
1456 lex_quasiquote :: String -> P String
1457 lex_quasiquote s = do
1458   i <- getInput
1459   case alexGetChar' i of
1460     Nothing -> lit_error i
1461
1462     Just ('\\',i)
1463         | Just ('|',i) <- next -> do
1464                 setInput i; lex_quasiquote ('|' : s)
1465         | Just (']',i) <- next -> do
1466                 setInput i; lex_quasiquote (']' : s)
1467         where next = alexGetChar' i
1468
1469     Just ('|',i)
1470         | Just (']',i) <- next -> do
1471                 setInput i; return s
1472         where next = alexGetChar' i
1473
1474     Just (c, i) -> do
1475          setInput i; lex_quasiquote (c : s)
1476
1477 -- -----------------------------------------------------------------------------
1478 -- Warnings
1479
1480 warn :: DynFlag -> SDoc -> Action
1481 warn option warning srcspan _buf _len = do
1482     addWarning option (RealSrcSpan srcspan) warning
1483     lexToken
1484
1485 warnThen :: DynFlag -> SDoc -> Action -> Action
1486 warnThen option warning action srcspan buf len = do
1487     addWarning option (RealSrcSpan srcspan) warning
1488     action srcspan buf len
1489
1490 -- -----------------------------------------------------------------------------
1491 -- The Parse Monad
1492
1493 data LayoutContext
1494   = NoLayout
1495   | Layout !Int
1496   deriving Show
1497
1498 data ParseResult a
1499   = POk PState a
1500   | PFailed
1501         SrcSpan         -- The start and end of the text span related to
1502                         -- the error.  Might be used in environments which can
1503                         -- show this span, e.g. by highlighting it.
1504         Message         -- The error message
1505
1506 data PState = PState {
1507         buffer     :: StringBuffer,
1508         dflags     :: DynFlags,
1509         messages   :: Messages,
1510         last_loc   :: RealSrcSpan,      -- pos of previous token
1511         last_len   :: !Int,     -- len of previous token
1512         loc        :: RealSrcLoc,   -- current loc (end of prev token + 1)
1513         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1514         context    :: [LayoutContext],
1515         lex_state  :: [Int],
1516         -- Used in the alternative layout rule:
1517         -- These tokens are the next ones to be sent out. They are
1518         -- just blindly emitted, without the rule looking at them again:
1519         alr_pending_implicit_tokens :: [RealLocated Token],
1520         -- This is the next token to be considered or, if it is Nothing,
1521         -- we need to get the next token from the input stream:
1522         alr_next_token :: Maybe (RealLocated Token),
1523         -- This is what we consider to be the locatino of the last token
1524         -- emitted:
1525         alr_last_loc :: RealSrcSpan,
1526         -- The stack of layout contexts:
1527         alr_context :: [ALRContext],
1528         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1529         -- us what sort of layout the '{' will open:
1530         alr_expecting_ocurly :: Maybe ALRLayout,
1531         -- Have we just had the '}' for a let block? If so, than an 'in'
1532         -- token doesn't need to close anything:
1533         alr_justClosedExplicitLetBlock :: Bool
1534      }
1535         -- last_loc and last_len are used when generating error messages,
1536         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1537         -- current token to happyError, we could at least get rid of last_len.
1538         -- Getting rid of last_loc would require finding another way to
1539         -- implement pushCurrentContext (which is only called from one place).
1540
1541 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1542                               Bool{- is it a 'let' block? -}
1543                 | ALRLayout ALRLayout Int
1544 data ALRLayout = ALRLayoutLet
1545                | ALRLayoutWhere
1546                | ALRLayoutOf
1547                | ALRLayoutDo
1548
1549 newtype P a = P { unP :: PState -> ParseResult a }
1550
1551 instance Monad P where
1552   return = returnP
1553   (>>=) = thenP
1554   fail = failP
1555
1556 returnP :: a -> P a
1557 returnP a = a `seq` (P $ \s -> POk s a)
1558
1559 thenP :: P a -> (a -> P b) -> P b
1560 (P m) `thenP` k = P $ \ s ->
1561         case m s of
1562                 POk s1 a         -> (unP (k a)) s1
1563                 PFailed span err -> PFailed span err
1564
1565 failP :: String -> P a
1566 failP msg = P $ \s -> PFailed (RealSrcSpan (last_loc s)) (text msg)
1567
1568 failMsgP :: String -> P a
1569 failMsgP msg = P $ \s -> PFailed (RealSrcSpan (last_loc s)) (text msg)
1570
1571 failLocMsgP :: RealSrcLoc -> RealSrcLoc -> String -> P a
1572 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (RealSrcSpan (mkRealSrcSpan loc1 loc2)) (text str)
1573
1574 failSpanMsgP :: SrcSpan -> SDoc -> P a
1575 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1576
1577 getPState :: P PState
1578 getPState = P $ \s -> POk s s
1579
1580 getDynFlags :: P DynFlags
1581 getDynFlags = P $ \s -> POk s (dflags s)
1582
1583 withThisPackage :: (PackageId -> a) -> P a
1584 withThisPackage f
1585  = do   pkg     <- liftM thisPackage getDynFlags
1586         return  $ f pkg
1587
1588 extension :: (Int -> Bool) -> P Bool
1589 extension p = P $ \s -> POk s (p $! extsBitmap s)
1590
1591 getExts :: P Int
1592 getExts = P $ \s -> POk s (extsBitmap s)
1593
1594 setExts :: (Int -> Int) -> P ()
1595 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1596
1597 setSrcLoc :: RealSrcLoc -> P ()
1598 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1599
1600 getSrcLoc :: P RealSrcLoc
1601 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1602
1603 setLastToken :: RealSrcSpan -> Int -> P ()
1604 setLastToken loc len = P $ \s -> POk s {
1605   last_loc=loc,
1606   last_len=len
1607   } ()
1608
1609 data AlexInput = AI RealSrcLoc StringBuffer
1610
1611 alexInputPrevChar :: AlexInput -> Char
1612 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1613
1614 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1615 alexGetChar (AI loc s)
1616   | atEnd s   = Nothing
1617   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1618                 --trace (show (ord c)) $
1619                 Just (adj_c, (AI loc' s'))
1620   where (c,s') = nextChar s
1621         loc'   = advanceSrcLoc loc c
1622
1623         non_graphic     = '\x0'
1624         upper           = '\x1'
1625         lower           = '\x2'
1626         digit           = '\x3'
1627         symbol          = '\x4'
1628         space           = '\x5'
1629         other_graphic   = '\x6'
1630
1631         adj_c
1632           | c <= '\x06' = non_graphic
1633           | c <= '\x7f' = c
1634           -- Alex doesn't handle Unicode, so when Unicode
1635           -- character is encountered we output these values
1636           -- with the actual character value hidden in the state.
1637           | otherwise =
1638                 case generalCategory c of
1639                   UppercaseLetter       -> upper
1640                   LowercaseLetter       -> lower
1641                   TitlecaseLetter       -> upper
1642                   ModifierLetter        -> other_graphic
1643                   OtherLetter           -> lower -- see #1103
1644                   NonSpacingMark        -> other_graphic
1645                   SpacingCombiningMark  -> other_graphic
1646                   EnclosingMark         -> other_graphic
1647                   DecimalNumber         -> digit
1648                   LetterNumber          -> other_graphic
1649                   OtherNumber           -> digit -- see #4373
1650                   ConnectorPunctuation  -> symbol
1651                   DashPunctuation       -> symbol
1652                   OpenPunctuation       -> other_graphic
1653                   ClosePunctuation      -> other_graphic
1654                   InitialQuote          -> other_graphic
1655                   FinalQuote            -> other_graphic
1656                   OtherPunctuation      -> symbol
1657                   MathSymbol            -> symbol
1658                   CurrencySymbol        -> symbol
1659                   ModifierSymbol        -> symbol
1660                   OtherSymbol           -> symbol
1661                   Space                 -> space
1662                   _other                -> non_graphic
1663
1664 -- This version does not squash unicode characters, it is used when
1665 -- lexing strings.
1666 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1667 alexGetChar' (AI loc s)
1668   | atEnd s   = Nothing
1669   | otherwise = c `seq` loc' `seq` s' `seq`
1670                 --trace (show (ord c)) $
1671                 Just (c, (AI loc' s'))
1672   where (c,s') = nextChar s
1673         loc'   = advanceSrcLoc loc c
1674
1675 getInput :: P AlexInput
1676 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1677
1678 setInput :: AlexInput -> P ()
1679 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1680
1681 nextIsEOF :: P Bool
1682 nextIsEOF = do
1683   AI _ s <- getInput
1684   return $ atEnd s
1685
1686 pushLexState :: Int -> P ()
1687 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1688
1689 popLexState :: P Int
1690 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1691
1692 getLexState :: P Int
1693 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1694
1695 popNextToken :: P (Maybe (RealLocated Token))
1696 popNextToken
1697     = P $ \s@PState{ alr_next_token = m } ->
1698               POk (s {alr_next_token = Nothing}) m
1699
1700 activeContext :: P Bool
1701 activeContext = do
1702   ctxt <- getALRContext
1703   expc <- getAlrExpectingOCurly
1704   impt <- implicitTokenPending
1705   case (ctxt,expc) of
1706     ([],Nothing) -> return impt
1707     _other       -> return True
1708
1709 setAlrLastLoc :: RealSrcSpan -> P ()
1710 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1711
1712 getAlrLastLoc :: P RealSrcSpan
1713 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1714
1715 getALRContext :: P [ALRContext]
1716 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1717
1718 setALRContext :: [ALRContext] -> P ()
1719 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1720
1721 getJustClosedExplicitLetBlock :: P Bool
1722 getJustClosedExplicitLetBlock
1723  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1724
1725 setJustClosedExplicitLetBlock :: Bool -> P ()
1726 setJustClosedExplicitLetBlock b
1727  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1728
1729 setNextToken :: RealLocated Token -> P ()
1730 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1731
1732 implicitTokenPending :: P Bool
1733 implicitTokenPending
1734     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1735               case ts of
1736               [] -> POk s False
1737               _  -> POk s True
1738
1739 popPendingImplicitToken :: P (Maybe (RealLocated Token))
1740 popPendingImplicitToken
1741     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1742               case ts of
1743               [] -> POk s Nothing
1744               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1745
1746 setPendingImplicitTokens :: [RealLocated Token] -> P ()
1747 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1748
1749 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1750 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1751
1752 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1753 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1754
1755 -- for reasons of efficiency, flags indicating language extensions (eg,
1756 -- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
1757 -- integer
1758
1759 -- The "genericsBit" is now unused, available for others
1760 -- genericsBit :: Int
1761 -- genericsBit = 0 -- {|, |} and "generic"
1762
1763 ffiBit :: Int
1764 ffiBit     = 1
1765 parrBit :: Int
1766 parrBit    = 2
1767 arrowsBit :: Int
1768 arrowsBit  = 4
1769 thBit :: Int
1770 thBit      = 5
1771 ipBit :: Int
1772 ipBit      = 6
1773 explicitForallBit :: Int
1774 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1775 bangPatBit :: Int
1776 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1777                 -- (doesn't affect the lexer)
1778 tyFamBit :: Int
1779 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1780 haddockBit :: Int
1781 haddockBit = 10 -- Lex and parse Haddock comments
1782 magicHashBit :: Int
1783 magicHashBit = 11 -- "#" in both functions and operators
1784 kindSigsBit :: Int
1785 kindSigsBit = 12 -- Kind signatures on type variables
1786 recursiveDoBit :: Int
1787 recursiveDoBit = 13 -- mdo
1788 unicodeSyntaxBit :: Int
1789 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1790 unboxedTuplesBit :: Int
1791 unboxedTuplesBit = 15 -- (# and #)
1792 datatypeContextsBit :: Int
1793 datatypeContextsBit = 16
1794 transformComprehensionsBit :: Int
1795 transformComprehensionsBit = 17
1796 qqBit :: Int
1797 qqBit      = 18 -- enable quasiquoting
1798 inRulePragBit :: Int
1799 inRulePragBit = 19
1800 rawTokenStreamBit :: Int
1801 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1802 recBit :: Int
1803 recBit = 22 -- rec
1804 alternativeLayoutRuleBit :: Int
1805 alternativeLayoutRuleBit = 23
1806 relaxedLayoutBit :: Int
1807 relaxedLayoutBit = 24
1808 nondecreasingIndentationBit :: Int
1809 nondecreasingIndentationBit = 25
1810
1811 always :: Int -> Bool
1812 always           _     = True
1813 parrEnabled :: Int -> Bool
1814 parrEnabled      flags = testBit flags parrBit
1815 arrowsEnabled :: Int -> Bool
1816 arrowsEnabled    flags = testBit flags arrowsBit
1817 thEnabled :: Int -> Bool
1818 thEnabled        flags = testBit flags thBit
1819 ipEnabled :: Int -> Bool
1820 ipEnabled        flags = testBit flags ipBit
1821 explicitForallEnabled :: Int -> Bool
1822 explicitForallEnabled flags = testBit flags explicitForallBit
1823 bangPatEnabled :: Int -> Bool
1824 bangPatEnabled   flags = testBit flags bangPatBit
1825 -- tyFamEnabled :: Int -> Bool
1826 -- tyFamEnabled     flags = testBit flags tyFamBit
1827 haddockEnabled :: Int -> Bool
1828 haddockEnabled   flags = testBit flags haddockBit
1829 magicHashEnabled :: Int -> Bool
1830 magicHashEnabled flags = testBit flags magicHashBit
1831 -- kindSigsEnabled :: Int -> Bool
1832 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1833 unicodeSyntaxEnabled :: Int -> Bool
1834 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1835 unboxedTuplesEnabled :: Int -> Bool
1836 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1837 datatypeContextsEnabled :: Int -> Bool
1838 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1839 qqEnabled :: Int -> Bool
1840 qqEnabled        flags = testBit flags qqBit
1841 -- inRulePrag :: Int -> Bool
1842 -- inRulePrag       flags = testBit flags inRulePragBit
1843 rawTokenStreamEnabled :: Int -> Bool
1844 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1845 alternativeLayoutRule :: Int -> Bool
1846 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1847 relaxedLayout :: Int -> Bool
1848 relaxedLayout flags = testBit flags relaxedLayoutBit
1849 nondecreasingIndentation :: Int -> Bool
1850 nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
1851
1852 -- PState for parsing options pragmas
1853 --
1854 pragState :: DynFlags -> StringBuffer -> RealSrcLoc -> PState
1855 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1856                                  lex_state = [bol, option_prags, 0]
1857                              }
1858
1859 -- create a parse state
1860 --
1861 mkPState :: DynFlags -> StringBuffer -> RealSrcLoc -> PState
1862 mkPState flags buf loc =
1863   PState {
1864       buffer        = buf,
1865       dflags        = flags,
1866       messages      = emptyMessages,
1867       last_loc      = mkRealSrcSpan loc loc,
1868       last_len      = 0,
1869       loc           = loc,
1870       extsBitmap    = fromIntegral bitmap,
1871       context       = [],
1872       lex_state     = [bol, 0],
1873       alr_pending_implicit_tokens = [],
1874       alr_next_token = Nothing,
1875       alr_last_loc = alrInitialLoc (fsLit "<no file>"),
1876       alr_context = [],
1877       alr_expecting_ocurly = Nothing,
1878       alr_justClosedExplicitLetBlock = False
1879     }
1880     where
1881       bitmap =     ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1882                .|. parrBit           `setBitIf` xopt Opt_ParallelArrays  flags
1883                .|. arrowsBit         `setBitIf` xopt Opt_Arrows          flags
1884                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1885                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes     flags
1886                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams  flags
1887                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll  flags
1888                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns    flags
1889                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies    flags
1890                .|. haddockBit        `setBitIf` dopt Opt_Haddock         flags
1891                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash       flags
1892                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures  flags
1893                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo     flags
1894                .|. recBit            `setBitIf` xopt Opt_DoRec           flags
1895                .|. recBit            `setBitIf` xopt Opt_Arrows          flags
1896                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax   flags
1897                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples   flags
1898                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1899                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1900                .|. transformComprehensionsBit `setBitIf` xopt Opt_MonadComprehensions flags
1901                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1902                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1903                .|. relaxedLayoutBit  `setBitIf` xopt Opt_RelaxedLayout flags
1904                .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
1905       --
1906       setBitIf :: Int -> Bool -> Int
1907       b `setBitIf` cond | cond      = bit b
1908                         | otherwise = 0
1909
1910 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1911 addWarning option srcspan warning
1912  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1913        let warning' = mkWarnMsg srcspan alwaysQualify warning
1914            ws' = if dopt option d then ws `snocBag` warning' else ws
1915        in POk s{messages=(ws', es)} ()
1916
1917 getMessages :: PState -> Messages
1918 getMessages PState{messages=ms} = ms
1919
1920 getContext :: P [LayoutContext]
1921 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1922
1923 setContext :: [LayoutContext] -> P ()
1924 setContext ctx = P $ \s -> POk s{context=ctx} ()
1925
1926 popContext :: P ()
1927 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1928                               last_len = len, last_loc = last_loc }) ->
1929   case ctx of
1930         (_:tl) -> POk s{ context = tl } ()
1931         []     -> PFailed (RealSrcSpan last_loc) (srcParseErr buf len)
1932
1933 -- Push a new layout context at the indentation of the last token read.
1934 -- This is only used at the outer level of a module when the 'module'
1935 -- keyword is missing.
1936 pushCurrentContext :: P ()
1937 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1938     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1939
1940 getOffside :: P Ordering
1941 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1942                 let offs = srcSpanStartCol loc in
1943                 let ord = case stk of
1944                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1945                                         compare offs n
1946                         _            -> GT
1947                 in POk s ord
1948
1949 -- ---------------------------------------------------------------------------
1950 -- Construct a parse error
1951
1952 srcParseErr
1953   :: StringBuffer       -- current buffer (placed just after the last token)
1954   -> Int                -- length of the previous token
1955   -> Message
1956 srcParseErr buf len
1957   = hcat [ if null token
1958              then ptext (sLit "parse error (possibly incorrect indentation)")
1959              else hcat [ptext (sLit "parse error on input "),
1960                         char '`', text token, char '\'']
1961     ]
1962   where token = lexemeToString (offsetBytes (-len) buf) len
1963
1964 -- Report a parse failure, giving the span of the previous token as
1965 -- the location of the error.  This is the entry point for errors
1966 -- detected during parsing.
1967 srcParseFail :: P a
1968 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1969                             last_loc = last_loc } ->
1970     PFailed (RealSrcSpan last_loc) (srcParseErr buf len)
1971
1972 -- A lexical error is reported at a particular position in the source file,
1973 -- not over a token range.
1974 lexError :: String -> P a
1975 lexError str = do
1976   loc <- getSrcLoc
1977   (AI end buf) <- getInput
1978   reportLexError loc end buf str
1979
1980 -- -----------------------------------------------------------------------------
1981 -- This is the top-level function: called from the parser each time a
1982 -- new token is to be read from the input.
1983
1984 lexer :: (Located Token -> P a) -> P a
1985 lexer cont = do
1986   alr <- extension alternativeLayoutRule
1987   let lexTokenFun = if alr then lexTokenAlr else lexToken
1988   (L span tok) <- lexTokenFun
1989   --trace ("token: " ++ show tok) $ do
1990   cont (L (RealSrcSpan span) tok)
1991
1992 lexTokenAlr :: P (RealLocated Token)
1993 lexTokenAlr = do mPending <- popPendingImplicitToken
1994                  t <- case mPending of
1995                       Nothing ->
1996                           do mNext <- popNextToken
1997                              t <- case mNext of
1998                                   Nothing -> lexToken
1999                                   Just next -> return next
2000                              alternativeLayoutRuleToken t
2001                       Just t ->
2002                           return t
2003                  setAlrLastLoc (getLoc t)
2004                  case unLoc t of
2005                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
2006                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
2007                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
2008                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
2009                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
2010                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
2011                      _       -> return ()
2012                  return t
2013
2014 alternativeLayoutRuleToken :: RealLocated Token -> P (RealLocated Token)
2015 alternativeLayoutRuleToken t
2016     = do context <- getALRContext
2017          lastLoc <- getAlrLastLoc
2018          mExpectingOCurly <- getAlrExpectingOCurly
2019          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
2020          setJustClosedExplicitLetBlock False
2021          dflags <- getDynFlags
2022          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
2023              thisLoc = getLoc t
2024              thisCol = srcSpanStartCol thisLoc
2025              newLine = srcSpanStartLine thisLoc > srcSpanEndLine lastLoc
2026          case (unLoc t, context, mExpectingOCurly) of
2027              -- This case handles a GHC extension to the original H98
2028              -- layout rule...
2029              (ITocurly, _, Just alrLayout) ->
2030                  do setAlrExpectingOCurly Nothing
2031                     let isLet = case alrLayout of
2032                                 ALRLayoutLet -> True
2033                                 _ -> False
2034                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
2035                     return t
2036              -- ...and makes this case unnecessary
2037              {-
2038              -- I think our implicit open-curly handling is slightly
2039              -- different to John's, in how it interacts with newlines
2040              -- and "in"
2041              (ITocurly, _, Just _) ->
2042                  do setAlrExpectingOCurly Nothing
2043                     setNextToken t
2044                     lexTokenAlr
2045              -}
2046              (_, ALRLayout _ col : ls, Just expectingOCurly)
2047               | (thisCol > col) ||
2048                 (thisCol == col &&
2049                  isNonDecreasingIntentation expectingOCurly) ->
2050                  do setAlrExpectingOCurly Nothing
2051                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2052                     setNextToken t
2053                     return (L thisLoc ITocurly)
2054               | otherwise ->
2055                  do setAlrExpectingOCurly Nothing
2056                     setPendingImplicitTokens [L lastLoc ITccurly]
2057                     setNextToken t
2058                     return (L lastLoc ITocurly)
2059              (_, _, Just expectingOCurly) ->
2060                  do setAlrExpectingOCurly Nothing
2061                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2062                     setNextToken t
2063                     return (L thisLoc ITocurly)
2064              -- We do the [] cases earlier than in the spec, as we
2065              -- have an actual EOF token
2066              (ITeof, ALRLayout _ _ : ls, _) ->
2067                  do setALRContext ls
2068                     setNextToken t
2069                     return (L thisLoc ITccurly)
2070              (ITeof, _, _) ->
2071                  return t
2072              -- the other ITeof case omitted; general case below covers it
2073              (ITin, _, _)
2074               | justClosedExplicitLetBlock ->
2075                  return t
2076              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2077               | newLine ->
2078                  do setPendingImplicitTokens [t]
2079                     setALRContext ls
2080                     return (L thisLoc ITccurly)
2081              -- This next case is to handle a transitional issue:
2082              (ITwhere, ALRLayout _ col : ls, _)
2083               | newLine && thisCol == col && transitional ->
2084                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2085                                (RealSrcSpan thisLoc)
2086                                (transitionalAlternativeLayoutWarning
2087                                     "`where' clause at the same depth as implicit layout block")
2088                     setALRContext ls
2089                     setNextToken t
2090                     -- Note that we use lastLoc, as we may need to close
2091                     -- more layouts, or give a semicolon
2092                     return (L lastLoc ITccurly)
2093              -- This next case is to handle a transitional issue:
2094              (ITvbar, ALRLayout _ col : ls, _)
2095               | newLine && thisCol == col && transitional ->
2096                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2097                                (RealSrcSpan thisLoc)
2098                                (transitionalAlternativeLayoutWarning
2099                                     "`|' at the same depth as implicit layout block")
2100                     setALRContext ls
2101                     setNextToken t
2102                     -- Note that we use lastLoc, as we may need to close
2103                     -- more layouts, or give a semicolon
2104                     return (L lastLoc ITccurly)
2105              (_, ALRLayout _ col : ls, _)
2106               | newLine && thisCol == col ->
2107                  do setNextToken t
2108                     return (L thisLoc ITsemi)
2109               | newLine && thisCol < col ->
2110                  do setALRContext ls
2111                     setNextToken t
2112                     -- Note that we use lastLoc, as we may need to close
2113                     -- more layouts, or give a semicolon
2114                     return (L lastLoc ITccurly)
2115              -- We need to handle close before open, as 'then' is both
2116              -- an open and a close
2117              (u, _, _)
2118               | isALRclose u ->
2119                  case context of
2120                  ALRLayout _ _ : ls ->
2121                      do setALRContext ls
2122                         setNextToken t
2123                         return (L thisLoc ITccurly)
2124                  ALRNoLayout _ isLet : ls ->
2125                      do let ls' = if isALRopen u
2126                                      then ALRNoLayout (containsCommas u) False : ls
2127                                      else ls
2128                         setALRContext ls'
2129                         when isLet $ setJustClosedExplicitLetBlock True
2130                         return t
2131                  [] ->
2132                      do let ls = if isALRopen u
2133                                     then [ALRNoLayout (containsCommas u) False]
2134                                     else ls
2135                         setALRContext ls
2136                         -- XXX This is an error in John's code, but
2137                         -- it looks reachable to me at first glance
2138                         return t
2139              (u, _, _)
2140               | isALRopen u ->
2141                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2142                     return t
2143              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2144                  do setALRContext ls
2145                     setPendingImplicitTokens [t]
2146                     return (L thisLoc ITccurly)
2147              (ITin, ALRLayout _ _ : ls, _) ->
2148                  do setALRContext ls
2149                     setNextToken t
2150                     return (L thisLoc ITccurly)
2151              -- the other ITin case omitted; general case below covers it
2152              (ITcomma, ALRLayout _ _ : ls, _)
2153               | topNoLayoutContainsCommas ls ->
2154                  do setALRContext ls
2155                     setNextToken t
2156                     return (L thisLoc ITccurly)
2157              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2158                  do setALRContext ls
2159                     setPendingImplicitTokens [t]
2160                     return (L thisLoc ITccurly)
2161              -- the other ITwhere case omitted; general case below covers it
2162              (_, _, _) -> return t
2163
2164 transitionalAlternativeLayoutWarning :: String -> SDoc
2165 transitionalAlternativeLayoutWarning msg
2166     = text "transitional layout will not be accepted in the future:"
2167    $$ text msg
2168
2169 isALRopen :: Token -> Bool
2170 isALRopen ITcase        = True
2171 isALRopen ITif          = True
2172 isALRopen ITthen        = True
2173 isALRopen IToparen      = True
2174 isALRopen ITobrack      = True
2175 isALRopen ITocurly      = True
2176 -- GHC Extensions:
2177 isALRopen IToubxparen   = True
2178 isALRopen ITparenEscape = True
2179 isALRopen _             = False
2180
2181 isALRclose :: Token -> Bool
2182 isALRclose ITof     = True
2183 isALRclose ITthen   = True
2184 isALRclose ITelse   = True
2185 isALRclose ITcparen = True
2186 isALRclose ITcbrack = True
2187 isALRclose ITccurly = True
2188 -- GHC Extensions:
2189 isALRclose ITcubxparen = True
2190 isALRclose _        = False
2191
2192 isNonDecreasingIntentation :: ALRLayout -> Bool
2193 isNonDecreasingIntentation ALRLayoutDo = True
2194 isNonDecreasingIntentation _           = False
2195
2196 containsCommas :: Token -> Bool
2197 containsCommas IToparen = True
2198 containsCommas ITobrack = True
2199 -- John doesn't have {} as containing commas, but records contain them,
2200 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2201 -- (defaultInstallDirs).
2202 containsCommas ITocurly = True
2203 -- GHC Extensions:
2204 containsCommas IToubxparen = True
2205 containsCommas _        = False
2206
2207 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2208 topNoLayoutContainsCommas [] = False
2209 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2210 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2211
2212 lexToken :: P (RealLocated Token)
2213 lexToken = do
2214   inp@(AI loc1 buf) <- getInput
2215   sc <- getLexState
2216   exts <- getExts
2217   case alexScanUser exts inp sc of
2218     AlexEOF -> do
2219         let span = mkRealSrcSpan loc1 loc1
2220         setLastToken span 0
2221         return (L span ITeof)
2222     AlexError (AI loc2 buf) ->
2223         reportLexError loc1 loc2 buf "lexical error"
2224     AlexSkip inp2 _ -> do
2225         setInput inp2
2226         lexToken
2227     AlexToken inp2@(AI end buf2) _ t -> do
2228         setInput inp2
2229         let span = mkRealSrcSpan loc1 end
2230         let bytes = byteDiff buf buf2
2231         span `seq` setLastToken span bytes
2232         t span buf bytes
2233
2234 reportLexError :: RealSrcLoc -> RealSrcLoc -> StringBuffer -> [Char] -> P a
2235 reportLexError loc1 loc2 buf str
2236   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2237   | otherwise =
2238   let
2239         c = fst (nextChar buf)
2240   in
2241   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2242     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2243     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2244
2245 lexTokenStream :: StringBuffer -> RealSrcLoc -> DynFlags -> ParseResult [Located Token]
2246 lexTokenStream buf loc dflags = unP go initState
2247     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2248           initState = mkPState dflags' buf loc
2249           go = do
2250             ltok <- lexer return
2251             case ltok of
2252               L _ ITeof -> return []
2253               _ -> liftM (ltok:) go
2254
2255 linePrags = Map.singleton "line" (begin line_prag2)
2256
2257 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2258                                  ("options_ghc", lex_string_prag IToptions_prag),
2259                                  ("options_haddock", lex_string_prag ITdocOptions),
2260                                  ("language", token ITlanguage_prag),
2261                                  ("include", lex_string_prag ITinclude_prag)])
2262
2263 ignoredPrags = Map.fromList (map ignored pragmas)
2264                where ignored opt = (opt, nested_comment lexToken)
2265                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2266                      options_pragmas = map ("options_" ++) impls
2267                      -- CFILES is a hugs-only thing.
2268                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2269
2270 oneWordPrags = Map.fromList([("rules", rulePrag),
2271                            ("inline", token (ITinline_prag Inline FunLike)),
2272                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2273                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2274                                           -- Spelling variant
2275                            ("notinline", token (ITinline_prag NoInline FunLike)),
2276                            ("specialize", token ITspec_prag),
2277                            ("source", token ITsource_prag),
2278                            ("warning", token ITwarning_prag),
2279                            ("deprecated", token ITdeprecated_prag),
2280                            ("scc", token ITscc_prag),
2281                            ("generated", token ITgenerated_prag),
2282                            ("core", token ITcore_prag),
2283                            ("unpack", token ITunpack_prag),
2284                            ("ann", token ITann_prag),
2285                            ("vectorize", token ITvect_prag),
2286                            ("novectorize", token ITnovect_prag)])
2287
2288 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2289                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2290                              ("specialize inline", token (ITspec_inline_prag True)),
2291                              ("specialize notinline", token (ITspec_inline_prag False)),
2292                              ("vectorize scalar", token ITvect_scalar_prag)])
2293
2294 dispatch_pragmas :: Map String Action -> Action
2295 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2296                                        Just found -> found span buf len
2297                                        Nothing -> lexError "unknown pragma"
2298
2299 known_pragma :: Map String Action -> AlexAccPred Int
2300 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2301                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2302
2303 clean_pragma :: String -> String
2304 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2305                     where unprefix prag' = case stripPrefix "{-#" prag' of
2306                                              Just rest -> rest
2307                                              Nothing -> prag'
2308                           canonical prag' = case prag' of
2309                                               "noinline" -> "notinline"
2310                                               "specialise" -> "specialize"
2311                                               "vectorise" -> "vectorize"
2312                                               "novectorise" -> "novectorize"
2313                                               "constructorlike" -> "conlike"
2314                                               _ -> prag'
2315                           canon_ws s = unwords (map canonical (words s))
2316 }