compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# LANGUAGE BangPatterns #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  38 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  39 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  40 -- But alex still generates some code that causes the "lazy unlifted bindings"
  41 -- warning, and old compilers don't know about it so we can't easily turn
  42 -- it off, so for now we use the sledge hammer:
  43 {-# OPTIONS_GHC -w #-}
  44
  45 {-# OPTIONS_GHC -funbox-strict-fields #-}
  46
  47 module Lexer (
  48    Token(..), lexer, pragState, mkPState, PState(..),
  49    P(..), ParseResult(..), getSrcLoc,
  50    getPState, getDynFlags, withThisPackage,
  51    failLocMsgP, failSpanMsgP, srcParseFail,
  52    getMessages,
  53    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  54    getLexState, popLexState, pushLexState,
  55    extension, bangPatEnabled, datatypeContextsEnabled,
  56    addWarning,
  57    lexTokenStream
  58   ) where
  59
  60 import Bag
  61 import ErrUtils
  62 import Outputable
  63 import StringBuffer
  64 import FastString
  65 import SrcLoc
  66 import UniqFM
  67 import DynFlags
  68 import Module
  69 import Ctype
  70 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..) )
  71 import Util             ( readRational )
  72
  73 import Control.Monad
  74 import Data.Bits
  75 import Data.Char
  76 import Data.List
  77 import Data.Maybe
  78 import Data.Map (Map)
  79 import qualified Data.Map as Map
  80 import Data.Ratio
  81 }
  82
  83 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  84 $whitechar   = [\ \n\r\f\v $unispace]
  85 $white_no_nl = $whitechar # \n
  86 $tab         = \t
  87
  88 $ascdigit  = 0-9
  89 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  90 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  91 $digit     = [$ascdigit $unidigit]
  92
  93 $special   = [\(\)\,\;\[\]\`\{\}]
  94 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  95 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  96 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  97
  98 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  99 $asclarge  = [A-Z]
 100 $large     = [$asclarge $unilarge]
 101
 102 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 103 $ascsmall  = [a-z]
 104 $small     = [$ascsmall $unismall \_]
 105
 106 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 107 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 108
 109 $octit     = 0-7
 110 $hexit     = [$decdigit A-F a-f]
 111 $symchar   = [$symbol \:]
 112 $nl        = [\n\r]
 113 $idchar    = [$small $large $digit \']
 114
 115 $pragmachar = [$small $large $digit]
 116
 117 $docsym    = [\| \^ \* \$]
 118
 119 @varid     = $small $idchar*
 120 @conid     = $large $idchar*
 121
 122 @varsym    = $symbol $symchar*
 123 @consym    = \: $symchar*
 124
 125 @decimal     = $decdigit+
 126 @octal       = $octit+
 127 @hexadecimal = $hexit+
 128 @exponent    = [eE] [\-\+]? @decimal
 129
 130 -- we support the hierarchical module name extension:
 131 @qual = (@conid \.)+
 132
 133 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 134
 135 -- normal signed numerical literals can only be explicitly negative,
 136 -- not explicitly positive (contrast @exponent)
 137 @negative = \-
 138 @signed = @negative ?
 139
 140 haskell :-
 141
 142 -- everywhere: skip whitespace and comments
 143 $white_no_nl+                           ;
 144 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 145
 146 -- Everywhere: deal with nested comments.  We explicitly rule out
 147 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 148 -- (this can happen even though pragmas will normally take precedence due to
 149 -- longest-match, because pragmas aren't valid in every state, but comments
 150 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 151 -- set.
 152
 153 "{-" / { isNormalComment } { nested_comment lexToken }
 154
 155 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 156 -- more dashes followed by a symbol should be parsed as a varsym, so we
 157 -- have to exclude those.
 158
 159 -- Since Haddock comments aren't valid in every state, we need to rule them
 160 -- out here.
 161
 162 -- The following two rules match comments that begin with two dashes, but
 163 -- continue with a different character. The rules test that this character
 164 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 165 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 166 -- have a Haddock comment). The rules then munch the rest of the line.
 167
 168 "-- " ~[$docsym \#] .* { lineCommentToken }
 169 "--" [^$symbol : \ ] .* { lineCommentToken }
 170
 171 -- Next, match Haddock comments if no -haddock flag
 172
 173 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 174
 175 -- Now, when we've matched comments that begin with 2 dashes and continue
 176 -- with a different character, we need to match comments that begin with three
 177 -- or more dashes (which clearly can't be Haddock comments). We only need to
 178 -- make sure that the first non-dash character isn't a symbol, and munch the
 179 -- rest of the line.
 180
 181 "---"\-* [^$symbol :] .* { lineCommentToken }
 182
 183 -- Since the previous rules all match dashes followed by at least one
 184 -- character, we also need to match a whole line filled with just dashes.
 185
 186 "--"\-* / { atEOL } { lineCommentToken }
 187
 188 -- We need this rule since none of the other single line comment rules
 189 -- actually match this case.
 190
 191 "-- " / { atEOL } { lineCommentToken }
 192
 193 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 194 -- blank lines) until we find a non-whitespace character, then do layout
 195 -- processing.
 196 --
 197 -- One slight wibble here: what if the line begins with {-#? In
 198 -- theory, we have to lex the pragma to see if it's one we recognise,
 199 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 200 -- as a nested comment.  We don't bother with this: if the line begins
 201 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 202 <bol> {
 203   \n                                    ;
 204   ^\# (line)?                           { begin line_prag1 }
 205   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 206   ^\# \! .* \n                          ; -- #!, for scripts
 207   ()                                    { do_bol }
 208 }
 209
 210 -- after a layout keyword (let, where, do, of), we begin a new layout
 211 -- context if the curly brace is missing.
 212 -- Careful! This stuff is quite delicate.
 213 <layout, layout_do> {
 214   \{ / { notFollowedBy '-' }            { hopefully_open_brace }
 215         -- we might encounter {-# here, but {- has been handled already
 216   \n                                    ;
 217   ^\# (line)?                           { begin line_prag1 }
 218 }
 219
 220 -- do is treated in a subtly different way, see new_layout_context
 221 <layout>    ()                          { new_layout_context True }
 222 <layout_do> ()                          { new_layout_context False }
 223
 224 -- after a new layout context which was found to be to the left of the
 225 -- previous context, we have generated a '{' token, and we now need to
 226 -- generate a matching '}' token.
 227 <layout_left>  ()                       { do_layout_left }
 228
 229 <0,option_prags> \n                             { begin bol }
 230
 231 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 232                                 { dispatch_pragmas linePrags }
 233
 234 -- single-line line pragmas, of the form
 235 --    # <line> "<file>" <extra-stuff> \n
 236 <line_prag1> $decdigit+                 { setLine line_prag1a }
 237 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 238 <line_prag1b> .*                        { pop }
 239
 240 -- Haskell-style line pragmas, of the form
 241 --    {-# LINE <line> "<file>" #-}
 242 <line_prag2> $decdigit+                 { setLine line_prag2a }
 243 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 244 <line_prag2b> "#-}"|"-}"                { pop }
 245    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 246    -- with older versions of GHC which generated these.
 247
 248 <0,option_prags> {
 249   "{-#" $whitechar* $pragmachar+
 250         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 251                                  { dispatch_pragmas twoWordPrags }
 252
 253   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 254                                  { dispatch_pragmas oneWordPrags }
 255
 256   -- We ignore all these pragmas, but don't generate a warning for them
 257   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 258                                  { dispatch_pragmas ignoredPrags }
 259
 260   -- ToDo: should only be valid inside a pragma:
 261   "#-}"                                 { endPrag }
 262 }
 263
 264 <option_prags> {
 265   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 266                                    { dispatch_pragmas fileHeaderPrags }
 267
 268   "-- #"                                 { multiline_doc_comment }
 269 }
 270
 271 <0> {
 272   -- In the "0" mode we ignore these pragmas
 273   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 274                      { nested_comment lexToken }
 275 }
 276
 277 <0> {
 278   "-- #" .* { lineCommentToken }
 279 }
 280
 281 <0,option_prags> {
 282   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 283                     (nested_comment lexToken) }
 284 }
 285
 286 -- '0' state: ordinary lexemes
 287
 288 -- Haddock comments
 289
 290 <0,option_prags> {
 291   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 292   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 293 }
 294
 295 -- "special" symbols
 296
 297 <0> {
 298   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 299   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 300 }
 301
 302 <0> {
 303   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 304   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 305   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 306   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 307   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 308   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 309   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 310   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 311
 312 -- For backward compatibility, accept the old dollar syntax
 313   "[$" @varid "|"  / { ifExtension qqEnabled }
 314                      { lex_quasiquote_tok }
 315
 316   "[" @varid "|"  / { ifExtension qqEnabled }
 317                      { lex_quasiquote_tok }
 318 }
 319
 320 <0> {
 321   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 322                                         { special IToparenbar }
 323   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 324 }
 325
 326 <0> {
 327   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 328 }
 329
 330 <0> {
 331   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 332          { token IToubxparen }
 333   "#)" / { ifExtension unboxedTuplesEnabled }
 334          { token ITcubxparen }
 335 }
 336
 337 <0> {
 338   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 339   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 340 }
 341
 342 <0,option_prags> {
 343   \(                                    { special IToparen }
 344   \)                                    { special ITcparen }
 345   \[                                    { special ITobrack }
 346   \]                                    { special ITcbrack }
 347   \,                                    { special ITcomma }
 348   \;                                    { special ITsemi }
 349   \`                                    { special ITbackquote }
 350
 351   \{                                    { open_brace }
 352   \}                                    { close_brace }
 353 }
 354
 355 <0,option_prags> {
 356   @qual @varid                  { idtoken qvarid }
 357   @qual @conid                  { idtoken qconid }
 358   @varid                        { varid }
 359   @conid                        { idtoken conid }
 360 }
 361
 362 <0> {
 363   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 364   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 365   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 366   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 367 }
 368
 369 -- ToDo: - move `var` and (sym) into lexical syntax?
 370 --       - remove backquote from $special?
 371 <0> {
 372   @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
 373   @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
 374   @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
 375   @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
 376   @varsym                                          { varsym }
 377   @consym                                          { consym }
 378 }
 379
 380 -- For the normal boxed literals we need to be careful
 381 -- when trying to be close to Haskell98
 382 <0> {
 383   -- Normal integral literals (:: Num a => a, from Integer)
 384   @decimal           { tok_num positive 0 0 decimal }
 385   0[oO] @octal       { tok_num positive 2 2 octal }
 386   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 387
 388   -- Normal rational literals (:: Fractional a => a, from Rational)
 389   @floating_point    { strtoken tok_float }
 390 }
 391
 392 <0> {
 393   -- Unboxed ints (:: Int#) and words (:: Word#)
 394   -- It's simpler (and faster?) to give separate cases to the negatives,
 395   -- especially considering octal/hexadecimal prefixes.
 396   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 397   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 398   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 399   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 400   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 401   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 402
 403   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 404   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 405   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 406
 407   -- Unboxed floats and doubles (:: Float#, :: Double#)
 408   -- prim_{float,double} work with signed literals
 409   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 410   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 411 }
 412
 413 -- Strings and chars are lexed by hand-written code.  The reason is
 414 -- that even if we recognise the string or char here in the regex
 415 -- lexer, we would still have to parse the string afterward in order
 416 -- to convert it to a String.
 417 <0> {
 418   \'                            { lex_char_tok }
 419   \"                            { lex_string_tok }
 420 }
 421
 422 {
 423 -- -----------------------------------------------------------------------------
 424 -- The token type
 425
 426 data Token
 427   = ITas                        -- Haskell keywords
 428   | ITcase
 429   | ITclass
 430   | ITdata
 431   | ITdefault
 432   | ITderiving
 433   | ITdo
 434   | ITelse
 435   | IThiding
 436   | ITif
 437   | ITimport
 438   | ITin
 439   | ITinfix
 440   | ITinfixl
 441   | ITinfixr
 442   | ITinstance
 443   | ITlet
 444   | ITmodule
 445   | ITnewtype
 446   | ITof
 447   | ITqualified
 448   | ITthen
 449   | ITtype
 450   | ITwhere
 451   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 452
 453   | ITforall                    -- GHC extension keywords
 454   | ITforeign
 455   | ITexport
 456   | ITlabel
 457   | ITdynamic
 458   | ITsafe
 459   | ITthreadsafe
 460   | ITinterruptible
 461   | ITunsafe
 462   | ITstdcallconv
 463   | ITccallconv
 464   | ITprimcallconv
 465   | ITmdo
 466   | ITfamily
 467   | ITgroup
 468   | ITby
 469   | ITusing
 470
 471         -- Pragmas
 472   | ITinline_prag InlineSpec RuleMatchInfo
 473   | ITspec_prag                 -- SPECIALISE
 474   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 475   | ITsource_prag
 476   | ITrules_prag
 477   | ITwarning_prag
 478   | ITdeprecated_prag
 479   | ITline_prag
 480   | ITscc_prag
 481   | ITgenerated_prag
 482   | ITcore_prag                 -- hdaume: core annotations
 483   | ITunpack_prag
 484   | ITann_prag
 485   | ITclose_prag
 486   | IToptions_prag String
 487   | ITinclude_prag String
 488   | ITlanguage_prag
 489
 490   | ITdotdot                    -- reserved symbols
 491   | ITcolon
 492   | ITdcolon
 493   | ITequal
 494   | ITlam
 495   | ITvbar
 496   | ITlarrow
 497   | ITrarrow
 498   | ITat
 499   | ITtilde
 500   | ITdarrow
 501   | ITminus
 502   | ITbang
 503   | ITstar
 504   | ITdot
 505
 506   | ITbiglam                    -- GHC-extension symbols
 507
 508   | ITocurly                    -- special symbols
 509   | ITccurly
 510   | ITocurlybar                 -- {|, for type applications
 511   | ITccurlybar                 -- |}, for type applications
 512   | ITvocurly
 513   | ITvccurly
 514   | ITobrack
 515   | ITopabrack                  -- [:, for parallel arrays with -XParr
 516   | ITcpabrack                  -- :], for parallel arrays with -XParr
 517   | ITcbrack
 518   | IToparen
 519   | ITcparen
 520   | IToubxparen
 521   | ITcubxparen
 522   | ITsemi
 523   | ITcomma
 524   | ITunderscore
 525   | ITbackquote
 526
 527   | ITvarid   FastString        -- identifiers
 528   | ITconid   FastString
 529   | ITvarsym  FastString
 530   | ITconsym  FastString
 531   | ITqvarid  (FastString,FastString)
 532   | ITqconid  (FastString,FastString)
 533   | ITqvarsym (FastString,FastString)
 534   | ITqconsym (FastString,FastString)
 535   | ITprefixqvarsym (FastString,FastString)
 536   | ITprefixqconsym (FastString,FastString)
 537
 538   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 539
 540   | ITchar       Char
 541   | ITstring     FastString
 542   | ITinteger    Integer
 543   | ITrational   Rational
 544
 545   | ITprimchar   Char
 546   | ITprimstring FastString
 547   | ITprimint    Integer
 548   | ITprimword   Integer
 549   | ITprimfloat  Rational
 550   | ITprimdouble Rational
 551
 552   -- Template Haskell extension tokens
 553   | ITopenExpQuote              --  [| or [e|
 554   | ITopenPatQuote              --  [p|
 555   | ITopenDecQuote              --  [d|
 556   | ITopenTypQuote              --  [t|
 557   | ITcloseQuote                --  |]
 558   | ITidEscape   FastString     --  $x
 559   | ITparenEscape               --  $(
 560   | ITvarQuote                  --  '
 561   | ITtyQuote                   --  ''
 562   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 563
 564   -- Arrow notation extension
 565   | ITproc
 566   | ITrec
 567   | IToparenbar                 --  (|
 568   | ITcparenbar                 --  |)
 569   | ITlarrowtail                --  -<
 570   | ITrarrowtail                --  >-
 571   | ITLarrowtail                --  -<<
 572   | ITRarrowtail                --  >>-
 573
 574   | ITunknown String            -- Used when the lexer can't make sense of it
 575   | ITeof                       -- end of file token
 576
 577   -- Documentation annotations
 578   | ITdocCommentNext  String     -- something beginning '-- |'
 579   | ITdocCommentPrev  String     -- something beginning '-- ^'
 580   | ITdocCommentNamed String     -- something beginning '-- $'
 581   | ITdocSection      Int String -- a section heading
 582   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 583   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 584   | ITlineComment     String     -- comment starting by "--"
 585   | ITblockComment    String     -- comment in {- -}
 586
 587 #ifdef DEBUG
 588   deriving Show -- debugging
 589 #endif
 590
 591 {-
 592 isSpecial :: Token -> Bool
 593 -- If we see M.x, where x is a keyword, but
 594 -- is special, we treat is as just plain M.x,
 595 -- not as a keyword.
 596 isSpecial ITas          = True
 597 isSpecial IThiding      = True
 598 isSpecial ITqualified   = True
 599 isSpecial ITforall      = True
 600 isSpecial ITexport      = True
 601 isSpecial ITlabel       = True
 602 isSpecial ITdynamic     = True
 603 isSpecial ITsafe        = True
 604 isSpecial ITthreadsafe  = True
 605 isSpecial ITinterruptible = True
 606 isSpecial ITunsafe      = True
 607 isSpecial ITccallconv   = True
 608 isSpecial ITstdcallconv = True
 609 isSpecial ITprimcallconv = True
 610 isSpecial ITmdo         = True
 611 isSpecial ITfamily      = True
 612 isSpecial ITgroup   = True
 613 isSpecial ITby      = True
 614 isSpecial ITusing   = True
 615 isSpecial _             = False
 616 -}
 617
 618 -- the bitmap provided as the third component indicates whether the
 619 -- corresponding extension keyword is valid under the extension options
 620 -- provided to the compiler; if the extension corresponding to *any* of the
 621 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 622 -- facilitates using a keyword in two different extensions that can be
 623 -- activated independently)
 624 --
 625 reservedWordsFM :: UniqFM (Token, Int)
 626 reservedWordsFM = listToUFM $
 627         map (\(x, y, z) -> (mkFastString x, (y, z)))
 628        [( "_",          ITunderscore,   0 ),
 629         ( "as",         ITas,           0 ),
 630         ( "case",       ITcase,         0 ),
 631         ( "class",      ITclass,        0 ),
 632         ( "data",       ITdata,         0 ),
 633         ( "default",    ITdefault,      0 ),
 634         ( "deriving",   ITderiving,     0 ),
 635         ( "do",         ITdo,           0 ),
 636         ( "else",       ITelse,         0 ),
 637         ( "hiding",     IThiding,       0 ),
 638         ( "if",         ITif,           0 ),
 639         ( "import",     ITimport,       0 ),
 640         ( "in",         ITin,           0 ),
 641         ( "infix",      ITinfix,        0 ),
 642         ( "infixl",     ITinfixl,       0 ),
 643         ( "infixr",     ITinfixr,       0 ),
 644         ( "instance",   ITinstance,     0 ),
 645         ( "let",        ITlet,          0 ),
 646         ( "module",     ITmodule,       0 ),
 647         ( "newtype",    ITnewtype,      0 ),
 648         ( "of",         ITof,           0 ),
 649         ( "qualified",  ITqualified,    0 ),
 650         ( "then",       ITthen,         0 ),
 651         ( "type",       ITtype,         0 ),
 652         ( "where",      ITwhere,        0 ),
 653         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 654
 655     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 656         ( "mdo",        ITmdo,           bit recursiveDoBit),
 657         ( "family",     ITfamily,        bit tyFamBit),
 658     ( "group",  ITgroup,     bit transformComprehensionsBit),
 659     ( "by",     ITby,        bit transformComprehensionsBit),
 660     ( "using",  ITusing,     bit transformComprehensionsBit),
 661
 662         ( "foreign",    ITforeign,       bit ffiBit),
 663         ( "export",     ITexport,        bit ffiBit),
 664         ( "label",      ITlabel,         bit ffiBit),
 665         ( "dynamic",    ITdynamic,       bit ffiBit),
 666         ( "safe",       ITsafe,          bit ffiBit),
 667         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 668         ( "interruptible", ITinterruptible, bit ffiBit),
 669         ( "unsafe",     ITunsafe,        bit ffiBit),
 670         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 671         ( "ccall",      ITccallconv,     bit ffiBit),
 672         ( "prim",       ITprimcallconv,  bit ffiBit),
 673
 674         ( "rec",        ITrec,           bit recBit),
 675         ( "proc",       ITproc,          bit arrowsBit)
 676      ]
 677
 678 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 679 reservedSymsFM = listToUFM $
 680     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 681       [ ("..",  ITdotdot,   always)
 682         -- (:) is a reserved op, meaning only list cons
 683        ,(":",   ITcolon,    always)
 684        ,("::",  ITdcolon,   always)
 685        ,("=",   ITequal,    always)
 686        ,("\\",  ITlam,      always)
 687        ,("|",   ITvbar,     always)
 688        ,("<-",  ITlarrow,   always)
 689        ,("->",  ITrarrow,   always)
 690        ,("@",   ITat,       always)
 691        ,("~",   ITtilde,    always)
 692        ,("=>",  ITdarrow,   always)
 693        ,("-",   ITminus,    always)
 694        ,("!",   ITbang,     always)
 695
 696         -- For data T (a::*) = MkT
 697        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 698         -- For 'forall a . t'
 699        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 700
 701        ,("-<",  ITlarrowtail, arrowsEnabled)
 702        ,(">-",  ITrarrowtail, arrowsEnabled)
 703        ,("-<<", ITLarrowtail, arrowsEnabled)
 704        ,(">>-", ITRarrowtail, arrowsEnabled)
 705
 706        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 707        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 708        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 709                                 explicitForallEnabled i)
 710        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 711        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 712
 713        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 714        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 716        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 717
 718        ,("★", ITstar, unicodeSyntaxEnabled)
 719
 720         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 721         -- form part of a large operator.  This would let us have a better
 722         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 723        ]
 724
 725 -- -----------------------------------------------------------------------------
 726 -- Lexer actions
 727
 728 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 729
 730 special :: Token -> Action
 731 special tok span _buf _len = return (L span tok)
 732
 733 token, layout_token :: Token -> Action
 734 token t span _buf _len = return (L span t)
 735 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 736
 737 idtoken :: (StringBuffer -> Int -> Token) -> Action
 738 idtoken f span buf len = return (L span $! (f buf len))
 739
 740 skip_one_varid :: (FastString -> Token) -> Action
 741 skip_one_varid f span buf len
 742   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 743
 744 strtoken :: (String -> Token) -> Action
 745 strtoken f span buf len =
 746   return (L span $! (f $! lexemeToString buf len))
 747
 748 init_strtoken :: Int -> (String -> Token) -> Action
 749 -- like strtoken, but drops the last N character(s)
 750 init_strtoken drop f span buf len =
 751   return (L span $! (f $! lexemeToString buf (len-drop)))
 752
 753 begin :: Int -> Action
 754 begin code _span _str _len = do pushLexState code; lexToken
 755
 756 pop :: Action
 757 pop _span _buf _len = do _ <- popLexState
 758                          lexToken
 759
 760 hopefully_open_brace :: Action
 761 hopefully_open_brace span buf len
 762  = do relaxed <- extension relaxedLayout
 763       ctx <- getContext
 764       (AI l _) <- getInput
 765       let offset = srcLocCol l
 766           isOK = relaxed ||
 767                  case ctx of
 768                  Layout prev_off : _ -> prev_off < offset
 769                  _                   -> True
 770       if isOK then pop_and open_brace span buf len
 771               else failSpanMsgP span (text "Missing block")
 772
 773 pop_and :: Action -> Action
 774 pop_and act span buf len = do _ <- popLexState
 775                               act span buf len
 776
 777 {-# INLINE nextCharIs #-}
 778 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 779 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 780
 781 notFollowedBy :: Char -> AlexAccPred Int
 782 notFollowedBy char _ _ _ (AI _ buf)
 783   = nextCharIs buf (/=char)
 784
 785 notFollowedBySymbol :: AlexAccPred Int
 786 notFollowedBySymbol _ _ _ (AI _ buf)
 787   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 788
 789 -- We must reject doc comments as being ordinary comments everywhere.
 790 -- In some cases the doc comment will be selected as the lexeme due to
 791 -- maximal munch, but not always, because the nested comment rule is
 792 -- valid in all states, but the doc-comment rules are only valid in
 793 -- the non-layout states.
 794 isNormalComment :: AlexAccPred Int
 795 isNormalComment bits _ _ (AI _ buf)
 796   | haddockEnabled bits = notFollowedByDocOrPragma
 797   | otherwise           = nextCharIs buf (/='#')
 798   where
 799     notFollowedByDocOrPragma
 800        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 801
 802 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 803 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 804
 805 {-
 806 haddockDisabledAnd p bits _ _ (AI _ buf)
 807   = if haddockEnabled bits then False else (p buf)
 808 -}
 809
 810 atEOL :: AlexAccPred Int
 811 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 812
 813 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 814 ifExtension pred bits _ _ _ = pred bits
 815
 816 multiline_doc_comment :: Action
 817 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 818   where
 819     worker commentAcc input docType oneLine = case alexGetChar input of
 820       Just ('\n', input')
 821         | oneLine -> docCommentEnd input commentAcc docType buf span
 822         | otherwise -> case checkIfCommentLine input' of
 823           Just input -> worker ('\n':commentAcc) input docType False
 824           Nothing -> docCommentEnd input commentAcc docType buf span
 825       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 826       Nothing -> docCommentEnd input commentAcc docType buf span
 827
 828     checkIfCommentLine input = check (dropNonNewlineSpace input)
 829       where
 830         check input = case alexGetChar input of
 831           Just ('-', input) -> case alexGetChar input of
 832             Just ('-', input) -> case alexGetChar input of
 833               Just (c, _) | c /= '-' -> Just input
 834               _ -> Nothing
 835             _ -> Nothing
 836           _ -> Nothing
 837
 838         dropNonNewlineSpace input = case alexGetChar input of
 839           Just (c, input')
 840             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 841             | otherwise -> input
 842           Nothing -> input
 843
 844 lineCommentToken :: Action
 845 lineCommentToken span buf len = do
 846   b <- extension rawTokenStreamEnabled
 847   if b then strtoken ITlineComment span buf len else lexToken
 848
 849 {-
 850   nested comments require traversing by hand, they can't be parsed
 851   using regular expressions.
 852 -}
 853 nested_comment :: P (Located Token) -> Action
 854 nested_comment cont span _str _len = do
 855   input <- getInput
 856   go "" (1::Int) input
 857   where
 858     go commentAcc 0 input = do setInput input
 859                                b <- extension rawTokenStreamEnabled
 860                                if b
 861                                  then docCommentEnd input commentAcc ITblockComment _str span
 862                                  else cont
 863     go commentAcc n input = case alexGetChar input of
 864       Nothing -> errBrace input span
 865       Just ('-',input) -> case alexGetChar input of
 866         Nothing  -> errBrace input span
 867         Just ('\125',input) -> go commentAcc (n-1) input
 868         Just (_,_)          -> go ('-':commentAcc) n input
 869       Just ('\123',input) -> case alexGetChar input of
 870         Nothing  -> errBrace input span
 871         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 872         Just (_,_)       -> go ('\123':commentAcc) n input
 873       Just (c,input) -> go (c:commentAcc) n input
 874
 875 nested_doc_comment :: Action
 876 nested_doc_comment span buf _len = withLexedDocType (go "")
 877   where
 878     go commentAcc input docType _ = case alexGetChar input of
 879       Nothing -> errBrace input span
 880       Just ('-',input) -> case alexGetChar input of
 881         Nothing -> errBrace input span
 882         Just ('\125',input) ->
 883           docCommentEnd input commentAcc docType buf span
 884         Just (_,_) -> go ('-':commentAcc) input docType False
 885       Just ('\123', input) -> case alexGetChar input of
 886         Nothing  -> errBrace input span
 887         Just ('-',input) -> do
 888           setInput input
 889           let cont = do input <- getInput; go commentAcc input docType False
 890           nested_comment cont span buf _len
 891         Just (_,_) -> go ('\123':commentAcc) input docType False
 892       Just (c,input) -> go (c:commentAcc) input docType False
 893
 894 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 895                  -> P (Located Token)
 896 withLexedDocType lexDocComment = do
 897   input@(AI _ buf) <- getInput
 898   case prevChar buf ' ' of
 899     '|' -> lexDocComment input ITdocCommentNext False
 900     '^' -> lexDocComment input ITdocCommentPrev False
 901     '$' -> lexDocComment input ITdocCommentNamed False
 902     '*' -> lexDocSection 1 input
 903     '#' -> lexDocComment input ITdocOptionsOld False
 904     _ -> panic "withLexedDocType: Bad doc type"
 905  where
 906     lexDocSection n input = case alexGetChar input of
 907       Just ('*', input) -> lexDocSection (n+1) input
 908       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 909       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 910
 911 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 912 -- off again at the end of the pragma.
 913 rulePrag :: Action
 914 rulePrag span _buf _len = do
 915   setExts (.|. bit inRulePragBit)
 916   return (L span ITrules_prag)
 917
 918 endPrag :: Action
 919 endPrag span _buf _len = do
 920   setExts (.&. complement (bit inRulePragBit))
 921   return (L span ITclose_prag)
 922
 923 -- docCommentEnd
 924 -------------------------------------------------------------------------------
 925 -- This function is quite tricky. We can't just return a new token, we also
 926 -- need to update the state of the parser. Why? Because the token is longer
 927 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 928 -- it writes the wrong token length to the parser state. This function is
 929 -- called afterwards, so it can just update the state.
 930
 931 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 932                  SrcSpan -> P (Located Token)
 933 docCommentEnd input commentAcc docType buf span = do
 934   setInput input
 935   let (AI loc nextBuf) = input
 936       comment = reverse commentAcc
 937       span' = mkSrcSpan (srcSpanStart span) loc
 938       last_len = byteDiff buf nextBuf
 939
 940   span `seq` setLastToken span' last_len
 941   return (L span' (docType comment))
 942
 943 errBrace :: AlexInput -> SrcSpan -> P a
 944 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 945
 946 open_brace, close_brace :: Action
 947 open_brace span _str _len = do
 948   ctx <- getContext
 949   setContext (NoLayout:ctx)
 950   return (L span ITocurly)
 951 close_brace span _str _len = do
 952   popContext
 953   return (L span ITccurly)
 954
 955 qvarid, qconid :: StringBuffer -> Int -> Token
 956 qvarid buf len = ITqvarid $! splitQualName buf len False
 957 qconid buf len = ITqconid $! splitQualName buf len False
 958
 959 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 960 -- takes a StringBuffer and a length, and returns the module name
 961 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 962 -- because of hierarchical module names.
 963 splitQualName orig_buf len parens = split orig_buf orig_buf
 964   where
 965     split buf dot_buf
 966         | orig_buf `byteDiff` buf >= len  = done dot_buf
 967         | c == '.'                        = found_dot buf'
 968         | otherwise                       = split buf' dot_buf
 969       where
 970        (c,buf') = nextChar buf
 971
 972     -- careful, we might get names like M....
 973     -- so, if the character after the dot is not upper-case, this is
 974     -- the end of the qualifier part.
 975     found_dot buf -- buf points after the '.'
 976         | isUpper c    = split buf' buf
 977         | otherwise    = done buf
 978       where
 979        (c,buf') = nextChar buf
 980
 981     done dot_buf =
 982         (lexemeToFastString orig_buf (qual_size - 1),
 983          if parens -- Prelude.(+)
 984             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 985             else lexemeToFastString dot_buf (len - qual_size))
 986       where
 987         qual_size = orig_buf `byteDiff` dot_buf
 988
 989 varid :: Action
 990 varid span buf len =
 991   fs `seq`
 992   case lookupUFM reservedWordsFM fs of
 993         Just (keyword,0)    -> do
 994                 maybe_layout keyword
 995                 return (L span keyword)
 996         Just (keyword,exts) -> do
 997                 b <- extension (\i -> exts .&. i /= 0)
 998                 if b then do maybe_layout keyword
 999                              return (L span keyword)
1000                      else return (L span (ITvarid fs))
1001         _other -> return (L span (ITvarid fs))
1002   where
1003         fs = lexemeToFastString buf len
1004
1005 conid :: StringBuffer -> Int -> Token
1006 conid buf len = ITconid fs
1007   where fs = lexemeToFastString buf len
1008
1009 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
1010 qvarsym buf len = ITqvarsym $! splitQualName buf len False
1011 qconsym buf len = ITqconsym $! splitQualName buf len False
1012 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
1013 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1014
1015 varsym, consym :: Action
1016 varsym = sym ITvarsym
1017 consym = sym ITconsym
1018
1019 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1020     -> P (Located Token)
1021 sym con span buf len =
1022   case lookupUFM reservedSymsFM fs of
1023         Just (keyword,exts) -> do
1024                 b <- extension exts
1025                 if b then return (L span keyword)
1026                      else return (L span $! con fs)
1027         _other -> return (L span $! con fs)
1028   where
1029         fs = lexemeToFastString buf len
1030
1031 -- Variations on the integral numeric literal.
1032 tok_integral :: (Integer -> Token)
1033      -> (Integer -> Integer)
1034  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1035      -> Int -> Int
1036      -> (Integer, (Char->Int)) -> Action
1037 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1038   return $ L span $ itint $! transint $ parseUnsignedInteger
1039      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1040
1041 -- some conveniences for use with tok_integral
1042 tok_num :: (Integer -> Integer)
1043         -> Int -> Int
1044         -> (Integer, (Char->Int)) -> Action
1045 tok_num = tok_integral ITinteger
1046 tok_primint :: (Integer -> Integer)
1047             -> Int -> Int
1048             -> (Integer, (Char->Int)) -> Action
1049 tok_primint = tok_integral ITprimint
1050 tok_primword :: Int -> Int
1051              -> (Integer, (Char->Int)) -> Action
1052 tok_primword = tok_integral ITprimword positive
1053 positive, negative :: (Integer -> Integer)
1054 positive = id
1055 negative = negate
1056 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1057 decimal = (10,octDecDigit)
1058 octal = (8,octDecDigit)
1059 hexadecimal = (16,hexDigit)
1060
1061 -- readRational can understand negative rationals, exponents, everything.
1062 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1063 tok_float        str = ITrational   $! readRational str
1064 tok_primfloat    str = ITprimfloat  $! readRational str
1065 tok_primdouble   str = ITprimdouble $! readRational str
1066
1067 -- -----------------------------------------------------------------------------
1068 -- Layout processing
1069
1070 -- we're at the first token on a line, insert layout tokens if necessary
1071 do_bol :: Action
1072 do_bol span _str _len = do
1073         pos <- getOffside
1074         case pos of
1075             LT -> do
1076                 --trace "layout: inserting '}'" $ do
1077                 popContext
1078                 -- do NOT pop the lex state, we might have a ';' to insert
1079                 return (L span ITvccurly)
1080             EQ -> do
1081                 --trace "layout: inserting ';'" $ do
1082                 _ <- popLexState
1083                 return (L span ITsemi)
1084             GT -> do
1085                 _ <- popLexState
1086                 lexToken
1087
1088 -- certain keywords put us in the "layout" state, where we might
1089 -- add an opening curly brace.
1090 maybe_layout :: Token -> P ()
1091 maybe_layout t = do -- If the alternative layout rule is enabled then
1092                     -- we never create an implicit layout context here.
1093                     -- Layout is handled XXX instead.
1094                     -- The code for closing implicit contexts, or
1095                     -- inserting implicit semi-colons, is therefore
1096                     -- irrelevant as it only applies in an implicit
1097                     -- context.
1098                     alr <- extension alternativeLayoutRule
1099                     unless alr $ f t
1100     where f ITdo    = pushLexState layout_do
1101           f ITmdo   = pushLexState layout_do
1102           f ITof    = pushLexState layout
1103           f ITlet   = pushLexState layout
1104           f ITwhere = pushLexState layout
1105           f ITrec   = pushLexState layout
1106           f _       = return ()
1107
1108 -- Pushing a new implicit layout context.  If the indentation of the
1109 -- next token is not greater than the previous layout context, then
1110 -- Haskell 98 says that the new layout context should be empty; that is
1111 -- the lexer must generate {}.
1112 --
1113 -- We are slightly more lenient than this: when the new context is started
1114 -- by a 'do', then we allow the new context to be at the same indentation as
1115 -- the previous context.  This is what the 'strict' argument is for.
1116 --
1117 new_layout_context :: Bool -> Action
1118 new_layout_context strict span _buf _len = do
1119     _ <- popLexState
1120     (AI l _) <- getInput
1121     let offset = srcLocCol l
1122     ctx <- getContext
1123     relaxed <- extension relaxedLayout
1124     let strict' = strict || not relaxed
1125     case ctx of
1126         Layout prev_off : _  |
1127            (strict'     && prev_off >= offset  ||
1128             not strict' && prev_off > offset) -> do
1129                 -- token is indented to the left of the previous context.
1130                 -- we must generate a {} sequence now.
1131                 pushLexState layout_left
1132                 return (L span ITvocurly)
1133         _ -> do
1134                 setContext (Layout offset : ctx)
1135                 return (L span ITvocurly)
1136
1137 do_layout_left :: Action
1138 do_layout_left span _buf _len = do
1139     _ <- popLexState
1140     pushLexState bol  -- we must be at the start of a line
1141     return (L span ITvccurly)
1142
1143 -- -----------------------------------------------------------------------------
1144 -- LINE pragmas
1145
1146 setLine :: Int -> Action
1147 setLine code span buf len = do
1148   let line = parseUnsignedInteger buf len 10 octDecDigit
1149   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1150         -- subtract one: the line number refers to the *following* line
1151   _ <- popLexState
1152   pushLexState code
1153   lexToken
1154
1155 setFile :: Int -> Action
1156 setFile code span buf len = do
1157   let file = lexemeToFastString (stepOn buf) (len-2)
1158   setAlrLastLoc noSrcSpan
1159   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1160   _ <- popLexState
1161   pushLexState code
1162   lexToken
1163
1164
1165 -- -----------------------------------------------------------------------------
1166 -- Options, includes and language pragmas.
1167
1168 lex_string_prag :: (String -> Token) -> Action
1169 lex_string_prag mkTok span _buf _len
1170     = do input <- getInput
1171          start <- getSrcLoc
1172          tok <- go [] input
1173          end <- getSrcLoc
1174          return (L (mkSrcSpan start end) tok)
1175     where go acc input
1176               = if isString input "#-}"
1177                    then do setInput input
1178                            return (mkTok (reverse acc))
1179                    else case alexGetChar input of
1180                           Just (c,i) -> go (c:acc) i
1181                           Nothing -> err input
1182           isString _ [] = True
1183           isString i (x:xs)
1184               = case alexGetChar i of
1185                   Just (c,i') | c == x    -> isString i' xs
1186                   _other -> False
1187           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1188
1189
1190 -- -----------------------------------------------------------------------------
1191 -- Strings & Chars
1192
1193 -- This stuff is horrible.  I hates it.
1194
1195 lex_string_tok :: Action
1196 lex_string_tok span _buf _len = do
1197   tok <- lex_string ""
1198   end <- getSrcLoc
1199   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1200
1201 lex_string :: String -> P Token
1202 lex_string s = do
1203   i <- getInput
1204   case alexGetChar' i of
1205     Nothing -> lit_error i
1206
1207     Just ('"',i)  -> do
1208         setInput i
1209         magicHash <- extension magicHashEnabled
1210         if magicHash
1211           then do
1212             i <- getInput
1213             case alexGetChar' i of
1214               Just ('#',i) -> do
1215                    setInput i
1216                    if any (> '\xFF') s
1217                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1218                     else let s' = mkZFastString (reverse s) in
1219                          return (ITprimstring s')
1220                         -- mkZFastString is a hack to avoid encoding the
1221                         -- string in UTF-8.  We just want the exact bytes.
1222               _other ->
1223                 return (ITstring (mkFastString (reverse s)))
1224           else
1225                 return (ITstring (mkFastString (reverse s)))
1226
1227     Just ('\\',i)
1228         | Just ('&',i) <- next -> do
1229                 setInput i; lex_string s
1230         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1231                            -- is_space only works for <= '\x7f' (#3751)
1232                 setInput i; lex_stringgap s
1233         where next = alexGetChar' i
1234
1235     Just (c, i1) -> do
1236         case c of
1237           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1238           c | isAny c -> do setInput i1; lex_string (c:s)
1239           _other -> lit_error i
1240
1241 lex_stringgap :: String -> P Token
1242 lex_stringgap s = do
1243   i <- getInput
1244   c <- getCharOrFail i
1245   case c of
1246     '\\' -> lex_string s
1247     c | is_space c -> lex_stringgap s
1248     _other -> lit_error i
1249
1250
1251 lex_char_tok :: Action
1252 -- Here we are basically parsing character literals, such as 'x' or '\n'
1253 -- but, when Template Haskell is on, we additionally spot
1254 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1255 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1256 -- So we have to do two characters of lookahead: when we see 'x we need to
1257 -- see if there's a trailing quote
1258 lex_char_tok span _buf _len = do        -- We've seen '
1259    i1 <- getInput       -- Look ahead to first character
1260    let loc = srcSpanStart span
1261    case alexGetChar' i1 of
1262         Nothing -> lit_error  i1
1263
1264         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1265                   th_exts <- extension thEnabled
1266                   if th_exts then do
1267                         setInput i2
1268                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1269                    else lit_error i1
1270
1271         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1272                   setInput i2
1273                   lit_ch <- lex_escape
1274                   i3 <- getInput
1275                   mc <- getCharOrFail i3 -- Trailing quote
1276                   if mc == '\'' then finish_char_tok loc lit_ch
1277                                 else lit_error i3
1278
1279         Just (c, i2@(AI _end2 _))
1280                 | not (isAny c) -> lit_error i1
1281                 | otherwise ->
1282
1283                 -- We've seen 'x, where x is a valid character
1284                 --  (i.e. not newline etc) but not a quote or backslash
1285            case alexGetChar' i2 of      -- Look ahead one more character
1286                 Just ('\'', i3) -> do   -- We've seen 'x'
1287                         setInput i3
1288                         finish_char_tok loc c
1289                 _other -> do            -- We've seen 'x not followed by quote
1290                                         -- (including the possibility of EOF)
1291                                         -- If TH is on, just parse the quote only
1292                         th_exts <- extension thEnabled
1293                         let (AI end _) = i1
1294                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1295                                    else lit_error i2
1296
1297 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1298 finish_char_tok loc ch  -- We've already seen the closing quote
1299                         -- Just need to check for trailing #
1300   = do  magicHash <- extension magicHashEnabled
1301         i@(AI end _) <- getInput
1302         if magicHash then do
1303                 case alexGetChar' i of
1304                         Just ('#',i@(AI end _)) -> do
1305                                 setInput i
1306                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1307                         _other ->
1308                                 return (L (mkSrcSpan loc end) (ITchar ch))
1309             else do
1310                    return (L (mkSrcSpan loc end) (ITchar ch))
1311
1312 isAny :: Char -> Bool
1313 isAny c | c > '\x7f' = isPrint c
1314         | otherwise  = is_any c
1315
1316 lex_escape :: P Char
1317 lex_escape = do
1318   i0 <- getInput
1319   c <- getCharOrFail i0
1320   case c of
1321         'a'   -> return '\a'
1322         'b'   -> return '\b'
1323         'f'   -> return '\f'
1324         'n'   -> return '\n'
1325         'r'   -> return '\r'
1326         't'   -> return '\t'
1327         'v'   -> return '\v'
1328         '\\'  -> return '\\'
1329         '"'   -> return '\"'
1330         '\''  -> return '\''
1331         '^'   -> do i1 <- getInput
1332                     c <- getCharOrFail i1
1333                     if c >= '@' && c <= '_'
1334                         then return (chr (ord c - ord '@'))
1335                         else lit_error i1
1336
1337         'x'   -> readNum is_hexdigit 16 hexDigit
1338         'o'   -> readNum is_octdigit  8 octDecDigit
1339         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1340
1341         c1 ->  do
1342            i <- getInput
1343            case alexGetChar' i of
1344             Nothing -> lit_error i0
1345             Just (c2,i2) ->
1346               case alexGetChar' i2 of
1347                 Nothing -> do lit_error i0
1348                 Just (c3,i3) ->
1349                    let str = [c1,c2,c3] in
1350                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1351                                      Just rest <- [stripPrefix p str] ] of
1352                           (escape_char,[]):_ -> do
1353                                 setInput i3
1354                                 return escape_char
1355                           (escape_char,_:_):_ -> do
1356                                 setInput i2
1357                                 return escape_char
1358                           [] -> lit_error i0
1359
1360 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1361 readNum is_digit base conv = do
1362   i <- getInput
1363   c <- getCharOrFail i
1364   if is_digit c
1365         then readNum2 is_digit base conv (conv c)
1366         else lit_error i
1367
1368 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1369 readNum2 is_digit base conv i = do
1370   input <- getInput
1371   read i input
1372   where read i input = do
1373           case alexGetChar' input of
1374             Just (c,input') | is_digit c -> do
1375                let i' = i*base + conv c
1376                if i' > 0x10ffff
1377                   then setInput input >> lexError "numeric escape sequence out of range"
1378                   else read i' input'
1379             _other -> do
1380               setInput input; return (chr i)
1381
1382
1383 silly_escape_chars :: [(String, Char)]
1384 silly_escape_chars = [
1385         ("NUL", '\NUL'),
1386         ("SOH", '\SOH'),
1387         ("STX", '\STX'),
1388         ("ETX", '\ETX'),
1389         ("EOT", '\EOT'),
1390         ("ENQ", '\ENQ'),
1391         ("ACK", '\ACK'),
1392         ("BEL", '\BEL'),
1393         ("BS", '\BS'),
1394         ("HT", '\HT'),
1395         ("LF", '\LF'),
1396         ("VT", '\VT'),
1397         ("FF", '\FF'),
1398         ("CR", '\CR'),
1399         ("SO", '\SO'),
1400         ("SI", '\SI'),
1401         ("DLE", '\DLE'),
1402         ("DC1", '\DC1'),
1403         ("DC2", '\DC2'),
1404         ("DC3", '\DC3'),
1405         ("DC4", '\DC4'),
1406         ("NAK", '\NAK'),
1407         ("SYN", '\SYN'),
1408         ("ETB", '\ETB'),
1409         ("CAN", '\CAN'),
1410         ("EM", '\EM'),
1411         ("SUB", '\SUB'),
1412         ("ESC", '\ESC'),
1413         ("FS", '\FS'),
1414         ("GS", '\GS'),
1415         ("RS", '\RS'),
1416         ("US", '\US'),
1417         ("SP", '\SP'),
1418         ("DEL", '\DEL')
1419         ]
1420
1421 -- before calling lit_error, ensure that the current input is pointing to
1422 -- the position of the error in the buffer.  This is so that we can report
1423 -- a correct location to the user, but also so we can detect UTF-8 decoding
1424 -- errors if they occur.
1425 lit_error :: AlexInput -> P a
1426 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1427
1428 getCharOrFail :: AlexInput -> P Char
1429 getCharOrFail i =  do
1430   case alexGetChar' i of
1431         Nothing -> lexError "unexpected end-of-file in string/character literal"
1432         Just (c,i)  -> do setInput i; return c
1433
1434 -- -----------------------------------------------------------------------------
1435 -- QuasiQuote
1436
1437 lex_quasiquote_tok :: Action
1438 lex_quasiquote_tok span buf len = do
1439   let quoter = tail (lexemeToString buf (len - 1))
1440                 -- 'tail' drops the initial '[',
1441                 -- while the -1 drops the trailing '|'
1442   quoteStart <- getSrcLoc
1443   quote <- lex_quasiquote ""
1444   end <- getSrcLoc
1445   return (L (mkSrcSpan (srcSpanStart span) end)
1446            (ITquasiQuote (mkFastString quoter,
1447                           mkFastString (reverse quote),
1448                           mkSrcSpan quoteStart end)))
1449
1450 lex_quasiquote :: String -> P String
1451 lex_quasiquote s = do
1452   i <- getInput
1453   case alexGetChar' i of
1454     Nothing -> lit_error i
1455
1456     Just ('\\',i)
1457         | Just ('|',i) <- next -> do
1458                 setInput i; lex_quasiquote ('|' : s)
1459         | Just (']',i) <- next -> do
1460                 setInput i; lex_quasiquote (']' : s)
1461         where next = alexGetChar' i
1462
1463     Just ('|',i)
1464         | Just (']',i) <- next -> do
1465                 setInput i; return s
1466         where next = alexGetChar' i
1467
1468     Just (c, i) -> do
1469          setInput i; lex_quasiquote (c : s)
1470
1471 -- -----------------------------------------------------------------------------
1472 -- Warnings
1473
1474 warn :: DynFlag -> SDoc -> Action
1475 warn option warning srcspan _buf _len = do
1476     addWarning option srcspan warning
1477     lexToken
1478
1479 warnThen :: DynFlag -> SDoc -> Action -> Action
1480 warnThen option warning action srcspan buf len = do
1481     addWarning option srcspan warning
1482     action srcspan buf len
1483
1484 -- -----------------------------------------------------------------------------
1485 -- The Parse Monad
1486
1487 data LayoutContext
1488   = NoLayout
1489   | Layout !Int
1490   deriving Show
1491
1492 data ParseResult a
1493   = POk PState a
1494   | PFailed
1495         SrcSpan         -- The start and end of the text span related to
1496                         -- the error.  Might be used in environments which can
1497                         -- show this span, e.g. by highlighting it.
1498         Message         -- The error message
1499
1500 data PState = PState {
1501         buffer     :: StringBuffer,
1502         dflags     :: DynFlags,
1503         messages   :: Messages,
1504         last_loc   :: SrcSpan,  -- pos of previous token
1505         last_len   :: !Int,     -- len of previous token
1506         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1507         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1508         context    :: [LayoutContext],
1509         lex_state  :: [Int],
1510         -- Used in the alternative layout rule:
1511         -- These tokens are the next ones to be sent out. They are
1512         -- just blindly emitted, without the rule looking at them again:
1513         alr_pending_implicit_tokens :: [Located Token],
1514         -- This is the next token to be considered or, if it is Nothing,
1515         -- we need to get the next token from the input stream:
1516         alr_next_token :: Maybe (Located Token),
1517         -- This is what we consider to be the locatino of the last token
1518         -- emitted:
1519         alr_last_loc :: SrcSpan,
1520         -- The stack of layout contexts:
1521         alr_context :: [ALRContext],
1522         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1523         -- us what sort of layout the '{' will open:
1524         alr_expecting_ocurly :: Maybe ALRLayout,
1525         -- Have we just had the '}' for a let block? If so, than an 'in'
1526         -- token doesn't need to close anything:
1527         alr_justClosedExplicitLetBlock :: Bool
1528      }
1529         -- last_loc and last_len are used when generating error messages,
1530         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1531         -- current token to happyError, we could at least get rid of last_len.
1532         -- Getting rid of last_loc would require finding another way to
1533         -- implement pushCurrentContext (which is only called from one place).
1534
1535 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1536                               Bool{- is it a 'let' block? -}
1537                 | ALRLayout ALRLayout Int
1538 data ALRLayout = ALRLayoutLet
1539                | ALRLayoutWhere
1540                | ALRLayoutOf
1541                | ALRLayoutDo
1542
1543 newtype P a = P { unP :: PState -> ParseResult a }
1544
1545 instance Monad P where
1546   return = returnP
1547   (>>=) = thenP
1548   fail = failP
1549
1550 returnP :: a -> P a
1551 returnP a = a `seq` (P $ \s -> POk s a)
1552
1553 thenP :: P a -> (a -> P b) -> P b
1554 (P m) `thenP` k = P $ \ s ->
1555         case m s of
1556                 POk s1 a         -> (unP (k a)) s1
1557                 PFailed span err -> PFailed span err
1558
1559 failP :: String -> P a
1560 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1561
1562 failMsgP :: String -> P a
1563 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1564
1565 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1566 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1567
1568 failSpanMsgP :: SrcSpan -> SDoc -> P a
1569 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1570
1571 getPState :: P PState
1572 getPState = P $ \s -> POk s s
1573
1574 getDynFlags :: P DynFlags
1575 getDynFlags = P $ \s -> POk s (dflags s)
1576
1577 withThisPackage :: (PackageId -> a) -> P a
1578 withThisPackage f
1579  = do   pkg     <- liftM thisPackage getDynFlags
1580         return  $ f pkg
1581
1582 extension :: (Int -> Bool) -> P Bool
1583 extension p = P $ \s -> POk s (p $! extsBitmap s)
1584
1585 getExts :: P Int
1586 getExts = P $ \s -> POk s (extsBitmap s)
1587
1588 setExts :: (Int -> Int) -> P ()
1589 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1590
1591 setSrcLoc :: SrcLoc -> P ()
1592 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1593
1594 getSrcLoc :: P SrcLoc
1595 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1596
1597 setLastToken :: SrcSpan -> Int -> P ()
1598 setLastToken loc len = P $ \s -> POk s {
1599   last_loc=loc,
1600   last_len=len
1601   } ()
1602
1603 data AlexInput = AI SrcLoc StringBuffer
1604
1605 alexInputPrevChar :: AlexInput -> Char
1606 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1607
1608 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1609 alexGetChar (AI loc s)
1610   | atEnd s   = Nothing
1611   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1612                 --trace (show (ord c)) $
1613                 Just (adj_c, (AI loc' s'))
1614   where (c,s') = nextChar s
1615         loc'   = advanceSrcLoc loc c
1616
1617         non_graphic     = '\x0'
1618         upper           = '\x1'
1619         lower           = '\x2'
1620         digit           = '\x3'
1621         symbol          = '\x4'
1622         space           = '\x5'
1623         other_graphic   = '\x6'
1624
1625         adj_c
1626           | c <= '\x06' = non_graphic
1627           | c <= '\x7f' = c
1628           -- Alex doesn't handle Unicode, so when Unicode
1629           -- character is encountered we output these values
1630           -- with the actual character value hidden in the state.
1631           | otherwise =
1632                 case generalCategory c of
1633                   UppercaseLetter       -> upper
1634                   LowercaseLetter       -> lower
1635                   TitlecaseLetter       -> upper
1636                   ModifierLetter        -> other_graphic
1637                   OtherLetter           -> lower -- see #1103
1638                   NonSpacingMark        -> other_graphic
1639                   SpacingCombiningMark  -> other_graphic
1640                   EnclosingMark         -> other_graphic
1641                   DecimalNumber         -> digit
1642                   LetterNumber          -> other_graphic
1643                   OtherNumber           -> other_graphic
1644                   ConnectorPunctuation  -> symbol
1645                   DashPunctuation       -> symbol
1646                   OpenPunctuation       -> other_graphic
1647                   ClosePunctuation      -> other_graphic
1648                   InitialQuote          -> other_graphic
1649                   FinalQuote            -> other_graphic
1650                   OtherPunctuation      -> symbol
1651                   MathSymbol            -> symbol
1652                   CurrencySymbol        -> symbol
1653                   ModifierSymbol        -> symbol
1654                   OtherSymbol           -> symbol
1655                   Space                 -> space
1656                   _other                -> non_graphic
1657
1658 -- This version does not squash unicode characters, it is used when
1659 -- lexing strings.
1660 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1661 alexGetChar' (AI loc s)
1662   | atEnd s   = Nothing
1663   | otherwise = c `seq` loc' `seq` s' `seq`
1664                 --trace (show (ord c)) $
1665                 Just (c, (AI loc' s'))
1666   where (c,s') = nextChar s
1667         loc'   = advanceSrcLoc loc c
1668
1669 getInput :: P AlexInput
1670 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1671
1672 setInput :: AlexInput -> P ()
1673 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1674
1675 pushLexState :: Int -> P ()
1676 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1677
1678 popLexState :: P Int
1679 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1680
1681 getLexState :: P Int
1682 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1683
1684 popNextToken :: P (Maybe (Located Token))
1685 popNextToken
1686     = P $ \s@PState{ alr_next_token = m } ->
1687               POk (s {alr_next_token = Nothing}) m
1688
1689 setAlrLastLoc :: SrcSpan -> P ()
1690 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1691
1692 getAlrLastLoc :: P SrcSpan
1693 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1694
1695 getALRContext :: P [ALRContext]
1696 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1697
1698 setALRContext :: [ALRContext] -> P ()
1699 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1700
1701 getJustClosedExplicitLetBlock :: P Bool
1702 getJustClosedExplicitLetBlock
1703  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1704
1705 setJustClosedExplicitLetBlock :: Bool -> P ()
1706 setJustClosedExplicitLetBlock b
1707  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1708
1709 setNextToken :: Located Token -> P ()
1710 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1711
1712 popPendingImplicitToken :: P (Maybe (Located Token))
1713 popPendingImplicitToken
1714     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1715               case ts of
1716               [] -> POk s Nothing
1717               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1718
1719 setPendingImplicitTokens :: [Located Token] -> P ()
1720 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1721
1722 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1723 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1724
1725 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1726 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1727
1728 -- for reasons of efficiency, flags indicating language extensions (eg,
1729 -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
1730 -- integer
1731
1732 genericsBit :: Int
1733 genericsBit = 0 -- {| and |}
1734 ffiBit :: Int
1735 ffiBit     = 1
1736 parrBit :: Int
1737 parrBit    = 2
1738 arrowsBit :: Int
1739 arrowsBit  = 4
1740 thBit :: Int
1741 thBit      = 5
1742 ipBit :: Int
1743 ipBit      = 6
1744 explicitForallBit :: Int
1745 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1746 bangPatBit :: Int
1747 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1748                 -- (doesn't affect the lexer)
1749 tyFamBit :: Int
1750 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1751 haddockBit :: Int
1752 haddockBit = 10 -- Lex and parse Haddock comments
1753 magicHashBit :: Int
1754 magicHashBit = 11 -- "#" in both functions and operators
1755 kindSigsBit :: Int
1756 kindSigsBit = 12 -- Kind signatures on type variables
1757 recursiveDoBit :: Int
1758 recursiveDoBit = 13 -- mdo
1759 unicodeSyntaxBit :: Int
1760 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1761 unboxedTuplesBit :: Int
1762 unboxedTuplesBit = 15 -- (# and #)
1763 datatypeContextsBit :: Int
1764 datatypeContextsBit = 16
1765 transformComprehensionsBit :: Int
1766 transformComprehensionsBit = 17
1767 qqBit :: Int
1768 qqBit      = 18 -- enable quasiquoting
1769 inRulePragBit :: Int
1770 inRulePragBit = 19
1771 rawTokenStreamBit :: Int
1772 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1773 newQualOpsBit :: Int
1774 newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
1775 recBit :: Int
1776 recBit = 22 -- rec
1777 alternativeLayoutRuleBit :: Int
1778 alternativeLayoutRuleBit = 23
1779 relaxedLayoutBit :: Int
1780 relaxedLayoutBit = 24
1781
1782 always :: Int -> Bool
1783 always           _     = True
1784 genericsEnabled :: Int -> Bool
1785 genericsEnabled  flags = testBit flags genericsBit
1786 parrEnabled :: Int -> Bool
1787 parrEnabled      flags = testBit flags parrBit
1788 arrowsEnabled :: Int -> Bool
1789 arrowsEnabled    flags = testBit flags arrowsBit
1790 thEnabled :: Int -> Bool
1791 thEnabled        flags = testBit flags thBit
1792 ipEnabled :: Int -> Bool
1793 ipEnabled        flags = testBit flags ipBit
1794 explicitForallEnabled :: Int -> Bool
1795 explicitForallEnabled flags = testBit flags explicitForallBit
1796 bangPatEnabled :: Int -> Bool
1797 bangPatEnabled   flags = testBit flags bangPatBit
1798 -- tyFamEnabled :: Int -> Bool
1799 -- tyFamEnabled     flags = testBit flags tyFamBit
1800 haddockEnabled :: Int -> Bool
1801 haddockEnabled   flags = testBit flags haddockBit
1802 magicHashEnabled :: Int -> Bool
1803 magicHashEnabled flags = testBit flags magicHashBit
1804 -- kindSigsEnabled :: Int -> Bool
1805 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1806 unicodeSyntaxEnabled :: Int -> Bool
1807 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1808 unboxedTuplesEnabled :: Int -> Bool
1809 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1810 datatypeContextsEnabled :: Int -> Bool
1811 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1812 qqEnabled :: Int -> Bool
1813 qqEnabled        flags = testBit flags qqBit
1814 -- inRulePrag :: Int -> Bool
1815 -- inRulePrag       flags = testBit flags inRulePragBit
1816 rawTokenStreamEnabled :: Int -> Bool
1817 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1818 newQualOps :: Int -> Bool
1819 newQualOps       flags = testBit flags newQualOpsBit
1820 oldQualOps :: Int -> Bool
1821 oldQualOps flags = not (newQualOps flags)
1822 alternativeLayoutRule :: Int -> Bool
1823 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1824 relaxedLayout :: Int -> Bool
1825 relaxedLayout flags = testBit flags relaxedLayoutBit
1826
1827 -- PState for parsing options pragmas
1828 --
1829 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1830 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1831                                  lex_state = [bol, option_prags, 0]
1832                              }
1833
1834 -- create a parse state
1835 --
1836 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1837 mkPState flags buf loc =
1838   PState {
1839       buffer          = buf,
1840       dflags        = flags,
1841       messages      = emptyMessages,
1842       last_loc      = mkSrcSpan loc loc,
1843       last_len      = 0,
1844       loc           = loc,
1845       extsBitmap    = fromIntegral bitmap,
1846       context       = [],
1847       lex_state     = [bol, 0],
1848       alr_pending_implicit_tokens = [],
1849       alr_next_token = Nothing,
1850       alr_last_loc = noSrcSpan,
1851       alr_context = [],
1852       alr_expecting_ocurly = Nothing,
1853       alr_justClosedExplicitLetBlock = False
1854     }
1855     where
1856       bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
1857                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1858                .|. parrBit           `setBitIf` xopt Opt_PArr         flags
1859                .|. arrowsBit         `setBitIf` xopt Opt_Arrows       flags
1860                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1861                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes flags
1862                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams flags
1863                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll flags
1864                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
1865                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
1866                .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
1867                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
1868                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
1869                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
1870                .|. recBit            `setBitIf` xopt Opt_DoRec  flags
1871                .|. recBit            `setBitIf` xopt Opt_Arrows flags
1872                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
1873                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
1874                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1875                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1876                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1877                .|. newQualOpsBit `setBitIf` xopt Opt_NewQualifiedOperators flags
1878                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1879                .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
1880       --
1881       setBitIf :: Int -> Bool -> Int
1882       b `setBitIf` cond | cond      = bit b
1883                         | otherwise = 0
1884
1885 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1886 addWarning option srcspan warning
1887  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1888        let warning' = mkWarnMsg srcspan alwaysQualify warning
1889            ws' = if dopt option d then ws `snocBag` warning' else ws
1890        in POk s{messages=(ws', es)} ()
1891
1892 getMessages :: PState -> Messages
1893 getMessages PState{messages=ms} = ms
1894
1895 getContext :: P [LayoutContext]
1896 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1897
1898 setContext :: [LayoutContext] -> P ()
1899 setContext ctx = P $ \s -> POk s{context=ctx} ()
1900
1901 popContext :: P ()
1902 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1903                               last_len = len, last_loc = last_loc }) ->
1904   case ctx of
1905         (_:tl) -> POk s{ context = tl } ()
1906         []     -> PFailed last_loc (srcParseErr buf len)
1907
1908 -- Push a new layout context at the indentation of the last token read.
1909 -- This is only used at the outer level of a module when the 'module'
1910 -- keyword is missing.
1911 pushCurrentContext :: P ()
1912 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1913     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1914
1915 getOffside :: P Ordering
1916 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1917                 let offs = srcSpanStartCol loc in
1918                 let ord = case stk of
1919                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1920                                         compare offs n
1921                         _            -> GT
1922                 in POk s ord
1923
1924 -- ---------------------------------------------------------------------------
1925 -- Construct a parse error
1926
1927 srcParseErr
1928   :: StringBuffer       -- current buffer (placed just after the last token)
1929   -> Int                -- length of the previous token
1930   -> Message
1931 srcParseErr buf len
1932   = hcat [ if null token
1933              then ptext (sLit "parse error (possibly incorrect indentation)")
1934              else hcat [ptext (sLit "parse error on input "),
1935                         char '`', text token, char '\'']
1936     ]
1937   where token = lexemeToString (offsetBytes (-len) buf) len
1938
1939 -- Report a parse failure, giving the span of the previous token as
1940 -- the location of the error.  This is the entry point for errors
1941 -- detected during parsing.
1942 srcParseFail :: P a
1943 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1944                             last_loc = last_loc } ->
1945     PFailed last_loc (srcParseErr buf len)
1946
1947 -- A lexical error is reported at a particular position in the source file,
1948 -- not over a token range.
1949 lexError :: String -> P a
1950 lexError str = do
1951   loc <- getSrcLoc
1952   (AI end buf) <- getInput
1953   reportLexError loc end buf str
1954
1955 -- -----------------------------------------------------------------------------
1956 -- This is the top-level function: called from the parser each time a
1957 -- new token is to be read from the input.
1958
1959 lexer :: (Located Token -> P a) -> P a
1960 lexer cont = do
1961   alr <- extension alternativeLayoutRule
1962   let lexTokenFun = if alr then lexTokenAlr else lexToken
1963   tok@(L _span _tok__) <- lexTokenFun
1964   --trace ("token: " ++ show _tok__) $ do
1965   cont tok
1966
1967 lexTokenAlr :: P (Located Token)
1968 lexTokenAlr = do mPending <- popPendingImplicitToken
1969                  t <- case mPending of
1970                       Nothing ->
1971                           do mNext <- popNextToken
1972                              t <- case mNext of
1973                                   Nothing -> lexToken
1974                                   Just next -> return next
1975                              alternativeLayoutRuleToken t
1976                       Just t ->
1977                           return t
1978                  setAlrLastLoc (getLoc t)
1979                  case unLoc t of
1980                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
1981                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
1982                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
1983                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
1984                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1985                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1986                      _       -> return ()
1987                  return t
1988
1989 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
1990 alternativeLayoutRuleToken t
1991     = do context <- getALRContext
1992          lastLoc <- getAlrLastLoc
1993          mExpectingOCurly <- getAlrExpectingOCurly
1994          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
1995          setJustClosedExplicitLetBlock False
1996          dflags <- getDynFlags
1997          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
1998              thisLoc = getLoc t
1999              thisCol = srcSpanStartCol thisLoc
2000              newLine = (lastLoc == noSrcSpan)
2001                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
2002          case (unLoc t, context, mExpectingOCurly) of
2003              -- This case handles a GHC extension to the original H98
2004              -- layout rule...
2005              (ITocurly, _, Just alrLayout) ->
2006                  do setAlrExpectingOCurly Nothing
2007                     let isLet = case alrLayout of
2008                                 ALRLayoutLet -> True
2009                                 _ -> False
2010                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
2011                     return t
2012              -- ...and makes this case unnecessary
2013              {-
2014              -- I think our implicit open-curly handling is slightly
2015              -- different to John's, in how it interacts with newlines
2016              -- and "in"
2017              (ITocurly, _, Just _) ->
2018                  do setAlrExpectingOCurly Nothing
2019                     setNextToken t
2020                     lexTokenAlr
2021              -}
2022              (_, ALRLayout _ col : ls, Just expectingOCurly)
2023               | (thisCol > col) ||
2024                 (thisCol == col &&
2025                  isNonDecreasingIntentation expectingOCurly) ->
2026                  do setAlrExpectingOCurly Nothing
2027                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2028                     setNextToken t
2029                     return (L thisLoc ITocurly)
2030               | otherwise ->
2031                  do setAlrExpectingOCurly Nothing
2032                     setPendingImplicitTokens [L lastLoc ITccurly]
2033                     setNextToken t
2034                     return (L lastLoc ITocurly)
2035              (_, _, Just expectingOCurly) ->
2036                  do setAlrExpectingOCurly Nothing
2037                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2038                     setNextToken t
2039                     return (L thisLoc ITocurly)
2040              -- We do the [] cases earlier than in the spec, as we
2041              -- have an actual EOF token
2042              (ITeof, ALRLayout _ _ : ls, _) ->
2043                  do setALRContext ls
2044                     setNextToken t
2045                     return (L thisLoc ITccurly)
2046              (ITeof, _, _) ->
2047                  return t
2048              -- the other ITeof case omitted; general case below covers it
2049              (ITin, _, _)
2050               | justClosedExplicitLetBlock ->
2051                  return t
2052              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2053               | newLine ->
2054                  do setPendingImplicitTokens [t]
2055                     setALRContext ls
2056                     return (L thisLoc ITccurly)
2057              -- This next case is to handle a transitional issue:
2058              (ITwhere, ALRLayout _ col : ls, _)
2059               | newLine && thisCol == col && transitional ->
2060                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2061                                thisLoc
2062                                (transitionalAlternativeLayoutWarning
2063                                     "`where' clause at the same depth as implicit layout block")
2064                     setALRContext ls
2065                     setNextToken t
2066                     -- Note that we use lastLoc, as we may need to close
2067                     -- more layouts, or give a semicolon
2068                     return (L lastLoc ITccurly)
2069              -- This next case is to handle a transitional issue:
2070              (ITvbar, ALRLayout _ col : ls, _)
2071               | newLine && thisCol == col && transitional ->
2072                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2073                                thisLoc
2074                                (transitionalAlternativeLayoutWarning
2075                                     "`|' at the same depth as implicit layout block")
2076                     setALRContext ls
2077                     setNextToken t
2078                     -- Note that we use lastLoc, as we may need to close
2079                     -- more layouts, or give a semicolon
2080                     return (L lastLoc ITccurly)
2081              (_, ALRLayout _ col : ls, _)
2082               | newLine && thisCol == col ->
2083                  do setNextToken t
2084                     return (L thisLoc ITsemi)
2085               | newLine && thisCol < col ->
2086                  do setALRContext ls
2087                     setNextToken t
2088                     -- Note that we use lastLoc, as we may need to close
2089                     -- more layouts, or give a semicolon
2090                     return (L lastLoc ITccurly)
2091              -- We need to handle close before open, as 'then' is both
2092              -- an open and a close
2093              (u, _, _)
2094               | isALRclose u ->
2095                  case context of
2096                  ALRLayout _ _ : ls ->
2097                      do setALRContext ls
2098                         setNextToken t
2099                         return (L thisLoc ITccurly)
2100                  ALRNoLayout _ isLet : ls ->
2101                      do let ls' = if isALRopen u
2102                                      then ALRNoLayout (containsCommas u) False : ls
2103                                      else ls
2104                         setALRContext ls'
2105                         when isLet $ setJustClosedExplicitLetBlock True
2106                         return t
2107                  [] ->
2108                      do let ls = if isALRopen u
2109                                     then [ALRNoLayout (containsCommas u) False]
2110                                     else ls
2111                         setALRContext ls
2112                         -- XXX This is an error in John's code, but
2113                         -- it looks reachable to me at first glance
2114                         return t
2115              (u, _, _)
2116               | isALRopen u ->
2117                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2118                     return t
2119              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2120                  do setALRContext ls
2121                     setPendingImplicitTokens [t]
2122                     return (L thisLoc ITccurly)
2123              (ITin, ALRLayout _ _ : ls, _) ->
2124                  do setALRContext ls
2125                     setNextToken t
2126                     return (L thisLoc ITccurly)
2127              -- the other ITin case omitted; general case below covers it
2128              (ITcomma, ALRLayout _ _ : ls, _)
2129               | topNoLayoutContainsCommas ls ->
2130                  do setALRContext ls
2131                     setNextToken t
2132                     return (L thisLoc ITccurly)
2133              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2134                  do setALRContext ls
2135                     setPendingImplicitTokens [t]
2136                     return (L thisLoc ITccurly)
2137              -- the other ITwhere case omitted; general case below covers it
2138              (_, _, _) -> return t
2139
2140 transitionalAlternativeLayoutWarning :: String -> SDoc
2141 transitionalAlternativeLayoutWarning msg
2142     = text "transitional layout will not be accepted in the future:"
2143    $$ text msg
2144
2145 isALRopen :: Token -> Bool
2146 isALRopen ITcase        = True
2147 isALRopen ITif          = True
2148 isALRopen ITthen        = True
2149 isALRopen IToparen      = True
2150 isALRopen ITobrack      = True
2151 isALRopen ITocurly      = True
2152 -- GHC Extensions:
2153 isALRopen IToubxparen   = True
2154 isALRopen ITparenEscape = True
2155 isALRopen _             = False
2156
2157 isALRclose :: Token -> Bool
2158 isALRclose ITof     = True
2159 isALRclose ITthen   = True
2160 isALRclose ITelse   = True
2161 isALRclose ITcparen = True
2162 isALRclose ITcbrack = True
2163 isALRclose ITccurly = True
2164 -- GHC Extensions:
2165 isALRclose ITcubxparen = True
2166 isALRclose _        = False
2167
2168 isNonDecreasingIntentation :: ALRLayout -> Bool
2169 isNonDecreasingIntentation ALRLayoutDo = True
2170 isNonDecreasingIntentation _           = False
2171
2172 containsCommas :: Token -> Bool
2173 containsCommas IToparen = True
2174 containsCommas ITobrack = True
2175 -- John doesn't have {} as containing commas, but records contain them,
2176 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2177 -- (defaultInstallDirs).
2178 containsCommas ITocurly = True
2179 -- GHC Extensions:
2180 containsCommas IToubxparen = True
2181 containsCommas _        = False
2182
2183 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2184 topNoLayoutContainsCommas [] = False
2185 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2186 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2187
2188 lexToken :: P (Located Token)
2189 lexToken = do
2190   inp@(AI loc1 buf) <- getInput
2191   sc <- getLexState
2192   exts <- getExts
2193   case alexScanUser exts inp sc of
2194     AlexEOF -> do
2195         let span = mkSrcSpan loc1 loc1
2196         setLastToken span 0
2197         return (L span ITeof)
2198     AlexError (AI loc2 buf) ->
2199         reportLexError loc1 loc2 buf "lexical error"
2200     AlexSkip inp2 _ -> do
2201         setInput inp2
2202         lexToken
2203     AlexToken inp2@(AI end buf2) _ t -> do
2204         setInput inp2
2205         let span = mkSrcSpan loc1 end
2206         let bytes = byteDiff buf buf2
2207         span `seq` setLastToken span bytes
2208         t span buf bytes
2209
2210 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2211 reportLexError loc1 loc2 buf str
2212   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2213   | otherwise =
2214   let
2215         c = fst (nextChar buf)
2216   in
2217   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2218     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2219     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2220
2221 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2222 lexTokenStream buf loc dflags = unP go initState
2223     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2224           initState = mkPState dflags' buf loc
2225           go = do
2226             ltok <- lexer return
2227             case ltok of
2228               L _ ITeof -> return []
2229               _ -> liftM (ltok:) go
2230
2231 linePrags = Map.singleton "line" (begin line_prag2)
2232
2233 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2234                                  ("options_ghc", lex_string_prag IToptions_prag),
2235                                  ("options_haddock", lex_string_prag ITdocOptions),
2236                                  ("language", token ITlanguage_prag),
2237                                  ("include", lex_string_prag ITinclude_prag)])
2238
2239 ignoredPrags = Map.fromList (map ignored pragmas)
2240                where ignored opt = (opt, nested_comment lexToken)
2241                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2242                      options_pragmas = map ("options_" ++) impls
2243                      -- CFILES is a hugs-only thing.
2244                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2245
2246 oneWordPrags = Map.fromList([("rules", rulePrag),
2247                            ("inline", token (ITinline_prag Inline FunLike)),
2248                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2249                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2250                                           -- Spelling variant
2251                            ("notinline", token (ITinline_prag NoInline FunLike)),
2252                            ("specialize", token ITspec_prag),
2253                            ("source", token ITsource_prag),
2254                            ("warning", token ITwarning_prag),
2255                            ("deprecated", token ITdeprecated_prag),
2256                            ("scc", token ITscc_prag),
2257                            ("generated", token ITgenerated_prag),
2258                            ("core", token ITcore_prag),
2259                            ("unpack", token ITunpack_prag),
2260                            ("ann", token ITann_prag)])
2261
2262 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2263                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2264                              ("specialize inline", token (ITspec_inline_prag True)),
2265                              ("specialize notinline", token (ITspec_inline_prag False))])
2266
2267
2268 dispatch_pragmas :: Map String Action -> Action
2269 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2270                                        Just found -> found span buf len
2271                                        Nothing -> lexError "unknown pragma"
2272
2273 known_pragma :: Map String Action -> AlexAccPred Int
2274 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2275                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2276
2277 clean_pragma :: String -> String
2278 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2279                     where unprefix prag' = case stripPrefix "{-#" prag' of
2280                                              Just rest -> rest
2281                                              Nothing -> prag'
2282                           canonical prag' = case prag' of
2283                                               "noinline" -> "notinline"
2284                                               "specialise" -> "specialize"
2285                                               "constructorlike" -> "conlike"
2286                                               _ -> prag'
2287                           canon_ws s = unwords (map canonical (words s))
2288 }