compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# LANGUAGE BangPatterns #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  38 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  39 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  40 -- But alex still generates some code that causes the "lazy unlifted bindings"
  41 -- warning, and old compilers don't know about it so we can't easily turn
  42 -- it off, so for now we use the sledge hammer:
  43 {-# OPTIONS_GHC -w #-}
  44
  45 {-# OPTIONS_GHC -funbox-strict-fields #-}
  46
  47 module Lexer (
  48    Token(..), lexer, pragState, mkPState, PState(..),
  49    P(..), ParseResult(..), getSrcLoc,
  50    getPState, getDynFlags, withThisPackage,
  51    failLocMsgP, failSpanMsgP, srcParseFail,
  52    getMessages,
  53    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  54    activeContext, nextIsEOF,
  55    getLexState, popLexState, pushLexState,
  56    extension, bangPatEnabled, datatypeContextsEnabled,
  57    addWarning,
  58    lexTokenStream
  59   ) where
  60
  61 import Bag
  62 import ErrUtils
  63 import Outputable
  64 import StringBuffer
  65 import FastString
  66 import SrcLoc
  67 import UniqFM
  68 import DynFlags
  69 import Module
  70 import Ctype
  71 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..), FractionalLit(..) )
  72 import Util             ( readRational )
  73
  74 import Control.Monad
  75 import Data.Bits
  76 import Data.Char
  77 import Data.List
  78 import Data.Maybe
  79 import Data.Map (Map)
  80 import qualified Data.Map as Map
  81 import Data.Ratio
  82 }
  83
  84 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  85 $whitechar   = [\ \n\r\f\v $unispace]
  86 $white_no_nl = $whitechar # \n
  87 $tab         = \t
  88
  89 $ascdigit  = 0-9
  90 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  91 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  92 $digit     = [$ascdigit $unidigit]
  93
  94 $special   = [\(\)\,\;\[\]\`\{\}]
  95 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  96 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  97 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  98
  99 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
 100 $asclarge  = [A-Z]
 101 $large     = [$asclarge $unilarge]
 102
 103 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 104 $ascsmall  = [a-z]
 105 $small     = [$ascsmall $unismall \_]
 106
 107 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 108 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 109
 110 $octit     = 0-7
 111 $hexit     = [$decdigit A-F a-f]
 112 $symchar   = [$symbol \:]
 113 $nl        = [\n\r]
 114 $idchar    = [$small $large $digit \']
 115
 116 $pragmachar = [$small $large $digit]
 117
 118 $docsym    = [\| \^ \* \$]
 119
 120 @varid     = $small $idchar*
 121 @conid     = $large $idchar*
 122
 123 @varsym    = $symbol $symchar*
 124 @consym    = \: $symchar*
 125
 126 @decimal     = $decdigit+
 127 @octal       = $octit+
 128 @hexadecimal = $hexit+
 129 @exponent    = [eE] [\-\+]? @decimal
 130
 131 -- we support the hierarchical module name extension:
 132 @qual = (@conid \.)+
 133
 134 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 135
 136 -- normal signed numerical literals can only be explicitly negative,
 137 -- not explicitly positive (contrast @exponent)
 138 @negative = \-
 139 @signed = @negative ?
 140
 141 haskell :-
 142
 143 -- everywhere: skip whitespace and comments
 144 $white_no_nl+                           ;
 145 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 146
 147 -- Everywhere: deal with nested comments.  We explicitly rule out
 148 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 149 -- (this can happen even though pragmas will normally take precedence due to
 150 -- longest-match, because pragmas aren't valid in every state, but comments
 151 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 152 -- set.
 153
 154 "{-" / { isNormalComment } { nested_comment lexToken }
 155
 156 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 157 -- more dashes followed by a symbol should be parsed as a varsym, so we
 158 -- have to exclude those.
 159
 160 -- Since Haddock comments aren't valid in every state, we need to rule them
 161 -- out here.
 162
 163 -- The following two rules match comments that begin with two dashes, but
 164 -- continue with a different character. The rules test that this character
 165 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 166 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 167 -- have a Haddock comment). The rules then munch the rest of the line.
 168
 169 "-- " ~[$docsym \#] .* { lineCommentToken }
 170 "--" [^$symbol : \ ] .* { lineCommentToken }
 171
 172 -- Next, match Haddock comments if no -haddock flag
 173
 174 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 175
 176 -- Now, when we've matched comments that begin with 2 dashes and continue
 177 -- with a different character, we need to match comments that begin with three
 178 -- or more dashes (which clearly can't be Haddock comments). We only need to
 179 -- make sure that the first non-dash character isn't a symbol, and munch the
 180 -- rest of the line.
 181
 182 "---"\-* [^$symbol :] .* { lineCommentToken }
 183
 184 -- Since the previous rules all match dashes followed by at least one
 185 -- character, we also need to match a whole line filled with just dashes.
 186
 187 "--"\-* / { atEOL } { lineCommentToken }
 188
 189 -- We need this rule since none of the other single line comment rules
 190 -- actually match this case.
 191
 192 "-- " / { atEOL } { lineCommentToken }
 193
 194 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 195 -- blank lines) until we find a non-whitespace character, then do layout
 196 -- processing.
 197 --
 198 -- One slight wibble here: what if the line begins with {-#? In
 199 -- theory, we have to lex the pragma to see if it's one we recognise,
 200 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 201 -- as a nested comment.  We don't bother with this: if the line begins
 202 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 203 <bol> {
 204   \n                                    ;
 205   ^\# (line)?                           { begin line_prag1 }
 206   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 207   ^\# \! .* \n                          ; -- #!, for scripts
 208   ()                                    { do_bol }
 209 }
 210
 211 -- after a layout keyword (let, where, do, of), we begin a new layout
 212 -- context if the curly brace is missing.
 213 -- Careful! This stuff is quite delicate.
 214 <layout, layout_do> {
 215   \{ / { notFollowedBy '-' }            { hopefully_open_brace }
 216         -- we might encounter {-# here, but {- has been handled already
 217   \n                                    ;
 218   ^\# (line)?                           { begin line_prag1 }
 219 }
 220
 221 -- do is treated in a subtly different way, see new_layout_context
 222 <layout>    ()                          { new_layout_context True }
 223 <layout_do> ()                          { new_layout_context False }
 224
 225 -- after a new layout context which was found to be to the left of the
 226 -- previous context, we have generated a '{' token, and we now need to
 227 -- generate a matching '}' token.
 228 <layout_left>  ()                       { do_layout_left }
 229
 230 <0,option_prags> \n                             { begin bol }
 231
 232 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 233                                 { dispatch_pragmas linePrags }
 234
 235 -- single-line line pragmas, of the form
 236 --    # <line> "<file>" <extra-stuff> \n
 237 <line_prag1> $decdigit+                 { setLine line_prag1a }
 238 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 239 <line_prag1b> .*                        { pop }
 240
 241 -- Haskell-style line pragmas, of the form
 242 --    {-# LINE <line> "<file>" #-}
 243 <line_prag2> $decdigit+                 { setLine line_prag2a }
 244 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 245 <line_prag2b> "#-}"|"-}"                { pop }
 246    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 247    -- with older versions of GHC which generated these.
 248
 249 <0,option_prags> {
 250   "{-#" $whitechar* $pragmachar+
 251         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 252                                  { dispatch_pragmas twoWordPrags }
 253
 254   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 255                                  { dispatch_pragmas oneWordPrags }
 256
 257   -- We ignore all these pragmas, but don't generate a warning for them
 258   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 259                                  { dispatch_pragmas ignoredPrags }
 260
 261   -- ToDo: should only be valid inside a pragma:
 262   "#-}"                                 { endPrag }
 263 }
 264
 265 <option_prags> {
 266   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 267                                    { dispatch_pragmas fileHeaderPrags }
 268
 269   "-- #"                                 { multiline_doc_comment }
 270 }
 271
 272 <0> {
 273   -- In the "0" mode we ignore these pragmas
 274   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 275                      { nested_comment lexToken }
 276 }
 277
 278 <0> {
 279   "-- #" .* { lineCommentToken }
 280 }
 281
 282 <0,option_prags> {
 283   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 284                     (nested_comment lexToken) }
 285 }
 286
 287 -- '0' state: ordinary lexemes
 288
 289 -- Haddock comments
 290
 291 <0,option_prags> {
 292   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 293   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 294 }
 295
 296 -- "special" symbols
 297
 298 <0> {
 299   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 300   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 301 }
 302
 303 <0> {
 304   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 305   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 306   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 307   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 308   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 309   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 310   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 311   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 312
 313 -- For backward compatibility, accept the old dollar syntax
 314   "[$" @varid "|"  / { ifExtension qqEnabled }
 315                      { lex_quasiquote_tok }
 316
 317   "[" @varid "|"  / { ifExtension qqEnabled }
 318                      { lex_quasiquote_tok }
 319 }
 320
 321 <0> {
 322   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 323                                         { special IToparenbar }
 324   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 325 }
 326
 327 <0> {
 328   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 329 }
 330
 331 <0> {
 332   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 333          { token IToubxparen }
 334   "#)" / { ifExtension unboxedTuplesEnabled }
 335          { token ITcubxparen }
 336 }
 337
 338 <0> {
 339   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 340   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 341 }
 342
 343 <0,option_prags> {
 344   \(                                    { special IToparen }
 345   \)                                    { special ITcparen }
 346   \[                                    { special ITobrack }
 347   \]                                    { special ITcbrack }
 348   \,                                    { special ITcomma }
 349   \;                                    { special ITsemi }
 350   \`                                    { special ITbackquote }
 351
 352   \{                                    { open_brace }
 353   \}                                    { close_brace }
 354 }
 355
 356 <0,option_prags> {
 357   @qual @varid                  { idtoken qvarid }
 358   @qual @conid                  { idtoken qconid }
 359   @varid                        { varid }
 360   @conid                        { idtoken conid }
 361 }
 362
 363 <0> {
 364   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 365   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 366   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 367   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 368 }
 369
 370 -- ToDo: - move `var` and (sym) into lexical syntax?
 371 --       - remove backquote from $special?
 372 <0> {
 373   @qual @varsym                                    { idtoken qvarsym }
 374   @qual @consym                                    { idtoken qconsym }
 375   @varsym                                          { varsym }
 376   @consym                                          { consym }
 377 }
 378
 379 -- For the normal boxed literals we need to be careful
 380 -- when trying to be close to Haskell98
 381 <0> {
 382   -- Normal integral literals (:: Num a => a, from Integer)
 383   @decimal           { tok_num positive 0 0 decimal }
 384   0[oO] @octal       { tok_num positive 2 2 octal }
 385   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 386
 387   -- Normal rational literals (:: Fractional a => a, from Rational)
 388   @floating_point    { strtoken tok_float }
 389 }
 390
 391 <0> {
 392   -- Unboxed ints (:: Int#) and words (:: Word#)
 393   -- It's simpler (and faster?) to give separate cases to the negatives,
 394   -- especially considering octal/hexadecimal prefixes.
 395   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 396   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 397   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 398   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 399   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 400   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 401
 402   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 403   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 404   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 405
 406   -- Unboxed floats and doubles (:: Float#, :: Double#)
 407   -- prim_{float,double} work with signed literals
 408   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 409   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 410 }
 411
 412 -- Strings and chars are lexed by hand-written code.  The reason is
 413 -- that even if we recognise the string or char here in the regex
 414 -- lexer, we would still have to parse the string afterward in order
 415 -- to convert it to a String.
 416 <0> {
 417   \'                            { lex_char_tok }
 418   \"                            { lex_string_tok }
 419 }
 420
 421 {
 422 -- -----------------------------------------------------------------------------
 423 -- The token type
 424
 425 data Token
 426   = ITas                        -- Haskell keywords
 427   | ITcase
 428   | ITclass
 429   | ITdata
 430   | ITdefault
 431   | ITderiving
 432   | ITdo
 433   | ITelse
 434   | IThiding
 435   | ITif
 436   | ITimport
 437   | ITin
 438   | ITinfix
 439   | ITinfixl
 440   | ITinfixr
 441   | ITinstance
 442   | ITlet
 443   | ITmodule
 444   | ITnewtype
 445   | ITof
 446   | ITqualified
 447   | ITthen
 448   | ITtype
 449   | ITwhere
 450   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 451
 452   | ITforall                    -- GHC extension keywords
 453   | ITforeign
 454   | ITexport
 455   | ITlabel
 456   | ITdynamic
 457   | ITsafe
 458   | ITthreadsafe
 459   | ITinterruptible
 460   | ITunsafe
 461   | ITstdcallconv
 462   | ITccallconv
 463   | ITprimcallconv
 464   | ITmdo
 465   | ITfamily
 466   | ITgroup
 467   | ITby
 468   | ITusing
 469
 470         -- Pragmas
 471   | ITinline_prag InlineSpec RuleMatchInfo
 472   | ITspec_prag                 -- SPECIALISE
 473   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 474   | ITsource_prag
 475   | ITrules_prag
 476   | ITwarning_prag
 477   | ITdeprecated_prag
 478   | ITline_prag
 479   | ITscc_prag
 480   | ITgenerated_prag
 481   | ITcore_prag                 -- hdaume: core annotations
 482   | ITunpack_prag
 483   | ITann_prag
 484   | ITclose_prag
 485   | IToptions_prag String
 486   | ITinclude_prag String
 487   | ITlanguage_prag
 488   | ITvect_prag
 489   | ITvect_scalar_prag
 490
 491   | ITdotdot                    -- reserved symbols
 492   | ITcolon
 493   | ITdcolon
 494   | ITequal
 495   | ITlam
 496   | ITvbar
 497   | ITlarrow
 498   | ITrarrow
 499   | ITat
 500   | ITtilde
 501   | ITdarrow
 502   | ITminus
 503   | ITbang
 504   | ITstar
 505   | ITdot
 506
 507   | ITbiglam                    -- GHC-extension symbols
 508
 509   | ITocurly                    -- special symbols
 510   | ITccurly
 511   | ITocurlybar                 -- {|, for type applications
 512   | ITccurlybar                 -- |}, for type applications
 513   | ITvocurly
 514   | ITvccurly
 515   | ITobrack
 516   | ITopabrack                  -- [:, for parallel arrays with -XParallelArrays
 517   | ITcpabrack                  -- :], for parallel arrays with -XParallelArrays
 518   | ITcbrack
 519   | IToparen
 520   | ITcparen
 521   | IToubxparen
 522   | ITcubxparen
 523   | ITsemi
 524   | ITcomma
 525   | ITunderscore
 526   | ITbackquote
 527
 528   | ITvarid   FastString        -- identifiers
 529   | ITconid   FastString
 530   | ITvarsym  FastString
 531   | ITconsym  FastString
 532   | ITqvarid  (FastString,FastString)
 533   | ITqconid  (FastString,FastString)
 534   | ITqvarsym (FastString,FastString)
 535   | ITqconsym (FastString,FastString)
 536   | ITprefixqvarsym (FastString,FastString)
 537   | ITprefixqconsym (FastString,FastString)
 538
 539   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 540
 541   | ITchar       Char
 542   | ITstring     FastString
 543   | ITinteger    Integer
 544   | ITrational   FractionalLit
 545
 546   | ITprimchar   Char
 547   | ITprimstring FastString
 548   | ITprimint    Integer
 549   | ITprimword   Integer
 550   | ITprimfloat  FractionalLit
 551   | ITprimdouble FractionalLit
 552
 553   -- Template Haskell extension tokens
 554   | ITopenExpQuote              --  [| or [e|
 555   | ITopenPatQuote              --  [p|
 556   | ITopenDecQuote              --  [d|
 557   | ITopenTypQuote              --  [t|
 558   | ITcloseQuote                --  |]
 559   | ITidEscape   FastString     --  $x
 560   | ITparenEscape               --  $(
 561   | ITvarQuote                  --  '
 562   | ITtyQuote                   --  ''
 563   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 564
 565   -- Arrow notation extension
 566   | ITproc
 567   | ITrec
 568   | IToparenbar                 --  (|
 569   | ITcparenbar                 --  |)
 570   | ITlarrowtail                --  -<
 571   | ITrarrowtail                --  >-
 572   | ITLarrowtail                --  -<<
 573   | ITRarrowtail                --  >>-
 574
 575   | ITunknown String            -- Used when the lexer can't make sense of it
 576   | ITeof                       -- end of file token
 577
 578   -- Documentation annotations
 579   | ITdocCommentNext  String     -- something beginning '-- |'
 580   | ITdocCommentPrev  String     -- something beginning '-- ^'
 581   | ITdocCommentNamed String     -- something beginning '-- $'
 582   | ITdocSection      Int String -- a section heading
 583   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 584   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 585   | ITlineComment     String     -- comment starting by "--"
 586   | ITblockComment    String     -- comment in {- -}
 587
 588 #ifdef DEBUG
 589   deriving Show -- debugging
 590 #endif
 591
 592 {-
 593 isSpecial :: Token -> Bool
 594 -- If we see M.x, where x is a keyword, but
 595 -- is special, we treat is as just plain M.x,
 596 -- not as a keyword.
 597 isSpecial ITas          = True
 598 isSpecial IThiding      = True
 599 isSpecial ITqualified   = True
 600 isSpecial ITforall      = True
 601 isSpecial ITexport      = True
 602 isSpecial ITlabel       = True
 603 isSpecial ITdynamic     = True
 604 isSpecial ITsafe        = True
 605 isSpecial ITthreadsafe  = True
 606 isSpecial ITinterruptible = True
 607 isSpecial ITunsafe      = True
 608 isSpecial ITccallconv   = True
 609 isSpecial ITstdcallconv = True
 610 isSpecial ITprimcallconv = True
 611 isSpecial ITmdo         = True
 612 isSpecial ITfamily      = True
 613 isSpecial ITgroup   = True
 614 isSpecial ITby      = True
 615 isSpecial ITusing   = True
 616 isSpecial _             = False
 617 -}
 618
 619 -- the bitmap provided as the third component indicates whether the
 620 -- corresponding extension keyword is valid under the extension options
 621 -- provided to the compiler; if the extension corresponding to *any* of the
 622 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 623 -- facilitates using a keyword in two different extensions that can be
 624 -- activated independently)
 625 --
 626 reservedWordsFM :: UniqFM (Token, Int)
 627 reservedWordsFM = listToUFM $
 628         map (\(x, y, z) -> (mkFastString x, (y, z)))
 629        [( "_",          ITunderscore,   0 ),
 630         ( "as",         ITas,           0 ),
 631         ( "case",       ITcase,         0 ),
 632         ( "class",      ITclass,        0 ),
 633         ( "data",       ITdata,         0 ),
 634         ( "default",    ITdefault,      0 ),
 635         ( "deriving",   ITderiving,     0 ),
 636         ( "do",         ITdo,           0 ),
 637         ( "else",       ITelse,         0 ),
 638         ( "hiding",     IThiding,       0 ),
 639         ( "if",         ITif,           0 ),
 640         ( "import",     ITimport,       0 ),
 641         ( "in",         ITin,           0 ),
 642         ( "infix",      ITinfix,        0 ),
 643         ( "infixl",     ITinfixl,       0 ),
 644         ( "infixr",     ITinfixr,       0 ),
 645         ( "instance",   ITinstance,     0 ),
 646         ( "let",        ITlet,          0 ),
 647         ( "module",     ITmodule,       0 ),
 648         ( "newtype",    ITnewtype,      0 ),
 649         ( "of",         ITof,           0 ),
 650         ( "qualified",  ITqualified,    0 ),
 651         ( "then",       ITthen,         0 ),
 652         ( "type",       ITtype,         0 ),
 653         ( "where",      ITwhere,        0 ),
 654         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 655
 656     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 657         ( "mdo",        ITmdo,           bit recursiveDoBit),
 658         ( "family",     ITfamily,        bit tyFamBit),
 659     ( "group",  ITgroup,     bit transformComprehensionsBit),
 660     ( "by",     ITby,        bit transformComprehensionsBit),
 661     ( "using",  ITusing,     bit transformComprehensionsBit),
 662
 663         ( "foreign",    ITforeign,       bit ffiBit),
 664         ( "export",     ITexport,        bit ffiBit),
 665         ( "label",      ITlabel,         bit ffiBit),
 666         ( "dynamic",    ITdynamic,       bit ffiBit),
 667         ( "safe",       ITsafe,          bit ffiBit),
 668         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 669         ( "interruptible", ITinterruptible, bit ffiBit),
 670         ( "unsafe",     ITunsafe,        bit ffiBit),
 671         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 672         ( "ccall",      ITccallconv,     bit ffiBit),
 673         ( "prim",       ITprimcallconv,  bit ffiBit),
 674
 675         ( "rec",        ITrec,           bit recBit),
 676         ( "proc",       ITproc,          bit arrowsBit)
 677      ]
 678
 679 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 680 reservedSymsFM = listToUFM $
 681     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 682       [ ("..",  ITdotdot,   always)
 683         -- (:) is a reserved op, meaning only list cons
 684        ,(":",   ITcolon,    always)
 685        ,("::",  ITdcolon,   always)
 686        ,("=",   ITequal,    always)
 687        ,("\\",  ITlam,      always)
 688        ,("|",   ITvbar,     always)
 689        ,("<-",  ITlarrow,   always)
 690        ,("->",  ITrarrow,   always)
 691        ,("@",   ITat,       always)
 692        ,("~",   ITtilde,    always)
 693        ,("=>",  ITdarrow,   always)
 694        ,("-",   ITminus,    always)
 695        ,("!",   ITbang,     always)
 696
 697         -- For data T (a::*) = MkT
 698        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 699         -- For 'forall a . t'
 700        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 701
 702        ,("-<",  ITlarrowtail, arrowsEnabled)
 703        ,(">-",  ITrarrowtail, arrowsEnabled)
 704        ,("-<<", ITLarrowtail, arrowsEnabled)
 705        ,(">>-", ITRarrowtail, arrowsEnabled)
 706
 707        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 708        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 709        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 710                                 explicitForallEnabled i)
 711        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 712        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 713
 714        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 716        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 717        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 718
 719        ,("★", ITstar, unicodeSyntaxEnabled)
 720
 721         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 722         -- form part of a large operator.  This would let us have a better
 723         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 724        ]
 725
 726 -- -----------------------------------------------------------------------------
 727 -- Lexer actions
 728
 729 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 730
 731 special :: Token -> Action
 732 special tok span _buf _len = return (L span tok)
 733
 734 token, layout_token :: Token -> Action
 735 token t span _buf _len = return (L span t)
 736 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 737
 738 idtoken :: (StringBuffer -> Int -> Token) -> Action
 739 idtoken f span buf len = return (L span $! (f buf len))
 740
 741 skip_one_varid :: (FastString -> Token) -> Action
 742 skip_one_varid f span buf len
 743   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 744
 745 strtoken :: (String -> Token) -> Action
 746 strtoken f span buf len =
 747   return (L span $! (f $! lexemeToString buf len))
 748
 749 init_strtoken :: Int -> (String -> Token) -> Action
 750 -- like strtoken, but drops the last N character(s)
 751 init_strtoken drop f span buf len =
 752   return (L span $! (f $! lexemeToString buf (len-drop)))
 753
 754 begin :: Int -> Action
 755 begin code _span _str _len = do pushLexState code; lexToken
 756
 757 pop :: Action
 758 pop _span _buf _len = do _ <- popLexState
 759                          lexToken
 760
 761 hopefully_open_brace :: Action
 762 hopefully_open_brace span buf len
 763  = do relaxed <- extension relaxedLayout
 764       ctx <- getContext
 765       (AI l _) <- getInput
 766       let offset = srcLocCol l
 767           isOK = relaxed ||
 768                  case ctx of
 769                  Layout prev_off : _ -> prev_off < offset
 770                  _                   -> True
 771       if isOK then pop_and open_brace span buf len
 772               else failSpanMsgP span (text "Missing block")
 773
 774 pop_and :: Action -> Action
 775 pop_and act span buf len = do _ <- popLexState
 776                               act span buf len
 777
 778 {-# INLINE nextCharIs #-}
 779 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 780 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 781
 782 notFollowedBy :: Char -> AlexAccPred Int
 783 notFollowedBy char _ _ _ (AI _ buf)
 784   = nextCharIs buf (/=char)
 785
 786 notFollowedBySymbol :: AlexAccPred Int
 787 notFollowedBySymbol _ _ _ (AI _ buf)
 788   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 789
 790 -- We must reject doc comments as being ordinary comments everywhere.
 791 -- In some cases the doc comment will be selected as the lexeme due to
 792 -- maximal munch, but not always, because the nested comment rule is
 793 -- valid in all states, but the doc-comment rules are only valid in
 794 -- the non-layout states.
 795 isNormalComment :: AlexAccPred Int
 796 isNormalComment bits _ _ (AI _ buf)
 797   | haddockEnabled bits = notFollowedByDocOrPragma
 798   | otherwise           = nextCharIs buf (/='#')
 799   where
 800     notFollowedByDocOrPragma
 801        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 802
 803 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 804 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 805
 806 {-
 807 haddockDisabledAnd p bits _ _ (AI _ buf)
 808   = if haddockEnabled bits then False else (p buf)
 809 -}
 810
 811 atEOL :: AlexAccPred Int
 812 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 813
 814 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 815 ifExtension pred bits _ _ _ = pred bits
 816
 817 multiline_doc_comment :: Action
 818 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 819   where
 820     worker commentAcc input docType oneLine = case alexGetChar input of
 821       Just ('\n', input')
 822         | oneLine -> docCommentEnd input commentAcc docType buf span
 823         | otherwise -> case checkIfCommentLine input' of
 824           Just input -> worker ('\n':commentAcc) input docType False
 825           Nothing -> docCommentEnd input commentAcc docType buf span
 826       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 827       Nothing -> docCommentEnd input commentAcc docType buf span
 828
 829     checkIfCommentLine input = check (dropNonNewlineSpace input)
 830       where
 831         check input = case alexGetChar input of
 832           Just ('-', input) -> case alexGetChar input of
 833             Just ('-', input) -> case alexGetChar input of
 834               Just (c, _) | c /= '-' -> Just input
 835               _ -> Nothing
 836             _ -> Nothing
 837           _ -> Nothing
 838
 839         dropNonNewlineSpace input = case alexGetChar input of
 840           Just (c, input')
 841             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 842             | otherwise -> input
 843           Nothing -> input
 844
 845 lineCommentToken :: Action
 846 lineCommentToken span buf len = do
 847   b <- extension rawTokenStreamEnabled
 848   if b then strtoken ITlineComment span buf len else lexToken
 849
 850 {-
 851   nested comments require traversing by hand, they can't be parsed
 852   using regular expressions.
 853 -}
 854 nested_comment :: P (Located Token) -> Action
 855 nested_comment cont span _str _len = do
 856   input <- getInput
 857   go "" (1::Int) input
 858   where
 859     go commentAcc 0 input = do setInput input
 860                                b <- extension rawTokenStreamEnabled
 861                                if b
 862                                  then docCommentEnd input commentAcc ITblockComment _str span
 863                                  else cont
 864     go commentAcc n input = case alexGetChar input of
 865       Nothing -> errBrace input span
 866       Just ('-',input) -> case alexGetChar input of
 867         Nothing  -> errBrace input span
 868         Just ('\125',input) -> go commentAcc (n-1) input
 869         Just (_,_)          -> go ('-':commentAcc) n input
 870       Just ('\123',input) -> case alexGetChar input of
 871         Nothing  -> errBrace input span
 872         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 873         Just (_,_)       -> go ('\123':commentAcc) n input
 874       Just (c,input) -> go (c:commentAcc) n input
 875
 876 nested_doc_comment :: Action
 877 nested_doc_comment span buf _len = withLexedDocType (go "")
 878   where
 879     go commentAcc input docType _ = case alexGetChar input of
 880       Nothing -> errBrace input span
 881       Just ('-',input) -> case alexGetChar input of
 882         Nothing -> errBrace input span
 883         Just ('\125',input) ->
 884           docCommentEnd input commentAcc docType buf span
 885         Just (_,_) -> go ('-':commentAcc) input docType False
 886       Just ('\123', input) -> case alexGetChar input of
 887         Nothing  -> errBrace input span
 888         Just ('-',input) -> do
 889           setInput input
 890           let cont = do input <- getInput; go commentAcc input docType False
 891           nested_comment cont span buf _len
 892         Just (_,_) -> go ('\123':commentAcc) input docType False
 893       Just (c,input) -> go (c:commentAcc) input docType False
 894
 895 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 896                  -> P (Located Token)
 897 withLexedDocType lexDocComment = do
 898   input@(AI _ buf) <- getInput
 899   case prevChar buf ' ' of
 900     '|' -> lexDocComment input ITdocCommentNext False
 901     '^' -> lexDocComment input ITdocCommentPrev False
 902     '$' -> lexDocComment input ITdocCommentNamed False
 903     '*' -> lexDocSection 1 input
 904     '#' -> lexDocComment input ITdocOptionsOld False
 905     _ -> panic "withLexedDocType: Bad doc type"
 906  where
 907     lexDocSection n input = case alexGetChar input of
 908       Just ('*', input) -> lexDocSection (n+1) input
 909       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 910       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 911
 912 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 913 -- off again at the end of the pragma.
 914 rulePrag :: Action
 915 rulePrag span _buf _len = do
 916   setExts (.|. bit inRulePragBit)
 917   return (L span ITrules_prag)
 918
 919 endPrag :: Action
 920 endPrag span _buf _len = do
 921   setExts (.&. complement (bit inRulePragBit))
 922   return (L span ITclose_prag)
 923
 924 -- docCommentEnd
 925 -------------------------------------------------------------------------------
 926 -- This function is quite tricky. We can't just return a new token, we also
 927 -- need to update the state of the parser. Why? Because the token is longer
 928 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 929 -- it writes the wrong token length to the parser state. This function is
 930 -- called afterwards, so it can just update the state.
 931
 932 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 933                  SrcSpan -> P (Located Token)
 934 docCommentEnd input commentAcc docType buf span = do
 935   setInput input
 936   let (AI loc nextBuf) = input
 937       comment = reverse commentAcc
 938       span' = mkSrcSpan (srcSpanStart span) loc
 939       last_len = byteDiff buf nextBuf
 940
 941   span `seq` setLastToken span' last_len
 942   return (L span' (docType comment))
 943
 944 errBrace :: AlexInput -> SrcSpan -> P a
 945 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 946
 947 open_brace, close_brace :: Action
 948 open_brace span _str _len = do
 949   ctx <- getContext
 950   setContext (NoLayout:ctx)
 951   return (L span ITocurly)
 952 close_brace span _str _len = do
 953   popContext
 954   return (L span ITccurly)
 955
 956 qvarid, qconid :: StringBuffer -> Int -> Token
 957 qvarid buf len = ITqvarid $! splitQualName buf len False
 958 qconid buf len = ITqconid $! splitQualName buf len False
 959
 960 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 961 -- takes a StringBuffer and a length, and returns the module name
 962 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 963 -- because of hierarchical module names.
 964 splitQualName orig_buf len parens = split orig_buf orig_buf
 965   where
 966     split buf dot_buf
 967         | orig_buf `byteDiff` buf >= len  = done dot_buf
 968         | c == '.'                        = found_dot buf'
 969         | otherwise                       = split buf' dot_buf
 970       where
 971        (c,buf') = nextChar buf
 972
 973     -- careful, we might get names like M....
 974     -- so, if the character after the dot is not upper-case, this is
 975     -- the end of the qualifier part.
 976     found_dot buf -- buf points after the '.'
 977         | isUpper c    = split buf' buf
 978         | otherwise    = done buf
 979       where
 980        (c,buf') = nextChar buf
 981
 982     done dot_buf =
 983         (lexemeToFastString orig_buf (qual_size - 1),
 984          if parens -- Prelude.(+)
 985             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 986             else lexemeToFastString dot_buf (len - qual_size))
 987       where
 988         qual_size = orig_buf `byteDiff` dot_buf
 989
 990 varid :: Action
 991 varid span buf len =
 992   fs `seq`
 993   case lookupUFM reservedWordsFM fs of
 994         Just (keyword,0)    -> do
 995                 maybe_layout keyword
 996                 return (L span keyword)
 997         Just (keyword,exts) -> do
 998                 b <- extension (\i -> exts .&. i /= 0)
 999                 if b then do maybe_layout keyword
1000                              return (L span keyword)
1001                      else return (L span (ITvarid fs))
1002         _other -> return (L span (ITvarid fs))
1003   where
1004         fs = lexemeToFastString buf len
1005
1006 conid :: StringBuffer -> Int -> Token
1007 conid buf len = ITconid fs
1008   where fs = lexemeToFastString buf len
1009
1010 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
1011 qvarsym buf len = ITqvarsym $! splitQualName buf len False
1012 qconsym buf len = ITqconsym $! splitQualName buf len False
1013 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
1014 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1015
1016 varsym, consym :: Action
1017 varsym = sym ITvarsym
1018 consym = sym ITconsym
1019
1020 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1021     -> P (Located Token)
1022 sym con span buf len =
1023   case lookupUFM reservedSymsFM fs of
1024         Just (keyword,exts) -> do
1025                 b <- extension exts
1026                 if b then return (L span keyword)
1027                      else return (L span $! con fs)
1028         _other -> return (L span $! con fs)
1029   where
1030         fs = lexemeToFastString buf len
1031
1032 -- Variations on the integral numeric literal.
1033 tok_integral :: (Integer -> Token)
1034      -> (Integer -> Integer)
1035  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1036      -> Int -> Int
1037      -> (Integer, (Char->Int)) -> Action
1038 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1039   return $ L span $ itint $! transint $ parseUnsignedInteger
1040      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1041
1042 -- some conveniences for use with tok_integral
1043 tok_num :: (Integer -> Integer)
1044         -> Int -> Int
1045         -> (Integer, (Char->Int)) -> Action
1046 tok_num = tok_integral ITinteger
1047 tok_primint :: (Integer -> Integer)
1048             -> Int -> Int
1049             -> (Integer, (Char->Int)) -> Action
1050 tok_primint = tok_integral ITprimint
1051 tok_primword :: Int -> Int
1052              -> (Integer, (Char->Int)) -> Action
1053 tok_primword = tok_integral ITprimword positive
1054 positive, negative :: (Integer -> Integer)
1055 positive = id
1056 negative = negate
1057 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1058 decimal = (10,octDecDigit)
1059 octal = (8,octDecDigit)
1060 hexadecimal = (16,hexDigit)
1061
1062 -- readRational can understand negative rationals, exponents, everything.
1063 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1064 tok_float        str = ITrational   $! readFractionalLit str
1065 tok_primfloat    str = ITprimfloat  $! readFractionalLit str
1066 tok_primdouble   str = ITprimdouble $! readFractionalLit str
1067
1068 readFractionalLit :: String -> FractionalLit
1069 readFractionalLit str = (FL $! str) $! readRational str
1070
1071 -- -----------------------------------------------------------------------------
1072 -- Layout processing
1073
1074 -- we're at the first token on a line, insert layout tokens if necessary
1075 do_bol :: Action
1076 do_bol span _str _len = do
1077         pos <- getOffside
1078         case pos of
1079             LT -> do
1080                 --trace "layout: inserting '}'" $ do
1081                 popContext
1082                 -- do NOT pop the lex state, we might have a ';' to insert
1083                 return (L span ITvccurly)
1084             EQ -> do
1085                 --trace "layout: inserting ';'" $ do
1086                 _ <- popLexState
1087                 return (L span ITsemi)
1088             GT -> do
1089                 _ <- popLexState
1090                 lexToken
1091
1092 -- certain keywords put us in the "layout" state, where we might
1093 -- add an opening curly brace.
1094 maybe_layout :: Token -> P ()
1095 maybe_layout t = do -- If the alternative layout rule is enabled then
1096                     -- we never create an implicit layout context here.
1097                     -- Layout is handled XXX instead.
1098                     -- The code for closing implicit contexts, or
1099                     -- inserting implicit semi-colons, is therefore
1100                     -- irrelevant as it only applies in an implicit
1101                     -- context.
1102                     alr <- extension alternativeLayoutRule
1103                     unless alr $ f t
1104     where f ITdo    = pushLexState layout_do
1105           f ITmdo   = pushLexState layout_do
1106           f ITof    = pushLexState layout
1107           f ITlet   = pushLexState layout
1108           f ITwhere = pushLexState layout
1109           f ITrec   = pushLexState layout
1110           f _       = return ()
1111
1112 -- Pushing a new implicit layout context.  If the indentation of the
1113 -- next token is not greater than the previous layout context, then
1114 -- Haskell 98 says that the new layout context should be empty; that is
1115 -- the lexer must generate {}.
1116 --
1117 -- We are slightly more lenient than this: when the new context is started
1118 -- by a 'do', then we allow the new context to be at the same indentation as
1119 -- the previous context.  This is what the 'strict' argument is for.
1120 --
1121 new_layout_context :: Bool -> Action
1122 new_layout_context strict span _buf _len = do
1123     _ <- popLexState
1124     (AI l _) <- getInput
1125     let offset = srcLocCol l
1126     ctx <- getContext
1127     nondecreasing <- extension nondecreasingIndentation
1128     let strict' = strict || not nondecreasing
1129     case ctx of
1130         Layout prev_off : _  |
1131            (strict'     && prev_off >= offset  ||
1132             not strict' && prev_off > offset) -> do
1133                 -- token is indented to the left of the previous context.
1134                 -- we must generate a {} sequence now.
1135                 pushLexState layout_left
1136                 return (L span ITvocurly)
1137         _ -> do
1138                 setContext (Layout offset : ctx)
1139                 return (L span ITvocurly)
1140
1141 do_layout_left :: Action
1142 do_layout_left span _buf _len = do
1143     _ <- popLexState
1144     pushLexState bol  -- we must be at the start of a line
1145     return (L span ITvccurly)
1146
1147 -- -----------------------------------------------------------------------------
1148 -- LINE pragmas
1149
1150 setLine :: Int -> Action
1151 setLine code span buf len = do
1152   let line = parseUnsignedInteger buf len 10 octDecDigit
1153   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1154         -- subtract one: the line number refers to the *following* line
1155   _ <- popLexState
1156   pushLexState code
1157   lexToken
1158
1159 setFile :: Int -> Action
1160 setFile code span buf len = do
1161   let file = lexemeToFastString (stepOn buf) (len-2)
1162   setAlrLastLoc noSrcSpan
1163   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1164   _ <- popLexState
1165   pushLexState code
1166   lexToken
1167
1168
1169 -- -----------------------------------------------------------------------------
1170 -- Options, includes and language pragmas.
1171
1172 lex_string_prag :: (String -> Token) -> Action
1173 lex_string_prag mkTok span _buf _len
1174     = do input <- getInput
1175          start <- getSrcLoc
1176          tok <- go [] input
1177          end <- getSrcLoc
1178          return (L (mkSrcSpan start end) tok)
1179     where go acc input
1180               = if isString input "#-}"
1181                    then do setInput input
1182                            return (mkTok (reverse acc))
1183                    else case alexGetChar input of
1184                           Just (c,i) -> go (c:acc) i
1185                           Nothing -> err input
1186           isString _ [] = True
1187           isString i (x:xs)
1188               = case alexGetChar i of
1189                   Just (c,i') | c == x    -> isString i' xs
1190                   _other -> False
1191           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1192
1193
1194 -- -----------------------------------------------------------------------------
1195 -- Strings & Chars
1196
1197 -- This stuff is horrible.  I hates it.
1198
1199 lex_string_tok :: Action
1200 lex_string_tok span _buf _len = do
1201   tok <- lex_string ""
1202   end <- getSrcLoc
1203   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1204
1205 lex_string :: String -> P Token
1206 lex_string s = do
1207   i <- getInput
1208   case alexGetChar' i of
1209     Nothing -> lit_error i
1210
1211     Just ('"',i)  -> do
1212         setInput i
1213         magicHash <- extension magicHashEnabled
1214         if magicHash
1215           then do
1216             i <- getInput
1217             case alexGetChar' i of
1218               Just ('#',i) -> do
1219                    setInput i
1220                    if any (> '\xFF') s
1221                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1222                     else let s' = mkZFastString (reverse s) in
1223                          return (ITprimstring s')
1224                         -- mkZFastString is a hack to avoid encoding the
1225                         -- string in UTF-8.  We just want the exact bytes.
1226               _other ->
1227                 return (ITstring (mkFastString (reverse s)))
1228           else
1229                 return (ITstring (mkFastString (reverse s)))
1230
1231     Just ('\\',i)
1232         | Just ('&',i) <- next -> do
1233                 setInput i; lex_string s
1234         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1235                            -- is_space only works for <= '\x7f' (#3751)
1236                 setInput i; lex_stringgap s
1237         where next = alexGetChar' i
1238
1239     Just (c, i1) -> do
1240         case c of
1241           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1242           c | isAny c -> do setInput i1; lex_string (c:s)
1243           _other -> lit_error i
1244
1245 lex_stringgap :: String -> P Token
1246 lex_stringgap s = do
1247   i <- getInput
1248   c <- getCharOrFail i
1249   case c of
1250     '\\' -> lex_string s
1251     c | is_space c -> lex_stringgap s
1252     _other -> lit_error i
1253
1254
1255 lex_char_tok :: Action
1256 -- Here we are basically parsing character literals, such as 'x' or '\n'
1257 -- but, when Template Haskell is on, we additionally spot
1258 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1259 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1260 -- So we have to do two characters of lookahead: when we see 'x we need to
1261 -- see if there's a trailing quote
1262 lex_char_tok span _buf _len = do        -- We've seen '
1263    i1 <- getInput       -- Look ahead to first character
1264    let loc = srcSpanStart span
1265    case alexGetChar' i1 of
1266         Nothing -> lit_error  i1
1267
1268         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1269                   th_exts <- extension thEnabled
1270                   if th_exts then do
1271                         setInput i2
1272                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1273                    else lit_error i1
1274
1275         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1276                   setInput i2
1277                   lit_ch <- lex_escape
1278                   i3 <- getInput
1279                   mc <- getCharOrFail i3 -- Trailing quote
1280                   if mc == '\'' then finish_char_tok loc lit_ch
1281                                 else lit_error i3
1282
1283         Just (c, i2@(AI _end2 _))
1284                 | not (isAny c) -> lit_error i1
1285                 | otherwise ->
1286
1287                 -- We've seen 'x, where x is a valid character
1288                 --  (i.e. not newline etc) but not a quote or backslash
1289            case alexGetChar' i2 of      -- Look ahead one more character
1290                 Just ('\'', i3) -> do   -- We've seen 'x'
1291                         setInput i3
1292                         finish_char_tok loc c
1293                 _other -> do            -- We've seen 'x not followed by quote
1294                                         -- (including the possibility of EOF)
1295                                         -- If TH is on, just parse the quote only
1296                         th_exts <- extension thEnabled
1297                         let (AI end _) = i1
1298                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1299                                    else lit_error i2
1300
1301 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1302 finish_char_tok loc ch  -- We've already seen the closing quote
1303                         -- Just need to check for trailing #
1304   = do  magicHash <- extension magicHashEnabled
1305         i@(AI end _) <- getInput
1306         if magicHash then do
1307                 case alexGetChar' i of
1308                         Just ('#',i@(AI end _)) -> do
1309                                 setInput i
1310                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1311                         _other ->
1312                                 return (L (mkSrcSpan loc end) (ITchar ch))
1313             else do
1314                    return (L (mkSrcSpan loc end) (ITchar ch))
1315
1316 isAny :: Char -> Bool
1317 isAny c | c > '\x7f' = isPrint c
1318         | otherwise  = is_any c
1319
1320 lex_escape :: P Char
1321 lex_escape = do
1322   i0 <- getInput
1323   c <- getCharOrFail i0
1324   case c of
1325         'a'   -> return '\a'
1326         'b'   -> return '\b'
1327         'f'   -> return '\f'
1328         'n'   -> return '\n'
1329         'r'   -> return '\r'
1330         't'   -> return '\t'
1331         'v'   -> return '\v'
1332         '\\'  -> return '\\'
1333         '"'   -> return '\"'
1334         '\''  -> return '\''
1335         '^'   -> do i1 <- getInput
1336                     c <- getCharOrFail i1
1337                     if c >= '@' && c <= '_'
1338                         then return (chr (ord c - ord '@'))
1339                         else lit_error i1
1340
1341         'x'   -> readNum is_hexdigit 16 hexDigit
1342         'o'   -> readNum is_octdigit  8 octDecDigit
1343         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1344
1345         c1 ->  do
1346            i <- getInput
1347            case alexGetChar' i of
1348             Nothing -> lit_error i0
1349             Just (c2,i2) ->
1350               case alexGetChar' i2 of
1351                 Nothing -> do lit_error i0
1352                 Just (c3,i3) ->
1353                    let str = [c1,c2,c3] in
1354                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1355                                      Just rest <- [stripPrefix p str] ] of
1356                           (escape_char,[]):_ -> do
1357                                 setInput i3
1358                                 return escape_char
1359                           (escape_char,_:_):_ -> do
1360                                 setInput i2
1361                                 return escape_char
1362                           [] -> lit_error i0
1363
1364 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1365 readNum is_digit base conv = do
1366   i <- getInput
1367   c <- getCharOrFail i
1368   if is_digit c
1369         then readNum2 is_digit base conv (conv c)
1370         else lit_error i
1371
1372 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1373 readNum2 is_digit base conv i = do
1374   input <- getInput
1375   read i input
1376   where read i input = do
1377           case alexGetChar' input of
1378             Just (c,input') | is_digit c -> do
1379                let i' = i*base + conv c
1380                if i' > 0x10ffff
1381                   then setInput input >> lexError "numeric escape sequence out of range"
1382                   else read i' input'
1383             _other -> do
1384               setInput input; return (chr i)
1385
1386
1387 silly_escape_chars :: [(String, Char)]
1388 silly_escape_chars = [
1389         ("NUL", '\NUL'),
1390         ("SOH", '\SOH'),
1391         ("STX", '\STX'),
1392         ("ETX", '\ETX'),
1393         ("EOT", '\EOT'),
1394         ("ENQ", '\ENQ'),
1395         ("ACK", '\ACK'),
1396         ("BEL", '\BEL'),
1397         ("BS", '\BS'),
1398         ("HT", '\HT'),
1399         ("LF", '\LF'),
1400         ("VT", '\VT'),
1401         ("FF", '\FF'),
1402         ("CR", '\CR'),
1403         ("SO", '\SO'),
1404         ("SI", '\SI'),
1405         ("DLE", '\DLE'),
1406         ("DC1", '\DC1'),
1407         ("DC2", '\DC2'),
1408         ("DC3", '\DC3'),
1409         ("DC4", '\DC4'),
1410         ("NAK", '\NAK'),
1411         ("SYN", '\SYN'),
1412         ("ETB", '\ETB'),
1413         ("CAN", '\CAN'),
1414         ("EM", '\EM'),
1415         ("SUB", '\SUB'),
1416         ("ESC", '\ESC'),
1417         ("FS", '\FS'),
1418         ("GS", '\GS'),
1419         ("RS", '\RS'),
1420         ("US", '\US'),
1421         ("SP", '\SP'),
1422         ("DEL", '\DEL')
1423         ]
1424
1425 -- before calling lit_error, ensure that the current input is pointing to
1426 -- the position of the error in the buffer.  This is so that we can report
1427 -- a correct location to the user, but also so we can detect UTF-8 decoding
1428 -- errors if they occur.
1429 lit_error :: AlexInput -> P a
1430 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1431
1432 getCharOrFail :: AlexInput -> P Char
1433 getCharOrFail i =  do
1434   case alexGetChar' i of
1435         Nothing -> lexError "unexpected end-of-file in string/character literal"
1436         Just (c,i)  -> do setInput i; return c
1437
1438 -- -----------------------------------------------------------------------------
1439 -- QuasiQuote
1440
1441 lex_quasiquote_tok :: Action
1442 lex_quasiquote_tok span buf len = do
1443   let quoter = tail (lexemeToString buf (len - 1))
1444                 -- 'tail' drops the initial '[',
1445                 -- while the -1 drops the trailing '|'
1446   quoteStart <- getSrcLoc
1447   quote <- lex_quasiquote ""
1448   end <- getSrcLoc
1449   return (L (mkSrcSpan (srcSpanStart span) end)
1450            (ITquasiQuote (mkFastString quoter,
1451                           mkFastString (reverse quote),
1452                           mkSrcSpan quoteStart end)))
1453
1454 lex_quasiquote :: String -> P String
1455 lex_quasiquote s = do
1456   i <- getInput
1457   case alexGetChar' i of
1458     Nothing -> lit_error i
1459
1460     Just ('\\',i)
1461         | Just ('|',i) <- next -> do
1462                 setInput i; lex_quasiquote ('|' : s)
1463         | Just (']',i) <- next -> do
1464                 setInput i; lex_quasiquote (']' : s)
1465         where next = alexGetChar' i
1466
1467     Just ('|',i)
1468         | Just (']',i) <- next -> do
1469                 setInput i; return s
1470         where next = alexGetChar' i
1471
1472     Just (c, i) -> do
1473          setInput i; lex_quasiquote (c : s)
1474
1475 -- -----------------------------------------------------------------------------
1476 -- Warnings
1477
1478 warn :: DynFlag -> SDoc -> Action
1479 warn option warning srcspan _buf _len = do
1480     addWarning option srcspan warning
1481     lexToken
1482
1483 warnThen :: DynFlag -> SDoc -> Action -> Action
1484 warnThen option warning action srcspan buf len = do
1485     addWarning option srcspan warning
1486     action srcspan buf len
1487
1488 -- -----------------------------------------------------------------------------
1489 -- The Parse Monad
1490
1491 data LayoutContext
1492   = NoLayout
1493   | Layout !Int
1494   deriving Show
1495
1496 data ParseResult a
1497   = POk PState a
1498   | PFailed
1499         SrcSpan         -- The start and end of the text span related to
1500                         -- the error.  Might be used in environments which can
1501                         -- show this span, e.g. by highlighting it.
1502         Message         -- The error message
1503
1504 data PState = PState {
1505         buffer     :: StringBuffer,
1506         dflags     :: DynFlags,
1507         messages   :: Messages,
1508         last_loc   :: SrcSpan,  -- pos of previous token
1509         last_len   :: !Int,     -- len of previous token
1510         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1511         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1512         context    :: [LayoutContext],
1513         lex_state  :: [Int],
1514         -- Used in the alternative layout rule:
1515         -- These tokens are the next ones to be sent out. They are
1516         -- just blindly emitted, without the rule looking at them again:
1517         alr_pending_implicit_tokens :: [Located Token],
1518         -- This is the next token to be considered or, if it is Nothing,
1519         -- we need to get the next token from the input stream:
1520         alr_next_token :: Maybe (Located Token),
1521         -- This is what we consider to be the locatino of the last token
1522         -- emitted:
1523         alr_last_loc :: SrcSpan,
1524         -- The stack of layout contexts:
1525         alr_context :: [ALRContext],
1526         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1527         -- us what sort of layout the '{' will open:
1528         alr_expecting_ocurly :: Maybe ALRLayout,
1529         -- Have we just had the '}' for a let block? If so, than an 'in'
1530         -- token doesn't need to close anything:
1531         alr_justClosedExplicitLetBlock :: Bool
1532      }
1533         -- last_loc and last_len are used when generating error messages,
1534         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1535         -- current token to happyError, we could at least get rid of last_len.
1536         -- Getting rid of last_loc would require finding another way to
1537         -- implement pushCurrentContext (which is only called from one place).
1538
1539 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1540                               Bool{- is it a 'let' block? -}
1541                 | ALRLayout ALRLayout Int
1542 data ALRLayout = ALRLayoutLet
1543                | ALRLayoutWhere
1544                | ALRLayoutOf
1545                | ALRLayoutDo
1546
1547 newtype P a = P { unP :: PState -> ParseResult a }
1548
1549 instance Monad P where
1550   return = returnP
1551   (>>=) = thenP
1552   fail = failP
1553
1554 returnP :: a -> P a
1555 returnP a = a `seq` (P $ \s -> POk s a)
1556
1557 thenP :: P a -> (a -> P b) -> P b
1558 (P m) `thenP` k = P $ \ s ->
1559         case m s of
1560                 POk s1 a         -> (unP (k a)) s1
1561                 PFailed span err -> PFailed span err
1562
1563 failP :: String -> P a
1564 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1565
1566 failMsgP :: String -> P a
1567 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1568
1569 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1570 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1571
1572 failSpanMsgP :: SrcSpan -> SDoc -> P a
1573 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1574
1575 getPState :: P PState
1576 getPState = P $ \s -> POk s s
1577
1578 getDynFlags :: P DynFlags
1579 getDynFlags = P $ \s -> POk s (dflags s)
1580
1581 withThisPackage :: (PackageId -> a) -> P a
1582 withThisPackage f
1583  = do   pkg     <- liftM thisPackage getDynFlags
1584         return  $ f pkg
1585
1586 extension :: (Int -> Bool) -> P Bool
1587 extension p = P $ \s -> POk s (p $! extsBitmap s)
1588
1589 getExts :: P Int
1590 getExts = P $ \s -> POk s (extsBitmap s)
1591
1592 setExts :: (Int -> Int) -> P ()
1593 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1594
1595 setSrcLoc :: SrcLoc -> P ()
1596 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1597
1598 getSrcLoc :: P SrcLoc
1599 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1600
1601 setLastToken :: SrcSpan -> Int -> P ()
1602 setLastToken loc len = P $ \s -> POk s {
1603   last_loc=loc,
1604   last_len=len
1605   } ()
1606
1607 data AlexInput = AI SrcLoc StringBuffer
1608
1609 alexInputPrevChar :: AlexInput -> Char
1610 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1611
1612 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1613 alexGetChar (AI loc s)
1614   | atEnd s   = Nothing
1615   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1616                 --trace (show (ord c)) $
1617                 Just (adj_c, (AI loc' s'))
1618   where (c,s') = nextChar s
1619         loc'   = advanceSrcLoc loc c
1620
1621         non_graphic     = '\x0'
1622         upper           = '\x1'
1623         lower           = '\x2'
1624         digit           = '\x3'
1625         symbol          = '\x4'
1626         space           = '\x5'
1627         other_graphic   = '\x6'
1628
1629         adj_c
1630           | c <= '\x06' = non_graphic
1631           | c <= '\x7f' = c
1632           -- Alex doesn't handle Unicode, so when Unicode
1633           -- character is encountered we output these values
1634           -- with the actual character value hidden in the state.
1635           | otherwise =
1636                 case generalCategory c of
1637                   UppercaseLetter       -> upper
1638                   LowercaseLetter       -> lower
1639                   TitlecaseLetter       -> upper
1640                   ModifierLetter        -> other_graphic
1641                   OtherLetter           -> lower -- see #1103
1642                   NonSpacingMark        -> other_graphic
1643                   SpacingCombiningMark  -> other_graphic
1644                   EnclosingMark         -> other_graphic
1645                   DecimalNumber         -> digit
1646                   LetterNumber          -> other_graphic
1647                   OtherNumber           -> digit -- see #4373
1648                   ConnectorPunctuation  -> symbol
1649                   DashPunctuation       -> symbol
1650                   OpenPunctuation       -> other_graphic
1651                   ClosePunctuation      -> other_graphic
1652                   InitialQuote          -> other_graphic
1653                   FinalQuote            -> other_graphic
1654                   OtherPunctuation      -> symbol
1655                   MathSymbol            -> symbol
1656                   CurrencySymbol        -> symbol
1657                   ModifierSymbol        -> symbol
1658                   OtherSymbol           -> symbol
1659                   Space                 -> space
1660                   _other                -> non_graphic
1661
1662 -- This version does not squash unicode characters, it is used when
1663 -- lexing strings.
1664 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1665 alexGetChar' (AI loc s)
1666   | atEnd s   = Nothing
1667   | otherwise = c `seq` loc' `seq` s' `seq`
1668                 --trace (show (ord c)) $
1669                 Just (c, (AI loc' s'))
1670   where (c,s') = nextChar s
1671         loc'   = advanceSrcLoc loc c
1672
1673 getInput :: P AlexInput
1674 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1675
1676 setInput :: AlexInput -> P ()
1677 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1678
1679 nextIsEOF :: P Bool
1680 nextIsEOF = do
1681   AI _ s <- getInput
1682   return $ atEnd s
1683
1684 pushLexState :: Int -> P ()
1685 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1686
1687 popLexState :: P Int
1688 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1689
1690 getLexState :: P Int
1691 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1692
1693 popNextToken :: P (Maybe (Located Token))
1694 popNextToken
1695     = P $ \s@PState{ alr_next_token = m } ->
1696               POk (s {alr_next_token = Nothing}) m
1697
1698 activeContext :: P Bool
1699 activeContext = do
1700   ctxt <- getALRContext
1701   expc <- getAlrExpectingOCurly
1702   impt <- implicitTokenPending
1703   case (ctxt,expc) of
1704     ([],Nothing) -> return impt
1705     _other       -> return True
1706
1707 setAlrLastLoc :: SrcSpan -> P ()
1708 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1709
1710 getAlrLastLoc :: P SrcSpan
1711 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1712
1713 getALRContext :: P [ALRContext]
1714 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1715
1716 setALRContext :: [ALRContext] -> P ()
1717 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1718
1719 getJustClosedExplicitLetBlock :: P Bool
1720 getJustClosedExplicitLetBlock
1721  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1722
1723 setJustClosedExplicitLetBlock :: Bool -> P ()
1724 setJustClosedExplicitLetBlock b
1725  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1726
1727 setNextToken :: Located Token -> P ()
1728 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1729
1730 implicitTokenPending :: P Bool
1731 implicitTokenPending
1732     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1733               case ts of
1734               [] -> POk s False
1735               _  -> POk s True
1736
1737 popPendingImplicitToken :: P (Maybe (Located Token))
1738 popPendingImplicitToken
1739     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1740               case ts of
1741               [] -> POk s Nothing
1742               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1743
1744 setPendingImplicitTokens :: [Located Token] -> P ()
1745 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1746
1747 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1748 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1749
1750 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1751 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1752
1753 -- for reasons of efficiency, flags indicating language extensions (eg,
1754 -- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
1755 -- integer
1756
1757 genericsBit :: Int
1758 genericsBit = 0 -- {| and |}
1759 ffiBit :: Int
1760 ffiBit     = 1
1761 parrBit :: Int
1762 parrBit    = 2
1763 arrowsBit :: Int
1764 arrowsBit  = 4
1765 thBit :: Int
1766 thBit      = 5
1767 ipBit :: Int
1768 ipBit      = 6
1769 explicitForallBit :: Int
1770 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1771 bangPatBit :: Int
1772 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1773                 -- (doesn't affect the lexer)
1774 tyFamBit :: Int
1775 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1776 haddockBit :: Int
1777 haddockBit = 10 -- Lex and parse Haddock comments
1778 magicHashBit :: Int
1779 magicHashBit = 11 -- "#" in both functions and operators
1780 kindSigsBit :: Int
1781 kindSigsBit = 12 -- Kind signatures on type variables
1782 recursiveDoBit :: Int
1783 recursiveDoBit = 13 -- mdo
1784 unicodeSyntaxBit :: Int
1785 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1786 unboxedTuplesBit :: Int
1787 unboxedTuplesBit = 15 -- (# and #)
1788 datatypeContextsBit :: Int
1789 datatypeContextsBit = 16
1790 transformComprehensionsBit :: Int
1791 transformComprehensionsBit = 17
1792 qqBit :: Int
1793 qqBit      = 18 -- enable quasiquoting
1794 inRulePragBit :: Int
1795 inRulePragBit = 19
1796 rawTokenStreamBit :: Int
1797 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1798 recBit :: Int
1799 recBit = 22 -- rec
1800 alternativeLayoutRuleBit :: Int
1801 alternativeLayoutRuleBit = 23
1802 relaxedLayoutBit :: Int
1803 relaxedLayoutBit = 24
1804 nondecreasingIndentationBit :: Int
1805 nondecreasingIndentationBit = 25
1806
1807 always :: Int -> Bool
1808 always           _     = True
1809 genericsEnabled :: Int -> Bool
1810 genericsEnabled  flags = testBit flags genericsBit
1811 parrEnabled :: Int -> Bool
1812 parrEnabled      flags = testBit flags parrBit
1813 arrowsEnabled :: Int -> Bool
1814 arrowsEnabled    flags = testBit flags arrowsBit
1815 thEnabled :: Int -> Bool
1816 thEnabled        flags = testBit flags thBit
1817 ipEnabled :: Int -> Bool
1818 ipEnabled        flags = testBit flags ipBit
1819 explicitForallEnabled :: Int -> Bool
1820 explicitForallEnabled flags = testBit flags explicitForallBit
1821 bangPatEnabled :: Int -> Bool
1822 bangPatEnabled   flags = testBit flags bangPatBit
1823 -- tyFamEnabled :: Int -> Bool
1824 -- tyFamEnabled     flags = testBit flags tyFamBit
1825 haddockEnabled :: Int -> Bool
1826 haddockEnabled   flags = testBit flags haddockBit
1827 magicHashEnabled :: Int -> Bool
1828 magicHashEnabled flags = testBit flags magicHashBit
1829 -- kindSigsEnabled :: Int -> Bool
1830 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1831 unicodeSyntaxEnabled :: Int -> Bool
1832 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1833 unboxedTuplesEnabled :: Int -> Bool
1834 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1835 datatypeContextsEnabled :: Int -> Bool
1836 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1837 qqEnabled :: Int -> Bool
1838 qqEnabled        flags = testBit flags qqBit
1839 -- inRulePrag :: Int -> Bool
1840 -- inRulePrag       flags = testBit flags inRulePragBit
1841 rawTokenStreamEnabled :: Int -> Bool
1842 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1843 alternativeLayoutRule :: Int -> Bool
1844 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1845 relaxedLayout :: Int -> Bool
1846 relaxedLayout flags = testBit flags relaxedLayoutBit
1847 nondecreasingIndentation :: Int -> Bool
1848 nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
1849
1850 -- PState for parsing options pragmas
1851 --
1852 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1853 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1854                                  lex_state = [bol, option_prags, 0]
1855                              }
1856
1857 -- create a parse state
1858 --
1859 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1860 mkPState flags buf loc =
1861   PState {
1862       buffer        = buf,
1863       dflags        = flags,
1864       messages      = emptyMessages,
1865       last_loc      = mkSrcSpan loc loc,
1866       last_len      = 0,
1867       loc           = loc,
1868       extsBitmap    = fromIntegral bitmap,
1869       context       = [],
1870       lex_state     = [bol, 0],
1871       alr_pending_implicit_tokens = [],
1872       alr_next_token = Nothing,
1873       alr_last_loc = noSrcSpan,
1874       alr_context = [],
1875       alr_expecting_ocurly = Nothing,
1876       alr_justClosedExplicitLetBlock = False
1877     }
1878     where
1879       bitmap =     genericsBit       `setBitIf` xopt Opt_Generics flags
1880                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1881                .|. parrBit           `setBitIf` xopt Opt_ParallelArrays  flags
1882                .|. arrowsBit         `setBitIf` xopt Opt_Arrows          flags
1883                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1884                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes     flags
1885                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams  flags
1886                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll  flags
1887                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns    flags
1888                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies    flags
1889                .|. haddockBit        `setBitIf` dopt Opt_Haddock         flags
1890                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash       flags
1891                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures  flags
1892                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo     flags
1893                .|. recBit            `setBitIf` xopt Opt_DoRec           flags
1894                .|. recBit            `setBitIf` xopt Opt_Arrows          flags
1895                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax   flags
1896                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples   flags
1897                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1898                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1899                .|. transformComprehensionsBit `setBitIf` xopt Opt_MonadComprehensions flags
1900                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1901                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1902                .|. relaxedLayoutBit  `setBitIf` xopt Opt_RelaxedLayout flags
1903                .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
1904       --
1905       setBitIf :: Int -> Bool -> Int
1906       b `setBitIf` cond | cond      = bit b
1907                         | otherwise = 0
1908
1909 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1910 addWarning option srcspan warning
1911  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1912        let warning' = mkWarnMsg srcspan alwaysQualify warning
1913            ws' = if dopt option d then ws `snocBag` warning' else ws
1914        in POk s{messages=(ws', es)} ()
1915
1916 getMessages :: PState -> Messages
1917 getMessages PState{messages=ms} = ms
1918
1919 getContext :: P [LayoutContext]
1920 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1921
1922 setContext :: [LayoutContext] -> P ()
1923 setContext ctx = P $ \s -> POk s{context=ctx} ()
1924
1925 popContext :: P ()
1926 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1927                               last_len = len, last_loc = last_loc }) ->
1928   case ctx of
1929         (_:tl) -> POk s{ context = tl } ()
1930         []     -> PFailed last_loc (srcParseErr buf len)
1931
1932 -- Push a new layout context at the indentation of the last token read.
1933 -- This is only used at the outer level of a module when the 'module'
1934 -- keyword is missing.
1935 pushCurrentContext :: P ()
1936 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1937     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1938
1939 getOffside :: P Ordering
1940 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1941                 let offs = srcSpanStartCol loc in
1942                 let ord = case stk of
1943                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1944                                         compare offs n
1945                         _            -> GT
1946                 in POk s ord
1947
1948 -- ---------------------------------------------------------------------------
1949 -- Construct a parse error
1950
1951 srcParseErr
1952   :: StringBuffer       -- current buffer (placed just after the last token)
1953   -> Int                -- length of the previous token
1954   -> Message
1955 srcParseErr buf len
1956   = hcat [ if null token
1957              then ptext (sLit "parse error (possibly incorrect indentation)")
1958              else hcat [ptext (sLit "parse error on input "),
1959                         char '`', text token, char '\'']
1960     ]
1961   where token = lexemeToString (offsetBytes (-len) buf) len
1962
1963 -- Report a parse failure, giving the span of the previous token as
1964 -- the location of the error.  This is the entry point for errors
1965 -- detected during parsing.
1966 srcParseFail :: P a
1967 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1968                             last_loc = last_loc } ->
1969     PFailed last_loc (srcParseErr buf len)
1970
1971 -- A lexical error is reported at a particular position in the source file,
1972 -- not over a token range.
1973 lexError :: String -> P a
1974 lexError str = do
1975   loc <- getSrcLoc
1976   (AI end buf) <- getInput
1977   reportLexError loc end buf str
1978
1979 -- -----------------------------------------------------------------------------
1980 -- This is the top-level function: called from the parser each time a
1981 -- new token is to be read from the input.
1982
1983 lexer :: (Located Token -> P a) -> P a
1984 lexer cont = do
1985   alr <- extension alternativeLayoutRule
1986   let lexTokenFun = if alr then lexTokenAlr else lexToken
1987   tok@(L _span _tok__) <- lexTokenFun
1988   --trace ("token: " ++ show _tok__) $ do
1989   cont tok
1990
1991 lexTokenAlr :: P (Located Token)
1992 lexTokenAlr = do mPending <- popPendingImplicitToken
1993                  t <- case mPending of
1994                       Nothing ->
1995                           do mNext <- popNextToken
1996                              t <- case mNext of
1997                                   Nothing -> lexToken
1998                                   Just next -> return next
1999                              alternativeLayoutRuleToken t
2000                       Just t ->
2001                           return t
2002                  setAlrLastLoc (getLoc t)
2003                  case unLoc t of
2004                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
2005                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
2006                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
2007                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
2008                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
2009                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
2010                      _       -> return ()
2011                  return t
2012
2013 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
2014 alternativeLayoutRuleToken t
2015     = do context <- getALRContext
2016          lastLoc <- getAlrLastLoc
2017          mExpectingOCurly <- getAlrExpectingOCurly
2018          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
2019          setJustClosedExplicitLetBlock False
2020          dflags <- getDynFlags
2021          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
2022              thisLoc = getLoc t
2023              thisCol = srcSpanStartCol thisLoc
2024              newLine = (lastLoc == noSrcSpan)
2025                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
2026          case (unLoc t, context, mExpectingOCurly) of
2027              -- This case handles a GHC extension to the original H98
2028              -- layout rule...
2029              (ITocurly, _, Just alrLayout) ->
2030                  do setAlrExpectingOCurly Nothing
2031                     let isLet = case alrLayout of
2032                                 ALRLayoutLet -> True
2033                                 _ -> False
2034                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
2035                     return t
2036              -- ...and makes this case unnecessary
2037              {-
2038              -- I think our implicit open-curly handling is slightly
2039              -- different to John's, in how it interacts with newlines
2040              -- and "in"
2041              (ITocurly, _, Just _) ->
2042                  do setAlrExpectingOCurly Nothing
2043                     setNextToken t
2044                     lexTokenAlr
2045              -}
2046              (_, ALRLayout _ col : ls, Just expectingOCurly)
2047               | (thisCol > col) ||
2048                 (thisCol == col &&
2049                  isNonDecreasingIntentation expectingOCurly) ->
2050                  do setAlrExpectingOCurly Nothing
2051                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2052                     setNextToken t
2053                     return (L thisLoc ITocurly)
2054               | otherwise ->
2055                  do setAlrExpectingOCurly Nothing
2056                     setPendingImplicitTokens [L lastLoc ITccurly]
2057                     setNextToken t
2058                     return (L lastLoc ITocurly)
2059              (_, _, Just expectingOCurly) ->
2060                  do setAlrExpectingOCurly Nothing
2061                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2062                     setNextToken t
2063                     return (L thisLoc ITocurly)
2064              -- We do the [] cases earlier than in the spec, as we
2065              -- have an actual EOF token
2066              (ITeof, ALRLayout _ _ : ls, _) ->
2067                  do setALRContext ls
2068                     setNextToken t
2069                     return (L thisLoc ITccurly)
2070              (ITeof, _, _) ->
2071                  return t
2072              -- the other ITeof case omitted; general case below covers it
2073              (ITin, _, _)
2074               | justClosedExplicitLetBlock ->
2075                  return t
2076              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2077               | newLine ->
2078                  do setPendingImplicitTokens [t]
2079                     setALRContext ls
2080                     return (L thisLoc ITccurly)
2081              -- This next case is to handle a transitional issue:
2082              (ITwhere, ALRLayout _ col : ls, _)
2083               | newLine && thisCol == col && transitional ->
2084                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2085                                thisLoc
2086                                (transitionalAlternativeLayoutWarning
2087                                     "`where' clause at the same depth as implicit layout block")
2088                     setALRContext ls
2089                     setNextToken t
2090                     -- Note that we use lastLoc, as we may need to close
2091                     -- more layouts, or give a semicolon
2092                     return (L lastLoc ITccurly)
2093              -- This next case is to handle a transitional issue:
2094              (ITvbar, ALRLayout _ col : ls, _)
2095               | newLine && thisCol == col && transitional ->
2096                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2097                                thisLoc
2098                                (transitionalAlternativeLayoutWarning
2099                                     "`|' at the same depth as implicit layout block")
2100                     setALRContext ls
2101                     setNextToken t
2102                     -- Note that we use lastLoc, as we may need to close
2103                     -- more layouts, or give a semicolon
2104                     return (L lastLoc ITccurly)
2105              (_, ALRLayout _ col : ls, _)
2106               | newLine && thisCol == col ->
2107                  do setNextToken t
2108                     return (L thisLoc ITsemi)
2109               | newLine && thisCol < col ->
2110                  do setALRContext ls
2111                     setNextToken t
2112                     -- Note that we use lastLoc, as we may need to close
2113                     -- more layouts, or give a semicolon
2114                     return (L lastLoc ITccurly)
2115              -- We need to handle close before open, as 'then' is both
2116              -- an open and a close
2117              (u, _, _)
2118               | isALRclose u ->
2119                  case context of
2120                  ALRLayout _ _ : ls ->
2121                      do setALRContext ls
2122                         setNextToken t
2123                         return (L thisLoc ITccurly)
2124                  ALRNoLayout _ isLet : ls ->
2125                      do let ls' = if isALRopen u
2126                                      then ALRNoLayout (containsCommas u) False : ls
2127                                      else ls
2128                         setALRContext ls'
2129                         when isLet $ setJustClosedExplicitLetBlock True
2130                         return t
2131                  [] ->
2132                      do let ls = if isALRopen u
2133                                     then [ALRNoLayout (containsCommas u) False]
2134                                     else ls
2135                         setALRContext ls
2136                         -- XXX This is an error in John's code, but
2137                         -- it looks reachable to me at first glance
2138                         return t
2139              (u, _, _)
2140               | isALRopen u ->
2141                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2142                     return t
2143              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2144                  do setALRContext ls
2145                     setPendingImplicitTokens [t]
2146                     return (L thisLoc ITccurly)
2147              (ITin, ALRLayout _ _ : ls, _) ->
2148                  do setALRContext ls
2149                     setNextToken t
2150                     return (L thisLoc ITccurly)
2151              -- the other ITin case omitted; general case below covers it
2152              (ITcomma, ALRLayout _ _ : ls, _)
2153               | topNoLayoutContainsCommas ls ->
2154                  do setALRContext ls
2155                     setNextToken t
2156                     return (L thisLoc ITccurly)
2157              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2158                  do setALRContext ls
2159                     setPendingImplicitTokens [t]
2160                     return (L thisLoc ITccurly)
2161              -- the other ITwhere case omitted; general case below covers it
2162              (_, _, _) -> return t
2163
2164 transitionalAlternativeLayoutWarning :: String -> SDoc
2165 transitionalAlternativeLayoutWarning msg
2166     = text "transitional layout will not be accepted in the future:"
2167    $$ text msg
2168
2169 isALRopen :: Token -> Bool
2170 isALRopen ITcase        = True
2171 isALRopen ITif          = True
2172 isALRopen ITthen        = True
2173 isALRopen IToparen      = True
2174 isALRopen ITobrack      = True
2175 isALRopen ITocurly      = True
2176 -- GHC Extensions:
2177 isALRopen IToubxparen   = True
2178 isALRopen ITparenEscape = True
2179 isALRopen _             = False
2180
2181 isALRclose :: Token -> Bool
2182 isALRclose ITof     = True
2183 isALRclose ITthen   = True
2184 isALRclose ITelse   = True
2185 isALRclose ITcparen = True
2186 isALRclose ITcbrack = True
2187 isALRclose ITccurly = True
2188 -- GHC Extensions:
2189 isALRclose ITcubxparen = True
2190 isALRclose _        = False
2191
2192 isNonDecreasingIntentation :: ALRLayout -> Bool
2193 isNonDecreasingIntentation ALRLayoutDo = True
2194 isNonDecreasingIntentation _           = False
2195
2196 containsCommas :: Token -> Bool
2197 containsCommas IToparen = True
2198 containsCommas ITobrack = True
2199 -- John doesn't have {} as containing commas, but records contain them,
2200 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2201 -- (defaultInstallDirs).
2202 containsCommas ITocurly = True
2203 -- GHC Extensions:
2204 containsCommas IToubxparen = True
2205 containsCommas _        = False
2206
2207 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2208 topNoLayoutContainsCommas [] = False
2209 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2210 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2211
2212 lexToken :: P (Located Token)
2213 lexToken = do
2214   inp@(AI loc1 buf) <- getInput
2215   sc <- getLexState
2216   exts <- getExts
2217   case alexScanUser exts inp sc of
2218     AlexEOF -> do
2219         let span = mkSrcSpan loc1 loc1
2220         setLastToken span 0
2221         return (L span ITeof)
2222     AlexError (AI loc2 buf) ->
2223         reportLexError loc1 loc2 buf "lexical error"
2224     AlexSkip inp2 _ -> do
2225         setInput inp2
2226         lexToken
2227     AlexToken inp2@(AI end buf2) _ t -> do
2228         setInput inp2
2229         let span = mkSrcSpan loc1 end
2230         let bytes = byteDiff buf buf2
2231         span `seq` setLastToken span bytes
2232         t span buf bytes
2233
2234 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2235 reportLexError loc1 loc2 buf str
2236   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2237   | otherwise =
2238   let
2239         c = fst (nextChar buf)
2240   in
2241   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2242     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2243     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2244
2245 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2246 lexTokenStream buf loc dflags = unP go initState
2247     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2248           initState = mkPState dflags' buf loc
2249           go = do
2250             ltok <- lexer return
2251             case ltok of
2252               L _ ITeof -> return []
2253               _ -> liftM (ltok:) go
2254
2255 linePrags = Map.singleton "line" (begin line_prag2)
2256
2257 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2258                                  ("options_ghc", lex_string_prag IToptions_prag),
2259                                  ("options_haddock", lex_string_prag ITdocOptions),
2260                                  ("language", token ITlanguage_prag),
2261                                  ("include", lex_string_prag ITinclude_prag)])
2262
2263 ignoredPrags = Map.fromList (map ignored pragmas)
2264                where ignored opt = (opt, nested_comment lexToken)
2265                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2266                      options_pragmas = map ("options_" ++) impls
2267                      -- CFILES is a hugs-only thing.
2268                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2269
2270 oneWordPrags = Map.fromList([("rules", rulePrag),
2271                            ("inline", token (ITinline_prag Inline FunLike)),
2272                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2273                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2274                                           -- Spelling variant
2275                            ("notinline", token (ITinline_prag NoInline FunLike)),
2276                            ("specialize", token ITspec_prag),
2277                            ("source", token ITsource_prag),
2278                            ("warning", token ITwarning_prag),
2279                            ("deprecated", token ITdeprecated_prag),
2280                            ("scc", token ITscc_prag),
2281                            ("generated", token ITgenerated_prag),
2282                            ("core", token ITcore_prag),
2283                            ("unpack", token ITunpack_prag),
2284                            ("ann", token ITann_prag),
2285                            ("vectorize", token ITvect_prag)])
2286
2287 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2288                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2289                              ("specialize inline", token (ITspec_inline_prag True)),
2290                              ("specialize notinline", token (ITspec_inline_prag False)),
2291                              ("vectorize scalar", token ITvect_scalar_prag)])
2292
2293 dispatch_pragmas :: Map String Action -> Action
2294 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2295                                        Just found -> found span buf len
2296                                        Nothing -> lexError "unknown pragma"
2297
2298 known_pragma :: Map String Action -> AlexAccPred Int
2299 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2300                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2301
2302 clean_pragma :: String -> String
2303 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2304                     where unprefix prag' = case stripPrefix "{-#" prag' of
2305                                              Just rest -> rest
2306                                              Nothing -> prag'
2307                           canonical prag' = case prag' of
2308                                               "noinline" -> "notinline"
2309                                               "specialise" -> "specialize"
2310                                               "vectorise" -> "vectorize"
2311                                               "constructorlike" -> "conlike"
2312                                               _ -> prag'
2313                           canon_ws s = unwords (map canonical (words s))
2314 }