compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  38 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  39 -- But alex still generates some code that causes the "lazy unlifted bindings"
  40 -- warning, and old compilers don't know about it so we can't easily turn
  41 -- it off, so for now we use the sledge hammer:
  42 {-# OPTIONS_GHC -w #-}
  43
  44 {-# OPTIONS_GHC -funbox-strict-fields #-}
  45
  46 module Lexer (
  47    Token(..), lexer, pragState, mkPState, PState(..),
  48    P(..), ParseResult(..), getSrcLoc,
  49    getPState, getDynFlags, withThisPackage,
  50    failLocMsgP, failSpanMsgP, srcParseFail,
  51    getMessages,
  52    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  53    getLexState, popLexState, pushLexState,
  54    extension, bangPatEnabled, datatypeContextsEnabled,
  55    addWarning,
  56    lexTokenStream
  57   ) where
  58
  59 import Bag
  60 import ErrUtils
  61 import Outputable
  62 import StringBuffer
  63 import FastString
  64 import SrcLoc
  65 import UniqFM
  66 import DynFlags
  67 import Module
  68 import Ctype
  69 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..) )
  70 import Util             ( readRational )
  71
  72 import Control.Monad
  73 import Data.Bits
  74 import Data.Char
  75 import Data.List
  76 import Data.Maybe
  77 import Data.Map (Map)
  78 import qualified Data.Map as Map
  79 import Data.Ratio
  80 }
  81
  82 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  83 $whitechar   = [\ \n\r\f\v $unispace]
  84 $white_no_nl = $whitechar # \n
  85 $tab         = \t
  86
  87 $ascdigit  = 0-9
  88 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  89 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  90 $digit     = [$ascdigit $unidigit]
  91
  92 $special   = [\(\)\,\;\[\]\`\{\}]
  93 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  94 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  95 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  96
  97 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  98 $asclarge  = [A-Z]
  99 $large     = [$asclarge $unilarge]
 100
 101 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 102 $ascsmall  = [a-z]
 103 $small     = [$ascsmall $unismall \_]
 104
 105 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 106 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 107
 108 $octit     = 0-7
 109 $hexit     = [$decdigit A-F a-f]
 110 $symchar   = [$symbol \:]
 111 $nl        = [\n\r]
 112 $idchar    = [$small $large $digit \']
 113
 114 $pragmachar = [$small $large $digit]
 115
 116 $docsym    = [\| \^ \* \$]
 117
 118 @varid     = $small $idchar*
 119 @conid     = $large $idchar*
 120
 121 @varsym    = $symbol $symchar*
 122 @consym    = \: $symchar*
 123
 124 @decimal     = $decdigit+
 125 @octal       = $octit+
 126 @hexadecimal = $hexit+
 127 @exponent    = [eE] [\-\+]? @decimal
 128
 129 -- we support the hierarchical module name extension:
 130 @qual = (@conid \.)+
 131
 132 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 133
 134 -- normal signed numerical literals can only be explicitly negative,
 135 -- not explicitly positive (contrast @exponent)
 136 @negative = \-
 137 @signed = @negative ?
 138
 139 haskell :-
 140
 141 -- everywhere: skip whitespace and comments
 142 $white_no_nl+                           ;
 143 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 144
 145 -- Everywhere: deal with nested comments.  We explicitly rule out
 146 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 147 -- (this can happen even though pragmas will normally take precedence due to
 148 -- longest-match, because pragmas aren't valid in every state, but comments
 149 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 150 -- set.
 151
 152 "{-" / { isNormalComment } { nested_comment lexToken }
 153
 154 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 155 -- more dashes followed by a symbol should be parsed as a varsym, so we
 156 -- have to exclude those.
 157
 158 -- Since Haddock comments aren't valid in every state, we need to rule them
 159 -- out here.
 160
 161 -- The following two rules match comments that begin with two dashes, but
 162 -- continue with a different character. The rules test that this character
 163 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 164 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 165 -- have a Haddock comment). The rules then munch the rest of the line.
 166
 167 "-- " ~[$docsym \#] .* { lineCommentToken }
 168 "--" [^$symbol : \ ] .* { lineCommentToken }
 169
 170 -- Next, match Haddock comments if no -haddock flag
 171
 172 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 173
 174 -- Now, when we've matched comments that begin with 2 dashes and continue
 175 -- with a different character, we need to match comments that begin with three
 176 -- or more dashes (which clearly can't be Haddock comments). We only need to
 177 -- make sure that the first non-dash character isn't a symbol, and munch the
 178 -- rest of the line.
 179
 180 "---"\-* [^$symbol :] .* { lineCommentToken }
 181
 182 -- Since the previous rules all match dashes followed by at least one
 183 -- character, we also need to match a whole line filled with just dashes.
 184
 185 "--"\-* / { atEOL } { lineCommentToken }
 186
 187 -- We need this rule since none of the other single line comment rules
 188 -- actually match this case.
 189
 190 "-- " / { atEOL } { lineCommentToken }
 191
 192 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 193 -- blank lines) until we find a non-whitespace character, then do layout
 194 -- processing.
 195 --
 196 -- One slight wibble here: what if the line begins with {-#? In
 197 -- theory, we have to lex the pragma to see if it's one we recognise,
 198 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 199 -- as a nested comment.  We don't bother with this: if the line begins
 200 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 201 <bol> {
 202   \n                                    ;
 203   ^\# (line)?                           { begin line_prag1 }
 204   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 205   ^\# \! .* \n                          ; -- #!, for scripts
 206   ()                                    { do_bol }
 207 }
 208
 209 -- after a layout keyword (let, where, do, of), we begin a new layout
 210 -- context if the curly brace is missing.
 211 -- Careful! This stuff is quite delicate.
 212 <layout, layout_do> {
 213   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 214         -- we might encounter {-# here, but {- has been handled already
 215   \n                                    ;
 216   ^\# (line)?                           { begin line_prag1 }
 217 }
 218
 219 -- do is treated in a subtly different way, see new_layout_context
 220 <layout>    ()                          { new_layout_context True }
 221 <layout_do> ()                          { new_layout_context False }
 222
 223 -- after a new layout context which was found to be to the left of the
 224 -- previous context, we have generated a '{' token, and we now need to
 225 -- generate a matching '}' token.
 226 <layout_left>  ()                       { do_layout_left }
 227
 228 <0,option_prags> \n                             { begin bol }
 229
 230 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 231                                 { dispatch_pragmas linePrags }
 232
 233 -- single-line line pragmas, of the form
 234 --    # <line> "<file>" <extra-stuff> \n
 235 <line_prag1> $decdigit+                 { setLine line_prag1a }
 236 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 237 <line_prag1b> .*                        { pop }
 238
 239 -- Haskell-style line pragmas, of the form
 240 --    {-# LINE <line> "<file>" #-}
 241 <line_prag2> $decdigit+                 { setLine line_prag2a }
 242 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 243 <line_prag2b> "#-}"|"-}"                { pop }
 244    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 245    -- with older versions of GHC which generated these.
 246
 247 <0,option_prags> {
 248   "{-#" $whitechar* $pragmachar+
 249         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 250                                  { dispatch_pragmas twoWordPrags }
 251
 252   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 253                                  { dispatch_pragmas oneWordPrags }
 254
 255   -- We ignore all these pragmas, but don't generate a warning for them
 256   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 257                                  { dispatch_pragmas ignoredPrags }
 258
 259   -- ToDo: should only be valid inside a pragma:
 260   "#-}"                                 { endPrag }
 261 }
 262
 263 <option_prags> {
 264   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 265                                    { dispatch_pragmas fileHeaderPrags }
 266
 267   "-- #"                                 { multiline_doc_comment }
 268 }
 269
 270 <0> {
 271   -- In the "0" mode we ignore these pragmas
 272   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 273                      { nested_comment lexToken }
 274 }
 275
 276 <0> {
 277   "-- #" .* { lineCommentToken }
 278 }
 279
 280 <0,option_prags> {
 281   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 282                     (nested_comment lexToken) }
 283 }
 284
 285 -- '0' state: ordinary lexemes
 286
 287 -- Haddock comments
 288
 289 <0,option_prags> {
 290   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 291   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 292 }
 293
 294 -- "special" symbols
 295
 296 <0> {
 297   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 298   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 299 }
 300
 301 <0> {
 302   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 303   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 304   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 305   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 306   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 307   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 308   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 309   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 310
 311   "[" @varid "|"  / { ifExtension qqEnabled }
 312                      { lex_quasiquote_tok }
 313 }
 314
 315 <0> {
 316   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 317                                         { special IToparenbar }
 318   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 319 }
 320
 321 <0> {
 322   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 323 }
 324
 325 <0> {
 326   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 327          { token IToubxparen }
 328   "#)" / { ifExtension unboxedTuplesEnabled }
 329          { token ITcubxparen }
 330 }
 331
 332 <0> {
 333   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 334   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 335 }
 336
 337 <0,option_prags> {
 338   \(                                    { special IToparen }
 339   \)                                    { special ITcparen }
 340   \[                                    { special ITobrack }
 341   \]                                    { special ITcbrack }
 342   \,                                    { special ITcomma }
 343   \;                                    { special ITsemi }
 344   \`                                    { special ITbackquote }
 345
 346   \{                                    { open_brace }
 347   \}                                    { close_brace }
 348 }
 349
 350 <0,option_prags> {
 351   @qual @varid                  { idtoken qvarid }
 352   @qual @conid                  { idtoken qconid }
 353   @varid                        { varid }
 354   @conid                        { idtoken conid }
 355 }
 356
 357 <0> {
 358   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 359   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 360   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 361   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 362 }
 363
 364 -- ToDo: - move `var` and (sym) into lexical syntax?
 365 --       - remove backquote from $special?
 366 <0> {
 367   @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
 368   @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
 369   @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
 370   @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
 371   @varsym                                          { varsym }
 372   @consym                                          { consym }
 373 }
 374
 375 -- For the normal boxed literals we need to be careful
 376 -- when trying to be close to Haskell98
 377 <0> {
 378   -- Normal integral literals (:: Num a => a, from Integer)
 379   @decimal           { tok_num positive 0 0 decimal }
 380   0[oO] @octal       { tok_num positive 2 2 octal }
 381   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 382
 383   -- Normal rational literals (:: Fractional a => a, from Rational)
 384   @floating_point    { strtoken tok_float }
 385 }
 386
 387 <0> {
 388   -- Unboxed ints (:: Int#) and words (:: Word#)
 389   -- It's simpler (and faster?) to give separate cases to the negatives,
 390   -- especially considering octal/hexadecimal prefixes.
 391   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 392   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 393   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 394   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 395   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 396   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 397
 398   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 399   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 400   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 401
 402   -- Unboxed floats and doubles (:: Float#, :: Double#)
 403   -- prim_{float,double} work with signed literals
 404   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 405   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 406 }
 407
 408 -- Strings and chars are lexed by hand-written code.  The reason is
 409 -- that even if we recognise the string or char here in the regex
 410 -- lexer, we would still have to parse the string afterward in order
 411 -- to convert it to a String.
 412 <0> {
 413   \'                            { lex_char_tok }
 414   \"                            { lex_string_tok }
 415 }
 416
 417 {
 418 -- -----------------------------------------------------------------------------
 419 -- The token type
 420
 421 data Token
 422   = ITas                        -- Haskell keywords
 423   | ITcase
 424   | ITclass
 425   | ITdata
 426   | ITdefault
 427   | ITderiving
 428   | ITdo
 429   | ITelse
 430   | IThiding
 431   | ITif
 432   | ITimport
 433   | ITin
 434   | ITinfix
 435   | ITinfixl
 436   | ITinfixr
 437   | ITinstance
 438   | ITlet
 439   | ITmodule
 440   | ITnewtype
 441   | ITof
 442   | ITqualified
 443   | ITthen
 444   | ITtype
 445   | ITwhere
 446   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 447
 448   | ITforall                    -- GHC extension keywords
 449   | ITforeign
 450   | ITexport
 451   | ITlabel
 452   | ITdynamic
 453   | ITsafe
 454   | ITthreadsafe
 455   | ITinterruptible
 456   | ITunsafe
 457   | ITstdcallconv
 458   | ITccallconv
 459   | ITprimcallconv
 460   | ITmdo
 461   | ITfamily
 462   | ITgroup
 463   | ITby
 464   | ITusing
 465
 466         -- Pragmas
 467   | ITinline_prag InlineSpec RuleMatchInfo
 468   | ITspec_prag                 -- SPECIALISE
 469   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 470   | ITsource_prag
 471   | ITrules_prag
 472   | ITwarning_prag
 473   | ITdeprecated_prag
 474   | ITline_prag
 475   | ITscc_prag
 476   | ITgenerated_prag
 477   | ITcore_prag                 -- hdaume: core annotations
 478   | ITunpack_prag
 479   | ITann_prag
 480   | ITclose_prag
 481   | IToptions_prag String
 482   | ITinclude_prag String
 483   | ITlanguage_prag
 484
 485   | ITdotdot                    -- reserved symbols
 486   | ITcolon
 487   | ITdcolon
 488   | ITequal
 489   | ITlam
 490   | ITvbar
 491   | ITlarrow
 492   | ITrarrow
 493   | ITat
 494   | ITtilde
 495   | ITdarrow
 496   | ITminus
 497   | ITbang
 498   | ITstar
 499   | ITdot
 500
 501   | ITbiglam                    -- GHC-extension symbols
 502
 503   | ITocurly                    -- special symbols
 504   | ITccurly
 505   | ITocurlybar                 -- {|, for type applications
 506   | ITccurlybar                 -- |}, for type applications
 507   | ITvocurly
 508   | ITvccurly
 509   | ITobrack
 510   | ITopabrack                  -- [:, for parallel arrays with -XParr
 511   | ITcpabrack                  -- :], for parallel arrays with -XParr
 512   | ITcbrack
 513   | IToparen
 514   | ITcparen
 515   | IToubxparen
 516   | ITcubxparen
 517   | ITsemi
 518   | ITcomma
 519   | ITunderscore
 520   | ITbackquote
 521
 522   | ITvarid   FastString        -- identifiers
 523   | ITconid   FastString
 524   | ITvarsym  FastString
 525   | ITconsym  FastString
 526   | ITqvarid  (FastString,FastString)
 527   | ITqconid  (FastString,FastString)
 528   | ITqvarsym (FastString,FastString)
 529   | ITqconsym (FastString,FastString)
 530   | ITprefixqvarsym (FastString,FastString)
 531   | ITprefixqconsym (FastString,FastString)
 532
 533   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 534
 535   | ITchar       Char
 536   | ITstring     FastString
 537   | ITinteger    Integer
 538   | ITrational   Rational
 539
 540   | ITprimchar   Char
 541   | ITprimstring FastString
 542   | ITprimint    Integer
 543   | ITprimword   Integer
 544   | ITprimfloat  Rational
 545   | ITprimdouble Rational
 546
 547   -- Template Haskell extension tokens
 548   | ITopenExpQuote              --  [| or [e|
 549   | ITopenPatQuote              --  [p|
 550   | ITopenDecQuote              --  [d|
 551   | ITopenTypQuote              --  [t|
 552   | ITcloseQuote                --  |]
 553   | ITidEscape   FastString     --  $x
 554   | ITparenEscape               --  $(
 555   | ITvarQuote                  --  '
 556   | ITtyQuote                   --  ''
 557   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 558
 559   -- Arrow notation extension
 560   | ITproc
 561   | ITrec
 562   | IToparenbar                 --  (|
 563   | ITcparenbar                 --  |)
 564   | ITlarrowtail                --  -<
 565   | ITrarrowtail                --  >-
 566   | ITLarrowtail                --  -<<
 567   | ITRarrowtail                --  >>-
 568
 569   | ITunknown String            -- Used when the lexer can't make sense of it
 570   | ITeof                       -- end of file token
 571
 572   -- Documentation annotations
 573   | ITdocCommentNext  String     -- something beginning '-- |'
 574   | ITdocCommentPrev  String     -- something beginning '-- ^'
 575   | ITdocCommentNamed String     -- something beginning '-- $'
 576   | ITdocSection      Int String -- a section heading
 577   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 578   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 579   | ITlineComment     String     -- comment starting by "--"
 580   | ITblockComment    String     -- comment in {- -}
 581
 582 #ifdef DEBUG
 583   deriving Show -- debugging
 584 #endif
 585
 586 {-
 587 isSpecial :: Token -> Bool
 588 -- If we see M.x, where x is a keyword, but
 589 -- is special, we treat is as just plain M.x,
 590 -- not as a keyword.
 591 isSpecial ITas          = True
 592 isSpecial IThiding      = True
 593 isSpecial ITqualified   = True
 594 isSpecial ITforall      = True
 595 isSpecial ITexport      = True
 596 isSpecial ITlabel       = True
 597 isSpecial ITdynamic     = True
 598 isSpecial ITsafe        = True
 599 isSpecial ITthreadsafe  = True
 600 isSpecial ITinterruptible = True
 601 isSpecial ITunsafe      = True
 602 isSpecial ITccallconv   = True
 603 isSpecial ITstdcallconv = True
 604 isSpecial ITprimcallconv = True
 605 isSpecial ITmdo         = True
 606 isSpecial ITfamily      = True
 607 isSpecial ITgroup   = True
 608 isSpecial ITby      = True
 609 isSpecial ITusing   = True
 610 isSpecial _             = False
 611 -}
 612
 613 -- the bitmap provided as the third component indicates whether the
 614 -- corresponding extension keyword is valid under the extension options
 615 -- provided to the compiler; if the extension corresponding to *any* of the
 616 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 617 -- facilitates using a keyword in two different extensions that can be
 618 -- activated independently)
 619 --
 620 reservedWordsFM :: UniqFM (Token, Int)
 621 reservedWordsFM = listToUFM $
 622         map (\(x, y, z) -> (mkFastString x, (y, z)))
 623        [( "_",          ITunderscore,   0 ),
 624         ( "as",         ITas,           0 ),
 625         ( "case",       ITcase,         0 ),
 626         ( "class",      ITclass,        0 ),
 627         ( "data",       ITdata,         0 ),
 628         ( "default",    ITdefault,      0 ),
 629         ( "deriving",   ITderiving,     0 ),
 630         ( "do",         ITdo,           0 ),
 631         ( "else",       ITelse,         0 ),
 632         ( "hiding",     IThiding,       0 ),
 633         ( "if",         ITif,           0 ),
 634         ( "import",     ITimport,       0 ),
 635         ( "in",         ITin,           0 ),
 636         ( "infix",      ITinfix,        0 ),
 637         ( "infixl",     ITinfixl,       0 ),
 638         ( "infixr",     ITinfixr,       0 ),
 639         ( "instance",   ITinstance,     0 ),
 640         ( "let",        ITlet,          0 ),
 641         ( "module",     ITmodule,       0 ),
 642         ( "newtype",    ITnewtype,      0 ),
 643         ( "of",         ITof,           0 ),
 644         ( "qualified",  ITqualified,    0 ),
 645         ( "then",       ITthen,         0 ),
 646         ( "type",       ITtype,         0 ),
 647         ( "where",      ITwhere,        0 ),
 648         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 649
 650     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 651         ( "mdo",        ITmdo,           bit recursiveDoBit),
 652         ( "family",     ITfamily,        bit tyFamBit),
 653     ( "group",  ITgroup,     bit transformComprehensionsBit),
 654     ( "by",     ITby,        bit transformComprehensionsBit),
 655     ( "using",  ITusing,     bit transformComprehensionsBit),
 656
 657         ( "foreign",    ITforeign,       bit ffiBit),
 658         ( "export",     ITexport,        bit ffiBit),
 659         ( "label",      ITlabel,         bit ffiBit),
 660         ( "dynamic",    ITdynamic,       bit ffiBit),
 661         ( "safe",       ITsafe,          bit ffiBit),
 662         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 663         ( "interruptible", ITinterruptible, bit ffiBit),
 664         ( "unsafe",     ITunsafe,        bit ffiBit),
 665         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 666         ( "ccall",      ITccallconv,     bit ffiBit),
 667         ( "prim",       ITprimcallconv,  bit ffiBit),
 668
 669         ( "rec",        ITrec,           bit recBit),
 670         ( "proc",       ITproc,          bit arrowsBit)
 671      ]
 672
 673 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 674 reservedSymsFM = listToUFM $
 675     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 676       [ ("..",  ITdotdot,   always)
 677         -- (:) is a reserved op, meaning only list cons
 678        ,(":",   ITcolon,    always)
 679        ,("::",  ITdcolon,   always)
 680        ,("=",   ITequal,    always)
 681        ,("\\",  ITlam,      always)
 682        ,("|",   ITvbar,     always)
 683        ,("<-",  ITlarrow,   always)
 684        ,("->",  ITrarrow,   always)
 685        ,("@",   ITat,       always)
 686        ,("~",   ITtilde,    always)
 687        ,("=>",  ITdarrow,   always)
 688        ,("-",   ITminus,    always)
 689        ,("!",   ITbang,     always)
 690
 691         -- For data T (a::*) = MkT
 692        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 693         -- For 'forall a . t'
 694        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 695
 696        ,("-<",  ITlarrowtail, arrowsEnabled)
 697        ,(">-",  ITrarrowtail, arrowsEnabled)
 698        ,("-<<", ITLarrowtail, arrowsEnabled)
 699        ,(">>-", ITRarrowtail, arrowsEnabled)
 700
 701        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 702        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 703        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 704                                 explicitForallEnabled i)
 705        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 706        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 707
 708        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 709        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 710        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 711        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 712
 713        ,("★", ITstar, unicodeSyntaxEnabled)
 714
 715         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 716         -- form part of a large operator.  This would let us have a better
 717         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 718        ]
 719
 720 -- -----------------------------------------------------------------------------
 721 -- Lexer actions
 722
 723 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 724
 725 special :: Token -> Action
 726 special tok span _buf _len = return (L span tok)
 727
 728 token, layout_token :: Token -> Action
 729 token t span _buf _len = return (L span t)
 730 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 731
 732 idtoken :: (StringBuffer -> Int -> Token) -> Action
 733 idtoken f span buf len = return (L span $! (f buf len))
 734
 735 skip_one_varid :: (FastString -> Token) -> Action
 736 skip_one_varid f span buf len
 737   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 738
 739 strtoken :: (String -> Token) -> Action
 740 strtoken f span buf len =
 741   return (L span $! (f $! lexemeToString buf len))
 742
 743 init_strtoken :: Int -> (String -> Token) -> Action
 744 -- like strtoken, but drops the last N character(s)
 745 init_strtoken drop f span buf len =
 746   return (L span $! (f $! lexemeToString buf (len-drop)))
 747
 748 begin :: Int -> Action
 749 begin code _span _str _len = do pushLexState code; lexToken
 750
 751 pop :: Action
 752 pop _span _buf _len = do _ <- popLexState
 753                          lexToken
 754
 755 pop_and :: Action -> Action
 756 pop_and act span buf len = do _ <- popLexState
 757                               act span buf len
 758
 759 {-# INLINE nextCharIs #-}
 760 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 761 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 762
 763 notFollowedBy :: Char -> AlexAccPred Int
 764 notFollowedBy char _ _ _ (AI _ buf)
 765   = nextCharIs buf (/=char)
 766
 767 notFollowedBySymbol :: AlexAccPred Int
 768 notFollowedBySymbol _ _ _ (AI _ buf)
 769   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 770
 771 -- We must reject doc comments as being ordinary comments everywhere.
 772 -- In some cases the doc comment will be selected as the lexeme due to
 773 -- maximal munch, but not always, because the nested comment rule is
 774 -- valid in all states, but the doc-comment rules are only valid in
 775 -- the non-layout states.
 776 isNormalComment :: AlexAccPred Int
 777 isNormalComment bits _ _ (AI _ buf)
 778   | haddockEnabled bits = notFollowedByDocOrPragma
 779   | otherwise           = nextCharIs buf (/='#')
 780   where
 781     notFollowedByDocOrPragma
 782        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 783
 784 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 785 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 786
 787 {-
 788 haddockDisabledAnd p bits _ _ (AI _ buf)
 789   = if haddockEnabled bits then False else (p buf)
 790 -}
 791
 792 atEOL :: AlexAccPred Int
 793 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 794
 795 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 796 ifExtension pred bits _ _ _ = pred bits
 797
 798 multiline_doc_comment :: Action
 799 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 800   where
 801     worker commentAcc input docType oneLine = case alexGetChar input of
 802       Just ('\n', input')
 803         | oneLine -> docCommentEnd input commentAcc docType buf span
 804         | otherwise -> case checkIfCommentLine input' of
 805           Just input -> worker ('\n':commentAcc) input docType False
 806           Nothing -> docCommentEnd input commentAcc docType buf span
 807       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 808       Nothing -> docCommentEnd input commentAcc docType buf span
 809
 810     checkIfCommentLine input = check (dropNonNewlineSpace input)
 811       where
 812         check input = case alexGetChar input of
 813           Just ('-', input) -> case alexGetChar input of
 814             Just ('-', input) -> case alexGetChar input of
 815               Just (c, _) | c /= '-' -> Just input
 816               _ -> Nothing
 817             _ -> Nothing
 818           _ -> Nothing
 819
 820         dropNonNewlineSpace input = case alexGetChar input of
 821           Just (c, input')
 822             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 823             | otherwise -> input
 824           Nothing -> input
 825
 826 lineCommentToken :: Action
 827 lineCommentToken span buf len = do
 828   b <- extension rawTokenStreamEnabled
 829   if b then strtoken ITlineComment span buf len else lexToken
 830
 831 {-
 832   nested comments require traversing by hand, they can't be parsed
 833   using regular expressions.
 834 -}
 835 nested_comment :: P (Located Token) -> Action
 836 nested_comment cont span _str _len = do
 837   input <- getInput
 838   go "" (1::Int) input
 839   where
 840     go commentAcc 0 input = do setInput input
 841                                b <- extension rawTokenStreamEnabled
 842                                if b
 843                                  then docCommentEnd input commentAcc ITblockComment _str span
 844                                  else cont
 845     go commentAcc n input = case alexGetChar input of
 846       Nothing -> errBrace input span
 847       Just ('-',input) -> case alexGetChar input of
 848         Nothing  -> errBrace input span
 849         Just ('\125',input) -> go commentAcc (n-1) input
 850         Just (_,_)          -> go ('-':commentAcc) n input
 851       Just ('\123',input) -> case alexGetChar input of
 852         Nothing  -> errBrace input span
 853         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 854         Just (_,_)       -> go ('\123':commentAcc) n input
 855       Just (c,input) -> go (c:commentAcc) n input
 856
 857 nested_doc_comment :: Action
 858 nested_doc_comment span buf _len = withLexedDocType (go "")
 859   where
 860     go commentAcc input docType _ = case alexGetChar input of
 861       Nothing -> errBrace input span
 862       Just ('-',input) -> case alexGetChar input of
 863         Nothing -> errBrace input span
 864         Just ('\125',input) ->
 865           docCommentEnd input commentAcc docType buf span
 866         Just (_,_) -> go ('-':commentAcc) input docType False
 867       Just ('\123', input) -> case alexGetChar input of
 868         Nothing  -> errBrace input span
 869         Just ('-',input) -> do
 870           setInput input
 871           let cont = do input <- getInput; go commentAcc input docType False
 872           nested_comment cont span buf _len
 873         Just (_,_) -> go ('\123':commentAcc) input docType False
 874       Just (c,input) -> go (c:commentAcc) input docType False
 875
 876 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 877                  -> P (Located Token)
 878 withLexedDocType lexDocComment = do
 879   input@(AI _ buf) <- getInput
 880   case prevChar buf ' ' of
 881     '|' -> lexDocComment input ITdocCommentNext False
 882     '^' -> lexDocComment input ITdocCommentPrev False
 883     '$' -> lexDocComment input ITdocCommentNamed False
 884     '*' -> lexDocSection 1 input
 885     '#' -> lexDocComment input ITdocOptionsOld False
 886     _ -> panic "withLexedDocType: Bad doc type"
 887  where
 888     lexDocSection n input = case alexGetChar input of
 889       Just ('*', input) -> lexDocSection (n+1) input
 890       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 891       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 892
 893 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 894 -- off again at the end of the pragma.
 895 rulePrag :: Action
 896 rulePrag span _buf _len = do
 897   setExts (.|. bit inRulePragBit)
 898   return (L span ITrules_prag)
 899
 900 endPrag :: Action
 901 endPrag span _buf _len = do
 902   setExts (.&. complement (bit inRulePragBit))
 903   return (L span ITclose_prag)
 904
 905 -- docCommentEnd
 906 -------------------------------------------------------------------------------
 907 -- This function is quite tricky. We can't just return a new token, we also
 908 -- need to update the state of the parser. Why? Because the token is longer
 909 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 910 -- it writes the wrong token length to the parser state. This function is
 911 -- called afterwards, so it can just update the state.
 912
 913 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 914                  SrcSpan -> P (Located Token)
 915 docCommentEnd input commentAcc docType buf span = do
 916   setInput input
 917   let (AI loc nextBuf) = input
 918       comment = reverse commentAcc
 919       span' = mkSrcSpan (srcSpanStart span) loc
 920       last_len = byteDiff buf nextBuf
 921
 922   span `seq` setLastToken span' last_len
 923   return (L span' (docType comment))
 924
 925 errBrace :: AlexInput -> SrcSpan -> P a
 926 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 927
 928 open_brace, close_brace :: Action
 929 open_brace span _str _len = do
 930   ctx <- getContext
 931   setContext (NoLayout:ctx)
 932   return (L span ITocurly)
 933 close_brace span _str _len = do
 934   popContext
 935   return (L span ITccurly)
 936
 937 qvarid, qconid :: StringBuffer -> Int -> Token
 938 qvarid buf len = ITqvarid $! splitQualName buf len False
 939 qconid buf len = ITqconid $! splitQualName buf len False
 940
 941 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 942 -- takes a StringBuffer and a length, and returns the module name
 943 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 944 -- because of hierarchical module names.
 945 splitQualName orig_buf len parens = split orig_buf orig_buf
 946   where
 947     split buf dot_buf
 948         | orig_buf `byteDiff` buf >= len  = done dot_buf
 949         | c == '.'                        = found_dot buf'
 950         | otherwise                       = split buf' dot_buf
 951       where
 952        (c,buf') = nextChar buf
 953
 954     -- careful, we might get names like M....
 955     -- so, if the character after the dot is not upper-case, this is
 956     -- the end of the qualifier part.
 957     found_dot buf -- buf points after the '.'
 958         | isUpper c    = split buf' buf
 959         | otherwise    = done buf
 960       where
 961        (c,buf') = nextChar buf
 962
 963     done dot_buf =
 964         (lexemeToFastString orig_buf (qual_size - 1),
 965          if parens -- Prelude.(+)
 966             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 967             else lexemeToFastString dot_buf (len - qual_size))
 968       where
 969         qual_size = orig_buf `byteDiff` dot_buf
 970
 971 varid :: Action
 972 varid span buf len =
 973   fs `seq`
 974   case lookupUFM reservedWordsFM fs of
 975         Just (keyword,0)    -> do
 976                 maybe_layout keyword
 977                 return (L span keyword)
 978         Just (keyword,exts) -> do
 979                 b <- extension (\i -> exts .&. i /= 0)
 980                 if b then do maybe_layout keyword
 981                              return (L span keyword)
 982                      else return (L span (ITvarid fs))
 983         _other -> return (L span (ITvarid fs))
 984   where
 985         fs = lexemeToFastString buf len
 986
 987 conid :: StringBuffer -> Int -> Token
 988 conid buf len = ITconid fs
 989   where fs = lexemeToFastString buf len
 990
 991 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
 992 qvarsym buf len = ITqvarsym $! splitQualName buf len False
 993 qconsym buf len = ITqconsym $! splitQualName buf len False
 994 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
 995 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
 996
 997 varsym, consym :: Action
 998 varsym = sym ITvarsym
 999 consym = sym ITconsym
1000
1001 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1002     -> P (Located Token)
1003 sym con span buf len =
1004   case lookupUFM reservedSymsFM fs of
1005         Just (keyword,exts) -> do
1006                 b <- extension exts
1007                 if b then return (L span keyword)
1008                      else return (L span $! con fs)
1009         _other -> return (L span $! con fs)
1010   where
1011         fs = lexemeToFastString buf len
1012
1013 -- Variations on the integral numeric literal.
1014 tok_integral :: (Integer -> Token)
1015      -> (Integer -> Integer)
1016  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1017      -> Int -> Int
1018      -> (Integer, (Char->Int)) -> Action
1019 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1020   return $ L span $ itint $! transint $ parseUnsignedInteger
1021      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1022
1023 -- some conveniences for use with tok_integral
1024 tok_num :: (Integer -> Integer)
1025         -> Int -> Int
1026         -> (Integer, (Char->Int)) -> Action
1027 tok_num = tok_integral ITinteger
1028 tok_primint :: (Integer -> Integer)
1029             -> Int -> Int
1030             -> (Integer, (Char->Int)) -> Action
1031 tok_primint = tok_integral ITprimint
1032 tok_primword :: Int -> Int
1033              -> (Integer, (Char->Int)) -> Action
1034 tok_primword = tok_integral ITprimword positive
1035 positive, negative :: (Integer -> Integer)
1036 positive = id
1037 negative = negate
1038 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1039 decimal = (10,octDecDigit)
1040 octal = (8,octDecDigit)
1041 hexadecimal = (16,hexDigit)
1042
1043 -- readRational can understand negative rationals, exponents, everything.
1044 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1045 tok_float        str = ITrational   $! readRational str
1046 tok_primfloat    str = ITprimfloat  $! readRational str
1047 tok_primdouble   str = ITprimdouble $! readRational str
1048
1049 -- -----------------------------------------------------------------------------
1050 -- Layout processing
1051
1052 -- we're at the first token on a line, insert layout tokens if necessary
1053 do_bol :: Action
1054 do_bol span _str _len = do
1055         pos <- getOffside
1056         case pos of
1057             LT -> do
1058                 --trace "layout: inserting '}'" $ do
1059                 popContext
1060                 -- do NOT pop the lex state, we might have a ';' to insert
1061                 return (L span ITvccurly)
1062             EQ -> do
1063                 --trace "layout: inserting ';'" $ do
1064                 _ <- popLexState
1065                 return (L span ITsemi)
1066             GT -> do
1067                 _ <- popLexState
1068                 lexToken
1069
1070 -- certain keywords put us in the "layout" state, where we might
1071 -- add an opening curly brace.
1072 maybe_layout :: Token -> P ()
1073 maybe_layout t = do -- If the alternative layout rule is enabled then
1074                     -- we never create an implicit layout context here.
1075                     -- Layout is handled XXX instead.
1076                     -- The code for closing implicit contexts, or
1077                     -- inserting implicit semi-colons, is therefore
1078                     -- irrelevant as it only applies in an implicit
1079                     -- context.
1080                     alr <- extension alternativeLayoutRule
1081                     unless alr $ f t
1082     where f ITdo    = pushLexState layout_do
1083           f ITmdo   = pushLexState layout_do
1084           f ITof    = pushLexState layout
1085           f ITlet   = pushLexState layout
1086           f ITwhere = pushLexState layout
1087           f ITrec   = pushLexState layout
1088           f _       = return ()
1089
1090 -- Pushing a new implicit layout context.  If the indentation of the
1091 -- next token is not greater than the previous layout context, then
1092 -- Haskell 98 says that the new layout context should be empty; that is
1093 -- the lexer must generate {}.
1094 --
1095 -- We are slightly more lenient than this: when the new context is started
1096 -- by a 'do', then we allow the new context to be at the same indentation as
1097 -- the previous context.  This is what the 'strict' argument is for.
1098 --
1099 new_layout_context :: Bool -> Action
1100 new_layout_context strict span _buf _len = do
1101     _ <- popLexState
1102     (AI l _) <- getInput
1103     let offset = srcLocCol l
1104     ctx <- getContext
1105     case ctx of
1106         Layout prev_off : _  |
1107            (strict     && prev_off >= offset  ||
1108             not strict && prev_off > offset) -> do
1109                 -- token is indented to the left of the previous context.
1110                 -- we must generate a {} sequence now.
1111                 pushLexState layout_left
1112                 return (L span ITvocurly)
1113         _ -> do
1114                 setContext (Layout offset : ctx)
1115                 return (L span ITvocurly)
1116
1117 do_layout_left :: Action
1118 do_layout_left span _buf _len = do
1119     _ <- popLexState
1120     pushLexState bol  -- we must be at the start of a line
1121     return (L span ITvccurly)
1122
1123 -- -----------------------------------------------------------------------------
1124 -- LINE pragmas
1125
1126 setLine :: Int -> Action
1127 setLine code span buf len = do
1128   let line = parseUnsignedInteger buf len 10 octDecDigit
1129   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1130         -- subtract one: the line number refers to the *following* line
1131   _ <- popLexState
1132   pushLexState code
1133   lexToken
1134
1135 setFile :: Int -> Action
1136 setFile code span buf len = do
1137   let file = lexemeToFastString (stepOn buf) (len-2)
1138   setAlrLastLoc noSrcSpan
1139   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1140   _ <- popLexState
1141   pushLexState code
1142   lexToken
1143
1144
1145 -- -----------------------------------------------------------------------------
1146 -- Options, includes and language pragmas.
1147
1148 lex_string_prag :: (String -> Token) -> Action
1149 lex_string_prag mkTok span _buf _len
1150     = do input <- getInput
1151          start <- getSrcLoc
1152          tok <- go [] input
1153          end <- getSrcLoc
1154          return (L (mkSrcSpan start end) tok)
1155     where go acc input
1156               = if isString input "#-}"
1157                    then do setInput input
1158                            return (mkTok (reverse acc))
1159                    else case alexGetChar input of
1160                           Just (c,i) -> go (c:acc) i
1161                           Nothing -> err input
1162           isString _ [] = True
1163           isString i (x:xs)
1164               = case alexGetChar i of
1165                   Just (c,i') | c == x    -> isString i' xs
1166                   _other -> False
1167           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1168
1169
1170 -- -----------------------------------------------------------------------------
1171 -- Strings & Chars
1172
1173 -- This stuff is horrible.  I hates it.
1174
1175 lex_string_tok :: Action
1176 lex_string_tok span _buf _len = do
1177   tok <- lex_string ""
1178   end <- getSrcLoc
1179   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1180
1181 lex_string :: String -> P Token
1182 lex_string s = do
1183   i <- getInput
1184   case alexGetChar' i of
1185     Nothing -> lit_error i
1186
1187     Just ('"',i)  -> do
1188         setInput i
1189         magicHash <- extension magicHashEnabled
1190         if magicHash
1191           then do
1192             i <- getInput
1193             case alexGetChar' i of
1194               Just ('#',i) -> do
1195                    setInput i
1196                    if any (> '\xFF') s
1197                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1198                     else let s' = mkZFastString (reverse s) in
1199                          return (ITprimstring s')
1200                         -- mkZFastString is a hack to avoid encoding the
1201                         -- string in UTF-8.  We just want the exact bytes.
1202               _other ->
1203                 return (ITstring (mkFastString (reverse s)))
1204           else
1205                 return (ITstring (mkFastString (reverse s)))
1206
1207     Just ('\\',i)
1208         | Just ('&',i) <- next -> do
1209                 setInput i; lex_string s
1210         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1211                            -- is_space only works for <= '\x7f' (#3751)
1212                 setInput i; lex_stringgap s
1213         where next = alexGetChar' i
1214
1215     Just (c, i1) -> do
1216         case c of
1217           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1218           c | isAny c -> do setInput i1; lex_string (c:s)
1219           _other -> lit_error i
1220
1221 lex_stringgap :: String -> P Token
1222 lex_stringgap s = do
1223   i <- getInput
1224   c <- getCharOrFail i
1225   case c of
1226     '\\' -> lex_string s
1227     c | is_space c -> lex_stringgap s
1228     _other -> lit_error i
1229
1230
1231 lex_char_tok :: Action
1232 -- Here we are basically parsing character literals, such as 'x' or '\n'
1233 -- but, when Template Haskell is on, we additionally spot
1234 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1235 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1236 -- So we have to do two characters of lookahead: when we see 'x we need to
1237 -- see if there's a trailing quote
1238 lex_char_tok span _buf _len = do        -- We've seen '
1239    i1 <- getInput       -- Look ahead to first character
1240    let loc = srcSpanStart span
1241    case alexGetChar' i1 of
1242         Nothing -> lit_error  i1
1243
1244         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1245                   th_exts <- extension thEnabled
1246                   if th_exts then do
1247                         setInput i2
1248                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1249                    else lit_error i1
1250
1251         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1252                   setInput i2
1253                   lit_ch <- lex_escape
1254                   i3 <- getInput
1255                   mc <- getCharOrFail i3 -- Trailing quote
1256                   if mc == '\'' then finish_char_tok loc lit_ch
1257                                 else lit_error i3
1258
1259         Just (c, i2@(AI _end2 _))
1260                 | not (isAny c) -> lit_error i1
1261                 | otherwise ->
1262
1263                 -- We've seen 'x, where x is a valid character
1264                 --  (i.e. not newline etc) but not a quote or backslash
1265            case alexGetChar' i2 of      -- Look ahead one more character
1266                 Just ('\'', i3) -> do   -- We've seen 'x'
1267                         setInput i3
1268                         finish_char_tok loc c
1269                 _other -> do            -- We've seen 'x not followed by quote
1270                                         -- (including the possibility of EOF)
1271                                         -- If TH is on, just parse the quote only
1272                         th_exts <- extension thEnabled
1273                         let (AI end _) = i1
1274                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1275                                    else lit_error i2
1276
1277 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1278 finish_char_tok loc ch  -- We've already seen the closing quote
1279                         -- Just need to check for trailing #
1280   = do  magicHash <- extension magicHashEnabled
1281         i@(AI end _) <- getInput
1282         if magicHash then do
1283                 case alexGetChar' i of
1284                         Just ('#',i@(AI end _)) -> do
1285                                 setInput i
1286                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1287                         _other ->
1288                                 return (L (mkSrcSpan loc end) (ITchar ch))
1289             else do
1290                    return (L (mkSrcSpan loc end) (ITchar ch))
1291
1292 isAny :: Char -> Bool
1293 isAny c | c > '\x7f' = isPrint c
1294         | otherwise  = is_any c
1295
1296 lex_escape :: P Char
1297 lex_escape = do
1298   i0 <- getInput
1299   c <- getCharOrFail i0
1300   case c of
1301         'a'   -> return '\a'
1302         'b'   -> return '\b'
1303         'f'   -> return '\f'
1304         'n'   -> return '\n'
1305         'r'   -> return '\r'
1306         't'   -> return '\t'
1307         'v'   -> return '\v'
1308         '\\'  -> return '\\'
1309         '"'   -> return '\"'
1310         '\''  -> return '\''
1311         '^'   -> do i1 <- getInput
1312                     c <- getCharOrFail i1
1313                     if c >= '@' && c <= '_'
1314                         then return (chr (ord c - ord '@'))
1315                         else lit_error i1
1316
1317         'x'   -> readNum is_hexdigit 16 hexDigit
1318         'o'   -> readNum is_octdigit  8 octDecDigit
1319         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1320
1321         c1 ->  do
1322            i <- getInput
1323            case alexGetChar' i of
1324             Nothing -> lit_error i0
1325             Just (c2,i2) ->
1326               case alexGetChar' i2 of
1327                 Nothing -> do lit_error i0
1328                 Just (c3,i3) ->
1329                    let str = [c1,c2,c3] in
1330                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1331                                      Just rest <- [stripPrefix p str] ] of
1332                           (escape_char,[]):_ -> do
1333                                 setInput i3
1334                                 return escape_char
1335                           (escape_char,_:_):_ -> do
1336                                 setInput i2
1337                                 return escape_char
1338                           [] -> lit_error i0
1339
1340 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1341 readNum is_digit base conv = do
1342   i <- getInput
1343   c <- getCharOrFail i
1344   if is_digit c
1345         then readNum2 is_digit base conv (conv c)
1346         else lit_error i
1347
1348 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1349 readNum2 is_digit base conv i = do
1350   input <- getInput
1351   read i input
1352   where read i input = do
1353           case alexGetChar' input of
1354             Just (c,input') | is_digit c -> do
1355                let i' = i*base + conv c
1356                if i' > 0x10ffff
1357                   then setInput input >> lexError "numeric escape sequence out of range"
1358                   else read i' input'
1359             _other -> do
1360               setInput input; return (chr i)
1361
1362
1363 silly_escape_chars :: [(String, Char)]
1364 silly_escape_chars = [
1365         ("NUL", '\NUL'),
1366         ("SOH", '\SOH'),
1367         ("STX", '\STX'),
1368         ("ETX", '\ETX'),
1369         ("EOT", '\EOT'),
1370         ("ENQ", '\ENQ'),
1371         ("ACK", '\ACK'),
1372         ("BEL", '\BEL'),
1373         ("BS", '\BS'),
1374         ("HT", '\HT'),
1375         ("LF", '\LF'),
1376         ("VT", '\VT'),
1377         ("FF", '\FF'),
1378         ("CR", '\CR'),
1379         ("SO", '\SO'),
1380         ("SI", '\SI'),
1381         ("DLE", '\DLE'),
1382         ("DC1", '\DC1'),
1383         ("DC2", '\DC2'),
1384         ("DC3", '\DC3'),
1385         ("DC4", '\DC4'),
1386         ("NAK", '\NAK'),
1387         ("SYN", '\SYN'),
1388         ("ETB", '\ETB'),
1389         ("CAN", '\CAN'),
1390         ("EM", '\EM'),
1391         ("SUB", '\SUB'),
1392         ("ESC", '\ESC'),
1393         ("FS", '\FS'),
1394         ("GS", '\GS'),
1395         ("RS", '\RS'),
1396         ("US", '\US'),
1397         ("SP", '\SP'),
1398         ("DEL", '\DEL')
1399         ]
1400
1401 -- before calling lit_error, ensure that the current input is pointing to
1402 -- the position of the error in the buffer.  This is so that we can report
1403 -- a correct location to the user, but also so we can detect UTF-8 decoding
1404 -- errors if they occur.
1405 lit_error :: AlexInput -> P a
1406 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1407
1408 getCharOrFail :: AlexInput -> P Char
1409 getCharOrFail i =  do
1410   case alexGetChar' i of
1411         Nothing -> lexError "unexpected end-of-file in string/character literal"
1412         Just (c,i)  -> do setInput i; return c
1413
1414 -- -----------------------------------------------------------------------------
1415 -- QuasiQuote
1416
1417 lex_quasiquote_tok :: Action
1418 lex_quasiquote_tok span buf len = do
1419   let quoter = tail (lexemeToString buf (len - 1))
1420                 -- 'tail' drops the initial '[',
1421                 -- while the -1 drops the trailing '|'
1422   quoteStart <- getSrcLoc
1423   quote <- lex_quasiquote ""
1424   end <- getSrcLoc
1425   return (L (mkSrcSpan (srcSpanStart span) end)
1426            (ITquasiQuote (mkFastString quoter,
1427                           mkFastString (reverse quote),
1428                           mkSrcSpan quoteStart end)))
1429
1430 lex_quasiquote :: String -> P String
1431 lex_quasiquote s = do
1432   i <- getInput
1433   case alexGetChar' i of
1434     Nothing -> lit_error i
1435
1436     Just ('\\',i)
1437         | Just ('|',i) <- next -> do
1438                 setInput i; lex_quasiquote ('|' : s)
1439         | Just (']',i) <- next -> do
1440                 setInput i; lex_quasiquote (']' : s)
1441         where next = alexGetChar' i
1442
1443     Just ('|',i)
1444         | Just (']',i) <- next -> do
1445                 setInput i; return s
1446         where next = alexGetChar' i
1447
1448     Just (c, i) -> do
1449          setInput i; lex_quasiquote (c : s)
1450
1451 -- -----------------------------------------------------------------------------
1452 -- Warnings
1453
1454 warn :: DynFlag -> SDoc -> Action
1455 warn option warning srcspan _buf _len = do
1456     addWarning option srcspan warning
1457     lexToken
1458
1459 warnThen :: DynFlag -> SDoc -> Action -> Action
1460 warnThen option warning action srcspan buf len = do
1461     addWarning option srcspan warning
1462     action srcspan buf len
1463
1464 -- -----------------------------------------------------------------------------
1465 -- The Parse Monad
1466
1467 data LayoutContext
1468   = NoLayout
1469   | Layout !Int
1470   deriving Show
1471
1472 data ParseResult a
1473   = POk PState a
1474   | PFailed
1475         SrcSpan         -- The start and end of the text span related to
1476                         -- the error.  Might be used in environments which can
1477                         -- show this span, e.g. by highlighting it.
1478         Message         -- The error message
1479
1480 data PState = PState {
1481         buffer     :: StringBuffer,
1482         dflags     :: DynFlags,
1483         messages   :: Messages,
1484         last_loc   :: SrcSpan,  -- pos of previous token
1485         last_len   :: !Int,     -- len of previous token
1486         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1487         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1488         context    :: [LayoutContext],
1489         lex_state  :: [Int],
1490         -- Used in the alternative layout rule:
1491         -- These tokens are the next ones to be sent out. They are
1492         -- just blindly emitted, without the rule looking at them again:
1493         alr_pending_implicit_tokens :: [Located Token],
1494         -- This is the next token to be considered or, if it is Nothing,
1495         -- we need to get the next token from the input stream:
1496         alr_next_token :: Maybe (Located Token),
1497         -- This is what we consider to be the locatino of the last token
1498         -- emitted:
1499         alr_last_loc :: SrcSpan,
1500         -- The stack of layout contexts:
1501         alr_context :: [ALRContext],
1502         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1503         -- us what sort of layout the '{' will open:
1504         alr_expecting_ocurly :: Maybe ALRLayout,
1505         -- Have we just had the '}' for a let block? If so, than an 'in'
1506         -- token doesn't need to close anything:
1507         alr_justClosedExplicitLetBlock :: Bool
1508      }
1509         -- last_loc and last_len are used when generating error messages,
1510         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1511         -- current token to happyError, we could at least get rid of last_len.
1512         -- Getting rid of last_loc would require finding another way to
1513         -- implement pushCurrentContext (which is only called from one place).
1514
1515 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1516                               Bool{- is it a 'let' block? -}
1517                 | ALRLayout ALRLayout Int
1518 data ALRLayout = ALRLayoutLet
1519                | ALRLayoutWhere
1520                | ALRLayoutOf
1521                | ALRLayoutDo
1522
1523 newtype P a = P { unP :: PState -> ParseResult a }
1524
1525 instance Monad P where
1526   return = returnP
1527   (>>=) = thenP
1528   fail = failP
1529
1530 returnP :: a -> P a
1531 returnP a = a `seq` (P $ \s -> POk s a)
1532
1533 thenP :: P a -> (a -> P b) -> P b
1534 (P m) `thenP` k = P $ \ s ->
1535         case m s of
1536                 POk s1 a         -> (unP (k a)) s1
1537                 PFailed span err -> PFailed span err
1538
1539 failP :: String -> P a
1540 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1541
1542 failMsgP :: String -> P a
1543 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1544
1545 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1546 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1547
1548 failSpanMsgP :: SrcSpan -> SDoc -> P a
1549 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1550
1551 getPState :: P PState
1552 getPState = P $ \s -> POk s s
1553
1554 getDynFlags :: P DynFlags
1555 getDynFlags = P $ \s -> POk s (dflags s)
1556
1557 withThisPackage :: (PackageId -> a) -> P a
1558 withThisPackage f
1559  = do   pkg     <- liftM thisPackage getDynFlags
1560         return  $ f pkg
1561
1562 extension :: (Int -> Bool) -> P Bool
1563 extension p = P $ \s -> POk s (p $! extsBitmap s)
1564
1565 getExts :: P Int
1566 getExts = P $ \s -> POk s (extsBitmap s)
1567
1568 setExts :: (Int -> Int) -> P ()
1569 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1570
1571 setSrcLoc :: SrcLoc -> P ()
1572 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1573
1574 getSrcLoc :: P SrcLoc
1575 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1576
1577 setLastToken :: SrcSpan -> Int -> P ()
1578 setLastToken loc len = P $ \s -> POk s {
1579   last_loc=loc,
1580   last_len=len
1581   } ()
1582
1583 data AlexInput = AI SrcLoc StringBuffer
1584
1585 alexInputPrevChar :: AlexInput -> Char
1586 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1587
1588 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1589 alexGetChar (AI loc s)
1590   | atEnd s   = Nothing
1591   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1592                 --trace (show (ord c)) $
1593                 Just (adj_c, (AI loc' s'))
1594   where (c,s') = nextChar s
1595         loc'   = advanceSrcLoc loc c
1596
1597         non_graphic     = '\x0'
1598         upper           = '\x1'
1599         lower           = '\x2'
1600         digit           = '\x3'
1601         symbol          = '\x4'
1602         space           = '\x5'
1603         other_graphic   = '\x6'
1604
1605         adj_c
1606           | c <= '\x06' = non_graphic
1607           | c <= '\x7f' = c
1608           -- Alex doesn't handle Unicode, so when Unicode
1609           -- character is encountered we output these values
1610           -- with the actual character value hidden in the state.
1611           | otherwise =
1612                 case generalCategory c of
1613                   UppercaseLetter       -> upper
1614                   LowercaseLetter       -> lower
1615                   TitlecaseLetter       -> upper
1616                   ModifierLetter        -> other_graphic
1617                   OtherLetter           -> lower -- see #1103
1618                   NonSpacingMark        -> other_graphic
1619                   SpacingCombiningMark  -> other_graphic
1620                   EnclosingMark         -> other_graphic
1621                   DecimalNumber         -> digit
1622                   LetterNumber          -> other_graphic
1623                   OtherNumber           -> other_graphic
1624                   ConnectorPunctuation  -> symbol
1625                   DashPunctuation       -> symbol
1626                   OpenPunctuation       -> other_graphic
1627                   ClosePunctuation      -> other_graphic
1628                   InitialQuote          -> other_graphic
1629                   FinalQuote            -> other_graphic
1630                   OtherPunctuation      -> symbol
1631                   MathSymbol            -> symbol
1632                   CurrencySymbol        -> symbol
1633                   ModifierSymbol        -> symbol
1634                   OtherSymbol           -> symbol
1635                   Space                 -> space
1636                   _other                -> non_graphic
1637
1638 -- This version does not squash unicode characters, it is used when
1639 -- lexing strings.
1640 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1641 alexGetChar' (AI loc s)
1642   | atEnd s   = Nothing
1643   | otherwise = c `seq` loc' `seq` s' `seq`
1644                 --trace (show (ord c)) $
1645                 Just (c, (AI loc' s'))
1646   where (c,s') = nextChar s
1647         loc'   = advanceSrcLoc loc c
1648
1649 getInput :: P AlexInput
1650 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1651
1652 setInput :: AlexInput -> P ()
1653 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1654
1655 pushLexState :: Int -> P ()
1656 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1657
1658 popLexState :: P Int
1659 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1660
1661 getLexState :: P Int
1662 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1663
1664 popNextToken :: P (Maybe (Located Token))
1665 popNextToken
1666     = P $ \s@PState{ alr_next_token = m } ->
1667               POk (s {alr_next_token = Nothing}) m
1668
1669 setAlrLastLoc :: SrcSpan -> P ()
1670 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1671
1672 getAlrLastLoc :: P SrcSpan
1673 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1674
1675 getALRContext :: P [ALRContext]
1676 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1677
1678 setALRContext :: [ALRContext] -> P ()
1679 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1680
1681 getJustClosedExplicitLetBlock :: P Bool
1682 getJustClosedExplicitLetBlock
1683  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1684
1685 setJustClosedExplicitLetBlock :: Bool -> P ()
1686 setJustClosedExplicitLetBlock b
1687  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1688
1689 setNextToken :: Located Token -> P ()
1690 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1691
1692 popPendingImplicitToken :: P (Maybe (Located Token))
1693 popPendingImplicitToken
1694     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1695               case ts of
1696               [] -> POk s Nothing
1697               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1698
1699 setPendingImplicitTokens :: [Located Token] -> P ()
1700 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1701
1702 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1703 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1704
1705 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1706 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1707
1708 -- for reasons of efficiency, flags indicating language extensions (eg,
1709 -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
1710 -- integer
1711
1712 genericsBit :: Int
1713 genericsBit = 0 -- {| and |}
1714 ffiBit :: Int
1715 ffiBit     = 1
1716 parrBit :: Int
1717 parrBit    = 2
1718 arrowsBit :: Int
1719 arrowsBit  = 4
1720 thBit :: Int
1721 thBit      = 5
1722 ipBit :: Int
1723 ipBit      = 6
1724 explicitForallBit :: Int
1725 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1726 bangPatBit :: Int
1727 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1728                 -- (doesn't affect the lexer)
1729 tyFamBit :: Int
1730 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1731 haddockBit :: Int
1732 haddockBit = 10 -- Lex and parse Haddock comments
1733 magicHashBit :: Int
1734 magicHashBit = 11 -- "#" in both functions and operators
1735 kindSigsBit :: Int
1736 kindSigsBit = 12 -- Kind signatures on type variables
1737 recursiveDoBit :: Int
1738 recursiveDoBit = 13 -- mdo
1739 unicodeSyntaxBit :: Int
1740 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1741 unboxedTuplesBit :: Int
1742 unboxedTuplesBit = 15 -- (# and #)
1743 datatypeContextsBit :: Int
1744 datatypeContextsBit = 16
1745 transformComprehensionsBit :: Int
1746 transformComprehensionsBit = 17
1747 qqBit :: Int
1748 qqBit      = 18 -- enable quasiquoting
1749 inRulePragBit :: Int
1750 inRulePragBit = 19
1751 rawTokenStreamBit :: Int
1752 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1753 newQualOpsBit :: Int
1754 newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
1755 recBit :: Int
1756 recBit = 22 -- rec
1757 alternativeLayoutRuleBit :: Int
1758 alternativeLayoutRuleBit = 23
1759
1760 always :: Int -> Bool
1761 always           _     = True
1762 genericsEnabled :: Int -> Bool
1763 genericsEnabled  flags = testBit flags genericsBit
1764 parrEnabled :: Int -> Bool
1765 parrEnabled      flags = testBit flags parrBit
1766 arrowsEnabled :: Int -> Bool
1767 arrowsEnabled    flags = testBit flags arrowsBit
1768 thEnabled :: Int -> Bool
1769 thEnabled        flags = testBit flags thBit
1770 ipEnabled :: Int -> Bool
1771 ipEnabled        flags = testBit flags ipBit
1772 explicitForallEnabled :: Int -> Bool
1773 explicitForallEnabled flags = testBit flags explicitForallBit
1774 bangPatEnabled :: Int -> Bool
1775 bangPatEnabled   flags = testBit flags bangPatBit
1776 -- tyFamEnabled :: Int -> Bool
1777 -- tyFamEnabled     flags = testBit flags tyFamBit
1778 haddockEnabled :: Int -> Bool
1779 haddockEnabled   flags = testBit flags haddockBit
1780 magicHashEnabled :: Int -> Bool
1781 magicHashEnabled flags = testBit flags magicHashBit
1782 -- kindSigsEnabled :: Int -> Bool
1783 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1784 unicodeSyntaxEnabled :: Int -> Bool
1785 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1786 unboxedTuplesEnabled :: Int -> Bool
1787 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1788 datatypeContextsEnabled :: Int -> Bool
1789 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1790 qqEnabled :: Int -> Bool
1791 qqEnabled        flags = testBit flags qqBit
1792 -- inRulePrag :: Int -> Bool
1793 -- inRulePrag       flags = testBit flags inRulePragBit
1794 rawTokenStreamEnabled :: Int -> Bool
1795 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1796 newQualOps :: Int -> Bool
1797 newQualOps       flags = testBit flags newQualOpsBit
1798 oldQualOps :: Int -> Bool
1799 oldQualOps flags = not (newQualOps flags)
1800 alternativeLayoutRule :: Int -> Bool
1801 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1802
1803 -- PState for parsing options pragmas
1804 --
1805 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1806 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1807                                  lex_state = [bol, option_prags, 0]
1808                              }
1809
1810 -- create a parse state
1811 --
1812 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1813 mkPState flags buf loc =
1814   PState {
1815       buffer          = buf,
1816       dflags        = flags,
1817       messages      = emptyMessages,
1818       last_loc      = mkSrcSpan loc loc,
1819       last_len      = 0,
1820       loc           = loc,
1821       extsBitmap    = fromIntegral bitmap,
1822       context       = [],
1823       lex_state     = [bol, 0],
1824       alr_pending_implicit_tokens = [],
1825       alr_next_token = Nothing,
1826       alr_last_loc = noSrcSpan,
1827       alr_context = [],
1828       alr_expecting_ocurly = Nothing,
1829       alr_justClosedExplicitLetBlock = False
1830     }
1831     where
1832       bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
1833                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1834                .|. parrBit           `setBitIf` xopt Opt_PArr         flags
1835                .|. arrowsBit         `setBitIf` xopt Opt_Arrows       flags
1836                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1837                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes flags
1838                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams flags
1839                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll flags
1840                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
1841                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
1842                .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
1843                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
1844                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
1845                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
1846                .|. recBit            `setBitIf` xopt Opt_DoRec  flags
1847                .|. recBit            `setBitIf` xopt Opt_Arrows flags
1848                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
1849                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
1850                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1851                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1852                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1853                .|. newQualOpsBit `setBitIf` xopt Opt_NewQualifiedOperators flags
1854                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1855       --
1856       setBitIf :: Int -> Bool -> Int
1857       b `setBitIf` cond | cond      = bit b
1858                         | otherwise = 0
1859
1860 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1861 addWarning option srcspan warning
1862  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1863        let warning' = mkWarnMsg srcspan alwaysQualify warning
1864            ws' = if dopt option d then ws `snocBag` warning' else ws
1865        in POk s{messages=(ws', es)} ()
1866
1867 getMessages :: PState -> Messages
1868 getMessages PState{messages=ms} = ms
1869
1870 getContext :: P [LayoutContext]
1871 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1872
1873 setContext :: [LayoutContext] -> P ()
1874 setContext ctx = P $ \s -> POk s{context=ctx} ()
1875
1876 popContext :: P ()
1877 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1878                               last_len = len, last_loc = last_loc }) ->
1879   case ctx of
1880         (_:tl) -> POk s{ context = tl } ()
1881         []     -> PFailed last_loc (srcParseErr buf len)
1882
1883 -- Push a new layout context at the indentation of the last token read.
1884 -- This is only used at the outer level of a module when the 'module'
1885 -- keyword is missing.
1886 pushCurrentContext :: P ()
1887 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1888     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1889
1890 getOffside :: P Ordering
1891 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1892                 let offs = srcSpanStartCol loc in
1893                 let ord = case stk of
1894                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1895                                         compare offs n
1896                         _            -> GT
1897                 in POk s ord
1898
1899 -- ---------------------------------------------------------------------------
1900 -- Construct a parse error
1901
1902 srcParseErr
1903   :: StringBuffer       -- current buffer (placed just after the last token)
1904   -> Int                -- length of the previous token
1905   -> Message
1906 srcParseErr buf len
1907   = hcat [ if null token
1908              then ptext (sLit "parse error (possibly incorrect indentation)")
1909              else hcat [ptext (sLit "parse error on input "),
1910                         char '`', text token, char '\'']
1911     ]
1912   where token = lexemeToString (offsetBytes (-len) buf) len
1913
1914 -- Report a parse failure, giving the span of the previous token as
1915 -- the location of the error.  This is the entry point for errors
1916 -- detected during parsing.
1917 srcParseFail :: P a
1918 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1919                             last_loc = last_loc } ->
1920     PFailed last_loc (srcParseErr buf len)
1921
1922 -- A lexical error is reported at a particular position in the source file,
1923 -- not over a token range.
1924 lexError :: String -> P a
1925 lexError str = do
1926   loc <- getSrcLoc
1927   (AI end buf) <- getInput
1928   reportLexError loc end buf str
1929
1930 -- -----------------------------------------------------------------------------
1931 -- This is the top-level function: called from the parser each time a
1932 -- new token is to be read from the input.
1933
1934 lexer :: (Located Token -> P a) -> P a
1935 lexer cont = do
1936   alr <- extension alternativeLayoutRule
1937   let lexTokenFun = if alr then lexTokenAlr else lexToken
1938   tok@(L _span _tok__) <- lexTokenFun
1939   --trace ("token: " ++ show _tok__) $ do
1940   cont tok
1941
1942 lexTokenAlr :: P (Located Token)
1943 lexTokenAlr = do mPending <- popPendingImplicitToken
1944                  t <- case mPending of
1945                       Nothing ->
1946                           do mNext <- popNextToken
1947                              t <- case mNext of
1948                                   Nothing -> lexToken
1949                                   Just next -> return next
1950                              alternativeLayoutRuleToken t
1951                       Just t ->
1952                           return t
1953                  setAlrLastLoc (getLoc t)
1954                  case unLoc t of
1955                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
1956                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
1957                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
1958                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
1959                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1960                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1961                      _       -> return ()
1962                  return t
1963
1964 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
1965 alternativeLayoutRuleToken t
1966     = do context <- getALRContext
1967          lastLoc <- getAlrLastLoc
1968          mExpectingOCurly <- getAlrExpectingOCurly
1969          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
1970          setJustClosedExplicitLetBlock False
1971          dflags <- getDynFlags
1972          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
1973              thisLoc = getLoc t
1974              thisCol = srcSpanStartCol thisLoc
1975              newLine = (lastLoc == noSrcSpan)
1976                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
1977          case (unLoc t, context, mExpectingOCurly) of
1978              -- This case handles a GHC extension to the original H98
1979              -- layout rule...
1980              (ITocurly, _, Just alrLayout) ->
1981                  do setAlrExpectingOCurly Nothing
1982                     let isLet = case alrLayout of
1983                                 ALRLayoutLet -> True
1984                                 _ -> False
1985                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
1986                     return t
1987              -- ...and makes this case unnecessary
1988              {-
1989              -- I think our implicit open-curly handling is slightly
1990              -- different to John's, in how it interacts with newlines
1991              -- and "in"
1992              (ITocurly, _, Just _) ->
1993                  do setAlrExpectingOCurly Nothing
1994                     setNextToken t
1995                     lexTokenAlr
1996              -}
1997              (_, ALRLayout _ col : ls, Just expectingOCurly)
1998               | (thisCol > col) ||
1999                 (thisCol == col &&
2000                  isNonDecreasingIntentation expectingOCurly) ->
2001                  do setAlrExpectingOCurly Nothing
2002                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2003                     setNextToken t
2004                     return (L thisLoc ITocurly)
2005               | otherwise ->
2006                  do setAlrExpectingOCurly Nothing
2007                     setPendingImplicitTokens [L lastLoc ITccurly]
2008                     setNextToken t
2009                     return (L lastLoc ITocurly)
2010              (_, _, Just expectingOCurly) ->
2011                  do setAlrExpectingOCurly Nothing
2012                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2013                     setNextToken t
2014                     return (L thisLoc ITocurly)
2015              -- We do the [] cases earlier than in the spec, as we
2016              -- have an actual EOF token
2017              (ITeof, ALRLayout _ _ : ls, _) ->
2018                  do setALRContext ls
2019                     setNextToken t
2020                     return (L thisLoc ITccurly)
2021              (ITeof, _, _) ->
2022                  return t
2023              -- the other ITeof case omitted; general case below covers it
2024              (ITin, _, _)
2025               | justClosedExplicitLetBlock ->
2026                  return t
2027              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2028               | newLine ->
2029                  do setPendingImplicitTokens [t]
2030                     setALRContext ls
2031                     return (L thisLoc ITccurly)
2032              -- This next case is to handle a transitional issue:
2033              (ITwhere, ALRLayout _ col : ls, _)
2034               | newLine && thisCol == col && transitional ->
2035                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2036                                thisLoc
2037                                (transitionalAlternativeLayoutWarning
2038                                     "`where' clause at the same depth as implicit layout block")
2039                     setALRContext ls
2040                     setNextToken t
2041                     -- Note that we use lastLoc, as we may need to close
2042                     -- more layouts, or give a semicolon
2043                     return (L lastLoc ITccurly)
2044              -- This next case is to handle a transitional issue:
2045              (ITvbar, ALRLayout _ col : ls, _)
2046               | newLine && thisCol == col && transitional ->
2047                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2048                                thisLoc
2049                                (transitionalAlternativeLayoutWarning
2050                                     "`|' at the same depth as implicit layout block")
2051                     setALRContext ls
2052                     setNextToken t
2053                     -- Note that we use lastLoc, as we may need to close
2054                     -- more layouts, or give a semicolon
2055                     return (L lastLoc ITccurly)
2056              (_, ALRLayout _ col : ls, _)
2057               | newLine && thisCol == col ->
2058                  do setNextToken t
2059                     return (L thisLoc ITsemi)
2060               | newLine && thisCol < col ->
2061                  do setALRContext ls
2062                     setNextToken t
2063                     -- Note that we use lastLoc, as we may need to close
2064                     -- more layouts, or give a semicolon
2065                     return (L lastLoc ITccurly)
2066              -- We need to handle close before open, as 'then' is both
2067              -- an open and a close
2068              (u, _, _)
2069               | isALRclose u ->
2070                  case context of
2071                  ALRLayout _ _ : ls ->
2072                      do setALRContext ls
2073                         setNextToken t
2074                         return (L thisLoc ITccurly)
2075                  ALRNoLayout _ isLet : ls ->
2076                      do let ls' = if isALRopen u
2077                                      then ALRNoLayout (containsCommas u) False : ls
2078                                      else ls
2079                         setALRContext ls'
2080                         when isLet $ setJustClosedExplicitLetBlock True
2081                         return t
2082                  [] ->
2083                      do let ls = if isALRopen u
2084                                     then [ALRNoLayout (containsCommas u) False]
2085                                     else ls
2086                         setALRContext ls
2087                         -- XXX This is an error in John's code, but
2088                         -- it looks reachable to me at first glance
2089                         return t
2090              (u, _, _)
2091               | isALRopen u ->
2092                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2093                     return t
2094              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2095                  do setALRContext ls
2096                     setPendingImplicitTokens [t]
2097                     return (L thisLoc ITccurly)
2098              (ITin, ALRLayout _ _ : ls, _) ->
2099                  do setALRContext ls
2100                     setNextToken t
2101                     return (L thisLoc ITccurly)
2102              -- the other ITin case omitted; general case below covers it
2103              (ITcomma, ALRLayout _ _ : ls, _)
2104               | topNoLayoutContainsCommas ls ->
2105                  do setALRContext ls
2106                     setNextToken t
2107                     return (L thisLoc ITccurly)
2108              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2109                  do setALRContext ls
2110                     setPendingImplicitTokens [t]
2111                     return (L thisLoc ITccurly)
2112              -- the other ITwhere case omitted; general case below covers it
2113              (_, _, _) -> return t
2114
2115 transitionalAlternativeLayoutWarning :: String -> SDoc
2116 transitionalAlternativeLayoutWarning msg
2117     = text "transitional layout will not be accepted in the future:"
2118    $$ text msg
2119
2120 isALRopen :: Token -> Bool
2121 isALRopen ITcase        = True
2122 isALRopen ITif          = True
2123 isALRopen ITthen        = True
2124 isALRopen IToparen      = True
2125 isALRopen ITobrack      = True
2126 isALRopen ITocurly      = True
2127 -- GHC Extensions:
2128 isALRopen IToubxparen   = True
2129 isALRopen ITparenEscape = True
2130 isALRopen _             = False
2131
2132 isALRclose :: Token -> Bool
2133 isALRclose ITof     = True
2134 isALRclose ITthen   = True
2135 isALRclose ITelse   = True
2136 isALRclose ITcparen = True
2137 isALRclose ITcbrack = True
2138 isALRclose ITccurly = True
2139 -- GHC Extensions:
2140 isALRclose ITcubxparen = True
2141 isALRclose _        = False
2142
2143 isNonDecreasingIntentation :: ALRLayout -> Bool
2144 isNonDecreasingIntentation ALRLayoutDo = True
2145 isNonDecreasingIntentation _           = False
2146
2147 containsCommas :: Token -> Bool
2148 containsCommas IToparen = True
2149 containsCommas ITobrack = True
2150 -- John doesn't have {} as containing commas, but records contain them,
2151 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2152 -- (defaultInstallDirs).
2153 containsCommas ITocurly = True
2154 -- GHC Extensions:
2155 containsCommas IToubxparen = True
2156 containsCommas _        = False
2157
2158 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2159 topNoLayoutContainsCommas [] = False
2160 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2161 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2162
2163 lexToken :: P (Located Token)
2164 lexToken = do
2165   inp@(AI loc1 buf) <- getInput
2166   sc <- getLexState
2167   exts <- getExts
2168   case alexScanUser exts inp sc of
2169     AlexEOF -> do
2170         let span = mkSrcSpan loc1 loc1
2171         setLastToken span 0
2172         return (L span ITeof)
2173     AlexError (AI loc2 buf) ->
2174         reportLexError loc1 loc2 buf "lexical error"
2175     AlexSkip inp2 _ -> do
2176         setInput inp2
2177         lexToken
2178     AlexToken inp2@(AI end buf2) _ t -> do
2179         setInput inp2
2180         let span = mkSrcSpan loc1 end
2181         let bytes = byteDiff buf buf2
2182         span `seq` setLastToken span bytes
2183         t span buf bytes
2184
2185 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2186 reportLexError loc1 loc2 buf str
2187   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2188   | otherwise =
2189   let
2190         c = fst (nextChar buf)
2191   in
2192   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2193     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2194     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2195
2196 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2197 lexTokenStream buf loc dflags = unP go initState
2198     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2199           initState = mkPState dflags' buf loc
2200           go = do
2201             ltok <- lexer return
2202             case ltok of
2203               L _ ITeof -> return []
2204               _ -> liftM (ltok:) go
2205
2206 linePrags = Map.singleton "line" (begin line_prag2)
2207
2208 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2209                                  ("options_ghc", lex_string_prag IToptions_prag),
2210                                  ("options_haddock", lex_string_prag ITdocOptions),
2211                                  ("language", token ITlanguage_prag),
2212                                  ("include", lex_string_prag ITinclude_prag)])
2213
2214 ignoredPrags = Map.fromList (map ignored pragmas)
2215                where ignored opt = (opt, nested_comment lexToken)
2216                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2217                      options_pragmas = map ("options_" ++) impls
2218                      -- CFILES is a hugs-only thing.
2219                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2220
2221 oneWordPrags = Map.fromList([("rules", rulePrag),
2222                            ("inline", token (ITinline_prag Inline FunLike)),
2223                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2224                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2225                                           -- Spelling variant
2226                            ("notinline", token (ITinline_prag NoInline FunLike)),
2227                            ("specialize", token ITspec_prag),
2228                            ("source", token ITsource_prag),
2229                            ("warning", token ITwarning_prag),
2230                            ("deprecated", token ITdeprecated_prag),
2231                            ("scc", token ITscc_prag),
2232                            ("generated", token ITgenerated_prag),
2233                            ("core", token ITcore_prag),
2234                            ("unpack", token ITunpack_prag),
2235                            ("ann", token ITann_prag)])
2236
2237 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2238                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2239                              ("specialize inline", token (ITspec_inline_prag True)),
2240                              ("specialize notinline", token (ITspec_inline_prag False))])
2241
2242
2243 dispatch_pragmas :: Map String Action -> Action
2244 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2245                                        Just found -> found span buf len
2246                                        Nothing -> lexError "unknown pragma"
2247
2248 known_pragma :: Map String Action -> AlexAccPred Int
2249 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2250                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2251
2252 clean_pragma :: String -> String
2253 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2254                     where unprefix prag' = case stripPrefix "{-#" prag' of
2255                                              Just rest -> rest
2256                                              Nothing -> prag'
2257                           canonical prag' = case prag' of
2258                                               "noinline" -> "notinline"
2259                                               "specialise" -> "specialize"
2260                                               "constructorlike" -> "conlike"
2261                                               _ -> prag'
2262                           canon_ws s = unwords (map canonical (words s))
2263 }