compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# LANGUAGE BangPatterns #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  38 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  39 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  40 -- But alex still generates some code that causes the "lazy unlifted bindings"
  41 -- warning, and old compilers don't know about it so we can't easily turn
  42 -- it off, so for now we use the sledge hammer:
  43 {-# OPTIONS_GHC -w #-}
  44
  45 {-# OPTIONS_GHC -funbox-strict-fields #-}
  46
  47 module Lexer (
  48    Token(..), lexer, pragState, mkPState, PState(..),
  49    P(..), ParseResult(..), getSrcLoc,
  50    getPState, getDynFlags, withThisPackage,
  51    failLocMsgP, failSpanMsgP, srcParseFail,
  52    getMessages,
  53    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  54    getLexState, popLexState, pushLexState,
  55    extension, bangPatEnabled, datatypeContextsEnabled,
  56    addWarning,
  57    lexTokenStream
  58   ) where
  59
  60 import Bag
  61 import ErrUtils
  62 import Outputable
  63 import StringBuffer
  64 import FastString
  65 import SrcLoc
  66 import UniqFM
  67 import DynFlags
  68 import Module
  69 import Ctype
  70 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..) )
  71 import Util             ( readRational )
  72
  73 import Control.Monad
  74 import Data.Bits
  75 import Data.Char
  76 import Data.List
  77 import Data.Maybe
  78 import Data.Map (Map)
  79 import qualified Data.Map as Map
  80 import Data.Ratio
  81 }
  82
  83 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  84 $whitechar   = [\ \n\r\f\v $unispace]
  85 $white_no_nl = $whitechar # \n
  86 $tab         = \t
  87
  88 $ascdigit  = 0-9
  89 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  90 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  91 $digit     = [$ascdigit $unidigit]
  92
  93 $special   = [\(\)\,\;\[\]\`\{\}]
  94 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  95 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  96 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  97
  98 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  99 $asclarge  = [A-Z]
 100 $large     = [$asclarge $unilarge]
 101
 102 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 103 $ascsmall  = [a-z]
 104 $small     = [$ascsmall $unismall \_]
 105
 106 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 107 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 108
 109 $octit     = 0-7
 110 $hexit     = [$decdigit A-F a-f]
 111 $symchar   = [$symbol \:]
 112 $nl        = [\n\r]
 113 $idchar    = [$small $large $digit \']
 114
 115 $pragmachar = [$small $large $digit]
 116
 117 $docsym    = [\| \^ \* \$]
 118
 119 @varid     = $small $idchar*
 120 @conid     = $large $idchar*
 121
 122 @varsym    = $symbol $symchar*
 123 @consym    = \: $symchar*
 124
 125 @decimal     = $decdigit+
 126 @octal       = $octit+
 127 @hexadecimal = $hexit+
 128 @exponent    = [eE] [\-\+]? @decimal
 129
 130 -- we support the hierarchical module name extension:
 131 @qual = (@conid \.)+
 132
 133 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 134
 135 -- normal signed numerical literals can only be explicitly negative,
 136 -- not explicitly positive (contrast @exponent)
 137 @negative = \-
 138 @signed = @negative ?
 139
 140 haskell :-
 141
 142 -- everywhere: skip whitespace and comments
 143 $white_no_nl+                           ;
 144 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 145
 146 -- Everywhere: deal with nested comments.  We explicitly rule out
 147 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 148 -- (this can happen even though pragmas will normally take precedence due to
 149 -- longest-match, because pragmas aren't valid in every state, but comments
 150 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 151 -- set.
 152
 153 "{-" / { isNormalComment } { nested_comment lexToken }
 154
 155 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 156 -- more dashes followed by a symbol should be parsed as a varsym, so we
 157 -- have to exclude those.
 158
 159 -- Since Haddock comments aren't valid in every state, we need to rule them
 160 -- out here.
 161
 162 -- The following two rules match comments that begin with two dashes, but
 163 -- continue with a different character. The rules test that this character
 164 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 165 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 166 -- have a Haddock comment). The rules then munch the rest of the line.
 167
 168 "-- " ~[$docsym \#] .* { lineCommentToken }
 169 "--" [^$symbol : \ ] .* { lineCommentToken }
 170
 171 -- Next, match Haddock comments if no -haddock flag
 172
 173 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 174
 175 -- Now, when we've matched comments that begin with 2 dashes and continue
 176 -- with a different character, we need to match comments that begin with three
 177 -- or more dashes (which clearly can't be Haddock comments). We only need to
 178 -- make sure that the first non-dash character isn't a symbol, and munch the
 179 -- rest of the line.
 180
 181 "---"\-* [^$symbol :] .* { lineCommentToken }
 182
 183 -- Since the previous rules all match dashes followed by at least one
 184 -- character, we also need to match a whole line filled with just dashes.
 185
 186 "--"\-* / { atEOL } { lineCommentToken }
 187
 188 -- We need this rule since none of the other single line comment rules
 189 -- actually match this case.
 190
 191 "-- " / { atEOL } { lineCommentToken }
 192
 193 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 194 -- blank lines) until we find a non-whitespace character, then do layout
 195 -- processing.
 196 --
 197 -- One slight wibble here: what if the line begins with {-#? In
 198 -- theory, we have to lex the pragma to see if it's one we recognise,
 199 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 200 -- as a nested comment.  We don't bother with this: if the line begins
 201 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 202 <bol> {
 203   \n                                    ;
 204   ^\# (line)?                           { begin line_prag1 }
 205   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 206   ^\# \! .* \n                          ; -- #!, for scripts
 207   ()                                    { do_bol }
 208 }
 209
 210 -- after a layout keyword (let, where, do, of), we begin a new layout
 211 -- context if the curly brace is missing.
 212 -- Careful! This stuff is quite delicate.
 213 <layout, layout_do> {
 214   \{ / { notFollowedBy '-' }            { hopefully_open_brace }
 215         -- we might encounter {-# here, but {- has been handled already
 216   \n                                    ;
 217   ^\# (line)?                           { begin line_prag1 }
 218 }
 219
 220 -- do is treated in a subtly different way, see new_layout_context
 221 <layout>    ()                          { new_layout_context True }
 222 <layout_do> ()                          { new_layout_context False }
 223
 224 -- after a new layout context which was found to be to the left of the
 225 -- previous context, we have generated a '{' token, and we now need to
 226 -- generate a matching '}' token.
 227 <layout_left>  ()                       { do_layout_left }
 228
 229 <0,option_prags> \n                             { begin bol }
 230
 231 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 232                                 { dispatch_pragmas linePrags }
 233
 234 -- single-line line pragmas, of the form
 235 --    # <line> "<file>" <extra-stuff> \n
 236 <line_prag1> $decdigit+                 { setLine line_prag1a }
 237 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 238 <line_prag1b> .*                        { pop }
 239
 240 -- Haskell-style line pragmas, of the form
 241 --    {-# LINE <line> "<file>" #-}
 242 <line_prag2> $decdigit+                 { setLine line_prag2a }
 243 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 244 <line_prag2b> "#-}"|"-}"                { pop }
 245    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 246    -- with older versions of GHC which generated these.
 247
 248 <0,option_prags> {
 249   "{-#" $whitechar* $pragmachar+
 250         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 251                                  { dispatch_pragmas twoWordPrags }
 252
 253   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 254                                  { dispatch_pragmas oneWordPrags }
 255
 256   -- We ignore all these pragmas, but don't generate a warning for them
 257   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 258                                  { dispatch_pragmas ignoredPrags }
 259
 260   -- ToDo: should only be valid inside a pragma:
 261   "#-}"                                 { endPrag }
 262 }
 263
 264 <option_prags> {
 265   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 266                                    { dispatch_pragmas fileHeaderPrags }
 267
 268   "-- #"                                 { multiline_doc_comment }
 269 }
 270
 271 <0> {
 272   -- In the "0" mode we ignore these pragmas
 273   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 274                      { nested_comment lexToken }
 275 }
 276
 277 <0> {
 278   "-- #" .* { lineCommentToken }
 279 }
 280
 281 <0,option_prags> {
 282   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 283                     (nested_comment lexToken) }
 284 }
 285
 286 -- '0' state: ordinary lexemes
 287
 288 -- Haddock comments
 289
 290 <0,option_prags> {
 291   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 292   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 293 }
 294
 295 -- "special" symbols
 296
 297 <0> {
 298   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 299   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 300 }
 301
 302 <0> {
 303   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 304   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 305   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 306   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 307   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 308   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 309   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 310   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 311
 312 -- For backward compatibility, accept the old dollar syntax
 313   "[$" @varid "|"  / { ifExtension qqEnabled }
 314                      { lex_quasiquote_tok }
 315
 316   "[" @varid "|"  / { ifExtension qqEnabled }
 317                      { lex_quasiquote_tok }
 318 }
 319
 320 <0> {
 321   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 322                                         { special IToparenbar }
 323   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 324 }
 325
 326 <0> {
 327   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 328 }
 329
 330 <0> {
 331   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 332          { token IToubxparen }
 333   "#)" / { ifExtension unboxedTuplesEnabled }
 334          { token ITcubxparen }
 335 }
 336
 337 <0> {
 338   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 339   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 340 }
 341
 342 <0,option_prags> {
 343   \(                                    { special IToparen }
 344   \)                                    { special ITcparen }
 345   \[                                    { special ITobrack }
 346   \]                                    { special ITcbrack }
 347   \,                                    { special ITcomma }
 348   \;                                    { special ITsemi }
 349   \`                                    { special ITbackquote }
 350
 351   \{                                    { open_brace }
 352   \}                                    { close_brace }
 353 }
 354
 355 <0,option_prags> {
 356   @qual @varid                  { idtoken qvarid }
 357   @qual @conid                  { idtoken qconid }
 358   @varid                        { varid }
 359   @conid                        { idtoken conid }
 360 }
 361
 362 <0> {
 363   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 364   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 365   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 366   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 367 }
 368
 369 -- ToDo: - move `var` and (sym) into lexical syntax?
 370 --       - remove backquote from $special?
 371 <0> {
 372   @qual @varsym                                    { idtoken qvarsym }
 373   @qual @consym                                    { idtoken qconsym }
 374   @varsym                                          { varsym }
 375   @consym                                          { consym }
 376 }
 377
 378 -- For the normal boxed literals we need to be careful
 379 -- when trying to be close to Haskell98
 380 <0> {
 381   -- Normal integral literals (:: Num a => a, from Integer)
 382   @decimal           { tok_num positive 0 0 decimal }
 383   0[oO] @octal       { tok_num positive 2 2 octal }
 384   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 385
 386   -- Normal rational literals (:: Fractional a => a, from Rational)
 387   @floating_point    { strtoken tok_float }
 388 }
 389
 390 <0> {
 391   -- Unboxed ints (:: Int#) and words (:: Word#)
 392   -- It's simpler (and faster?) to give separate cases to the negatives,
 393   -- especially considering octal/hexadecimal prefixes.
 394   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 395   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 396   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 397   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 398   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 399   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 400
 401   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 402   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 403   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 404
 405   -- Unboxed floats and doubles (:: Float#, :: Double#)
 406   -- prim_{float,double} work with signed literals
 407   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 408   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 409 }
 410
 411 -- Strings and chars are lexed by hand-written code.  The reason is
 412 -- that even if we recognise the string or char here in the regex
 413 -- lexer, we would still have to parse the string afterward in order
 414 -- to convert it to a String.
 415 <0> {
 416   \'                            { lex_char_tok }
 417   \"                            { lex_string_tok }
 418 }
 419
 420 {
 421 -- -----------------------------------------------------------------------------
 422 -- The token type
 423
 424 data Token
 425   = ITas                        -- Haskell keywords
 426   | ITcase
 427   | ITclass
 428   | ITdata
 429   | ITdefault
 430   | ITderiving
 431   | ITdo
 432   | ITelse
 433   | IThiding
 434   | ITif
 435   | ITimport
 436   | ITin
 437   | ITinfix
 438   | ITinfixl
 439   | ITinfixr
 440   | ITinstance
 441   | ITlet
 442   | ITmodule
 443   | ITnewtype
 444   | ITof
 445   | ITqualified
 446   | ITthen
 447   | ITtype
 448   | ITwhere
 449   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 450
 451   | ITforall                    -- GHC extension keywords
 452   | ITforeign
 453   | ITexport
 454   | ITlabel
 455   | ITdynamic
 456   | ITsafe
 457   | ITthreadsafe
 458   | ITinterruptible
 459   | ITunsafe
 460   | ITstdcallconv
 461   | ITccallconv
 462   | ITprimcallconv
 463   | ITmdo
 464   | ITfamily
 465   | ITgroup
 466   | ITby
 467   | ITusing
 468
 469         -- Pragmas
 470   | ITinline_prag InlineSpec RuleMatchInfo
 471   | ITspec_prag                 -- SPECIALISE
 472   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 473   | ITsource_prag
 474   | ITrules_prag
 475   | ITwarning_prag
 476   | ITdeprecated_prag
 477   | ITline_prag
 478   | ITscc_prag
 479   | ITgenerated_prag
 480   | ITcore_prag                 -- hdaume: core annotations
 481   | ITunpack_prag
 482   | ITann_prag
 483   | ITclose_prag
 484   | IToptions_prag String
 485   | ITinclude_prag String
 486   | ITlanguage_prag
 487
 488   | ITdotdot                    -- reserved symbols
 489   | ITcolon
 490   | ITdcolon
 491   | ITequal
 492   | ITlam
 493   | ITvbar
 494   | ITlarrow
 495   | ITrarrow
 496   | ITat
 497   | ITtilde
 498   | ITdarrow
 499   | ITminus
 500   | ITbang
 501   | ITstar
 502   | ITdot
 503
 504   | ITbiglam                    -- GHC-extension symbols
 505
 506   | ITocurly                    -- special symbols
 507   | ITccurly
 508   | ITocurlybar                 -- {|, for type applications
 509   | ITccurlybar                 -- |}, for type applications
 510   | ITvocurly
 511   | ITvccurly
 512   | ITobrack
 513   | ITopabrack                  -- [:, for parallel arrays with -XParallelArrays
 514   | ITcpabrack                  -- :], for parallel arrays with -XParallelArrays
 515   | ITcbrack
 516   | IToparen
 517   | ITcparen
 518   | IToubxparen
 519   | ITcubxparen
 520   | ITsemi
 521   | ITcomma
 522   | ITunderscore
 523   | ITbackquote
 524
 525   | ITvarid   FastString        -- identifiers
 526   | ITconid   FastString
 527   | ITvarsym  FastString
 528   | ITconsym  FastString
 529   | ITqvarid  (FastString,FastString)
 530   | ITqconid  (FastString,FastString)
 531   | ITqvarsym (FastString,FastString)
 532   | ITqconsym (FastString,FastString)
 533   | ITprefixqvarsym (FastString,FastString)
 534   | ITprefixqconsym (FastString,FastString)
 535
 536   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 537
 538   | ITchar       Char
 539   | ITstring     FastString
 540   | ITinteger    Integer
 541   | ITrational   Rational
 542
 543   | ITprimchar   Char
 544   | ITprimstring FastString
 545   | ITprimint    Integer
 546   | ITprimword   Integer
 547   | ITprimfloat  Rational
 548   | ITprimdouble Rational
 549
 550   -- Template Haskell extension tokens
 551   | ITopenExpQuote              --  [| or [e|
 552   | ITopenPatQuote              --  [p|
 553   | ITopenDecQuote              --  [d|
 554   | ITopenTypQuote              --  [t|
 555   | ITcloseQuote                --  |]
 556   | ITidEscape   FastString     --  $x
 557   | ITparenEscape               --  $(
 558   | ITvarQuote                  --  '
 559   | ITtyQuote                   --  ''
 560   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 561
 562   -- Arrow notation extension
 563   | ITproc
 564   | ITrec
 565   | IToparenbar                 --  (|
 566   | ITcparenbar                 --  |)
 567   | ITlarrowtail                --  -<
 568   | ITrarrowtail                --  >-
 569   | ITLarrowtail                --  -<<
 570   | ITRarrowtail                --  >>-
 571
 572   | ITunknown String            -- Used when the lexer can't make sense of it
 573   | ITeof                       -- end of file token
 574
 575   -- Documentation annotations
 576   | ITdocCommentNext  String     -- something beginning '-- |'
 577   | ITdocCommentPrev  String     -- something beginning '-- ^'
 578   | ITdocCommentNamed String     -- something beginning '-- $'
 579   | ITdocSection      Int String -- a section heading
 580   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 581   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 582   | ITlineComment     String     -- comment starting by "--"
 583   | ITblockComment    String     -- comment in {- -}
 584
 585 #ifdef DEBUG
 586   deriving Show -- debugging
 587 #endif
 588
 589 {-
 590 isSpecial :: Token -> Bool
 591 -- If we see M.x, where x is a keyword, but
 592 -- is special, we treat is as just plain M.x,
 593 -- not as a keyword.
 594 isSpecial ITas          = True
 595 isSpecial IThiding      = True
 596 isSpecial ITqualified   = True
 597 isSpecial ITforall      = True
 598 isSpecial ITexport      = True
 599 isSpecial ITlabel       = True
 600 isSpecial ITdynamic     = True
 601 isSpecial ITsafe        = True
 602 isSpecial ITthreadsafe  = True
 603 isSpecial ITinterruptible = True
 604 isSpecial ITunsafe      = True
 605 isSpecial ITccallconv   = True
 606 isSpecial ITstdcallconv = True
 607 isSpecial ITprimcallconv = True
 608 isSpecial ITmdo         = True
 609 isSpecial ITfamily      = True
 610 isSpecial ITgroup   = True
 611 isSpecial ITby      = True
 612 isSpecial ITusing   = True
 613 isSpecial _             = False
 614 -}
 615
 616 -- the bitmap provided as the third component indicates whether the
 617 -- corresponding extension keyword is valid under the extension options
 618 -- provided to the compiler; if the extension corresponding to *any* of the
 619 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 620 -- facilitates using a keyword in two different extensions that can be
 621 -- activated independently)
 622 --
 623 reservedWordsFM :: UniqFM (Token, Int)
 624 reservedWordsFM = listToUFM $
 625         map (\(x, y, z) -> (mkFastString x, (y, z)))
 626        [( "_",          ITunderscore,   0 ),
 627         ( "as",         ITas,           0 ),
 628         ( "case",       ITcase,         0 ),
 629         ( "class",      ITclass,        0 ),
 630         ( "data",       ITdata,         0 ),
 631         ( "default",    ITdefault,      0 ),
 632         ( "deriving",   ITderiving,     0 ),
 633         ( "do",         ITdo,           0 ),
 634         ( "else",       ITelse,         0 ),
 635         ( "hiding",     IThiding,       0 ),
 636         ( "if",         ITif,           0 ),
 637         ( "import",     ITimport,       0 ),
 638         ( "in",         ITin,           0 ),
 639         ( "infix",      ITinfix,        0 ),
 640         ( "infixl",     ITinfixl,       0 ),
 641         ( "infixr",     ITinfixr,       0 ),
 642         ( "instance",   ITinstance,     0 ),
 643         ( "let",        ITlet,          0 ),
 644         ( "module",     ITmodule,       0 ),
 645         ( "newtype",    ITnewtype,      0 ),
 646         ( "of",         ITof,           0 ),
 647         ( "qualified",  ITqualified,    0 ),
 648         ( "then",       ITthen,         0 ),
 649         ( "type",       ITtype,         0 ),
 650         ( "where",      ITwhere,        0 ),
 651         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 652
 653     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 654         ( "mdo",        ITmdo,           bit recursiveDoBit),
 655         ( "family",     ITfamily,        bit tyFamBit),
 656     ( "group",  ITgroup,     bit transformComprehensionsBit),
 657     ( "by",     ITby,        bit transformComprehensionsBit),
 658     ( "using",  ITusing,     bit transformComprehensionsBit),
 659
 660         ( "foreign",    ITforeign,       bit ffiBit),
 661         ( "export",     ITexport,        bit ffiBit),
 662         ( "label",      ITlabel,         bit ffiBit),
 663         ( "dynamic",    ITdynamic,       bit ffiBit),
 664         ( "safe",       ITsafe,          bit ffiBit),
 665         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 666         ( "interruptible", ITinterruptible, bit ffiBit),
 667         ( "unsafe",     ITunsafe,        bit ffiBit),
 668         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 669         ( "ccall",      ITccallconv,     bit ffiBit),
 670         ( "prim",       ITprimcallconv,  bit ffiBit),
 671
 672         ( "rec",        ITrec,           bit recBit),
 673         ( "proc",       ITproc,          bit arrowsBit)
 674      ]
 675
 676 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 677 reservedSymsFM = listToUFM $
 678     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 679       [ ("..",  ITdotdot,   always)
 680         -- (:) is a reserved op, meaning only list cons
 681        ,(":",   ITcolon,    always)
 682        ,("::",  ITdcolon,   always)
 683        ,("=",   ITequal,    always)
 684        ,("\\",  ITlam,      always)
 685        ,("|",   ITvbar,     always)
 686        ,("<-",  ITlarrow,   always)
 687        ,("->",  ITrarrow,   always)
 688        ,("@",   ITat,       always)
 689        ,("~",   ITtilde,    always)
 690        ,("=>",  ITdarrow,   always)
 691        ,("-",   ITminus,    always)
 692        ,("!",   ITbang,     always)
 693
 694         -- For data T (a::*) = MkT
 695        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 696         -- For 'forall a . t'
 697        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 698
 699        ,("-<",  ITlarrowtail, arrowsEnabled)
 700        ,(">-",  ITrarrowtail, arrowsEnabled)
 701        ,("-<<", ITLarrowtail, arrowsEnabled)
 702        ,(">>-", ITRarrowtail, arrowsEnabled)
 703
 704        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 705        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 706        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 707                                 explicitForallEnabled i)
 708        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 709        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 710
 711        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 712        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 713        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 714        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715
 716        ,("★", ITstar, unicodeSyntaxEnabled)
 717
 718         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 719         -- form part of a large operator.  This would let us have a better
 720         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 721        ]
 722
 723 -- -----------------------------------------------------------------------------
 724 -- Lexer actions
 725
 726 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 727
 728 special :: Token -> Action
 729 special tok span _buf _len = return (L span tok)
 730
 731 token, layout_token :: Token -> Action
 732 token t span _buf _len = return (L span t)
 733 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 734
 735 idtoken :: (StringBuffer -> Int -> Token) -> Action
 736 idtoken f span buf len = return (L span $! (f buf len))
 737
 738 skip_one_varid :: (FastString -> Token) -> Action
 739 skip_one_varid f span buf len
 740   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 741
 742 strtoken :: (String -> Token) -> Action
 743 strtoken f span buf len =
 744   return (L span $! (f $! lexemeToString buf len))
 745
 746 init_strtoken :: Int -> (String -> Token) -> Action
 747 -- like strtoken, but drops the last N character(s)
 748 init_strtoken drop f span buf len =
 749   return (L span $! (f $! lexemeToString buf (len-drop)))
 750
 751 begin :: Int -> Action
 752 begin code _span _str _len = do pushLexState code; lexToken
 753
 754 pop :: Action
 755 pop _span _buf _len = do _ <- popLexState
 756                          lexToken
 757
 758 hopefully_open_brace :: Action
 759 hopefully_open_brace span buf len
 760  = do relaxed <- extension relaxedLayout
 761       ctx <- getContext
 762       (AI l _) <- getInput
 763       let offset = srcLocCol l
 764           isOK = relaxed ||
 765                  case ctx of
 766                  Layout prev_off : _ -> prev_off < offset
 767                  _                   -> True
 768       if isOK then pop_and open_brace span buf len
 769               else failSpanMsgP span (text "Missing block")
 770
 771 pop_and :: Action -> Action
 772 pop_and act span buf len = do _ <- popLexState
 773                               act span buf len
 774
 775 {-# INLINE nextCharIs #-}
 776 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 777 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 778
 779 notFollowedBy :: Char -> AlexAccPred Int
 780 notFollowedBy char _ _ _ (AI _ buf)
 781   = nextCharIs buf (/=char)
 782
 783 notFollowedBySymbol :: AlexAccPred Int
 784 notFollowedBySymbol _ _ _ (AI _ buf)
 785   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 786
 787 -- We must reject doc comments as being ordinary comments everywhere.
 788 -- In some cases the doc comment will be selected as the lexeme due to
 789 -- maximal munch, but not always, because the nested comment rule is
 790 -- valid in all states, but the doc-comment rules are only valid in
 791 -- the non-layout states.
 792 isNormalComment :: AlexAccPred Int
 793 isNormalComment bits _ _ (AI _ buf)
 794   | haddockEnabled bits = notFollowedByDocOrPragma
 795   | otherwise           = nextCharIs buf (/='#')
 796   where
 797     notFollowedByDocOrPragma
 798        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 799
 800 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 801 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 802
 803 {-
 804 haddockDisabledAnd p bits _ _ (AI _ buf)
 805   = if haddockEnabled bits then False else (p buf)
 806 -}
 807
 808 atEOL :: AlexAccPred Int
 809 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 810
 811 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 812 ifExtension pred bits _ _ _ = pred bits
 813
 814 multiline_doc_comment :: Action
 815 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 816   where
 817     worker commentAcc input docType oneLine = case alexGetChar input of
 818       Just ('\n', input')
 819         | oneLine -> docCommentEnd input commentAcc docType buf span
 820         | otherwise -> case checkIfCommentLine input' of
 821           Just input -> worker ('\n':commentAcc) input docType False
 822           Nothing -> docCommentEnd input commentAcc docType buf span
 823       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 824       Nothing -> docCommentEnd input commentAcc docType buf span
 825
 826     checkIfCommentLine input = check (dropNonNewlineSpace input)
 827       where
 828         check input = case alexGetChar input of
 829           Just ('-', input) -> case alexGetChar input of
 830             Just ('-', input) -> case alexGetChar input of
 831               Just (c, _) | c /= '-' -> Just input
 832               _ -> Nothing
 833             _ -> Nothing
 834           _ -> Nothing
 835
 836         dropNonNewlineSpace input = case alexGetChar input of
 837           Just (c, input')
 838             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 839             | otherwise -> input
 840           Nothing -> input
 841
 842 lineCommentToken :: Action
 843 lineCommentToken span buf len = do
 844   b <- extension rawTokenStreamEnabled
 845   if b then strtoken ITlineComment span buf len else lexToken
 846
 847 {-
 848   nested comments require traversing by hand, they can't be parsed
 849   using regular expressions.
 850 -}
 851 nested_comment :: P (Located Token) -> Action
 852 nested_comment cont span _str _len = do
 853   input <- getInput
 854   go "" (1::Int) input
 855   where
 856     go commentAcc 0 input = do setInput input
 857                                b <- extension rawTokenStreamEnabled
 858                                if b
 859                                  then docCommentEnd input commentAcc ITblockComment _str span
 860                                  else cont
 861     go commentAcc n input = case alexGetChar input of
 862       Nothing -> errBrace input span
 863       Just ('-',input) -> case alexGetChar input of
 864         Nothing  -> errBrace input span
 865         Just ('\125',input) -> go commentAcc (n-1) input
 866         Just (_,_)          -> go ('-':commentAcc) n input
 867       Just ('\123',input) -> case alexGetChar input of
 868         Nothing  -> errBrace input span
 869         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 870         Just (_,_)       -> go ('\123':commentAcc) n input
 871       Just (c,input) -> go (c:commentAcc) n input
 872
 873 nested_doc_comment :: Action
 874 nested_doc_comment span buf _len = withLexedDocType (go "")
 875   where
 876     go commentAcc input docType _ = case alexGetChar input of
 877       Nothing -> errBrace input span
 878       Just ('-',input) -> case alexGetChar input of
 879         Nothing -> errBrace input span
 880         Just ('\125',input) ->
 881           docCommentEnd input commentAcc docType buf span
 882         Just (_,_) -> go ('-':commentAcc) input docType False
 883       Just ('\123', input) -> case alexGetChar input of
 884         Nothing  -> errBrace input span
 885         Just ('-',input) -> do
 886           setInput input
 887           let cont = do input <- getInput; go commentAcc input docType False
 888           nested_comment cont span buf _len
 889         Just (_,_) -> go ('\123':commentAcc) input docType False
 890       Just (c,input) -> go (c:commentAcc) input docType False
 891
 892 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 893                  -> P (Located Token)
 894 withLexedDocType lexDocComment = do
 895   input@(AI _ buf) <- getInput
 896   case prevChar buf ' ' of
 897     '|' -> lexDocComment input ITdocCommentNext False
 898     '^' -> lexDocComment input ITdocCommentPrev False
 899     '$' -> lexDocComment input ITdocCommentNamed False
 900     '*' -> lexDocSection 1 input
 901     '#' -> lexDocComment input ITdocOptionsOld False
 902     _ -> panic "withLexedDocType: Bad doc type"
 903  where
 904     lexDocSection n input = case alexGetChar input of
 905       Just ('*', input) -> lexDocSection (n+1) input
 906       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 907       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 908
 909 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 910 -- off again at the end of the pragma.
 911 rulePrag :: Action
 912 rulePrag span _buf _len = do
 913   setExts (.|. bit inRulePragBit)
 914   return (L span ITrules_prag)
 915
 916 endPrag :: Action
 917 endPrag span _buf _len = do
 918   setExts (.&. complement (bit inRulePragBit))
 919   return (L span ITclose_prag)
 920
 921 -- docCommentEnd
 922 -------------------------------------------------------------------------------
 923 -- This function is quite tricky. We can't just return a new token, we also
 924 -- need to update the state of the parser. Why? Because the token is longer
 925 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 926 -- it writes the wrong token length to the parser state. This function is
 927 -- called afterwards, so it can just update the state.
 928
 929 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 930                  SrcSpan -> P (Located Token)
 931 docCommentEnd input commentAcc docType buf span = do
 932   setInput input
 933   let (AI loc nextBuf) = input
 934       comment = reverse commentAcc
 935       span' = mkSrcSpan (srcSpanStart span) loc
 936       last_len = byteDiff buf nextBuf
 937
 938   span `seq` setLastToken span' last_len
 939   return (L span' (docType comment))
 940
 941 errBrace :: AlexInput -> SrcSpan -> P a
 942 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 943
 944 open_brace, close_brace :: Action
 945 open_brace span _str _len = do
 946   ctx <- getContext
 947   setContext (NoLayout:ctx)
 948   return (L span ITocurly)
 949 close_brace span _str _len = do
 950   popContext
 951   return (L span ITccurly)
 952
 953 qvarid, qconid :: StringBuffer -> Int -> Token
 954 qvarid buf len = ITqvarid $! splitQualName buf len False
 955 qconid buf len = ITqconid $! splitQualName buf len False
 956
 957 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 958 -- takes a StringBuffer and a length, and returns the module name
 959 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 960 -- because of hierarchical module names.
 961 splitQualName orig_buf len parens = split orig_buf orig_buf
 962   where
 963     split buf dot_buf
 964         | orig_buf `byteDiff` buf >= len  = done dot_buf
 965         | c == '.'                        = found_dot buf'
 966         | otherwise                       = split buf' dot_buf
 967       where
 968        (c,buf') = nextChar buf
 969
 970     -- careful, we might get names like M....
 971     -- so, if the character after the dot is not upper-case, this is
 972     -- the end of the qualifier part.
 973     found_dot buf -- buf points after the '.'
 974         | isUpper c    = split buf' buf
 975         | otherwise    = done buf
 976       where
 977        (c,buf') = nextChar buf
 978
 979     done dot_buf =
 980         (lexemeToFastString orig_buf (qual_size - 1),
 981          if parens -- Prelude.(+)
 982             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 983             else lexemeToFastString dot_buf (len - qual_size))
 984       where
 985         qual_size = orig_buf `byteDiff` dot_buf
 986
 987 varid :: Action
 988 varid span buf len =
 989   fs `seq`
 990   case lookupUFM reservedWordsFM fs of
 991         Just (keyword,0)    -> do
 992                 maybe_layout keyword
 993                 return (L span keyword)
 994         Just (keyword,exts) -> do
 995                 b <- extension (\i -> exts .&. i /= 0)
 996                 if b then do maybe_layout keyword
 997                              return (L span keyword)
 998                      else return (L span (ITvarid fs))
 999         _other -> return (L span (ITvarid fs))
1000   where
1001         fs = lexemeToFastString buf len
1002
1003 conid :: StringBuffer -> Int -> Token
1004 conid buf len = ITconid fs
1005   where fs = lexemeToFastString buf len
1006
1007 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
1008 qvarsym buf len = ITqvarsym $! splitQualName buf len False
1009 qconsym buf len = ITqconsym $! splitQualName buf len False
1010 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
1011 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1012
1013 varsym, consym :: Action
1014 varsym = sym ITvarsym
1015 consym = sym ITconsym
1016
1017 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1018     -> P (Located Token)
1019 sym con span buf len =
1020   case lookupUFM reservedSymsFM fs of
1021         Just (keyword,exts) -> do
1022                 b <- extension exts
1023                 if b then return (L span keyword)
1024                      else return (L span $! con fs)
1025         _other -> return (L span $! con fs)
1026   where
1027         fs = lexemeToFastString buf len
1028
1029 -- Variations on the integral numeric literal.
1030 tok_integral :: (Integer -> Token)
1031      -> (Integer -> Integer)
1032  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1033      -> Int -> Int
1034      -> (Integer, (Char->Int)) -> Action
1035 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1036   return $ L span $ itint $! transint $ parseUnsignedInteger
1037      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1038
1039 -- some conveniences for use with tok_integral
1040 tok_num :: (Integer -> Integer)
1041         -> Int -> Int
1042         -> (Integer, (Char->Int)) -> Action
1043 tok_num = tok_integral ITinteger
1044 tok_primint :: (Integer -> Integer)
1045             -> Int -> Int
1046             -> (Integer, (Char->Int)) -> Action
1047 tok_primint = tok_integral ITprimint
1048 tok_primword :: Int -> Int
1049              -> (Integer, (Char->Int)) -> Action
1050 tok_primword = tok_integral ITprimword positive
1051 positive, negative :: (Integer -> Integer)
1052 positive = id
1053 negative = negate
1054 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1055 decimal = (10,octDecDigit)
1056 octal = (8,octDecDigit)
1057 hexadecimal = (16,hexDigit)
1058
1059 -- readRational can understand negative rationals, exponents, everything.
1060 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1061 tok_float        str = ITrational   $! readRational str
1062 tok_primfloat    str = ITprimfloat  $! readRational str
1063 tok_primdouble   str = ITprimdouble $! readRational str
1064
1065 -- -----------------------------------------------------------------------------
1066 -- Layout processing
1067
1068 -- we're at the first token on a line, insert layout tokens if necessary
1069 do_bol :: Action
1070 do_bol span _str _len = do
1071         pos <- getOffside
1072         case pos of
1073             LT -> do
1074                 --trace "layout: inserting '}'" $ do
1075                 popContext
1076                 -- do NOT pop the lex state, we might have a ';' to insert
1077                 return (L span ITvccurly)
1078             EQ -> do
1079                 --trace "layout: inserting ';'" $ do
1080                 _ <- popLexState
1081                 return (L span ITsemi)
1082             GT -> do
1083                 _ <- popLexState
1084                 lexToken
1085
1086 -- certain keywords put us in the "layout" state, where we might
1087 -- add an opening curly brace.
1088 maybe_layout :: Token -> P ()
1089 maybe_layout t = do -- If the alternative layout rule is enabled then
1090                     -- we never create an implicit layout context here.
1091                     -- Layout is handled XXX instead.
1092                     -- The code for closing implicit contexts, or
1093                     -- inserting implicit semi-colons, is therefore
1094                     -- irrelevant as it only applies in an implicit
1095                     -- context.
1096                     alr <- extension alternativeLayoutRule
1097                     unless alr $ f t
1098     where f ITdo    = pushLexState layout_do
1099           f ITmdo   = pushLexState layout_do
1100           f ITof    = pushLexState layout
1101           f ITlet   = pushLexState layout
1102           f ITwhere = pushLexState layout
1103           f ITrec   = pushLexState layout
1104           f _       = return ()
1105
1106 -- Pushing a new implicit layout context.  If the indentation of the
1107 -- next token is not greater than the previous layout context, then
1108 -- Haskell 98 says that the new layout context should be empty; that is
1109 -- the lexer must generate {}.
1110 --
1111 -- We are slightly more lenient than this: when the new context is started
1112 -- by a 'do', then we allow the new context to be at the same indentation as
1113 -- the previous context.  This is what the 'strict' argument is for.
1114 --
1115 new_layout_context :: Bool -> Action
1116 new_layout_context strict span _buf _len = do
1117     _ <- popLexState
1118     (AI l _) <- getInput
1119     let offset = srcLocCol l
1120     ctx <- getContext
1121     nondecreasing <- extension nondecreasingIndentation
1122     let strict' = strict || not nondecreasing
1123     case ctx of
1124         Layout prev_off : _  |
1125            (strict'     && prev_off >= offset  ||
1126             not strict' && prev_off > offset) -> do
1127                 -- token is indented to the left of the previous context.
1128                 -- we must generate a {} sequence now.
1129                 pushLexState layout_left
1130                 return (L span ITvocurly)
1131         _ -> do
1132                 setContext (Layout offset : ctx)
1133                 return (L span ITvocurly)
1134
1135 do_layout_left :: Action
1136 do_layout_left span _buf _len = do
1137     _ <- popLexState
1138     pushLexState bol  -- we must be at the start of a line
1139     return (L span ITvccurly)
1140
1141 -- -----------------------------------------------------------------------------
1142 -- LINE pragmas
1143
1144 setLine :: Int -> Action
1145 setLine code span buf len = do
1146   let line = parseUnsignedInteger buf len 10 octDecDigit
1147   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1148         -- subtract one: the line number refers to the *following* line
1149   _ <- popLexState
1150   pushLexState code
1151   lexToken
1152
1153 setFile :: Int -> Action
1154 setFile code span buf len = do
1155   let file = lexemeToFastString (stepOn buf) (len-2)
1156   setAlrLastLoc noSrcSpan
1157   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1158   _ <- popLexState
1159   pushLexState code
1160   lexToken
1161
1162
1163 -- -----------------------------------------------------------------------------
1164 -- Options, includes and language pragmas.
1165
1166 lex_string_prag :: (String -> Token) -> Action
1167 lex_string_prag mkTok span _buf _len
1168     = do input <- getInput
1169          start <- getSrcLoc
1170          tok <- go [] input
1171          end <- getSrcLoc
1172          return (L (mkSrcSpan start end) tok)
1173     where go acc input
1174               = if isString input "#-}"
1175                    then do setInput input
1176                            return (mkTok (reverse acc))
1177                    else case alexGetChar input of
1178                           Just (c,i) -> go (c:acc) i
1179                           Nothing -> err input
1180           isString _ [] = True
1181           isString i (x:xs)
1182               = case alexGetChar i of
1183                   Just (c,i') | c == x    -> isString i' xs
1184                   _other -> False
1185           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1186
1187
1188 -- -----------------------------------------------------------------------------
1189 -- Strings & Chars
1190
1191 -- This stuff is horrible.  I hates it.
1192
1193 lex_string_tok :: Action
1194 lex_string_tok span _buf _len = do
1195   tok <- lex_string ""
1196   end <- getSrcLoc
1197   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1198
1199 lex_string :: String -> P Token
1200 lex_string s = do
1201   i <- getInput
1202   case alexGetChar' i of
1203     Nothing -> lit_error i
1204
1205     Just ('"',i)  -> do
1206         setInput i
1207         magicHash <- extension magicHashEnabled
1208         if magicHash
1209           then do
1210             i <- getInput
1211             case alexGetChar' i of
1212               Just ('#',i) -> do
1213                    setInput i
1214                    if any (> '\xFF') s
1215                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1216                     else let s' = mkZFastString (reverse s) in
1217                          return (ITprimstring s')
1218                         -- mkZFastString is a hack to avoid encoding the
1219                         -- string in UTF-8.  We just want the exact bytes.
1220               _other ->
1221                 return (ITstring (mkFastString (reverse s)))
1222           else
1223                 return (ITstring (mkFastString (reverse s)))
1224
1225     Just ('\\',i)
1226         | Just ('&',i) <- next -> do
1227                 setInput i; lex_string s
1228         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1229                            -- is_space only works for <= '\x7f' (#3751)
1230                 setInput i; lex_stringgap s
1231         where next = alexGetChar' i
1232
1233     Just (c, i1) -> do
1234         case c of
1235           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1236           c | isAny c -> do setInput i1; lex_string (c:s)
1237           _other -> lit_error i
1238
1239 lex_stringgap :: String -> P Token
1240 lex_stringgap s = do
1241   i <- getInput
1242   c <- getCharOrFail i
1243   case c of
1244     '\\' -> lex_string s
1245     c | is_space c -> lex_stringgap s
1246     _other -> lit_error i
1247
1248
1249 lex_char_tok :: Action
1250 -- Here we are basically parsing character literals, such as 'x' or '\n'
1251 -- but, when Template Haskell is on, we additionally spot
1252 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1253 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1254 -- So we have to do two characters of lookahead: when we see 'x we need to
1255 -- see if there's a trailing quote
1256 lex_char_tok span _buf _len = do        -- We've seen '
1257    i1 <- getInput       -- Look ahead to first character
1258    let loc = srcSpanStart span
1259    case alexGetChar' i1 of
1260         Nothing -> lit_error  i1
1261
1262         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1263                   th_exts <- extension thEnabled
1264                   if th_exts then do
1265                         setInput i2
1266                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1267                    else lit_error i1
1268
1269         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1270                   setInput i2
1271                   lit_ch <- lex_escape
1272                   i3 <- getInput
1273                   mc <- getCharOrFail i3 -- Trailing quote
1274                   if mc == '\'' then finish_char_tok loc lit_ch
1275                                 else lit_error i3
1276
1277         Just (c, i2@(AI _end2 _))
1278                 | not (isAny c) -> lit_error i1
1279                 | otherwise ->
1280
1281                 -- We've seen 'x, where x is a valid character
1282                 --  (i.e. not newline etc) but not a quote or backslash
1283            case alexGetChar' i2 of      -- Look ahead one more character
1284                 Just ('\'', i3) -> do   -- We've seen 'x'
1285                         setInput i3
1286                         finish_char_tok loc c
1287                 _other -> do            -- We've seen 'x not followed by quote
1288                                         -- (including the possibility of EOF)
1289                                         -- If TH is on, just parse the quote only
1290                         th_exts <- extension thEnabled
1291                         let (AI end _) = i1
1292                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1293                                    else lit_error i2
1294
1295 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1296 finish_char_tok loc ch  -- We've already seen the closing quote
1297                         -- Just need to check for trailing #
1298   = do  magicHash <- extension magicHashEnabled
1299         i@(AI end _) <- getInput
1300         if magicHash then do
1301                 case alexGetChar' i of
1302                         Just ('#',i@(AI end _)) -> do
1303                                 setInput i
1304                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1305                         _other ->
1306                                 return (L (mkSrcSpan loc end) (ITchar ch))
1307             else do
1308                    return (L (mkSrcSpan loc end) (ITchar ch))
1309
1310 isAny :: Char -> Bool
1311 isAny c | c > '\x7f' = isPrint c
1312         | otherwise  = is_any c
1313
1314 lex_escape :: P Char
1315 lex_escape = do
1316   i0 <- getInput
1317   c <- getCharOrFail i0
1318   case c of
1319         'a'   -> return '\a'
1320         'b'   -> return '\b'
1321         'f'   -> return '\f'
1322         'n'   -> return '\n'
1323         'r'   -> return '\r'
1324         't'   -> return '\t'
1325         'v'   -> return '\v'
1326         '\\'  -> return '\\'
1327         '"'   -> return '\"'
1328         '\''  -> return '\''
1329         '^'   -> do i1 <- getInput
1330                     c <- getCharOrFail i1
1331                     if c >= '@' && c <= '_'
1332                         then return (chr (ord c - ord '@'))
1333                         else lit_error i1
1334
1335         'x'   -> readNum is_hexdigit 16 hexDigit
1336         'o'   -> readNum is_octdigit  8 octDecDigit
1337         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1338
1339         c1 ->  do
1340            i <- getInput
1341            case alexGetChar' i of
1342             Nothing -> lit_error i0
1343             Just (c2,i2) ->
1344               case alexGetChar' i2 of
1345                 Nothing -> do lit_error i0
1346                 Just (c3,i3) ->
1347                    let str = [c1,c2,c3] in
1348                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1349                                      Just rest <- [stripPrefix p str] ] of
1350                           (escape_char,[]):_ -> do
1351                                 setInput i3
1352                                 return escape_char
1353                           (escape_char,_:_):_ -> do
1354                                 setInput i2
1355                                 return escape_char
1356                           [] -> lit_error i0
1357
1358 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1359 readNum is_digit base conv = do
1360   i <- getInput
1361   c <- getCharOrFail i
1362   if is_digit c
1363         then readNum2 is_digit base conv (conv c)
1364         else lit_error i
1365
1366 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1367 readNum2 is_digit base conv i = do
1368   input <- getInput
1369   read i input
1370   where read i input = do
1371           case alexGetChar' input of
1372             Just (c,input') | is_digit c -> do
1373                let i' = i*base + conv c
1374                if i' > 0x10ffff
1375                   then setInput input >> lexError "numeric escape sequence out of range"
1376                   else read i' input'
1377             _other -> do
1378               setInput input; return (chr i)
1379
1380
1381 silly_escape_chars :: [(String, Char)]
1382 silly_escape_chars = [
1383         ("NUL", '\NUL'),
1384         ("SOH", '\SOH'),
1385         ("STX", '\STX'),
1386         ("ETX", '\ETX'),
1387         ("EOT", '\EOT'),
1388         ("ENQ", '\ENQ'),
1389         ("ACK", '\ACK'),
1390         ("BEL", '\BEL'),
1391         ("BS", '\BS'),
1392         ("HT", '\HT'),
1393         ("LF", '\LF'),
1394         ("VT", '\VT'),
1395         ("FF", '\FF'),
1396         ("CR", '\CR'),
1397         ("SO", '\SO'),
1398         ("SI", '\SI'),
1399         ("DLE", '\DLE'),
1400         ("DC1", '\DC1'),
1401         ("DC2", '\DC2'),
1402         ("DC3", '\DC3'),
1403         ("DC4", '\DC4'),
1404         ("NAK", '\NAK'),
1405         ("SYN", '\SYN'),
1406         ("ETB", '\ETB'),
1407         ("CAN", '\CAN'),
1408         ("EM", '\EM'),
1409         ("SUB", '\SUB'),
1410         ("ESC", '\ESC'),
1411         ("FS", '\FS'),
1412         ("GS", '\GS'),
1413         ("RS", '\RS'),
1414         ("US", '\US'),
1415         ("SP", '\SP'),
1416         ("DEL", '\DEL')
1417         ]
1418
1419 -- before calling lit_error, ensure that the current input is pointing to
1420 -- the position of the error in the buffer.  This is so that we can report
1421 -- a correct location to the user, but also so we can detect UTF-8 decoding
1422 -- errors if they occur.
1423 lit_error :: AlexInput -> P a
1424 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1425
1426 getCharOrFail :: AlexInput -> P Char
1427 getCharOrFail i =  do
1428   case alexGetChar' i of
1429         Nothing -> lexError "unexpected end-of-file in string/character literal"
1430         Just (c,i)  -> do setInput i; return c
1431
1432 -- -----------------------------------------------------------------------------
1433 -- QuasiQuote
1434
1435 lex_quasiquote_tok :: Action
1436 lex_quasiquote_tok span buf len = do
1437   let quoter = tail (lexemeToString buf (len - 1))
1438                 -- 'tail' drops the initial '[',
1439                 -- while the -1 drops the trailing '|'
1440   quoteStart <- getSrcLoc
1441   quote <- lex_quasiquote ""
1442   end <- getSrcLoc
1443   return (L (mkSrcSpan (srcSpanStart span) end)
1444            (ITquasiQuote (mkFastString quoter,
1445                           mkFastString (reverse quote),
1446                           mkSrcSpan quoteStart end)))
1447
1448 lex_quasiquote :: String -> P String
1449 lex_quasiquote s = do
1450   i <- getInput
1451   case alexGetChar' i of
1452     Nothing -> lit_error i
1453
1454     Just ('\\',i)
1455         | Just ('|',i) <- next -> do
1456                 setInput i; lex_quasiquote ('|' : s)
1457         | Just (']',i) <- next -> do
1458                 setInput i; lex_quasiquote (']' : s)
1459         where next = alexGetChar' i
1460
1461     Just ('|',i)
1462         | Just (']',i) <- next -> do
1463                 setInput i; return s
1464         where next = alexGetChar' i
1465
1466     Just (c, i) -> do
1467          setInput i; lex_quasiquote (c : s)
1468
1469 -- -----------------------------------------------------------------------------
1470 -- Warnings
1471
1472 warn :: DynFlag -> SDoc -> Action
1473 warn option warning srcspan _buf _len = do
1474     addWarning option srcspan warning
1475     lexToken
1476
1477 warnThen :: DynFlag -> SDoc -> Action -> Action
1478 warnThen option warning action srcspan buf len = do
1479     addWarning option srcspan warning
1480     action srcspan buf len
1481
1482 -- -----------------------------------------------------------------------------
1483 -- The Parse Monad
1484
1485 data LayoutContext
1486   = NoLayout
1487   | Layout !Int
1488   deriving Show
1489
1490 data ParseResult a
1491   = POk PState a
1492   | PFailed
1493         SrcSpan         -- The start and end of the text span related to
1494                         -- the error.  Might be used in environments which can
1495                         -- show this span, e.g. by highlighting it.
1496         Message         -- The error message
1497
1498 data PState = PState {
1499         buffer     :: StringBuffer,
1500         dflags     :: DynFlags,
1501         messages   :: Messages,
1502         last_loc   :: SrcSpan,  -- pos of previous token
1503         last_len   :: !Int,     -- len of previous token
1504         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1505         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1506         context    :: [LayoutContext],
1507         lex_state  :: [Int],
1508         -- Used in the alternative layout rule:
1509         -- These tokens are the next ones to be sent out. They are
1510         -- just blindly emitted, without the rule looking at them again:
1511         alr_pending_implicit_tokens :: [Located Token],
1512         -- This is the next token to be considered or, if it is Nothing,
1513         -- we need to get the next token from the input stream:
1514         alr_next_token :: Maybe (Located Token),
1515         -- This is what we consider to be the locatino of the last token
1516         -- emitted:
1517         alr_last_loc :: SrcSpan,
1518         -- The stack of layout contexts:
1519         alr_context :: [ALRContext],
1520         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1521         -- us what sort of layout the '{' will open:
1522         alr_expecting_ocurly :: Maybe ALRLayout,
1523         -- Have we just had the '}' for a let block? If so, than an 'in'
1524         -- token doesn't need to close anything:
1525         alr_justClosedExplicitLetBlock :: Bool
1526      }
1527         -- last_loc and last_len are used when generating error messages,
1528         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1529         -- current token to happyError, we could at least get rid of last_len.
1530         -- Getting rid of last_loc would require finding another way to
1531         -- implement pushCurrentContext (which is only called from one place).
1532
1533 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1534                               Bool{- is it a 'let' block? -}
1535                 | ALRLayout ALRLayout Int
1536 data ALRLayout = ALRLayoutLet
1537                | ALRLayoutWhere
1538                | ALRLayoutOf
1539                | ALRLayoutDo
1540
1541 newtype P a = P { unP :: PState -> ParseResult a }
1542
1543 instance Monad P where
1544   return = returnP
1545   (>>=) = thenP
1546   fail = failP
1547
1548 returnP :: a -> P a
1549 returnP a = a `seq` (P $ \s -> POk s a)
1550
1551 thenP :: P a -> (a -> P b) -> P b
1552 (P m) `thenP` k = P $ \ s ->
1553         case m s of
1554                 POk s1 a         -> (unP (k a)) s1
1555                 PFailed span err -> PFailed span err
1556
1557 failP :: String -> P a
1558 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1559
1560 failMsgP :: String -> P a
1561 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1562
1563 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1564 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1565
1566 failSpanMsgP :: SrcSpan -> SDoc -> P a
1567 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1568
1569 getPState :: P PState
1570 getPState = P $ \s -> POk s s
1571
1572 getDynFlags :: P DynFlags
1573 getDynFlags = P $ \s -> POk s (dflags s)
1574
1575 withThisPackage :: (PackageId -> a) -> P a
1576 withThisPackage f
1577  = do   pkg     <- liftM thisPackage getDynFlags
1578         return  $ f pkg
1579
1580 extension :: (Int -> Bool) -> P Bool
1581 extension p = P $ \s -> POk s (p $! extsBitmap s)
1582
1583 getExts :: P Int
1584 getExts = P $ \s -> POk s (extsBitmap s)
1585
1586 setExts :: (Int -> Int) -> P ()
1587 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1588
1589 setSrcLoc :: SrcLoc -> P ()
1590 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1591
1592 getSrcLoc :: P SrcLoc
1593 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1594
1595 setLastToken :: SrcSpan -> Int -> P ()
1596 setLastToken loc len = P $ \s -> POk s {
1597   last_loc=loc,
1598   last_len=len
1599   } ()
1600
1601 data AlexInput = AI SrcLoc StringBuffer
1602
1603 alexInputPrevChar :: AlexInput -> Char
1604 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1605
1606 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1607 alexGetChar (AI loc s)
1608   | atEnd s   = Nothing
1609   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1610                 --trace (show (ord c)) $
1611                 Just (adj_c, (AI loc' s'))
1612   where (c,s') = nextChar s
1613         loc'   = advanceSrcLoc loc c
1614
1615         non_graphic     = '\x0'
1616         upper           = '\x1'
1617         lower           = '\x2'
1618         digit           = '\x3'
1619         symbol          = '\x4'
1620         space           = '\x5'
1621         other_graphic   = '\x6'
1622
1623         adj_c
1624           | c <= '\x06' = non_graphic
1625           | c <= '\x7f' = c
1626           -- Alex doesn't handle Unicode, so when Unicode
1627           -- character is encountered we output these values
1628           -- with the actual character value hidden in the state.
1629           | otherwise =
1630                 case generalCategory c of
1631                   UppercaseLetter       -> upper
1632                   LowercaseLetter       -> lower
1633                   TitlecaseLetter       -> upper
1634                   ModifierLetter        -> other_graphic
1635                   OtherLetter           -> lower -- see #1103
1636                   NonSpacingMark        -> other_graphic
1637                   SpacingCombiningMark  -> other_graphic
1638                   EnclosingMark         -> other_graphic
1639                   DecimalNumber         -> digit
1640                   LetterNumber          -> other_graphic
1641                   OtherNumber           -> digit -- see #4373
1642                   ConnectorPunctuation  -> symbol
1643                   DashPunctuation       -> symbol
1644                   OpenPunctuation       -> other_graphic
1645                   ClosePunctuation      -> other_graphic
1646                   InitialQuote          -> other_graphic
1647                   FinalQuote            -> other_graphic
1648                   OtherPunctuation      -> symbol
1649                   MathSymbol            -> symbol
1650                   CurrencySymbol        -> symbol
1651                   ModifierSymbol        -> symbol
1652                   OtherSymbol           -> symbol
1653                   Space                 -> space
1654                   _other                -> non_graphic
1655
1656 -- This version does not squash unicode characters, it is used when
1657 -- lexing strings.
1658 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1659 alexGetChar' (AI loc s)
1660   | atEnd s   = Nothing
1661   | otherwise = c `seq` loc' `seq` s' `seq`
1662                 --trace (show (ord c)) $
1663                 Just (c, (AI loc' s'))
1664   where (c,s') = nextChar s
1665         loc'   = advanceSrcLoc loc c
1666
1667 getInput :: P AlexInput
1668 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1669
1670 setInput :: AlexInput -> P ()
1671 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1672
1673 pushLexState :: Int -> P ()
1674 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1675
1676 popLexState :: P Int
1677 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1678
1679 getLexState :: P Int
1680 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1681
1682 popNextToken :: P (Maybe (Located Token))
1683 popNextToken
1684     = P $ \s@PState{ alr_next_token = m } ->
1685               POk (s {alr_next_token = Nothing}) m
1686
1687 setAlrLastLoc :: SrcSpan -> P ()
1688 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1689
1690 getAlrLastLoc :: P SrcSpan
1691 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1692
1693 getALRContext :: P [ALRContext]
1694 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1695
1696 setALRContext :: [ALRContext] -> P ()
1697 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1698
1699 getJustClosedExplicitLetBlock :: P Bool
1700 getJustClosedExplicitLetBlock
1701  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1702
1703 setJustClosedExplicitLetBlock :: Bool -> P ()
1704 setJustClosedExplicitLetBlock b
1705  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1706
1707 setNextToken :: Located Token -> P ()
1708 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1709
1710 popPendingImplicitToken :: P (Maybe (Located Token))
1711 popPendingImplicitToken
1712     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1713               case ts of
1714               [] -> POk s Nothing
1715               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1716
1717 setPendingImplicitTokens :: [Located Token] -> P ()
1718 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1719
1720 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1721 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1722
1723 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1724 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1725
1726 -- for reasons of efficiency, flags indicating language extensions (eg,
1727 -- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
1728 -- integer
1729
1730 genericsBit :: Int
1731 genericsBit = 0 -- {| and |}
1732 ffiBit :: Int
1733 ffiBit     = 1
1734 parrBit :: Int
1735 parrBit    = 2
1736 arrowsBit :: Int
1737 arrowsBit  = 4
1738 thBit :: Int
1739 thBit      = 5
1740 ipBit :: Int
1741 ipBit      = 6
1742 explicitForallBit :: Int
1743 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1744 bangPatBit :: Int
1745 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1746                 -- (doesn't affect the lexer)
1747 tyFamBit :: Int
1748 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1749 haddockBit :: Int
1750 haddockBit = 10 -- Lex and parse Haddock comments
1751 magicHashBit :: Int
1752 magicHashBit = 11 -- "#" in both functions and operators
1753 kindSigsBit :: Int
1754 kindSigsBit = 12 -- Kind signatures on type variables
1755 recursiveDoBit :: Int
1756 recursiveDoBit = 13 -- mdo
1757 unicodeSyntaxBit :: Int
1758 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1759 unboxedTuplesBit :: Int
1760 unboxedTuplesBit = 15 -- (# and #)
1761 datatypeContextsBit :: Int
1762 datatypeContextsBit = 16
1763 transformComprehensionsBit :: Int
1764 transformComprehensionsBit = 17
1765 qqBit :: Int
1766 qqBit      = 18 -- enable quasiquoting
1767 inRulePragBit :: Int
1768 inRulePragBit = 19
1769 rawTokenStreamBit :: Int
1770 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1771 recBit :: Int
1772 recBit = 22 -- rec
1773 alternativeLayoutRuleBit :: Int
1774 alternativeLayoutRuleBit = 23
1775 relaxedLayoutBit :: Int
1776 relaxedLayoutBit = 24
1777 nondecreasingIndentationBit :: Int
1778 nondecreasingIndentationBit = 25
1779
1780 always :: Int -> Bool
1781 always           _     = True
1782 genericsEnabled :: Int -> Bool
1783 genericsEnabled  flags = testBit flags genericsBit
1784 parrEnabled :: Int -> Bool
1785 parrEnabled      flags = testBit flags parrBit
1786 arrowsEnabled :: Int -> Bool
1787 arrowsEnabled    flags = testBit flags arrowsBit
1788 thEnabled :: Int -> Bool
1789 thEnabled        flags = testBit flags thBit
1790 ipEnabled :: Int -> Bool
1791 ipEnabled        flags = testBit flags ipBit
1792 explicitForallEnabled :: Int -> Bool
1793 explicitForallEnabled flags = testBit flags explicitForallBit
1794 bangPatEnabled :: Int -> Bool
1795 bangPatEnabled   flags = testBit flags bangPatBit
1796 -- tyFamEnabled :: Int -> Bool
1797 -- tyFamEnabled     flags = testBit flags tyFamBit
1798 haddockEnabled :: Int -> Bool
1799 haddockEnabled   flags = testBit flags haddockBit
1800 magicHashEnabled :: Int -> Bool
1801 magicHashEnabled flags = testBit flags magicHashBit
1802 -- kindSigsEnabled :: Int -> Bool
1803 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1804 unicodeSyntaxEnabled :: Int -> Bool
1805 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1806 unboxedTuplesEnabled :: Int -> Bool
1807 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1808 datatypeContextsEnabled :: Int -> Bool
1809 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1810 qqEnabled :: Int -> Bool
1811 qqEnabled        flags = testBit flags qqBit
1812 -- inRulePrag :: Int -> Bool
1813 -- inRulePrag       flags = testBit flags inRulePragBit
1814 rawTokenStreamEnabled :: Int -> Bool
1815 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1816 alternativeLayoutRule :: Int -> Bool
1817 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1818 relaxedLayout :: Int -> Bool
1819 relaxedLayout flags = testBit flags relaxedLayoutBit
1820 nondecreasingIndentation :: Int -> Bool
1821 nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
1822
1823 -- PState for parsing options pragmas
1824 --
1825 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1826 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1827                                  lex_state = [bol, option_prags, 0]
1828                              }
1829
1830 -- create a parse state
1831 --
1832 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1833 mkPState flags buf loc =
1834   PState {
1835       buffer          = buf,
1836       dflags        = flags,
1837       messages      = emptyMessages,
1838       last_loc      = mkSrcSpan loc loc,
1839       last_len      = 0,
1840       loc           = loc,
1841       extsBitmap    = fromIntegral bitmap,
1842       context       = [],
1843       lex_state     = [bol, 0],
1844       alr_pending_implicit_tokens = [],
1845       alr_next_token = Nothing,
1846       alr_last_loc = noSrcSpan,
1847       alr_context = [],
1848       alr_expecting_ocurly = Nothing,
1849       alr_justClosedExplicitLetBlock = False
1850     }
1851     where
1852       bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
1853                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1854                .|. parrBit           `setBitIf` xopt Opt_ParallelArrays  flags
1855                .|. arrowsBit         `setBitIf` xopt Opt_Arrows          flags
1856                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1857                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes     flags
1858                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams  flags
1859                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll  flags
1860                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
1861                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
1862                .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
1863                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
1864                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
1865                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
1866                .|. recBit            `setBitIf` xopt Opt_DoRec  flags
1867                .|. recBit            `setBitIf` xopt Opt_Arrows flags
1868                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
1869                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
1870                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1871                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1872                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1873                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1874                .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
1875                .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
1876       --
1877       setBitIf :: Int -> Bool -> Int
1878       b `setBitIf` cond | cond      = bit b
1879                         | otherwise = 0
1880
1881 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1882 addWarning option srcspan warning
1883  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1884        let warning' = mkWarnMsg srcspan alwaysQualify warning
1885            ws' = if dopt option d then ws `snocBag` warning' else ws
1886        in POk s{messages=(ws', es)} ()
1887
1888 getMessages :: PState -> Messages
1889 getMessages PState{messages=ms} = ms
1890
1891 getContext :: P [LayoutContext]
1892 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1893
1894 setContext :: [LayoutContext] -> P ()
1895 setContext ctx = P $ \s -> POk s{context=ctx} ()
1896
1897 popContext :: P ()
1898 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1899                               last_len = len, last_loc = last_loc }) ->
1900   case ctx of
1901         (_:tl) -> POk s{ context = tl } ()
1902         []     -> PFailed last_loc (srcParseErr buf len)
1903
1904 -- Push a new layout context at the indentation of the last token read.
1905 -- This is only used at the outer level of a module when the 'module'
1906 -- keyword is missing.
1907 pushCurrentContext :: P ()
1908 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1909     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1910
1911 getOffside :: P Ordering
1912 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1913                 let offs = srcSpanStartCol loc in
1914                 let ord = case stk of
1915                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1916                                         compare offs n
1917                         _            -> GT
1918                 in POk s ord
1919
1920 -- ---------------------------------------------------------------------------
1921 -- Construct a parse error
1922
1923 srcParseErr
1924   :: StringBuffer       -- current buffer (placed just after the last token)
1925   -> Int                -- length of the previous token
1926   -> Message
1927 srcParseErr buf len
1928   = hcat [ if null token
1929              then ptext (sLit "parse error (possibly incorrect indentation)")
1930              else hcat [ptext (sLit "parse error on input "),
1931                         char '`', text token, char '\'']
1932     ]
1933   where token = lexemeToString (offsetBytes (-len) buf) len
1934
1935 -- Report a parse failure, giving the span of the previous token as
1936 -- the location of the error.  This is the entry point for errors
1937 -- detected during parsing.
1938 srcParseFail :: P a
1939 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1940                             last_loc = last_loc } ->
1941     PFailed last_loc (srcParseErr buf len)
1942
1943 -- A lexical error is reported at a particular position in the source file,
1944 -- not over a token range.
1945 lexError :: String -> P a
1946 lexError str = do
1947   loc <- getSrcLoc
1948   (AI end buf) <- getInput
1949   reportLexError loc end buf str
1950
1951 -- -----------------------------------------------------------------------------
1952 -- This is the top-level function: called from the parser each time a
1953 -- new token is to be read from the input.
1954
1955 lexer :: (Located Token -> P a) -> P a
1956 lexer cont = do
1957   alr <- extension alternativeLayoutRule
1958   let lexTokenFun = if alr then lexTokenAlr else lexToken
1959   tok@(L _span _tok__) <- lexTokenFun
1960   --trace ("token: " ++ show _tok__) $ do
1961   cont tok
1962
1963 lexTokenAlr :: P (Located Token)
1964 lexTokenAlr = do mPending <- popPendingImplicitToken
1965                  t <- case mPending of
1966                       Nothing ->
1967                           do mNext <- popNextToken
1968                              t <- case mNext of
1969                                   Nothing -> lexToken
1970                                   Just next -> return next
1971                              alternativeLayoutRuleToken t
1972                       Just t ->
1973                           return t
1974                  setAlrLastLoc (getLoc t)
1975                  case unLoc t of
1976                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
1977                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
1978                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
1979                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
1980                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1981                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1982                      _       -> return ()
1983                  return t
1984
1985 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
1986 alternativeLayoutRuleToken t
1987     = do context <- getALRContext
1988          lastLoc <- getAlrLastLoc
1989          mExpectingOCurly <- getAlrExpectingOCurly
1990          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
1991          setJustClosedExplicitLetBlock False
1992          dflags <- getDynFlags
1993          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
1994              thisLoc = getLoc t
1995              thisCol = srcSpanStartCol thisLoc
1996              newLine = (lastLoc == noSrcSpan)
1997                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
1998          case (unLoc t, context, mExpectingOCurly) of
1999              -- This case handles a GHC extension to the original H98
2000              -- layout rule...
2001              (ITocurly, _, Just alrLayout) ->
2002                  do setAlrExpectingOCurly Nothing
2003                     let isLet = case alrLayout of
2004                                 ALRLayoutLet -> True
2005                                 _ -> False
2006                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
2007                     return t
2008              -- ...and makes this case unnecessary
2009              {-
2010              -- I think our implicit open-curly handling is slightly
2011              -- different to John's, in how it interacts with newlines
2012              -- and "in"
2013              (ITocurly, _, Just _) ->
2014                  do setAlrExpectingOCurly Nothing
2015                     setNextToken t
2016                     lexTokenAlr
2017              -}
2018              (_, ALRLayout _ col : ls, Just expectingOCurly)
2019               | (thisCol > col) ||
2020                 (thisCol == col &&
2021                  isNonDecreasingIntentation expectingOCurly) ->
2022                  do setAlrExpectingOCurly Nothing
2023                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2024                     setNextToken t
2025                     return (L thisLoc ITocurly)
2026               | otherwise ->
2027                  do setAlrExpectingOCurly Nothing
2028                     setPendingImplicitTokens [L lastLoc ITccurly]
2029                     setNextToken t
2030                     return (L lastLoc ITocurly)
2031              (_, _, Just expectingOCurly) ->
2032                  do setAlrExpectingOCurly Nothing
2033                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2034                     setNextToken t
2035                     return (L thisLoc ITocurly)
2036              -- We do the [] cases earlier than in the spec, as we
2037              -- have an actual EOF token
2038              (ITeof, ALRLayout _ _ : ls, _) ->
2039                  do setALRContext ls
2040                     setNextToken t
2041                     return (L thisLoc ITccurly)
2042              (ITeof, _, _) ->
2043                  return t
2044              -- the other ITeof case omitted; general case below covers it
2045              (ITin, _, _)
2046               | justClosedExplicitLetBlock ->
2047                  return t
2048              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2049               | newLine ->
2050                  do setPendingImplicitTokens [t]
2051                     setALRContext ls
2052                     return (L thisLoc ITccurly)
2053              -- This next case is to handle a transitional issue:
2054              (ITwhere, ALRLayout _ col : ls, _)
2055               | newLine && thisCol == col && transitional ->
2056                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2057                                thisLoc
2058                                (transitionalAlternativeLayoutWarning
2059                                     "`where' clause at the same depth as implicit layout block")
2060                     setALRContext ls
2061                     setNextToken t
2062                     -- Note that we use lastLoc, as we may need to close
2063                     -- more layouts, or give a semicolon
2064                     return (L lastLoc ITccurly)
2065              -- This next case is to handle a transitional issue:
2066              (ITvbar, ALRLayout _ col : ls, _)
2067               | newLine && thisCol == col && transitional ->
2068                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2069                                thisLoc
2070                                (transitionalAlternativeLayoutWarning
2071                                     "`|' at the same depth as implicit layout block")
2072                     setALRContext ls
2073                     setNextToken t
2074                     -- Note that we use lastLoc, as we may need to close
2075                     -- more layouts, or give a semicolon
2076                     return (L lastLoc ITccurly)
2077              (_, ALRLayout _ col : ls, _)
2078               | newLine && thisCol == col ->
2079                  do setNextToken t
2080                     return (L thisLoc ITsemi)
2081               | newLine && thisCol < col ->
2082                  do setALRContext ls
2083                     setNextToken t
2084                     -- Note that we use lastLoc, as we may need to close
2085                     -- more layouts, or give a semicolon
2086                     return (L lastLoc ITccurly)
2087              -- We need to handle close before open, as 'then' is both
2088              -- an open and a close
2089              (u, _, _)
2090               | isALRclose u ->
2091                  case context of
2092                  ALRLayout _ _ : ls ->
2093                      do setALRContext ls
2094                         setNextToken t
2095                         return (L thisLoc ITccurly)
2096                  ALRNoLayout _ isLet : ls ->
2097                      do let ls' = if isALRopen u
2098                                      then ALRNoLayout (containsCommas u) False : ls
2099                                      else ls
2100                         setALRContext ls'
2101                         when isLet $ setJustClosedExplicitLetBlock True
2102                         return t
2103                  [] ->
2104                      do let ls = if isALRopen u
2105                                     then [ALRNoLayout (containsCommas u) False]
2106                                     else ls
2107                         setALRContext ls
2108                         -- XXX This is an error in John's code, but
2109                         -- it looks reachable to me at first glance
2110                         return t
2111              (u, _, _)
2112               | isALRopen u ->
2113                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2114                     return t
2115              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2116                  do setALRContext ls
2117                     setPendingImplicitTokens [t]
2118                     return (L thisLoc ITccurly)
2119              (ITin, ALRLayout _ _ : ls, _) ->
2120                  do setALRContext ls
2121                     setNextToken t
2122                     return (L thisLoc ITccurly)
2123              -- the other ITin case omitted; general case below covers it
2124              (ITcomma, ALRLayout _ _ : ls, _)
2125               | topNoLayoutContainsCommas ls ->
2126                  do setALRContext ls
2127                     setNextToken t
2128                     return (L thisLoc ITccurly)
2129              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2130                  do setALRContext ls
2131                     setPendingImplicitTokens [t]
2132                     return (L thisLoc ITccurly)
2133              -- the other ITwhere case omitted; general case below covers it
2134              (_, _, _) -> return t
2135
2136 transitionalAlternativeLayoutWarning :: String -> SDoc
2137 transitionalAlternativeLayoutWarning msg
2138     = text "transitional layout will not be accepted in the future:"
2139    $$ text msg
2140
2141 isALRopen :: Token -> Bool
2142 isALRopen ITcase        = True
2143 isALRopen ITif          = True
2144 isALRopen ITthen        = True
2145 isALRopen IToparen      = True
2146 isALRopen ITobrack      = True
2147 isALRopen ITocurly      = True
2148 -- GHC Extensions:
2149 isALRopen IToubxparen   = True
2150 isALRopen ITparenEscape = True
2151 isALRopen _             = False
2152
2153 isALRclose :: Token -> Bool
2154 isALRclose ITof     = True
2155 isALRclose ITthen   = True
2156 isALRclose ITelse   = True
2157 isALRclose ITcparen = True
2158 isALRclose ITcbrack = True
2159 isALRclose ITccurly = True
2160 -- GHC Extensions:
2161 isALRclose ITcubxparen = True
2162 isALRclose _        = False
2163
2164 isNonDecreasingIntentation :: ALRLayout -> Bool
2165 isNonDecreasingIntentation ALRLayoutDo = True
2166 isNonDecreasingIntentation _           = False
2167
2168 containsCommas :: Token -> Bool
2169 containsCommas IToparen = True
2170 containsCommas ITobrack = True
2171 -- John doesn't have {} as containing commas, but records contain them,
2172 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2173 -- (defaultInstallDirs).
2174 containsCommas ITocurly = True
2175 -- GHC Extensions:
2176 containsCommas IToubxparen = True
2177 containsCommas _        = False
2178
2179 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2180 topNoLayoutContainsCommas [] = False
2181 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2182 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2183
2184 lexToken :: P (Located Token)
2185 lexToken = do
2186   inp@(AI loc1 buf) <- getInput
2187   sc <- getLexState
2188   exts <- getExts
2189   case alexScanUser exts inp sc of
2190     AlexEOF -> do
2191         let span = mkSrcSpan loc1 loc1
2192         setLastToken span 0
2193         return (L span ITeof)
2194     AlexError (AI loc2 buf) ->
2195         reportLexError loc1 loc2 buf "lexical error"
2196     AlexSkip inp2 _ -> do
2197         setInput inp2
2198         lexToken
2199     AlexToken inp2@(AI end buf2) _ t -> do
2200         setInput inp2
2201         let span = mkSrcSpan loc1 end
2202         let bytes = byteDiff buf buf2
2203         span `seq` setLastToken span bytes
2204         t span buf bytes
2205
2206 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2207 reportLexError loc1 loc2 buf str
2208   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2209   | otherwise =
2210   let
2211         c = fst (nextChar buf)
2212   in
2213   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2214     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2215     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2216
2217 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2218 lexTokenStream buf loc dflags = unP go initState
2219     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2220           initState = mkPState dflags' buf loc
2221           go = do
2222             ltok <- lexer return
2223             case ltok of
2224               L _ ITeof -> return []
2225               _ -> liftM (ltok:) go
2226
2227 linePrags = Map.singleton "line" (begin line_prag2)
2228
2229 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2230                                  ("options_ghc", lex_string_prag IToptions_prag),
2231                                  ("options_haddock", lex_string_prag ITdocOptions),
2232                                  ("language", token ITlanguage_prag),
2233                                  ("include", lex_string_prag ITinclude_prag)])
2234
2235 ignoredPrags = Map.fromList (map ignored pragmas)
2236                where ignored opt = (opt, nested_comment lexToken)
2237                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2238                      options_pragmas = map ("options_" ++) impls
2239                      -- CFILES is a hugs-only thing.
2240                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2241
2242 oneWordPrags = Map.fromList([("rules", rulePrag),
2243                            ("inline", token (ITinline_prag Inline FunLike)),
2244                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2245                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2246                                           -- Spelling variant
2247                            ("notinline", token (ITinline_prag NoInline FunLike)),
2248                            ("specialize", token ITspec_prag),
2249                            ("source", token ITsource_prag),
2250                            ("warning", token ITwarning_prag),
2251                            ("deprecated", token ITdeprecated_prag),
2252                            ("scc", token ITscc_prag),
2253                            ("generated", token ITgenerated_prag),
2254                            ("core", token ITcore_prag),
2255                            ("unpack", token ITunpack_prag),
2256                            ("ann", token ITann_prag)])
2257
2258 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2259                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2260                              ("specialize inline", token (ITspec_inline_prag True)),
2261                              ("specialize notinline", token (ITspec_inline_prag False))])
2262
2263
2264 dispatch_pragmas :: Map String Action -> Action
2265 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2266                                        Just found -> found span buf len
2267                                        Nothing -> lexError "unknown pragma"
2268
2269 known_pragma :: Map String Action -> AlexAccPred Int
2270 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2271                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2272
2273 clean_pragma :: String -> String
2274 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2275                     where unprefix prag' = case stripPrefix "{-#" prag' of
2276                                              Just rest -> rest
2277                                              Nothing -> prag'
2278                           canonical prag' = case prag' of
2279                                               "noinline" -> "notinline"
2280                                               "specialise" -> "specialize"
2281                                               "constructorlike" -> "conlike"
2282                                               _ -> prag'
2283                           canon_ws s = unwords (map canonical (words s))
2284 }