compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# LANGUAGE BangPatterns #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  38 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  39 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  40 -- But alex still generates some code that causes the "lazy unlifted bindings"
  41 -- warning, and old compilers don't know about it so we can't easily turn
  42 -- it off, so for now we use the sledge hammer:
  43 {-# OPTIONS_GHC -w #-}
  44
  45 {-# OPTIONS_GHC -funbox-strict-fields #-}
  46
  47 module Lexer (
  48    Token(..), lexer, pragState, mkPState, PState(..),
  49    P(..), ParseResult(..), getSrcLoc,
  50    getPState, getDynFlags, withThisPackage,
  51    failLocMsgP, failSpanMsgP, srcParseFail,
  52    getMessages,
  53    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  54    getLexState, popLexState, pushLexState,
  55    extension, bangPatEnabled, datatypeContextsEnabled,
  56    addWarning,
  57    lexTokenStream
  58   ) where
  59
  60 import Bag
  61 import ErrUtils
  62 import Outputable
  63 import StringBuffer
  64 import FastString
  65 import SrcLoc
  66 import UniqFM
  67 import DynFlags
  68 import Module
  69 import Ctype
  70 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..) )
  71 import Util             ( readRational )
  72
  73 import Control.Monad
  74 import Data.Bits
  75 import Data.Char
  76 import Data.List
  77 import Data.Maybe
  78 import Data.Map (Map)
  79 import qualified Data.Map as Map
  80 import Data.Ratio
  81 }
  82
  83 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  84 $whitechar   = [\ \n\r\f\v $unispace]
  85 $white_no_nl = $whitechar # \n
  86 $tab         = \t
  87
  88 $ascdigit  = 0-9
  89 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  90 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  91 $digit     = [$ascdigit $unidigit]
  92
  93 $special   = [\(\)\,\;\[\]\`\{\}]
  94 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  95 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  96 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  97
  98 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  99 $asclarge  = [A-Z]
 100 $large     = [$asclarge $unilarge]
 101
 102 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 103 $ascsmall  = [a-z]
 104 $small     = [$ascsmall $unismall \_]
 105
 106 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 107 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 108
 109 $octit     = 0-7
 110 $hexit     = [$decdigit A-F a-f]
 111 $symchar   = [$symbol \:]
 112 $nl        = [\n\r]
 113 $idchar    = [$small $large $digit \']
 114
 115 $pragmachar = [$small $large $digit]
 116
 117 $docsym    = [\| \^ \* \$]
 118
 119 @varid     = $small $idchar*
 120 @conid     = $large $idchar*
 121
 122 @varsym    = $symbol $symchar*
 123 @consym    = \: $symchar*
 124
 125 @decimal     = $decdigit+
 126 @octal       = $octit+
 127 @hexadecimal = $hexit+
 128 @exponent    = [eE] [\-\+]? @decimal
 129
 130 -- we support the hierarchical module name extension:
 131 @qual = (@conid \.)+
 132
 133 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 134
 135 -- normal signed numerical literals can only be explicitly negative,
 136 -- not explicitly positive (contrast @exponent)
 137 @negative = \-
 138 @signed = @negative ?
 139
 140 haskell :-
 141
 142 -- everywhere: skip whitespace and comments
 143 $white_no_nl+                           ;
 144 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 145
 146 -- Everywhere: deal with nested comments.  We explicitly rule out
 147 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 148 -- (this can happen even though pragmas will normally take precedence due to
 149 -- longest-match, because pragmas aren't valid in every state, but comments
 150 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 151 -- set.
 152
 153 "{-" / { isNormalComment } { nested_comment lexToken }
 154
 155 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 156 -- more dashes followed by a symbol should be parsed as a varsym, so we
 157 -- have to exclude those.
 158
 159 -- Since Haddock comments aren't valid in every state, we need to rule them
 160 -- out here.
 161
 162 -- The following two rules match comments that begin with two dashes, but
 163 -- continue with a different character. The rules test that this character
 164 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 165 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 166 -- have a Haddock comment). The rules then munch the rest of the line.
 167
 168 "-- " ~[$docsym \#] .* { lineCommentToken }
 169 "--" [^$symbol : \ ] .* { lineCommentToken }
 170
 171 -- Next, match Haddock comments if no -haddock flag
 172
 173 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 174
 175 -- Now, when we've matched comments that begin with 2 dashes and continue
 176 -- with a different character, we need to match comments that begin with three
 177 -- or more dashes (which clearly can't be Haddock comments). We only need to
 178 -- make sure that the first non-dash character isn't a symbol, and munch the
 179 -- rest of the line.
 180
 181 "---"\-* [^$symbol :] .* { lineCommentToken }
 182
 183 -- Since the previous rules all match dashes followed by at least one
 184 -- character, we also need to match a whole line filled with just dashes.
 185
 186 "--"\-* / { atEOL } { lineCommentToken }
 187
 188 -- We need this rule since none of the other single line comment rules
 189 -- actually match this case.
 190
 191 "-- " / { atEOL } { lineCommentToken }
 192
 193 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 194 -- blank lines) until we find a non-whitespace character, then do layout
 195 -- processing.
 196 --
 197 -- One slight wibble here: what if the line begins with {-#? In
 198 -- theory, we have to lex the pragma to see if it's one we recognise,
 199 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 200 -- as a nested comment.  We don't bother with this: if the line begins
 201 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 202 <bol> {
 203   \n                                    ;
 204   ^\# (line)?                           { begin line_prag1 }
 205   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 206   ^\# \! .* \n                          ; -- #!, for scripts
 207   ()                                    { do_bol }
 208 }
 209
 210 -- after a layout keyword (let, where, do, of), we begin a new layout
 211 -- context if the curly brace is missing.
 212 -- Careful! This stuff is quite delicate.
 213 <layout, layout_do> {
 214   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 215         -- we might encounter {-# here, but {- has been handled already
 216   \n                                    ;
 217   ^\# (line)?                           { begin line_prag1 }
 218 }
 219
 220 -- do is treated in a subtly different way, see new_layout_context
 221 <layout>    ()                          { new_layout_context True }
 222 <layout_do> ()                          { new_layout_context False }
 223
 224 -- after a new layout context which was found to be to the left of the
 225 -- previous context, we have generated a '{' token, and we now need to
 226 -- generate a matching '}' token.
 227 <layout_left>  ()                       { do_layout_left }
 228
 229 <0,option_prags> \n                             { begin bol }
 230
 231 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 232                                 { dispatch_pragmas linePrags }
 233
 234 -- single-line line pragmas, of the form
 235 --    # <line> "<file>" <extra-stuff> \n
 236 <line_prag1> $decdigit+                 { setLine line_prag1a }
 237 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 238 <line_prag1b> .*                        { pop }
 239
 240 -- Haskell-style line pragmas, of the form
 241 --    {-# LINE <line> "<file>" #-}
 242 <line_prag2> $decdigit+                 { setLine line_prag2a }
 243 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 244 <line_prag2b> "#-}"|"-}"                { pop }
 245    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 246    -- with older versions of GHC which generated these.
 247
 248 <0,option_prags> {
 249   "{-#" $whitechar* $pragmachar+
 250         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 251                                  { dispatch_pragmas twoWordPrags }
 252
 253   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 254                                  { dispatch_pragmas oneWordPrags }
 255
 256   -- We ignore all these pragmas, but don't generate a warning for them
 257   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 258                                  { dispatch_pragmas ignoredPrags }
 259
 260   -- ToDo: should only be valid inside a pragma:
 261   "#-}"                                 { endPrag }
 262 }
 263
 264 <option_prags> {
 265   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 266                                    { dispatch_pragmas fileHeaderPrags }
 267
 268   "-- #"                                 { multiline_doc_comment }
 269 }
 270
 271 <0> {
 272   -- In the "0" mode we ignore these pragmas
 273   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 274                      { nested_comment lexToken }
 275 }
 276
 277 <0> {
 278   "-- #" .* { lineCommentToken }
 279 }
 280
 281 <0,option_prags> {
 282   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 283                     (nested_comment lexToken) }
 284 }
 285
 286 -- '0' state: ordinary lexemes
 287
 288 -- Haddock comments
 289
 290 <0,option_prags> {
 291   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 292   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 293 }
 294
 295 -- "special" symbols
 296
 297 <0> {
 298   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 299   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 300 }
 301
 302 <0> {
 303   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 304   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 305   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 306   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 307   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 308   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 309   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 310   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 311
 312 -- For backward compatibility, accept the old dollar syntax
 313   "[$" @varid "|"  / { ifExtension qqEnabled }
 314                      { lex_quasiquote_tok }
 315
 316   "[" @varid "|"  / { ifExtension qqEnabled }
 317                      { lex_quasiquote_tok }
 318 }
 319
 320 <0> {
 321   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 322                                         { special IToparenbar }
 323   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 324 }
 325
 326 <0> {
 327   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 328 }
 329
 330 <0> {
 331   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 332          { token IToubxparen }
 333   "#)" / { ifExtension unboxedTuplesEnabled }
 334          { token ITcubxparen }
 335 }
 336
 337 <0> {
 338   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 339   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 340 }
 341
 342 <0,option_prags> {
 343   \(                                    { special IToparen }
 344   \)                                    { special ITcparen }
 345   \[                                    { special ITobrack }
 346   \]                                    { special ITcbrack }
 347   \,                                    { special ITcomma }
 348   \;                                    { special ITsemi }
 349   \`                                    { special ITbackquote }
 350
 351   \{                                    { open_brace }
 352   \}                                    { close_brace }
 353 }
 354
 355 <0,option_prags> {
 356   @qual @varid                  { idtoken qvarid }
 357   @qual @conid                  { idtoken qconid }
 358   @varid                        { varid }
 359   @conid                        { idtoken conid }
 360 }
 361
 362 <0> {
 363   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 364   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 365   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 366   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 367 }
 368
 369 -- ToDo: - move `var` and (sym) into lexical syntax?
 370 --       - remove backquote from $special?
 371 <0> {
 372   @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
 373   @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
 374   @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
 375   @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
 376   @varsym                                          { varsym }
 377   @consym                                          { consym }
 378 }
 379
 380 -- For the normal boxed literals we need to be careful
 381 -- when trying to be close to Haskell98
 382 <0> {
 383   -- Normal integral literals (:: Num a => a, from Integer)
 384   @decimal           { tok_num positive 0 0 decimal }
 385   0[oO] @octal       { tok_num positive 2 2 octal }
 386   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 387
 388   -- Normal rational literals (:: Fractional a => a, from Rational)
 389   @floating_point    { strtoken tok_float }
 390 }
 391
 392 <0> {
 393   -- Unboxed ints (:: Int#) and words (:: Word#)
 394   -- It's simpler (and faster?) to give separate cases to the negatives,
 395   -- especially considering octal/hexadecimal prefixes.
 396   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 397   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 398   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 399   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 400   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 401   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 402
 403   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 404   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 405   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 406
 407   -- Unboxed floats and doubles (:: Float#, :: Double#)
 408   -- prim_{float,double} work with signed literals
 409   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 410   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 411 }
 412
 413 -- Strings and chars are lexed by hand-written code.  The reason is
 414 -- that even if we recognise the string or char here in the regex
 415 -- lexer, we would still have to parse the string afterward in order
 416 -- to convert it to a String.
 417 <0> {
 418   \'                            { lex_char_tok }
 419   \"                            { lex_string_tok }
 420 }
 421
 422 {
 423 -- -----------------------------------------------------------------------------
 424 -- The token type
 425
 426 data Token
 427   = ITas                        -- Haskell keywords
 428   | ITcase
 429   | ITclass
 430   | ITdata
 431   | ITdefault
 432   | ITderiving
 433   | ITdo
 434   | ITelse
 435   | IThiding
 436   | ITif
 437   | ITimport
 438   | ITin
 439   | ITinfix
 440   | ITinfixl
 441   | ITinfixr
 442   | ITinstance
 443   | ITlet
 444   | ITmodule
 445   | ITnewtype
 446   | ITof
 447   | ITqualified
 448   | ITthen
 449   | ITtype
 450   | ITwhere
 451   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 452
 453   | ITforall                    -- GHC extension keywords
 454   | ITforeign
 455   | ITexport
 456   | ITlabel
 457   | ITdynamic
 458   | ITsafe
 459   | ITthreadsafe
 460   | ITinterruptible
 461   | ITunsafe
 462   | ITstdcallconv
 463   | ITccallconv
 464   | ITprimcallconv
 465   | ITmdo
 466   | ITfamily
 467   | ITgroup
 468   | ITby
 469   | ITusing
 470
 471         -- Pragmas
 472   | ITinline_prag InlineSpec RuleMatchInfo
 473   | ITspec_prag                 -- SPECIALISE
 474   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 475   | ITsource_prag
 476   | ITrules_prag
 477   | ITwarning_prag
 478   | ITdeprecated_prag
 479   | ITline_prag
 480   | ITscc_prag
 481   | ITgenerated_prag
 482   | ITcore_prag                 -- hdaume: core annotations
 483   | ITunpack_prag
 484   | ITann_prag
 485   | ITclose_prag
 486   | IToptions_prag String
 487   | ITinclude_prag String
 488   | ITlanguage_prag
 489
 490   | ITdotdot                    -- reserved symbols
 491   | ITcolon
 492   | ITdcolon
 493   | ITequal
 494   | ITlam
 495   | ITvbar
 496   | ITlarrow
 497   | ITrarrow
 498   | ITat
 499   | ITtilde
 500   | ITdarrow
 501   | ITminus
 502   | ITbang
 503   | ITstar
 504   | ITdot
 505
 506   | ITbiglam                    -- GHC-extension symbols
 507
 508   | ITocurly                    -- special symbols
 509   | ITccurly
 510   | ITocurlybar                 -- {|, for type applications
 511   | ITccurlybar                 -- |}, for type applications
 512   | ITvocurly
 513   | ITvccurly
 514   | ITobrack
 515   | ITopabrack                  -- [:, for parallel arrays with -XParr
 516   | ITcpabrack                  -- :], for parallel arrays with -XParr
 517   | ITcbrack
 518   | IToparen
 519   | ITcparen
 520   | IToubxparen
 521   | ITcubxparen
 522   | ITsemi
 523   | ITcomma
 524   | ITunderscore
 525   | ITbackquote
 526
 527   | ITvarid   FastString        -- identifiers
 528   | ITconid   FastString
 529   | ITvarsym  FastString
 530   | ITconsym  FastString
 531   | ITqvarid  (FastString,FastString)
 532   | ITqconid  (FastString,FastString)
 533   | ITqvarsym (FastString,FastString)
 534   | ITqconsym (FastString,FastString)
 535   | ITprefixqvarsym (FastString,FastString)
 536   | ITprefixqconsym (FastString,FastString)
 537
 538   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 539
 540   | ITchar       Char
 541   | ITstring     FastString
 542   | ITinteger    Integer
 543   | ITrational   Rational
 544
 545   | ITprimchar   Char
 546   | ITprimstring FastString
 547   | ITprimint    Integer
 548   | ITprimword   Integer
 549   | ITprimfloat  Rational
 550   | ITprimdouble Rational
 551
 552   -- Template Haskell extension tokens
 553   | ITopenExpQuote              --  [| or [e|
 554   | ITopenPatQuote              --  [p|
 555   | ITopenDecQuote              --  [d|
 556   | ITopenTypQuote              --  [t|
 557   | ITcloseQuote                --  |]
 558   | ITidEscape   FastString     --  $x
 559   | ITparenEscape               --  $(
 560   | ITvarQuote                  --  '
 561   | ITtyQuote                   --  ''
 562   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 563
 564   -- Arrow notation extension
 565   | ITproc
 566   | ITrec
 567   | IToparenbar                 --  (|
 568   | ITcparenbar                 --  |)
 569   | ITlarrowtail                --  -<
 570   | ITrarrowtail                --  >-
 571   | ITLarrowtail                --  -<<
 572   | ITRarrowtail                --  >>-
 573
 574   | ITunknown String            -- Used when the lexer can't make sense of it
 575   | ITeof                       -- end of file token
 576
 577   -- Documentation annotations
 578   | ITdocCommentNext  String     -- something beginning '-- |'
 579   | ITdocCommentPrev  String     -- something beginning '-- ^'
 580   | ITdocCommentNamed String     -- something beginning '-- $'
 581   | ITdocSection      Int String -- a section heading
 582   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 583   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 584   | ITlineComment     String     -- comment starting by "--"
 585   | ITblockComment    String     -- comment in {- -}
 586
 587 #ifdef DEBUG
 588   deriving Show -- debugging
 589 #endif
 590
 591 {-
 592 isSpecial :: Token -> Bool
 593 -- If we see M.x, where x is a keyword, but
 594 -- is special, we treat is as just plain M.x,
 595 -- not as a keyword.
 596 isSpecial ITas          = True
 597 isSpecial IThiding      = True
 598 isSpecial ITqualified   = True
 599 isSpecial ITforall      = True
 600 isSpecial ITexport      = True
 601 isSpecial ITlabel       = True
 602 isSpecial ITdynamic     = True
 603 isSpecial ITsafe        = True
 604 isSpecial ITthreadsafe  = True
 605 isSpecial ITinterruptible = True
 606 isSpecial ITunsafe      = True
 607 isSpecial ITccallconv   = True
 608 isSpecial ITstdcallconv = True
 609 isSpecial ITprimcallconv = True
 610 isSpecial ITmdo         = True
 611 isSpecial ITfamily      = True
 612 isSpecial ITgroup   = True
 613 isSpecial ITby      = True
 614 isSpecial ITusing   = True
 615 isSpecial _             = False
 616 -}
 617
 618 -- the bitmap provided as the third component indicates whether the
 619 -- corresponding extension keyword is valid under the extension options
 620 -- provided to the compiler; if the extension corresponding to *any* of the
 621 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 622 -- facilitates using a keyword in two different extensions that can be
 623 -- activated independently)
 624 --
 625 reservedWordsFM :: UniqFM (Token, Int)
 626 reservedWordsFM = listToUFM $
 627         map (\(x, y, z) -> (mkFastString x, (y, z)))
 628        [( "_",          ITunderscore,   0 ),
 629         ( "as",         ITas,           0 ),
 630         ( "case",       ITcase,         0 ),
 631         ( "class",      ITclass,        0 ),
 632         ( "data",       ITdata,         0 ),
 633         ( "default",    ITdefault,      0 ),
 634         ( "deriving",   ITderiving,     0 ),
 635         ( "do",         ITdo,           0 ),
 636         ( "else",       ITelse,         0 ),
 637         ( "hiding",     IThiding,       0 ),
 638         ( "if",         ITif,           0 ),
 639         ( "import",     ITimport,       0 ),
 640         ( "in",         ITin,           0 ),
 641         ( "infix",      ITinfix,        0 ),
 642         ( "infixl",     ITinfixl,       0 ),
 643         ( "infixr",     ITinfixr,       0 ),
 644         ( "instance",   ITinstance,     0 ),
 645         ( "let",        ITlet,          0 ),
 646         ( "module",     ITmodule,       0 ),
 647         ( "newtype",    ITnewtype,      0 ),
 648         ( "of",         ITof,           0 ),
 649         ( "qualified",  ITqualified,    0 ),
 650         ( "then",       ITthen,         0 ),
 651         ( "type",       ITtype,         0 ),
 652         ( "where",      ITwhere,        0 ),
 653         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 654
 655     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 656         ( "mdo",        ITmdo,           bit recursiveDoBit),
 657         ( "family",     ITfamily,        bit tyFamBit),
 658     ( "group",  ITgroup,     bit transformComprehensionsBit),
 659     ( "by",     ITby,        bit transformComprehensionsBit),
 660     ( "using",  ITusing,     bit transformComprehensionsBit),
 661
 662         ( "foreign",    ITforeign,       bit ffiBit),
 663         ( "export",     ITexport,        bit ffiBit),
 664         ( "label",      ITlabel,         bit ffiBit),
 665         ( "dynamic",    ITdynamic,       bit ffiBit),
 666         ( "safe",       ITsafe,          bit ffiBit),
 667         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 668         ( "interruptible", ITinterruptible, bit ffiBit),
 669         ( "unsafe",     ITunsafe,        bit ffiBit),
 670         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 671         ( "ccall",      ITccallconv,     bit ffiBit),
 672         ( "prim",       ITprimcallconv,  bit ffiBit),
 673
 674         ( "rec",        ITrec,           bit recBit),
 675         ( "proc",       ITproc,          bit arrowsBit)
 676      ]
 677
 678 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 679 reservedSymsFM = listToUFM $
 680     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 681       [ ("..",  ITdotdot,   always)
 682         -- (:) is a reserved op, meaning only list cons
 683        ,(":",   ITcolon,    always)
 684        ,("::",  ITdcolon,   always)
 685        ,("=",   ITequal,    always)
 686        ,("\\",  ITlam,      always)
 687        ,("|",   ITvbar,     always)
 688        ,("<-",  ITlarrow,   always)
 689        ,("->",  ITrarrow,   always)
 690        ,("@",   ITat,       always)
 691        ,("~",   ITtilde,    always)
 692        ,("=>",  ITdarrow,   always)
 693        ,("-",   ITminus,    always)
 694        ,("!",   ITbang,     always)
 695
 696         -- For data T (a::*) = MkT
 697        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 698         -- For 'forall a . t'
 699        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 700
 701        ,("-<",  ITlarrowtail, arrowsEnabled)
 702        ,(">-",  ITrarrowtail, arrowsEnabled)
 703        ,("-<<", ITLarrowtail, arrowsEnabled)
 704        ,(">>-", ITRarrowtail, arrowsEnabled)
 705
 706        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 707        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 708        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 709                                 explicitForallEnabled i)
 710        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 711        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 712
 713        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 714        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 716        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 717
 718        ,("★", ITstar, unicodeSyntaxEnabled)
 719
 720         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 721         -- form part of a large operator.  This would let us have a better
 722         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 723        ]
 724
 725 -- -----------------------------------------------------------------------------
 726 -- Lexer actions
 727
 728 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 729
 730 special :: Token -> Action
 731 special tok span _buf _len = return (L span tok)
 732
 733 token, layout_token :: Token -> Action
 734 token t span _buf _len = return (L span t)
 735 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 736
 737 idtoken :: (StringBuffer -> Int -> Token) -> Action
 738 idtoken f span buf len = return (L span $! (f buf len))
 739
 740 skip_one_varid :: (FastString -> Token) -> Action
 741 skip_one_varid f span buf len
 742   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 743
 744 strtoken :: (String -> Token) -> Action
 745 strtoken f span buf len =
 746   return (L span $! (f $! lexemeToString buf len))
 747
 748 init_strtoken :: Int -> (String -> Token) -> Action
 749 -- like strtoken, but drops the last N character(s)
 750 init_strtoken drop f span buf len =
 751   return (L span $! (f $! lexemeToString buf (len-drop)))
 752
 753 begin :: Int -> Action
 754 begin code _span _str _len = do pushLexState code; lexToken
 755
 756 pop :: Action
 757 pop _span _buf _len = do _ <- popLexState
 758                          lexToken
 759
 760 pop_and :: Action -> Action
 761 pop_and act span buf len = do _ <- popLexState
 762                               act span buf len
 763
 764 {-# INLINE nextCharIs #-}
 765 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 766 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 767
 768 notFollowedBy :: Char -> AlexAccPred Int
 769 notFollowedBy char _ _ _ (AI _ buf)
 770   = nextCharIs buf (/=char)
 771
 772 notFollowedBySymbol :: AlexAccPred Int
 773 notFollowedBySymbol _ _ _ (AI _ buf)
 774   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 775
 776 -- We must reject doc comments as being ordinary comments everywhere.
 777 -- In some cases the doc comment will be selected as the lexeme due to
 778 -- maximal munch, but not always, because the nested comment rule is
 779 -- valid in all states, but the doc-comment rules are only valid in
 780 -- the non-layout states.
 781 isNormalComment :: AlexAccPred Int
 782 isNormalComment bits _ _ (AI _ buf)
 783   | haddockEnabled bits = notFollowedByDocOrPragma
 784   | otherwise           = nextCharIs buf (/='#')
 785   where
 786     notFollowedByDocOrPragma
 787        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 788
 789 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 790 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 791
 792 {-
 793 haddockDisabledAnd p bits _ _ (AI _ buf)
 794   = if haddockEnabled bits then False else (p buf)
 795 -}
 796
 797 atEOL :: AlexAccPred Int
 798 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 799
 800 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 801 ifExtension pred bits _ _ _ = pred bits
 802
 803 multiline_doc_comment :: Action
 804 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 805   where
 806     worker commentAcc input docType oneLine = case alexGetChar input of
 807       Just ('\n', input')
 808         | oneLine -> docCommentEnd input commentAcc docType buf span
 809         | otherwise -> case checkIfCommentLine input' of
 810           Just input -> worker ('\n':commentAcc) input docType False
 811           Nothing -> docCommentEnd input commentAcc docType buf span
 812       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 813       Nothing -> docCommentEnd input commentAcc docType buf span
 814
 815     checkIfCommentLine input = check (dropNonNewlineSpace input)
 816       where
 817         check input = case alexGetChar input of
 818           Just ('-', input) -> case alexGetChar input of
 819             Just ('-', input) -> case alexGetChar input of
 820               Just (c, _) | c /= '-' -> Just input
 821               _ -> Nothing
 822             _ -> Nothing
 823           _ -> Nothing
 824
 825         dropNonNewlineSpace input = case alexGetChar input of
 826           Just (c, input')
 827             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 828             | otherwise -> input
 829           Nothing -> input
 830
 831 lineCommentToken :: Action
 832 lineCommentToken span buf len = do
 833   b <- extension rawTokenStreamEnabled
 834   if b then strtoken ITlineComment span buf len else lexToken
 835
 836 {-
 837   nested comments require traversing by hand, they can't be parsed
 838   using regular expressions.
 839 -}
 840 nested_comment :: P (Located Token) -> Action
 841 nested_comment cont span _str _len = do
 842   input <- getInput
 843   go "" (1::Int) input
 844   where
 845     go commentAcc 0 input = do setInput input
 846                                b <- extension rawTokenStreamEnabled
 847                                if b
 848                                  then docCommentEnd input commentAcc ITblockComment _str span
 849                                  else cont
 850     go commentAcc n input = case alexGetChar input of
 851       Nothing -> errBrace input span
 852       Just ('-',input) -> case alexGetChar input of
 853         Nothing  -> errBrace input span
 854         Just ('\125',input) -> go commentAcc (n-1) input
 855         Just (_,_)          -> go ('-':commentAcc) n input
 856       Just ('\123',input) -> case alexGetChar input of
 857         Nothing  -> errBrace input span
 858         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 859         Just (_,_)       -> go ('\123':commentAcc) n input
 860       Just (c,input) -> go (c:commentAcc) n input
 861
 862 nested_doc_comment :: Action
 863 nested_doc_comment span buf _len = withLexedDocType (go "")
 864   where
 865     go commentAcc input docType _ = case alexGetChar input of
 866       Nothing -> errBrace input span
 867       Just ('-',input) -> case alexGetChar input of
 868         Nothing -> errBrace input span
 869         Just ('\125',input) ->
 870           docCommentEnd input commentAcc docType buf span
 871         Just (_,_) -> go ('-':commentAcc) input docType False
 872       Just ('\123', input) -> case alexGetChar input of
 873         Nothing  -> errBrace input span
 874         Just ('-',input) -> do
 875           setInput input
 876           let cont = do input <- getInput; go commentAcc input docType False
 877           nested_comment cont span buf _len
 878         Just (_,_) -> go ('\123':commentAcc) input docType False
 879       Just (c,input) -> go (c:commentAcc) input docType False
 880
 881 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 882                  -> P (Located Token)
 883 withLexedDocType lexDocComment = do
 884   input@(AI _ buf) <- getInput
 885   case prevChar buf ' ' of
 886     '|' -> lexDocComment input ITdocCommentNext False
 887     '^' -> lexDocComment input ITdocCommentPrev False
 888     '$' -> lexDocComment input ITdocCommentNamed False
 889     '*' -> lexDocSection 1 input
 890     '#' -> lexDocComment input ITdocOptionsOld False
 891     _ -> panic "withLexedDocType: Bad doc type"
 892  where
 893     lexDocSection n input = case alexGetChar input of
 894       Just ('*', input) -> lexDocSection (n+1) input
 895       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 896       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 897
 898 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 899 -- off again at the end of the pragma.
 900 rulePrag :: Action
 901 rulePrag span _buf _len = do
 902   setExts (.|. bit inRulePragBit)
 903   return (L span ITrules_prag)
 904
 905 endPrag :: Action
 906 endPrag span _buf _len = do
 907   setExts (.&. complement (bit inRulePragBit))
 908   return (L span ITclose_prag)
 909
 910 -- docCommentEnd
 911 -------------------------------------------------------------------------------
 912 -- This function is quite tricky. We can't just return a new token, we also
 913 -- need to update the state of the parser. Why? Because the token is longer
 914 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 915 -- it writes the wrong token length to the parser state. This function is
 916 -- called afterwards, so it can just update the state.
 917
 918 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 919                  SrcSpan -> P (Located Token)
 920 docCommentEnd input commentAcc docType buf span = do
 921   setInput input
 922   let (AI loc nextBuf) = input
 923       comment = reverse commentAcc
 924       span' = mkSrcSpan (srcSpanStart span) loc
 925       last_len = byteDiff buf nextBuf
 926
 927   span `seq` setLastToken span' last_len
 928   return (L span' (docType comment))
 929
 930 errBrace :: AlexInput -> SrcSpan -> P a
 931 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 932
 933 open_brace, close_brace :: Action
 934 open_brace span _str _len = do
 935   ctx <- getContext
 936   setContext (NoLayout:ctx)
 937   return (L span ITocurly)
 938 close_brace span _str _len = do
 939   popContext
 940   return (L span ITccurly)
 941
 942 qvarid, qconid :: StringBuffer -> Int -> Token
 943 qvarid buf len = ITqvarid $! splitQualName buf len False
 944 qconid buf len = ITqconid $! splitQualName buf len False
 945
 946 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 947 -- takes a StringBuffer and a length, and returns the module name
 948 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 949 -- because of hierarchical module names.
 950 splitQualName orig_buf len parens = split orig_buf orig_buf
 951   where
 952     split buf dot_buf
 953         | orig_buf `byteDiff` buf >= len  = done dot_buf
 954         | c == '.'                        = found_dot buf'
 955         | otherwise                       = split buf' dot_buf
 956       where
 957        (c,buf') = nextChar buf
 958
 959     -- careful, we might get names like M....
 960     -- so, if the character after the dot is not upper-case, this is
 961     -- the end of the qualifier part.
 962     found_dot buf -- buf points after the '.'
 963         | isUpper c    = split buf' buf
 964         | otherwise    = done buf
 965       where
 966        (c,buf') = nextChar buf
 967
 968     done dot_buf =
 969         (lexemeToFastString orig_buf (qual_size - 1),
 970          if parens -- Prelude.(+)
 971             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 972             else lexemeToFastString dot_buf (len - qual_size))
 973       where
 974         qual_size = orig_buf `byteDiff` dot_buf
 975
 976 varid :: Action
 977 varid span buf len =
 978   fs `seq`
 979   case lookupUFM reservedWordsFM fs of
 980         Just (keyword,0)    -> do
 981                 maybe_layout keyword
 982                 return (L span keyword)
 983         Just (keyword,exts) -> do
 984                 b <- extension (\i -> exts .&. i /= 0)
 985                 if b then do maybe_layout keyword
 986                              return (L span keyword)
 987                      else return (L span (ITvarid fs))
 988         _other -> return (L span (ITvarid fs))
 989   where
 990         fs = lexemeToFastString buf len
 991
 992 conid :: StringBuffer -> Int -> Token
 993 conid buf len = ITconid fs
 994   where fs = lexemeToFastString buf len
 995
 996 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
 997 qvarsym buf len = ITqvarsym $! splitQualName buf len False
 998 qconsym buf len = ITqconsym $! splitQualName buf len False
 999 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
1000 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1001
1002 varsym, consym :: Action
1003 varsym = sym ITvarsym
1004 consym = sym ITconsym
1005
1006 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1007     -> P (Located Token)
1008 sym con span buf len =
1009   case lookupUFM reservedSymsFM fs of
1010         Just (keyword,exts) -> do
1011                 b <- extension exts
1012                 if b then return (L span keyword)
1013                      else return (L span $! con fs)
1014         _other -> return (L span $! con fs)
1015   where
1016         fs = lexemeToFastString buf len
1017
1018 -- Variations on the integral numeric literal.
1019 tok_integral :: (Integer -> Token)
1020      -> (Integer -> Integer)
1021  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1022      -> Int -> Int
1023      -> (Integer, (Char->Int)) -> Action
1024 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1025   return $ L span $ itint $! transint $ parseUnsignedInteger
1026      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1027
1028 -- some conveniences for use with tok_integral
1029 tok_num :: (Integer -> Integer)
1030         -> Int -> Int
1031         -> (Integer, (Char->Int)) -> Action
1032 tok_num = tok_integral ITinteger
1033 tok_primint :: (Integer -> Integer)
1034             -> Int -> Int
1035             -> (Integer, (Char->Int)) -> Action
1036 tok_primint = tok_integral ITprimint
1037 tok_primword :: Int -> Int
1038              -> (Integer, (Char->Int)) -> Action
1039 tok_primword = tok_integral ITprimword positive
1040 positive, negative :: (Integer -> Integer)
1041 positive = id
1042 negative = negate
1043 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1044 decimal = (10,octDecDigit)
1045 octal = (8,octDecDigit)
1046 hexadecimal = (16,hexDigit)
1047
1048 -- readRational can understand negative rationals, exponents, everything.
1049 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1050 tok_float        str = ITrational   $! readRational str
1051 tok_primfloat    str = ITprimfloat  $! readRational str
1052 tok_primdouble   str = ITprimdouble $! readRational str
1053
1054 -- -----------------------------------------------------------------------------
1055 -- Layout processing
1056
1057 -- we're at the first token on a line, insert layout tokens if necessary
1058 do_bol :: Action
1059 do_bol span _str _len = do
1060         pos <- getOffside
1061         case pos of
1062             LT -> do
1063                 --trace "layout: inserting '}'" $ do
1064                 popContext
1065                 -- do NOT pop the lex state, we might have a ';' to insert
1066                 return (L span ITvccurly)
1067             EQ -> do
1068                 --trace "layout: inserting ';'" $ do
1069                 _ <- popLexState
1070                 return (L span ITsemi)
1071             GT -> do
1072                 _ <- popLexState
1073                 lexToken
1074
1075 -- certain keywords put us in the "layout" state, where we might
1076 -- add an opening curly brace.
1077 maybe_layout :: Token -> P ()
1078 maybe_layout t = do -- If the alternative layout rule is enabled then
1079                     -- we never create an implicit layout context here.
1080                     -- Layout is handled XXX instead.
1081                     -- The code for closing implicit contexts, or
1082                     -- inserting implicit semi-colons, is therefore
1083                     -- irrelevant as it only applies in an implicit
1084                     -- context.
1085                     alr <- extension alternativeLayoutRule
1086                     unless alr $ f t
1087     where f ITdo    = pushLexState layout_do
1088           f ITmdo   = pushLexState layout_do
1089           f ITof    = pushLexState layout
1090           f ITlet   = pushLexState layout
1091           f ITwhere = pushLexState layout
1092           f ITrec   = pushLexState layout
1093           f _       = return ()
1094
1095 -- Pushing a new implicit layout context.  If the indentation of the
1096 -- next token is not greater than the previous layout context, then
1097 -- Haskell 98 says that the new layout context should be empty; that is
1098 -- the lexer must generate {}.
1099 --
1100 -- We are slightly more lenient than this: when the new context is started
1101 -- by a 'do', then we allow the new context to be at the same indentation as
1102 -- the previous context.  This is what the 'strict' argument is for.
1103 --
1104 new_layout_context :: Bool -> Action
1105 new_layout_context strict span _buf _len = do
1106     _ <- popLexState
1107     (AI l _) <- getInput
1108     let offset = srcLocCol l
1109     ctx <- getContext
1110     relaxed <- extension relaxedLayout
1111     let strict' = strict || not relaxed
1112     case ctx of
1113         Layout prev_off : _  |
1114            (strict'     && prev_off >= offset  ||
1115             not strict' && prev_off > offset) -> do
1116                 -- token is indented to the left of the previous context.
1117                 -- we must generate a {} sequence now.
1118                 pushLexState layout_left
1119                 return (L span ITvocurly)
1120         _ -> do
1121                 setContext (Layout offset : ctx)
1122                 return (L span ITvocurly)
1123
1124 do_layout_left :: Action
1125 do_layout_left span _buf _len = do
1126     _ <- popLexState
1127     pushLexState bol  -- we must be at the start of a line
1128     return (L span ITvccurly)
1129
1130 -- -----------------------------------------------------------------------------
1131 -- LINE pragmas
1132
1133 setLine :: Int -> Action
1134 setLine code span buf len = do
1135   let line = parseUnsignedInteger buf len 10 octDecDigit
1136   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1137         -- subtract one: the line number refers to the *following* line
1138   _ <- popLexState
1139   pushLexState code
1140   lexToken
1141
1142 setFile :: Int -> Action
1143 setFile code span buf len = do
1144   let file = lexemeToFastString (stepOn buf) (len-2)
1145   setAlrLastLoc noSrcSpan
1146   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1147   _ <- popLexState
1148   pushLexState code
1149   lexToken
1150
1151
1152 -- -----------------------------------------------------------------------------
1153 -- Options, includes and language pragmas.
1154
1155 lex_string_prag :: (String -> Token) -> Action
1156 lex_string_prag mkTok span _buf _len
1157     = do input <- getInput
1158          start <- getSrcLoc
1159          tok <- go [] input
1160          end <- getSrcLoc
1161          return (L (mkSrcSpan start end) tok)
1162     where go acc input
1163               = if isString input "#-}"
1164                    then do setInput input
1165                            return (mkTok (reverse acc))
1166                    else case alexGetChar input of
1167                           Just (c,i) -> go (c:acc) i
1168                           Nothing -> err input
1169           isString _ [] = True
1170           isString i (x:xs)
1171               = case alexGetChar i of
1172                   Just (c,i') | c == x    -> isString i' xs
1173                   _other -> False
1174           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1175
1176
1177 -- -----------------------------------------------------------------------------
1178 -- Strings & Chars
1179
1180 -- This stuff is horrible.  I hates it.
1181
1182 lex_string_tok :: Action
1183 lex_string_tok span _buf _len = do
1184   tok <- lex_string ""
1185   end <- getSrcLoc
1186   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1187
1188 lex_string :: String -> P Token
1189 lex_string s = do
1190   i <- getInput
1191   case alexGetChar' i of
1192     Nothing -> lit_error i
1193
1194     Just ('"',i)  -> do
1195         setInput i
1196         magicHash <- extension magicHashEnabled
1197         if magicHash
1198           then do
1199             i <- getInput
1200             case alexGetChar' i of
1201               Just ('#',i) -> do
1202                    setInput i
1203                    if any (> '\xFF') s
1204                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1205                     else let s' = mkZFastString (reverse s) in
1206                          return (ITprimstring s')
1207                         -- mkZFastString is a hack to avoid encoding the
1208                         -- string in UTF-8.  We just want the exact bytes.
1209               _other ->
1210                 return (ITstring (mkFastString (reverse s)))
1211           else
1212                 return (ITstring (mkFastString (reverse s)))
1213
1214     Just ('\\',i)
1215         | Just ('&',i) <- next -> do
1216                 setInput i; lex_string s
1217         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1218                            -- is_space only works for <= '\x7f' (#3751)
1219                 setInput i; lex_stringgap s
1220         where next = alexGetChar' i
1221
1222     Just (c, i1) -> do
1223         case c of
1224           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1225           c | isAny c -> do setInput i1; lex_string (c:s)
1226           _other -> lit_error i
1227
1228 lex_stringgap :: String -> P Token
1229 lex_stringgap s = do
1230   i <- getInput
1231   c <- getCharOrFail i
1232   case c of
1233     '\\' -> lex_string s
1234     c | is_space c -> lex_stringgap s
1235     _other -> lit_error i
1236
1237
1238 lex_char_tok :: Action
1239 -- Here we are basically parsing character literals, such as 'x' or '\n'
1240 -- but, when Template Haskell is on, we additionally spot
1241 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1242 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1243 -- So we have to do two characters of lookahead: when we see 'x we need to
1244 -- see if there's a trailing quote
1245 lex_char_tok span _buf _len = do        -- We've seen '
1246    i1 <- getInput       -- Look ahead to first character
1247    let loc = srcSpanStart span
1248    case alexGetChar' i1 of
1249         Nothing -> lit_error  i1
1250
1251         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1252                   th_exts <- extension thEnabled
1253                   if th_exts then do
1254                         setInput i2
1255                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1256                    else lit_error i1
1257
1258         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1259                   setInput i2
1260                   lit_ch <- lex_escape
1261                   i3 <- getInput
1262                   mc <- getCharOrFail i3 -- Trailing quote
1263                   if mc == '\'' then finish_char_tok loc lit_ch
1264                                 else lit_error i3
1265
1266         Just (c, i2@(AI _end2 _))
1267                 | not (isAny c) -> lit_error i1
1268                 | otherwise ->
1269
1270                 -- We've seen 'x, where x is a valid character
1271                 --  (i.e. not newline etc) but not a quote or backslash
1272            case alexGetChar' i2 of      -- Look ahead one more character
1273                 Just ('\'', i3) -> do   -- We've seen 'x'
1274                         setInput i3
1275                         finish_char_tok loc c
1276                 _other -> do            -- We've seen 'x not followed by quote
1277                                         -- (including the possibility of EOF)
1278                                         -- If TH is on, just parse the quote only
1279                         th_exts <- extension thEnabled
1280                         let (AI end _) = i1
1281                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1282                                    else lit_error i2
1283
1284 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1285 finish_char_tok loc ch  -- We've already seen the closing quote
1286                         -- Just need to check for trailing #
1287   = do  magicHash <- extension magicHashEnabled
1288         i@(AI end _) <- getInput
1289         if magicHash then do
1290                 case alexGetChar' i of
1291                         Just ('#',i@(AI end _)) -> do
1292                                 setInput i
1293                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1294                         _other ->
1295                                 return (L (mkSrcSpan loc end) (ITchar ch))
1296             else do
1297                    return (L (mkSrcSpan loc end) (ITchar ch))
1298
1299 isAny :: Char -> Bool
1300 isAny c | c > '\x7f' = isPrint c
1301         | otherwise  = is_any c
1302
1303 lex_escape :: P Char
1304 lex_escape = do
1305   i0 <- getInput
1306   c <- getCharOrFail i0
1307   case c of
1308         'a'   -> return '\a'
1309         'b'   -> return '\b'
1310         'f'   -> return '\f'
1311         'n'   -> return '\n'
1312         'r'   -> return '\r'
1313         't'   -> return '\t'
1314         'v'   -> return '\v'
1315         '\\'  -> return '\\'
1316         '"'   -> return '\"'
1317         '\''  -> return '\''
1318         '^'   -> do i1 <- getInput
1319                     c <- getCharOrFail i1
1320                     if c >= '@' && c <= '_'
1321                         then return (chr (ord c - ord '@'))
1322                         else lit_error i1
1323
1324         'x'   -> readNum is_hexdigit 16 hexDigit
1325         'o'   -> readNum is_octdigit  8 octDecDigit
1326         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1327
1328         c1 ->  do
1329            i <- getInput
1330            case alexGetChar' i of
1331             Nothing -> lit_error i0
1332             Just (c2,i2) ->
1333               case alexGetChar' i2 of
1334                 Nothing -> do lit_error i0
1335                 Just (c3,i3) ->
1336                    let str = [c1,c2,c3] in
1337                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1338                                      Just rest <- [stripPrefix p str] ] of
1339                           (escape_char,[]):_ -> do
1340                                 setInput i3
1341                                 return escape_char
1342                           (escape_char,_:_):_ -> do
1343                                 setInput i2
1344                                 return escape_char
1345                           [] -> lit_error i0
1346
1347 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1348 readNum is_digit base conv = do
1349   i <- getInput
1350   c <- getCharOrFail i
1351   if is_digit c
1352         then readNum2 is_digit base conv (conv c)
1353         else lit_error i
1354
1355 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1356 readNum2 is_digit base conv i = do
1357   input <- getInput
1358   read i input
1359   where read i input = do
1360           case alexGetChar' input of
1361             Just (c,input') | is_digit c -> do
1362                let i' = i*base + conv c
1363                if i' > 0x10ffff
1364                   then setInput input >> lexError "numeric escape sequence out of range"
1365                   else read i' input'
1366             _other -> do
1367               setInput input; return (chr i)
1368
1369
1370 silly_escape_chars :: [(String, Char)]
1371 silly_escape_chars = [
1372         ("NUL", '\NUL'),
1373         ("SOH", '\SOH'),
1374         ("STX", '\STX'),
1375         ("ETX", '\ETX'),
1376         ("EOT", '\EOT'),
1377         ("ENQ", '\ENQ'),
1378         ("ACK", '\ACK'),
1379         ("BEL", '\BEL'),
1380         ("BS", '\BS'),
1381         ("HT", '\HT'),
1382         ("LF", '\LF'),
1383         ("VT", '\VT'),
1384         ("FF", '\FF'),
1385         ("CR", '\CR'),
1386         ("SO", '\SO'),
1387         ("SI", '\SI'),
1388         ("DLE", '\DLE'),
1389         ("DC1", '\DC1'),
1390         ("DC2", '\DC2'),
1391         ("DC3", '\DC3'),
1392         ("DC4", '\DC4'),
1393         ("NAK", '\NAK'),
1394         ("SYN", '\SYN'),
1395         ("ETB", '\ETB'),
1396         ("CAN", '\CAN'),
1397         ("EM", '\EM'),
1398         ("SUB", '\SUB'),
1399         ("ESC", '\ESC'),
1400         ("FS", '\FS'),
1401         ("GS", '\GS'),
1402         ("RS", '\RS'),
1403         ("US", '\US'),
1404         ("SP", '\SP'),
1405         ("DEL", '\DEL')
1406         ]
1407
1408 -- before calling lit_error, ensure that the current input is pointing to
1409 -- the position of the error in the buffer.  This is so that we can report
1410 -- a correct location to the user, but also so we can detect UTF-8 decoding
1411 -- errors if they occur.
1412 lit_error :: AlexInput -> P a
1413 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1414
1415 getCharOrFail :: AlexInput -> P Char
1416 getCharOrFail i =  do
1417   case alexGetChar' i of
1418         Nothing -> lexError "unexpected end-of-file in string/character literal"
1419         Just (c,i)  -> do setInput i; return c
1420
1421 -- -----------------------------------------------------------------------------
1422 -- QuasiQuote
1423
1424 lex_quasiquote_tok :: Action
1425 lex_quasiquote_tok span buf len = do
1426   let quoter = tail (lexemeToString buf (len - 1))
1427                 -- 'tail' drops the initial '[',
1428                 -- while the -1 drops the trailing '|'
1429   quoteStart <- getSrcLoc
1430   quote <- lex_quasiquote ""
1431   end <- getSrcLoc
1432   return (L (mkSrcSpan (srcSpanStart span) end)
1433            (ITquasiQuote (mkFastString quoter,
1434                           mkFastString (reverse quote),
1435                           mkSrcSpan quoteStart end)))
1436
1437 lex_quasiquote :: String -> P String
1438 lex_quasiquote s = do
1439   i <- getInput
1440   case alexGetChar' i of
1441     Nothing -> lit_error i
1442
1443     Just ('\\',i)
1444         | Just ('|',i) <- next -> do
1445                 setInput i; lex_quasiquote ('|' : s)
1446         | Just (']',i) <- next -> do
1447                 setInput i; lex_quasiquote (']' : s)
1448         where next = alexGetChar' i
1449
1450     Just ('|',i)
1451         | Just (']',i) <- next -> do
1452                 setInput i; return s
1453         where next = alexGetChar' i
1454
1455     Just (c, i) -> do
1456          setInput i; lex_quasiquote (c : s)
1457
1458 -- -----------------------------------------------------------------------------
1459 -- Warnings
1460
1461 warn :: DynFlag -> SDoc -> Action
1462 warn option warning srcspan _buf _len = do
1463     addWarning option srcspan warning
1464     lexToken
1465
1466 warnThen :: DynFlag -> SDoc -> Action -> Action
1467 warnThen option warning action srcspan buf len = do
1468     addWarning option srcspan warning
1469     action srcspan buf len
1470
1471 -- -----------------------------------------------------------------------------
1472 -- The Parse Monad
1473
1474 data LayoutContext
1475   = NoLayout
1476   | Layout !Int
1477   deriving Show
1478
1479 data ParseResult a
1480   = POk PState a
1481   | PFailed
1482         SrcSpan         -- The start and end of the text span related to
1483                         -- the error.  Might be used in environments which can
1484                         -- show this span, e.g. by highlighting it.
1485         Message         -- The error message
1486
1487 data PState = PState {
1488         buffer     :: StringBuffer,
1489         dflags     :: DynFlags,
1490         messages   :: Messages,
1491         last_loc   :: SrcSpan,  -- pos of previous token
1492         last_len   :: !Int,     -- len of previous token
1493         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1494         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1495         context    :: [LayoutContext],
1496         lex_state  :: [Int],
1497         -- Used in the alternative layout rule:
1498         -- These tokens are the next ones to be sent out. They are
1499         -- just blindly emitted, without the rule looking at them again:
1500         alr_pending_implicit_tokens :: [Located Token],
1501         -- This is the next token to be considered or, if it is Nothing,
1502         -- we need to get the next token from the input stream:
1503         alr_next_token :: Maybe (Located Token),
1504         -- This is what we consider to be the locatino of the last token
1505         -- emitted:
1506         alr_last_loc :: SrcSpan,
1507         -- The stack of layout contexts:
1508         alr_context :: [ALRContext],
1509         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1510         -- us what sort of layout the '{' will open:
1511         alr_expecting_ocurly :: Maybe ALRLayout,
1512         -- Have we just had the '}' for a let block? If so, than an 'in'
1513         -- token doesn't need to close anything:
1514         alr_justClosedExplicitLetBlock :: Bool
1515      }
1516         -- last_loc and last_len are used when generating error messages,
1517         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1518         -- current token to happyError, we could at least get rid of last_len.
1519         -- Getting rid of last_loc would require finding another way to
1520         -- implement pushCurrentContext (which is only called from one place).
1521
1522 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1523                               Bool{- is it a 'let' block? -}
1524                 | ALRLayout ALRLayout Int
1525 data ALRLayout = ALRLayoutLet
1526                | ALRLayoutWhere
1527                | ALRLayoutOf
1528                | ALRLayoutDo
1529
1530 newtype P a = P { unP :: PState -> ParseResult a }
1531
1532 instance Monad P where
1533   return = returnP
1534   (>>=) = thenP
1535   fail = failP
1536
1537 returnP :: a -> P a
1538 returnP a = a `seq` (P $ \s -> POk s a)
1539
1540 thenP :: P a -> (a -> P b) -> P b
1541 (P m) `thenP` k = P $ \ s ->
1542         case m s of
1543                 POk s1 a         -> (unP (k a)) s1
1544                 PFailed span err -> PFailed span err
1545
1546 failP :: String -> P a
1547 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1548
1549 failMsgP :: String -> P a
1550 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1551
1552 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1553 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1554
1555 failSpanMsgP :: SrcSpan -> SDoc -> P a
1556 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1557
1558 getPState :: P PState
1559 getPState = P $ \s -> POk s s
1560
1561 getDynFlags :: P DynFlags
1562 getDynFlags = P $ \s -> POk s (dflags s)
1563
1564 withThisPackage :: (PackageId -> a) -> P a
1565 withThisPackage f
1566  = do   pkg     <- liftM thisPackage getDynFlags
1567         return  $ f pkg
1568
1569 extension :: (Int -> Bool) -> P Bool
1570 extension p = P $ \s -> POk s (p $! extsBitmap s)
1571
1572 getExts :: P Int
1573 getExts = P $ \s -> POk s (extsBitmap s)
1574
1575 setExts :: (Int -> Int) -> P ()
1576 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1577
1578 setSrcLoc :: SrcLoc -> P ()
1579 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1580
1581 getSrcLoc :: P SrcLoc
1582 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1583
1584 setLastToken :: SrcSpan -> Int -> P ()
1585 setLastToken loc len = P $ \s -> POk s {
1586   last_loc=loc,
1587   last_len=len
1588   } ()
1589
1590 data AlexInput = AI SrcLoc StringBuffer
1591
1592 alexInputPrevChar :: AlexInput -> Char
1593 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1594
1595 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1596 alexGetChar (AI loc s)
1597   | atEnd s   = Nothing
1598   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1599                 --trace (show (ord c)) $
1600                 Just (adj_c, (AI loc' s'))
1601   where (c,s') = nextChar s
1602         loc'   = advanceSrcLoc loc c
1603
1604         non_graphic     = '\x0'
1605         upper           = '\x1'
1606         lower           = '\x2'
1607         digit           = '\x3'
1608         symbol          = '\x4'
1609         space           = '\x5'
1610         other_graphic   = '\x6'
1611
1612         adj_c
1613           | c <= '\x06' = non_graphic
1614           | c <= '\x7f' = c
1615           -- Alex doesn't handle Unicode, so when Unicode
1616           -- character is encountered we output these values
1617           -- with the actual character value hidden in the state.
1618           | otherwise =
1619                 case generalCategory c of
1620                   UppercaseLetter       -> upper
1621                   LowercaseLetter       -> lower
1622                   TitlecaseLetter       -> upper
1623                   ModifierLetter        -> other_graphic
1624                   OtherLetter           -> lower -- see #1103
1625                   NonSpacingMark        -> other_graphic
1626                   SpacingCombiningMark  -> other_graphic
1627                   EnclosingMark         -> other_graphic
1628                   DecimalNumber         -> digit
1629                   LetterNumber          -> other_graphic
1630                   OtherNumber           -> other_graphic
1631                   ConnectorPunctuation  -> symbol
1632                   DashPunctuation       -> symbol
1633                   OpenPunctuation       -> other_graphic
1634                   ClosePunctuation      -> other_graphic
1635                   InitialQuote          -> other_graphic
1636                   FinalQuote            -> other_graphic
1637                   OtherPunctuation      -> symbol
1638                   MathSymbol            -> symbol
1639                   CurrencySymbol        -> symbol
1640                   ModifierSymbol        -> symbol
1641                   OtherSymbol           -> symbol
1642                   Space                 -> space
1643                   _other                -> non_graphic
1644
1645 -- This version does not squash unicode characters, it is used when
1646 -- lexing strings.
1647 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1648 alexGetChar' (AI loc s)
1649   | atEnd s   = Nothing
1650   | otherwise = c `seq` loc' `seq` s' `seq`
1651                 --trace (show (ord c)) $
1652                 Just (c, (AI loc' s'))
1653   where (c,s') = nextChar s
1654         loc'   = advanceSrcLoc loc c
1655
1656 getInput :: P AlexInput
1657 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1658
1659 setInput :: AlexInput -> P ()
1660 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1661
1662 pushLexState :: Int -> P ()
1663 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1664
1665 popLexState :: P Int
1666 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1667
1668 getLexState :: P Int
1669 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1670
1671 popNextToken :: P (Maybe (Located Token))
1672 popNextToken
1673     = P $ \s@PState{ alr_next_token = m } ->
1674               POk (s {alr_next_token = Nothing}) m
1675
1676 setAlrLastLoc :: SrcSpan -> P ()
1677 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1678
1679 getAlrLastLoc :: P SrcSpan
1680 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1681
1682 getALRContext :: P [ALRContext]
1683 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1684
1685 setALRContext :: [ALRContext] -> P ()
1686 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1687
1688 getJustClosedExplicitLetBlock :: P Bool
1689 getJustClosedExplicitLetBlock
1690  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1691
1692 setJustClosedExplicitLetBlock :: Bool -> P ()
1693 setJustClosedExplicitLetBlock b
1694  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1695
1696 setNextToken :: Located Token -> P ()
1697 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1698
1699 popPendingImplicitToken :: P (Maybe (Located Token))
1700 popPendingImplicitToken
1701     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1702               case ts of
1703               [] -> POk s Nothing
1704               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1705
1706 setPendingImplicitTokens :: [Located Token] -> P ()
1707 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1708
1709 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1710 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1711
1712 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1713 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1714
1715 -- for reasons of efficiency, flags indicating language extensions (eg,
1716 -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
1717 -- integer
1718
1719 genericsBit :: Int
1720 genericsBit = 0 -- {| and |}
1721 ffiBit :: Int
1722 ffiBit     = 1
1723 parrBit :: Int
1724 parrBit    = 2
1725 arrowsBit :: Int
1726 arrowsBit  = 4
1727 thBit :: Int
1728 thBit      = 5
1729 ipBit :: Int
1730 ipBit      = 6
1731 explicitForallBit :: Int
1732 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1733 bangPatBit :: Int
1734 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1735                 -- (doesn't affect the lexer)
1736 tyFamBit :: Int
1737 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1738 haddockBit :: Int
1739 haddockBit = 10 -- Lex and parse Haddock comments
1740 magicHashBit :: Int
1741 magicHashBit = 11 -- "#" in both functions and operators
1742 kindSigsBit :: Int
1743 kindSigsBit = 12 -- Kind signatures on type variables
1744 recursiveDoBit :: Int
1745 recursiveDoBit = 13 -- mdo
1746 unicodeSyntaxBit :: Int
1747 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1748 unboxedTuplesBit :: Int
1749 unboxedTuplesBit = 15 -- (# and #)
1750 datatypeContextsBit :: Int
1751 datatypeContextsBit = 16
1752 transformComprehensionsBit :: Int
1753 transformComprehensionsBit = 17
1754 qqBit :: Int
1755 qqBit      = 18 -- enable quasiquoting
1756 inRulePragBit :: Int
1757 inRulePragBit = 19
1758 rawTokenStreamBit :: Int
1759 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1760 newQualOpsBit :: Int
1761 newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
1762 recBit :: Int
1763 recBit = 22 -- rec
1764 alternativeLayoutRuleBit :: Int
1765 alternativeLayoutRuleBit = 23
1766 relaxedLayoutBit :: Int
1767 relaxedLayoutBit = 24
1768
1769 always :: Int -> Bool
1770 always           _     = True
1771 genericsEnabled :: Int -> Bool
1772 genericsEnabled  flags = testBit flags genericsBit
1773 parrEnabled :: Int -> Bool
1774 parrEnabled      flags = testBit flags parrBit
1775 arrowsEnabled :: Int -> Bool
1776 arrowsEnabled    flags = testBit flags arrowsBit
1777 thEnabled :: Int -> Bool
1778 thEnabled        flags = testBit flags thBit
1779 ipEnabled :: Int -> Bool
1780 ipEnabled        flags = testBit flags ipBit
1781 explicitForallEnabled :: Int -> Bool
1782 explicitForallEnabled flags = testBit flags explicitForallBit
1783 bangPatEnabled :: Int -> Bool
1784 bangPatEnabled   flags = testBit flags bangPatBit
1785 -- tyFamEnabled :: Int -> Bool
1786 -- tyFamEnabled     flags = testBit flags tyFamBit
1787 haddockEnabled :: Int -> Bool
1788 haddockEnabled   flags = testBit flags haddockBit
1789 magicHashEnabled :: Int -> Bool
1790 magicHashEnabled flags = testBit flags magicHashBit
1791 -- kindSigsEnabled :: Int -> Bool
1792 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1793 unicodeSyntaxEnabled :: Int -> Bool
1794 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1795 unboxedTuplesEnabled :: Int -> Bool
1796 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1797 datatypeContextsEnabled :: Int -> Bool
1798 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1799 qqEnabled :: Int -> Bool
1800 qqEnabled        flags = testBit flags qqBit
1801 -- inRulePrag :: Int -> Bool
1802 -- inRulePrag       flags = testBit flags inRulePragBit
1803 rawTokenStreamEnabled :: Int -> Bool
1804 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1805 newQualOps :: Int -> Bool
1806 newQualOps       flags = testBit flags newQualOpsBit
1807 oldQualOps :: Int -> Bool
1808 oldQualOps flags = not (newQualOps flags)
1809 alternativeLayoutRule :: Int -> Bool
1810 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1811 relaxedLayout :: Int -> Bool
1812 relaxedLayout flags = testBit flags relaxedLayoutBit
1813
1814 -- PState for parsing options pragmas
1815 --
1816 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1817 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1818                                  lex_state = [bol, option_prags, 0]
1819                              }
1820
1821 -- create a parse state
1822 --
1823 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1824 mkPState flags buf loc =
1825   PState {
1826       buffer          = buf,
1827       dflags        = flags,
1828       messages      = emptyMessages,
1829       last_loc      = mkSrcSpan loc loc,
1830       last_len      = 0,
1831       loc           = loc,
1832       extsBitmap    = fromIntegral bitmap,
1833       context       = [],
1834       lex_state     = [bol, 0],
1835       alr_pending_implicit_tokens = [],
1836       alr_next_token = Nothing,
1837       alr_last_loc = noSrcSpan,
1838       alr_context = [],
1839       alr_expecting_ocurly = Nothing,
1840       alr_justClosedExplicitLetBlock = False
1841     }
1842     where
1843       bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
1844                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1845                .|. parrBit           `setBitIf` xopt Opt_PArr         flags
1846                .|. arrowsBit         `setBitIf` xopt Opt_Arrows       flags
1847                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1848                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes flags
1849                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams flags
1850                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll flags
1851                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
1852                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
1853                .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
1854                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
1855                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
1856                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
1857                .|. recBit            `setBitIf` xopt Opt_DoRec  flags
1858                .|. recBit            `setBitIf` xopt Opt_Arrows flags
1859                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
1860                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
1861                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1862                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1863                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1864                .|. newQualOpsBit `setBitIf` xopt Opt_NewQualifiedOperators flags
1865                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1866                .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
1867       --
1868       setBitIf :: Int -> Bool -> Int
1869       b `setBitIf` cond | cond      = bit b
1870                         | otherwise = 0
1871
1872 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1873 addWarning option srcspan warning
1874  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1875        let warning' = mkWarnMsg srcspan alwaysQualify warning
1876            ws' = if dopt option d then ws `snocBag` warning' else ws
1877        in POk s{messages=(ws', es)} ()
1878
1879 getMessages :: PState -> Messages
1880 getMessages PState{messages=ms} = ms
1881
1882 getContext :: P [LayoutContext]
1883 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1884
1885 setContext :: [LayoutContext] -> P ()
1886 setContext ctx = P $ \s -> POk s{context=ctx} ()
1887
1888 popContext :: P ()
1889 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1890                               last_len = len, last_loc = last_loc }) ->
1891   case ctx of
1892         (_:tl) -> POk s{ context = tl } ()
1893         []     -> PFailed last_loc (srcParseErr buf len)
1894
1895 -- Push a new layout context at the indentation of the last token read.
1896 -- This is only used at the outer level of a module when the 'module'
1897 -- keyword is missing.
1898 pushCurrentContext :: P ()
1899 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1900     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1901
1902 getOffside :: P Ordering
1903 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1904                 let offs = srcSpanStartCol loc in
1905                 let ord = case stk of
1906                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1907                                         compare offs n
1908                         _            -> GT
1909                 in POk s ord
1910
1911 -- ---------------------------------------------------------------------------
1912 -- Construct a parse error
1913
1914 srcParseErr
1915   :: StringBuffer       -- current buffer (placed just after the last token)
1916   -> Int                -- length of the previous token
1917   -> Message
1918 srcParseErr buf len
1919   = hcat [ if null token
1920              then ptext (sLit "parse error (possibly incorrect indentation)")
1921              else hcat [ptext (sLit "parse error on input "),
1922                         char '`', text token, char '\'']
1923     ]
1924   where token = lexemeToString (offsetBytes (-len) buf) len
1925
1926 -- Report a parse failure, giving the span of the previous token as
1927 -- the location of the error.  This is the entry point for errors
1928 -- detected during parsing.
1929 srcParseFail :: P a
1930 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1931                             last_loc = last_loc } ->
1932     PFailed last_loc (srcParseErr buf len)
1933
1934 -- A lexical error is reported at a particular position in the source file,
1935 -- not over a token range.
1936 lexError :: String -> P a
1937 lexError str = do
1938   loc <- getSrcLoc
1939   (AI end buf) <- getInput
1940   reportLexError loc end buf str
1941
1942 -- -----------------------------------------------------------------------------
1943 -- This is the top-level function: called from the parser each time a
1944 -- new token is to be read from the input.
1945
1946 lexer :: (Located Token -> P a) -> P a
1947 lexer cont = do
1948   alr <- extension alternativeLayoutRule
1949   let lexTokenFun = if alr then lexTokenAlr else lexToken
1950   tok@(L _span _tok__) <- lexTokenFun
1951   --trace ("token: " ++ show _tok__) $ do
1952   cont tok
1953
1954 lexTokenAlr :: P (Located Token)
1955 lexTokenAlr = do mPending <- popPendingImplicitToken
1956                  t <- case mPending of
1957                       Nothing ->
1958                           do mNext <- popNextToken
1959                              t <- case mNext of
1960                                   Nothing -> lexToken
1961                                   Just next -> return next
1962                              alternativeLayoutRuleToken t
1963                       Just t ->
1964                           return t
1965                  setAlrLastLoc (getLoc t)
1966                  case unLoc t of
1967                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
1968                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
1969                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
1970                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
1971                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1972                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1973                      _       -> return ()
1974                  return t
1975
1976 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
1977 alternativeLayoutRuleToken t
1978     = do context <- getALRContext
1979          lastLoc <- getAlrLastLoc
1980          mExpectingOCurly <- getAlrExpectingOCurly
1981          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
1982          setJustClosedExplicitLetBlock False
1983          dflags <- getDynFlags
1984          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
1985              thisLoc = getLoc t
1986              thisCol = srcSpanStartCol thisLoc
1987              newLine = (lastLoc == noSrcSpan)
1988                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
1989          case (unLoc t, context, mExpectingOCurly) of
1990              -- This case handles a GHC extension to the original H98
1991              -- layout rule...
1992              (ITocurly, _, Just alrLayout) ->
1993                  do setAlrExpectingOCurly Nothing
1994                     let isLet = case alrLayout of
1995                                 ALRLayoutLet -> True
1996                                 _ -> False
1997                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
1998                     return t
1999              -- ...and makes this case unnecessary
2000              {-
2001              -- I think our implicit open-curly handling is slightly
2002              -- different to John's, in how it interacts with newlines
2003              -- and "in"
2004              (ITocurly, _, Just _) ->
2005                  do setAlrExpectingOCurly Nothing
2006                     setNextToken t
2007                     lexTokenAlr
2008              -}
2009              (_, ALRLayout _ col : ls, Just expectingOCurly)
2010               | (thisCol > col) ||
2011                 (thisCol == col &&
2012                  isNonDecreasingIntentation expectingOCurly) ->
2013                  do setAlrExpectingOCurly Nothing
2014                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2015                     setNextToken t
2016                     return (L thisLoc ITocurly)
2017               | otherwise ->
2018                  do setAlrExpectingOCurly Nothing
2019                     setPendingImplicitTokens [L lastLoc ITccurly]
2020                     setNextToken t
2021                     return (L lastLoc ITocurly)
2022              (_, _, Just expectingOCurly) ->
2023                  do setAlrExpectingOCurly Nothing
2024                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2025                     setNextToken t
2026                     return (L thisLoc ITocurly)
2027              -- We do the [] cases earlier than in the spec, as we
2028              -- have an actual EOF token
2029              (ITeof, ALRLayout _ _ : ls, _) ->
2030                  do setALRContext ls
2031                     setNextToken t
2032                     return (L thisLoc ITccurly)
2033              (ITeof, _, _) ->
2034                  return t
2035              -- the other ITeof case omitted; general case below covers it
2036              (ITin, _, _)
2037               | justClosedExplicitLetBlock ->
2038                  return t
2039              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2040               | newLine ->
2041                  do setPendingImplicitTokens [t]
2042                     setALRContext ls
2043                     return (L thisLoc ITccurly)
2044              -- This next case is to handle a transitional issue:
2045              (ITwhere, ALRLayout _ col : ls, _)
2046               | newLine && thisCol == col && transitional ->
2047                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2048                                thisLoc
2049                                (transitionalAlternativeLayoutWarning
2050                                     "`where' clause at the same depth as implicit layout block")
2051                     setALRContext ls
2052                     setNextToken t
2053                     -- Note that we use lastLoc, as we may need to close
2054                     -- more layouts, or give a semicolon
2055                     return (L lastLoc ITccurly)
2056              -- This next case is to handle a transitional issue:
2057              (ITvbar, ALRLayout _ col : ls, _)
2058               | newLine && thisCol == col && transitional ->
2059                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2060                                thisLoc
2061                                (transitionalAlternativeLayoutWarning
2062                                     "`|' at the same depth as implicit layout block")
2063                     setALRContext ls
2064                     setNextToken t
2065                     -- Note that we use lastLoc, as we may need to close
2066                     -- more layouts, or give a semicolon
2067                     return (L lastLoc ITccurly)
2068              (_, ALRLayout _ col : ls, _)
2069               | newLine && thisCol == col ->
2070                  do setNextToken t
2071                     return (L thisLoc ITsemi)
2072               | newLine && thisCol < col ->
2073                  do setALRContext ls
2074                     setNextToken t
2075                     -- Note that we use lastLoc, as we may need to close
2076                     -- more layouts, or give a semicolon
2077                     return (L lastLoc ITccurly)
2078              -- We need to handle close before open, as 'then' is both
2079              -- an open and a close
2080              (u, _, _)
2081               | isALRclose u ->
2082                  case context of
2083                  ALRLayout _ _ : ls ->
2084                      do setALRContext ls
2085                         setNextToken t
2086                         return (L thisLoc ITccurly)
2087                  ALRNoLayout _ isLet : ls ->
2088                      do let ls' = if isALRopen u
2089                                      then ALRNoLayout (containsCommas u) False : ls
2090                                      else ls
2091                         setALRContext ls'
2092                         when isLet $ setJustClosedExplicitLetBlock True
2093                         return t
2094                  [] ->
2095                      do let ls = if isALRopen u
2096                                     then [ALRNoLayout (containsCommas u) False]
2097                                     else ls
2098                         setALRContext ls
2099                         -- XXX This is an error in John's code, but
2100                         -- it looks reachable to me at first glance
2101                         return t
2102              (u, _, _)
2103               | isALRopen u ->
2104                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2105                     return t
2106              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2107                  do setALRContext ls
2108                     setPendingImplicitTokens [t]
2109                     return (L thisLoc ITccurly)
2110              (ITin, ALRLayout _ _ : ls, _) ->
2111                  do setALRContext ls
2112                     setNextToken t
2113                     return (L thisLoc ITccurly)
2114              -- the other ITin case omitted; general case below covers it
2115              (ITcomma, ALRLayout _ _ : ls, _)
2116               | topNoLayoutContainsCommas ls ->
2117                  do setALRContext ls
2118                     setNextToken t
2119                     return (L thisLoc ITccurly)
2120              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2121                  do setALRContext ls
2122                     setPendingImplicitTokens [t]
2123                     return (L thisLoc ITccurly)
2124              -- the other ITwhere case omitted; general case below covers it
2125              (_, _, _) -> return t
2126
2127 transitionalAlternativeLayoutWarning :: String -> SDoc
2128 transitionalAlternativeLayoutWarning msg
2129     = text "transitional layout will not be accepted in the future:"
2130    $$ text msg
2131
2132 isALRopen :: Token -> Bool
2133 isALRopen ITcase        = True
2134 isALRopen ITif          = True
2135 isALRopen ITthen        = True
2136 isALRopen IToparen      = True
2137 isALRopen ITobrack      = True
2138 isALRopen ITocurly      = True
2139 -- GHC Extensions:
2140 isALRopen IToubxparen   = True
2141 isALRopen ITparenEscape = True
2142 isALRopen _             = False
2143
2144 isALRclose :: Token -> Bool
2145 isALRclose ITof     = True
2146 isALRclose ITthen   = True
2147 isALRclose ITelse   = True
2148 isALRclose ITcparen = True
2149 isALRclose ITcbrack = True
2150 isALRclose ITccurly = True
2151 -- GHC Extensions:
2152 isALRclose ITcubxparen = True
2153 isALRclose _        = False
2154
2155 isNonDecreasingIntentation :: ALRLayout -> Bool
2156 isNonDecreasingIntentation ALRLayoutDo = True
2157 isNonDecreasingIntentation _           = False
2158
2159 containsCommas :: Token -> Bool
2160 containsCommas IToparen = True
2161 containsCommas ITobrack = True
2162 -- John doesn't have {} as containing commas, but records contain them,
2163 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2164 -- (defaultInstallDirs).
2165 containsCommas ITocurly = True
2166 -- GHC Extensions:
2167 containsCommas IToubxparen = True
2168 containsCommas _        = False
2169
2170 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2171 topNoLayoutContainsCommas [] = False
2172 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2173 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2174
2175 lexToken :: P (Located Token)
2176 lexToken = do
2177   inp@(AI loc1 buf) <- getInput
2178   sc <- getLexState
2179   exts <- getExts
2180   case alexScanUser exts inp sc of
2181     AlexEOF -> do
2182         let span = mkSrcSpan loc1 loc1
2183         setLastToken span 0
2184         return (L span ITeof)
2185     AlexError (AI loc2 buf) ->
2186         reportLexError loc1 loc2 buf "lexical error"
2187     AlexSkip inp2 _ -> do
2188         setInput inp2
2189         lexToken
2190     AlexToken inp2@(AI end buf2) _ t -> do
2191         setInput inp2
2192         let span = mkSrcSpan loc1 end
2193         let bytes = byteDiff buf buf2
2194         span `seq` setLastToken span bytes
2195         t span buf bytes
2196
2197 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2198 reportLexError loc1 loc2 buf str
2199   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2200   | otherwise =
2201   let
2202         c = fst (nextChar buf)
2203   in
2204   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2205     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2206     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2207
2208 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2209 lexTokenStream buf loc dflags = unP go initState
2210     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2211           initState = mkPState dflags' buf loc
2212           go = do
2213             ltok <- lexer return
2214             case ltok of
2215               L _ ITeof -> return []
2216               _ -> liftM (ltok:) go
2217
2218 linePrags = Map.singleton "line" (begin line_prag2)
2219
2220 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2221                                  ("options_ghc", lex_string_prag IToptions_prag),
2222                                  ("options_haddock", lex_string_prag ITdocOptions),
2223                                  ("language", token ITlanguage_prag),
2224                                  ("include", lex_string_prag ITinclude_prag)])
2225
2226 ignoredPrags = Map.fromList (map ignored pragmas)
2227                where ignored opt = (opt, nested_comment lexToken)
2228                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2229                      options_pragmas = map ("options_" ++) impls
2230                      -- CFILES is a hugs-only thing.
2231                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2232
2233 oneWordPrags = Map.fromList([("rules", rulePrag),
2234                            ("inline", token (ITinline_prag Inline FunLike)),
2235                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2236                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2237                                           -- Spelling variant
2238                            ("notinline", token (ITinline_prag NoInline FunLike)),
2239                            ("specialize", token ITspec_prag),
2240                            ("source", token ITsource_prag),
2241                            ("warning", token ITwarning_prag),
2242                            ("deprecated", token ITdeprecated_prag),
2243                            ("scc", token ITscc_prag),
2244                            ("generated", token ITgenerated_prag),
2245                            ("core", token ITcore_prag),
2246                            ("unpack", token ITunpack_prag),
2247                            ("ann", token ITann_prag)])
2248
2249 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2250                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2251                              ("specialize inline", token (ITspec_inline_prag True)),
2252                              ("specialize notinline", token (ITspec_inline_prag False))])
2253
2254
2255 dispatch_pragmas :: Map String Action -> Action
2256 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2257                                        Just found -> found span buf len
2258                                        Nothing -> lexError "unknown pragma"
2259
2260 known_pragma :: Map String Action -> AlexAccPred Int
2261 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2262                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2263
2264 clean_pragma :: String -> String
2265 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2266                     where unprefix prag' = case stripPrefix "{-#" prag' of
2267                                              Just rest -> rest
2268                                              Nothing -> prag'
2269                           canonical prag' = case prag' of
2270                                               "noinline" -> "notinline"
2271                                               "specialise" -> "specialize"
2272                                               "constructorlike" -> "conlike"
2273                                               _ -> prag'
2274                           canon_ws s = unwords (map canonical (words s))
2275 }